学习代码-----
#coding = utf-8 from selenium import webdriver from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.keys import Keys import time class driver_1(object): def __init__(self): self.driver = webdriver.Chrome('D:\chromedriver_win32\chromedriver.exe') #访问谷歌 def logload_Chrome(self): self.driver.get('https://www.google.com/') #搜索内容 def Search(self): self.logload_Chrome() Search = self.driver.find_element_by_name('q') Search.send_keys('aaaa') Search.send_keys(Keys.ENTER) #搜索站点 def Search_site(self,site): self.driver.implicitly_wait(10) self.site = 'site:' + '\'' + site + '\'' self.logload_Chrome() Search = self.driver.find_element_by_name('q') Search.send_keys(self.site) Search.send_keys(Keys.ENTER) #保存当前窗口的句柄 mainWindow = self.driver.current_window_handle #print(self.driver.title) ''' 保存url和name ''' for handle in self.driver.window_handles: self.driver.switch_to.window(handle) # 切换窗口 if 'suda.edu.cn' in self.driver.title: print('当前位置:%s' % self.driver.title) Result_name = self.driver.find_elements_by_css_selector('#search .g .r h3') Result_url = self.driver.find_elements_by_css_selector('#search .g .r>a[href]') dict_name_url = {} i = 0 while i < len(Result_name): print(i) dict_name_url[str(Result_name[i].text)] = str(Result_url[i].get_attribute('href')) i += 1 return dict_name_url #self.driver.close() if __name__ == "__main__": site = str(input('输入需要搜索的站点:')) a = driver_1() name = a.Search_site(site) print(name.items())
https://pan.baidu.com/s/1JHzdTqfWMH_wqPJUjSA_cw
#coding = utf-8 from selenium import webdriver from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.keys import Keys import time import pymysql class driver_1(object): def __init__(self): self.driver = webdriver.Chrome('D:\chromedriver_win32\chromedriver.exe') #访问谷歌 def logload_Chrome(self): self.driver.get('https://www.google.com/') #搜索站点 def Search_site(self,site): self.driver.implicitly_wait(10) self.site = 'site:' + '\'' + site + '\'' self.logload_Chrome() Search = self.driver.find_element_by_name('q') Search.send_keys(self.site) Search.send_keys(Keys.ENTER) #保存当前窗口的句柄 mainWindow = self.driver.current_window_handle #print(self.driver.title) #提取url name def Save_name_url(self,site): ''' 提取url和name ''' for handle in self.driver.window_handles: self.driver.switch_to.window(handle) # 切换窗口 if str(site) in self.driver.title: #print('当前位置:%s' % self.driver.title) Result_name = self.driver.find_elements_by_css_selector('#search .g .r h3') Result_url = self.driver.find_elements_by_css_selector('#search .g .r>a[href]') dict_name_url = {} i = 0 while i < len(Result_name): dict_name_url[str(Result_name[i].text)] = str(Result_url[i].get_attribute('href')) i += 1 return dict_name_url #self.driver.close() #循环遍历功能 def Cyclic_Search(self,num): i = 0 while i < num: self.driver.find_element_by_css_selector('[valign=top] td:nth-last-child(1)').click() dict_name_url2 = self.Save_name_url(site) i += 1 return dict_name_url2 #保存url和name到文件 def Save_file(self): pass class db(object): #链接出入数据 def connect(self,i,key,value): conn = pymysql.connect('localhost', 'root', 'Password@2020', 'suda') # 创建游标对象 cursor = conn.cursor() try: cursor.execute("insert into information_site values(%s,'%s','%s');" % (int(i), key, value)) except: conn.ping() cursor = conn.cursor() cursor.execute("insert into information_site values(%s,'%s','%s');" % (int(i), key, value)) if __name__ == "__main__": site = str(input('输入需要搜索的站点:')) num = int(input('请输入需要搜索的页数:')) - 1 #搜索信息 a = driver_1() a.Search_site(site) name = a.Save_name_url(site) name2 = a.Cyclic_Search(num) now_name = {**name,**name2} print('信息收集完成%s' %now_name.items()) print('提取信息数目%s' %len(now_name)) print('准备存入数据') ''' #保存数据 b = db() i = 1 for key,value in now_name.items(): b.connect(i,key,value) i += 1 print('数据存入完成') '''