第三十二节 selenium爬取拉勾网

 1 from selenium import webdriver
 2 from selenium.webdriver.chrome.options import Options
 3 from selenium.webdriver.common.by import By
 4 from lxml import etree
 5 import time
 6 
 7 
 8 class lagou():
 9 
10     def __init__(self):
11         self.browers_path = r'C:\Users\Administrator\AppData\Local\360Chrome\Chrome\Application\360chrome.exe'
12         self.chrome_option = Options()
13         self.chrome_option.binary_location = self.browers_path
14         self.driver = webdriver.Chrome(options=self.chrome_option)
15         self.url = "https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput="
16 
17     def run(self):
18         all_list = []
19         sourse = self.driver.get(self.url)
20         elementi= self.driver.find_element_by_xpath('//div[@class="body-btn"]')
21         elementi.click()
22         time.sleep(5)
23         sourse = self.driver.page_source
24         self.parse_path(sourse)
25         # all_list.extend(s)
26         # return all_list
27 
28     def parse_path(self,sourse):
29         position_list = []
30         html = etree.HTML(sourse)
31         links = html.xpath('//a[@class="position_link"]//@href')
32         for link in links:
33             self.xiangqingye(link)
34         #     position_list.append(h)
35         # return position_list
36 
37     def xiangqingye(self,ur):
38         # self.driver.execute_script("window.open('https://www.lagou.com/jobs/6232081.html?show=23decdaefd344a719f63cfa436b063b8')" )
39         self.driver.execute_script("window.open('%s')"%ur)
40         self.driver.switch_to.window(self.driver.window_handles[1])
41         time.sleep(2)
42         xiang_source = self.driver.page_source
43         html = etree.HTML(xiang_source)
44         job_resqust_spans = html.xpath('//dd[@class="job_request"]//span')
45         salary = job_resqust_spans[0].xpath('.//text()')[0].strip()
46         city = job_resqust_spans[1].xpath('.//text()')[0].strip()
47         workyears = job_resqust_spans[2].xpath('.//text()')[0].strip()
48         position = {
49             'salary':salary,
50             'city':city,
51             'workyears':workyears
52         }
53         print(position)
54         time.sleep(1)
55         self.driver.close()
56         self.driver.switch_to.window(self.driver.window_handles[0])
57         # print('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')
58 
59 if __name__ == '__main__':
60     lagou_shili = lagou()
61     q = lagou_shili.run()
62     print(q)
63 
64     # browers_driver.execute_script('window.open("https://www.douban.com")')
65     # window_list = browers_driver.window_handles#获取窗口列表
66     # browers_driver.switch_to.window(window_list[1])#将browers_driver的指针转移到指定的窗口
67     # print(browers_driver.current_url)#打印browers_driver指向的窗口网址

 

posted @ 2020-03-16 21:58  kog_maw  阅读(226)  评论(0编辑  收藏  举报