selenium爬取拉勾网招聘url(demo)

 1 from selenium import webdriver
 2 browser=webdriver.Chrome()
 3 import time
 4 from lxml import etree
 5 import requests
 6 import re
 7 import json
 8 
 9 
10 def search():
11     browser.get('https://www.lagou.com/jobs/list_%E7%88%AC%E8%99%AB?labelWords=&fromSearch=true&suginput=')
12     time.sleep(3)
13     #print(browser.page_source)
14     #browser.execute_script('window.scrollTo(0,document.body.scrollHeight)')
15     time.sleep(2)
16     i=0
17     for i in range(1,25):
18         browser.execute_script('window.scrollTo(0,document.body.scrollHeight)')
19         button=browser.find_element_by_css_selector('#s_position_list > div.item_con_pager > div > span.pager_next')
20         button.click()
21         browser.execute_script('window.scrollTo(0,document.body.scrollHeight)')
22         time.sleep(2)
23         html = etree.HTML(browser.page_source)
24         links = html.xpath(
25             '//ul[@class="item_con_list"]/li[@class="con_list_item default_list"]//a[@class="position_link"]/@href')
26         #browser.close
27         for link in links:
28             yield {
29                 'joblink':link
30             }
31 
32 for url in search():
33     with open ('url.json','a') as f:
34         f.write(json.dumps(url,ensure_ascii=False)+'\n')

 

posted @ 2017-12-04 12:01  不可叽叽歪歪  阅读(652)  评论(0编辑  收藏  举报