Python 爬虫:煎蛋网妹子图
使用 Headless Chrome 替代了 PhatomJS。
图片保存到指定文件夹中。
1 import requests 2 from bs4 import BeautifulSoup 3 from selenium import webdriver 4 from selenium.webdriver.chrome.options import Options 5 6 chrome_options = Options() 7 chrome_options.add_argument('--headless') 8 chrome_options.add_argument('--disable-gpu') 9 driver = webdriver.Chrome(chrome_options=chrome_options) 10 dir = 'C:/spider-download/jandan-girls/' 11 img_urls = [] 12 page_urls = ["http://jandan.net/ooxx/page-{}#comments".format(str(i)) for i in range(5, 6)] 13 14 def GetImgUrl(u): 15 driver.get(u) 16 html = driver.page_source 17 soup = BeautifulSoup(html, 'lxml') 18 images = soup.select('a.view_img_link') 19 for i in images: 20 t = i.get('href') 21 if str('gif') in str(t): 22 pass 23 else: 24 img_url = 'http:' + t 25 img_urls.append(img_url) 26 27 def DownloadImg(): 28 n = 1 29 for i in img_urls: 30 print('第 ' + str(n) + ' 张 ... ', end='') 31 with open(dir + i[-20:], 'wb') as f: 32 f.write(requests.get(i).content) 33 print('OK!') 34 n = n + 1 35 36 for u in page_urls: 37 GetImgUrl(u) 38 print('*** 开始下载 ***') 39 DownloadImg() 40 print('*** 下载完成 ***')