一个爬取lativ诚衣网站上模特穿搭图片的爬虫
show the code:
[peter@localhost savvy]$ vi lativ.py # -*- coding:utf-8 -*- import requests,lxml,os from bs4 import BeautifulSoup as sb def get_html(): url = 'https://www.lativ.com/Style' headers = {'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.76 Mobile Safari/537.36'} html = requests.post(url,headers).text return html def soup_html(html): soup = sb(html, 'lxml') a = soup.find_all('a')[12:190] return a def save_img(a): for i in a: l = i.get('href') print l j = l[-14:-9] with open(str(j)+'.jpg','wrb') as f: img = requests.get(l) f.write(img.content) print str(j)+'saved' if __name__=='__main__': html = get_html() a = soup_html(html) save_img(a)
技术总结、分享,欢迎交流!