一个爬取lativ诚衣网站上模特穿搭图片的爬虫

  show the code:

[peter@localhost savvy]$ vi lativ.py
# -*- coding:utf-8 -*-
import requests,lxml,os
from bs4 import BeautifulSoup as sb

def get_html():
        url = 'https://www.lativ.com/Style'
        headers = {'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.76 Mobile Safari/537.36'}
        html = requests.post(url,headers).text
        return html

def soup_html(html):
        soup = sb(html, 'lxml')
        a = soup.find_all('a')[12:190]
        return a

def save_img(a):
        for i in a:
                l = i.get('href')
                print l
                j = l[-14:-9]
                with open(str(j)+'.jpg','wrb') as f:
                        img = requests.get(l)
                        f.write(img.content)
                        print str(j)+'saved'

if __name__=='__main__':
        html = get_html()
        a = soup_html(html)
        save_img(a)

 

posted @ 2017-08-07 22:36  安阳小栈-客官歇会吧  阅读(618)  评论(0编辑  收藏  举报