s6tu

# -*- coding: utf-8 -*-
# @Time    : 2018/03/30 15:20
# @Author  : cxa
# @File    : liuuchnagtu.py
# @Software: PyCharm
import requests
from fake_useragent import UserAgent as UA
from lxml import html
import os
import threading
import traceback
import time
import random
class GetImage():
    def __init__(self):
        self.url="http://www.s6tu.com/explore/popular/?list=images&sort=likes_desc&page={}"
        self.imgpath = "//div[@class='list-item-image fixed-size']/a/img/@src"
        self.headers = {
    'Accept': 'text/html, application/xhtml+xml, image/jxr, */*',
    'Accept - Encoding': 'gzip, deflate',
    'Accept-Language': 'zh-Hans-CN, zh-Hans; q=0.5',
    'Connection': 'Keep-Alive',
    'User-Agent': UA().random,
    'Host': 'www.s6tu.com',}
    def get_oen_page(self):
        try:
             os.makedirs("setu",exist_ok=True)
             for i in range(1,999):
                 url=self.url.format(i)
                 Session=requests.session()
                 #Session.proxies.update(self.proxies)
                 req=Session.get(url,headers=self.headers)
                 if req.status_code==requests.codes.ok:
                    root=html.fromstring(req.text)
                    imglist=root.xpath(self.imgpath)
                    newlist=[i.replace(".md.",".")for i in imglist]
                    downloadThreads=[]
                    for i in range(0,len(newlist),int(len(newlist)/3)):
                        downloadThread = threading.Thread(target=self.getimglist, args=(newlist,i, i + int(len(newlist)/3)))
                        downloadThreads.append(downloadThread)
                        downloadThread.start()
                    for item in downloadThreads:
                        item.join()
                    print("get one page over")

                 else:
                     print("errro")
                     time.sleep(random.randint(1,5))
        except:
            print("error,here  is details:{}".format(traceback.format_exc()))
    def getimglist(self,newlist,start,end):
        if end>len(newlist):
            end=len(newlist)
        for i in range(start,end):
            imgurl=newlist[i]
            downloadThreads = []
            print(imgurl)
            req=requests.get(imgurl,headers=self.headers)
            with open(os.path.join("setu",os.path.basename(imgurl)),"wb") as fs:
                fs.write(req.content)

if __name__=="__main__":
     GetImage().get_oen_page()
  

  

posted @ 2018-03-30 15:56  公众号python学习开发  阅读(411)  评论(0编辑  收藏  举报