全自动爬取壁纸

如图

 

 代码:

 1 import requests
 2 import re
 3 from lxml import html
 4 import _thread
 5 
 6 
 7 class wallpaper:
 8     def __init__(self):
 9         '''
10         初始化
11         '''
12         self.url = "http://simpledesktops.com/"
13         self.req_header = {
14             'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
15         }
16         self.S = requests.session()
17 
18     def home(self, num):
19         '''
20         访问首页
21         '''
22         self.numx = num
23         X = self.S.get(self.url+f"browse/{num}")
24         self.context = X.text
25 
26     def url_all(self):
27         '''
28         获取所有子页链接
29         '''
30         tree = html.fromstring(self.context)
31         self.res = tree.xpath('//div[@class="desktop"]/a/@href')
32         print(self.res)
33 
34     def Single(self, url):
35         '''
36         访问子页,下载图片
37         '''
38         url = url
39         O = self.S.get(self.url+url)
40         Otree = html.fromstring(O.text)
41         res = Otree.xpath('//div[@class="desktop"]/a/@href')
42         title = Otree.xpath('//h2/a/text()')
43         print(f"第{self.numx}页_标题:"+title[0])
44         cs = self.S.get(self.url+res[0])
45         print("链接:"+self.url+res[0])
46         with open(f'img/{self.numx}_{title[0]}.png', 'wb') as f:
47             f.write(cs.content)
48 
49     def dowlod(self):
50         '''
51         下载所有图片
52         '''
53         for i in self.res:
54             self.Single(url=i)
55 
56 
57 def job(IN, OUT):
58     '''
59     线程函数
60     '''
61     for num in range(IN, OUT):
62         A = wallpaper()
63         A.home(num=num)
64         A.url_all()
65         A.dowlod()
66 
67 
68 try:  # 多线程分段下载
69     _thread.start_new_thread(job, (5, 10, ))
70     _thread.start_new_thread(job, (14, 20, ))
71     _thread.start_new_thread(job, (23, 30, ))
72     _thread.start_new_thread(job, (32, 40, ))
73     _thread.start_new_thread(job, (41, 51, ))
74 except:
75     print("Error: 无法启动线程")
76 
77 
78 while 1:  # 阻塞程序
79     pass

 

posted @ 2020-07-04 15:29  sunny开始学坏  阅读(232)  评论(0编辑  收藏  举报