










 1 import requests
 2 import os
 3 from hashlib import md5
 4 from urllib.parse import urlencode
 5 from multiprocessing.pool import Pool
 6 def get_data(offset):
 7     #构造URL,发送请求
 8     params = {
 9         'aid': '24',
10         'app_name': 'web_search',
11         'offset': offset,
12         'format': 'json',
13         'autoload': 'true',
14         'count': '20',
15         'en_qc': '1',
16         'cur_tab': '1',
17         'from': 'search_tab',
18     }
19     base_url = ''
20     url = base_url + urlencode(params)


1     try:
2         res = requests.get(url)
3         if res.status_code == 200:
4             return res.json()
5     except requests.ConnectionError:
6         return 'sorry.'


 1 def get_img(data):
 2     if data.get('data'):
 3         page_data = data.get('data')
 4         for item in page_data:
 5             title = item.get('title')
 6             imgs = item.get('image_list')
 7             for img in imgs:          #yield是一个生成器,具体的解释可以去博客:
 8                 yield {
 9                     'title': title,
10                     'img': img.get('url')
11                 }


 1 def save(item):
 2     img_path = 'img' + '/' + item.get('title')
 3     if not os.path.exists(img_path):
 4         os.makedirs(img_path)
 5     try:
 6         res = requests.get(item.get('img'))
 7         if res.status_code == 200:
 8             file_path = img_path + '/' + '{name}.{suffix}'.format(
 9                 name=md5(res.content).hexdigest(),
10                 suffix='jpg')
11             if not os.path.exists(file_path):
12                 with open(file_path, 'wb') as f:
13                     f.write(res.content)
14                 print('Successful')
15             else:
16                 print('Already Download')
17     except requests.ConnectionError:
18         print('Failed to save images')


 1 def main(offset):
 2     data = get_data(offset)
 3     for item in get_img(data):
 4         print(item)
 5         save(item)
 6 START = 0
 7 END = 10
 8 if __name__ == "__main__":
 9     pool = Pool()
10     offsets = ([n * 20 for n in range(START, END + 1)])
12     pool.close()
13     pool.join()



 1 import requests
 2 import os
 3 from hashlib import md5
 4 from urllib.parse import urlencode
 5 from multiprocessing.pool import Pool
 6 def get_data(offset):
 7     #构造URL,发送请求
 8     params = {
 9         'aid': '24',
10         'app_name': 'web_search',
11         'offset': offset,
12         'format': 'json',
13         'autoload': 'true',
14         'count': '20',
15         'en_qc': '1',
16         'cur_tab': '1',
17         'from': 'search_tab',
18     }
19     base_url = ''
20     url = base_url + urlencode(params)
21     try:
22         res = requests.get(url)
23         if res.status_code == 200:
24             return res.json()
25     except requests.ConnectionError:
26         return 'sorry.'
27 def get_img(data):
28     if data.get('data'):
29         page_data = data.get('data')
30         for item in page_data:
31             title = item.get('title')
32             imgs = item.get('image_list')
33             for img in imgs:
34                 yield {
35                     'title': title,
36                     'img': img.get('url')
37                 }
38 def save(item):
39     img_path = 'img' + '/' + item.get('title')
40     if not os.path.exists(img_path):
41         os.makedirs(img_path)
42     try:
43         res = requests.get(item.get('img'))
44         if res.status_code == 200:
45             file_path = img_path + '/' + '{name}.{suffix}'.format(
46                 name=md5(res.content).hexdigest(),
47                 suffix='jpg')
48             if not os.path.exists(file_path):
49                 with open(file_path, 'wb') as f:
50                     f.write(res.content)
51                 print('Successful')
52             else:
53                 print('Already Download')
54     except requests.ConnectionError:
55         print('Failed to save images')
56 def main(offset):
57     data = get_data(offset)
58     for item in get_img(data):
59         print(item)
60         save(item)
61 START = 0
62 END = 10
63 if __name__ == "__main__":
64     pool = Pool()
65     offsets = ([n * 20 for n in range(START, END + 1)])
67     pool.close()
68     pool.join()
posted @ 2019-09-05 16:06  KangZP  阅读(531)  评论(0编辑  收藏  举报