爬取汽车之家

 

这个算是爬虫的老梗了.

 

就是用requests 和 beautifulsoup来操作一波.

 

 1 import requests
 2 from bs4 import BeautifulSoup
 3 
 4 ret = requests.get(url="https://www.autohome.com.cn/news/")
 5 code = ret.apparent_encoding
 6 ret.encoding = ret.apparent_encoding
 7 # print(ret.text)
 8 
 9 soup = BeautifulSoup(ret.text, 'html.parser')
10 # print(soup)
11 
12 div = soup.find(name='div', id='auto-channel-lazyload-article')
13 # print(div)
14 li_list = div.find_all(name='li')
15 # print(li_list)
16 for it in li_list:
17     h3 = it.find(name='h3')
18     if not h3:
19         continue
20     # print(h3.text)
21     p = it.find(name='p')
22     a = it.find(name='a')
23     img = it.find(name='img')
24     src = img.get('src')
25 
26     file_name = './image/' + src.rsplit('__', maxsplit=1)[1]
27 
28     ret_img = requests.get(
29         url='https:' + src
30     )
31 
32     with open(file_name, 'wb') as fw:
33         fw.write(ret_img.content)
34 
35     print(h3.text, a.get('href'))
36     print(p.text)
37     print('=' * 15)

 

posted @ 2018-12-26 16:48  #忘乎所以#  阅读(219)  评论(0编辑  收藏  举报