#!/usr/bin/env python # Version = 3.5.2 # __auth__ = '无名小妖' import requests from bs4 import BeautifulSoup import uuid response = requests.get( url='http://www.autohome.com.cn/news/' ) response.encoding = response.apparent_encoding # 使用原页面的编码进行解析 # response.status_code 状态码 # 将页面字符串转化成bs对象,features 是转化方式,此处用的html.parser,而生产中用的是lxml,性能较好 soup = BeautifulSoup(response.text, features='html.parser') # 获取id为'auto-channel-lazyload-article' 的标签 target = soup.find(id='auto-channel-lazyload-article') # 在此标签下找到所有的li标签 li = target.find_all('li') # 获取每个li标签下的a标签 for i in li: a = i.find('a') if a: # print(a.attrs.get('href')) txt = a.find('h3').text # 获取图片的地址 img_url = a.find('img').attrs.get('src') if not img_url.startswith("http:"): img_url = "http:" + img_url # 下载图片 img_response = requests.get(url=img_url) h = img_url.split('.') jpg_name = '{}.{}'.format(uuid.uuid4(), h[-1]) with open(jpg_name, 'wb') as f: f.write(img_response.content)