(学习日记) 爬取网站图片
#导入第三方库
# coding:utf-8
import requests,re
#找到需要爬取的网站'http://www.qqjia.com/sucai/sucai1210.htm'
#1>获取网站 2>正则表达式匹配不同图片的地址 3>找到所有图片的URL
#开发讲究见名识意
#1.1 定义一个函数get到url
def get_urls():
#(1)获取网站
response = requests.get('http://www.qqjia.com/sucai/sucai1210.htm')
# (2)通过正则匹配地址re库,共有的内容保存,不一样的用 .*?
url_add = r'<img border="0" alt="" src="(.*?)" /></p>'
#(3)找到所有图片的URL
url_list = re.findall(url_add,response.text)
# (4)打印验证
print(url_list)
return url_list
#第四步,下载网页数据
#再定义一个函数 目的:下载数据
def get_gif(url,name):
#请求图片地址 ,传参
response = requests.get(url)
#下载图片到目标位置--》D:\pygif
with open('D:\pygif\%d.gif'%name,'wb') as ft :
ft.write(response.content)
# (5)启动这个程序
if __name__ == '__main__':
url_list = get_urls()
a = 1
for url in url_list:
get_gif(url,a)
a += 1
text是源代码,content是二进制数据
import requests,re def url_get(): response = requests.get('http://qq.yh31.com/zjbq/2920180.html') url_add = r'<img border="0" alt="" src="(.*?)" />' url_list = re.findall(url_add,response.text) print(url_list) return url_list def download(url,name): response = requests.get(url) with open('D:\pygif\%d'%name,'wb') as ft : ft.write(response.content) if __name__ == '__main__': url_list = url_get() a = 1 for url in url_list: com_url = 'http://mm.yh31.com:88'+ url download(com_url, a) a += 1