久染

导航

(学习日记) 爬取网站图片

#导入第三方库
# coding:utf-8
import requests,re

#找到需要爬取的网站'http://www.qqjia.com/sucai/sucai1210.htm'

#1>获取网站 2>正则表达式匹配不同图片的地址 3>找到所有图片的URL
#开发讲究见名识意

#1.1 定义一个函数get到url
def get_urls():
#(1)获取网站
response = requests.get('http://www.qqjia.com/sucai/sucai1210.htm')
# (2)通过正则匹配地址re库,共有的内容保存,不一样的用 .*?
url_add = r'<img border="0" alt="" src="(.*?)" /></p>'
#(3)找到所有图片的URL
url_list = re.findall(url_add,response.text)
# (4)打印验证
print(url_list)
return url_list
#第四步,下载网页数据
#再定义一个函数 目的:下载数据
def get_gif(url,name):
#请求图片地址 ,传参
response = requests.get(url)
#下载图片到目标位置--》D:\pygif
with open('D:\pygif\%d.gif'%name,'wb') as ft :
ft.write(response.content)

# (5)启动这个程序
if __name__ == '__main__':
url_list = get_urls()
a = 1
for url in url_list:
get_gif(url,a)
a += 1

text是源代码,content是二进制数据




import requests,re
def url_get():
    response = requests.get('http://qq.yh31.com/zjbq/2920180.html')
    url_add = r'<img border="0" alt="" src="(.*?)" />'
    url_list = re.findall(url_add,response.text)
    print(url_list)
    return url_list
def download(url,name):
    response = requests.get(url)
    with open('D:\pygif\%d'%name,'wb') as ft :
        ft.write(response.content)
if __name__ == '__main__':
    url_list = url_get()
    a = 1
    for url in url_list:
        com_url = 'http://mm.yh31.com:88'+ url
        download(com_url, a)
        a += 1

 

posted on 2018-11-02 23:56  久染  阅读(489)  评论(0编辑  收藏  举报