Python爬虫之简单的图片获取

简单的静态网页的图片获取:

import urllib
import re
import os

url = 'http://www.toutiao.com/a6467889113046450702/'

def getHTML(url):
    html = urllib.request.urlopen(url).read()
    return html

def getIMG(html):
    s = r'img src="(http://.+?)&quot'
    pat = re.compile(s)
    imglist = pat.findall(html)
    return imglist
    
html = getHTML(url)
html = html.decode('UTF-8')
imglist = getIMG(html)
imgname = 0
path = 'D:\\pic\\'
if not os.path.isdir(path):
    os.mkdir(path)
    print('创建文件夹%s'%path)

for url in imglist:
    filepath = path+'%d.jpg'%imgname
    print('正在下载第%d张照片...' % (imgname+1))
    urllib.request.urlretrieve(url, filepath )
    imgname = imgname+1
    
print('下载完成!')

 

动态网页图片获取

posted @ 2017-09-21 13:45  君以沫  阅读(754)  评论(0编辑  收藏  举报