#!/usr/bin/env python
#coding:utf-8
import urllib
import re
def GetHtml(url):
"""获取HTML页面所有元素."""
page = urllib.urlopen(url)
html = page.read()
return html
def GetImg(html):
""" 获取HTML页面所有.jpg图片."""
reg = r'src="(.+?\.jpg)"'
imgre = re.compile(reg)
imglist = re.findall(imgre, html)
x = 0
for imgurl in imglist:
urllib.urlretrieve(imgurl, '%s.jpg' %x, cbx)
x += 1
print "img: %s is done!" %x
def cbx(a, b, c):
"""显示下载进度.
@a: 已经下载的数据块
@b: 数据块大小
@c: 远程文件的大小
"""
per = 100.0 * a * b / c
if per > 100:
per = 100
print "%.2f%%" %per
html = GetHtml("http://www.baidu.com")
print GetImg(html)