第一只小啊小爬虫(纪念下)
import urllib.request
import re
def getHtml(url):
page = urllib.request.urlopen(url)
html = page.read()
html = html.decode('GBK')
return html
def getImg(html):
reg = r'src="(.*?\.jpg)"'
imgre = re.compile(reg)
imglist = re.findall(imgre,html)
i = 0
for imgurl in imglist:
urllib.request.urlretrieve(imgurl, '%s.jpg' % x)
i+=1
html = getHtml("http://tupian.zol.com.cn/")
getImg(html)