python meizitu
1 import re 2 import urllib.request 3 4 # ------ 获取网页源代码的方法 --- 5 def getHtml(url): 6 page = urllib.request.urlopen(url) 7 html = page.read() 8 return html 9 10 # ------ getHtml()内输入任意静态URL ------ 11 html = getHtml("http://www.meizitu.com/a/5485.html") 12 # ------ 修改html对象内的字符编码为gbk或者UTF-8 ------ 13 html = html.decode('gbk') 14 15 # ------ 获取所有图片地址的方法 ------ 16 17 # ------ 利用正则表达式匹配网页内容找到图片地址 ------ 18 reg = r'(http://[^\s]*\/[0-9][0-9].jpg)' 19 mmurl = re.findall(reg,html) 20 #reg = r'src="([.*\S]*\.jpg)" pic_ext="jpeg"' 21 #reg = r'src="(\.\.jpg)" pic_ext="jpeg"' 22 # imgre = re.compile(reg) 23 # imglist = re.findall(reg, html) 24 # return imglist 25 #print(re.findall(reg, html)) 26 27 x = 0 28 for imgurl in mmurl: 29 opener = urllib.request.build_opener() 30 opener.addheaders = [('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1941.0 Safari/537.36')] 31 urllib.request.install_opener(opener) 32 urllib.request.urlretrieve(imgurl, 'd://1/5485/%s.jpg' % x ) 33 x += 1 34 35 print("All Done!")