简单爬虫 爬特定链接里妹子的图片

 1 import urllib.request
 2 import re
 3   
 4 def get_html(url):  
 5     page = urllib.request.urlopen(url)  
 6     html = page.read()  
 7     return html  
 8   
 9 def get_img(html):  
10     reg = r'src="(.*?\.jpg)" bdwater='  
11     imgre = re.compile(reg)  
12     imglist = re.findall(imgre, html)  
13     i = 0  
14     for imgurl in imglist:  
15         urllib.request.urlretrieve(imgurl, '%s.jpg'%i)  
16         i+=1  
17 html = get_html('https://tieba.baidu.com/p/4395980124')
18 html = html.decode('utf-8')#python3
19   
20 print (get_img(html)) 

 

posted @ 2017-04-20 11:35  幽灵飞舞  阅读(170)  评论(0编辑  收藏  举报