Python爬虫第一篇
1 #!/usr/bin/env python 2 #coding=utf-8 3 #http://tieba.baidu.com/p/3296647141 4 import re 5 import urllib 6 7 def get_content(url): 8 '''doc.''' 9 html=urllib.urlopen(url) 10 content=html.read() 11 html.close() 12 13 return content 14 def get_images(info): 15 """get images 16 """ 17 regex=r'class="BDE_Image" pic_type="0" width="(.+?)" height="(.+?)" src="(.+?\.jpg)"' 18 pat=re.compile(regex)#moshi 19 images_code=re.findall(pat,info) 20 print images_code[2] 21 i=0 22 for image_url in images_code: 23 print 'image_url:',image_url 24 urllib.urlretrieve(image_url[2],'%d.jpg'%i) 25 i+=1 26 print 'count:',i 27 28 if __name__=='__main__': 29 info=get_content('http://tieba.baidu.com/p/3296647141') 30 get_images(info)
夕阳的进步与发展,靠各位同仁的支持与鼓励!谢谢!!!!