Python爬虫第一篇

 1 #!/usr/bin/env python
 2 #coding=utf-8
 3 #http://tieba.baidu.com/p/3296647141
 4 import re
 5 import urllib
 6 
 7 def get_content(url):
 8     '''doc.'''
 9     html=urllib.urlopen(url)
10     content=html.read()
11     html.close()
12 
13     return content
14 def get_images(info):
15     """get images 
16     """
17     regex=r'class="BDE_Image" pic_type="0" width="(.+?)" height="(.+?)" src="(.+?\.jpg)"'
18     pat=re.compile(regex)#moshi
19     images_code=re.findall(pat,info)
20     print images_code[2]
21     i=0
22     for image_url in images_code:
23         print 'image_url:',image_url
24         urllib.urlretrieve(image_url[2],'%d.jpg'%i)
25         i+=1
26     print 'count:',i
27 
28 if __name__=='__main__':
29     info=get_content('http://tieba.baidu.com/p/3296647141')
30     get_images(info)

 

posted @ 2014-10-02 13:01  爱在夕阳下  阅读(112)  评论(0编辑  收藏  举报