python实现简单爬虫程序

#coding=utf-8
import re
import urllib

def getHtml(url):
　　page = urllib.urlopen(url)
　　html = page.read()
　　return html

def getImg(html):
　　reg = r'src="(.*?\.jpg)" pic_ext'
　　imgre = re.compile(reg)
　　imglist = re.findall(imgre,html)
　　return imglist

if __name__ == "__main__":
　　html = getHtml("http://tieba.baidu.com/p/2460150866")
　　print getImg(html)

posted @ 2017-04-26 15:51 joangaga 阅读(296) 评论(0) 收藏举报

joangaga