python实现简单爬虫程序

#coding=utf-8
import re
import urllib

def getHtml(url):
  page = urllib.urlopen(url)
  html = page.read()
  return html

def getImg(html):
  reg = r'src="(.*?\.jpg)" pic_ext'
  imgre = re.compile(reg)
  imglist = re.findall(imgre,html)
  return imglist

if __name__ == "__main__":
  html = getHtml("http://tieba.baidu.com/p/2460150866")
  print getImg(html)

posted @ 2017-04-26 15:51  joangaga  阅读(266)  评论(0编辑  收藏  举报