python爬取贴吧图片


1
#!/usr/bin/env python 2 # -*- coding: utf-8 -*- 3 # @Time : 2017/7/19 16:21 4 # @Author : wqj 5 # @Contact : wqjhky@gmail.com 6 # @Site : 7 # @File : img.py 8 # @Software: PyCharm Community Edition 9 import re 10 import urllib 11 import sys 12 import os 13 14 reload(sys) 15 sys.setdefaultencoding('utf8') 16 17 18 def getHtml(url): 19 page = urllib.urlopen(url) 20 html = page.read() 21 return html 22 23 24 def getImg(html, x): 25 reg = r'src="(http://img.*?\.jpg)"' 26 imgre = re.compile(reg) 27 imList = re.findall(reg, html) 28 29 print(imList) 30 for i in imList: 31 print(i) 32 print x 33 urllib.urlretrieve(i, '%s.jpg' % x) 34 x += 1 35 return x 36 37 38 x = 1 39 url = "http://tieba.baidu.com/f?ie=utf-8&kw=%E6%B2%B3%E5%8D%97%E7%A7%91%E6%8A%80%E5%AD%A6%E9%99%A2&fr=search?pn=" 40 for k in range(1, 28): 41 ul = url + str(k) 42 print ul 43 html = getHtml(ul) 44 # print html 45 x = getImg(html, x) 46 dirpath = r'D:\img' 47 filename = os.path.join(dirpath, str(x) + ".jpg")

 以上是代码,程序较为简单

posted @ 2017-07-20 09:48  没时间看海  阅读(213)  评论(0编辑  收藏  举报