百度贴吧图片抓取
# -*- coding: utf-8 -*- from bs4 import BeautifulSoup import re import os import urllib2 import urllib def download_img(urls,k): #urls = "http://tieba.baidu.com/p/4807867791" page = urllib2.urlopen(urls) html = page.read() soup = BeautifulSoup(html, 'html.parser') lists = soup.find('a', string='尾页') if lists == None: return False pageurl = lists['href']; totalPage = pageurl[-1] for j in range(1, int(totalPage)): url = urls + "?pn=" + str(j) page = urllib2.urlopen(url) html = page.read() soup = BeautifulSoup(html, 'html.parser') print(url) lists = soup.find_all('img', class_="BDE_Image") i = 1; for list in lists: filename = str(k)+"-"+str(j) + '-' + str(i); print(filename) urllib.urlretrieve(list['src'], './images/%s.jpg' % filename) i = i + 1; k = 4807867791 url = "http://tieba.baidu.com/p/4807867791" download_img(url,k)