下载某个页面中的图片
用到了BeautifulSoup这个库,需要先下载安装.下载地址http://www.crummy.com/software/BeautifulSoup/
config.py
1 url = "http://www.baidu.com" 2 folder = "d:\\test"
downloadPictrues.py
import config from bs4 import BeautifulSoup import urlparse from urllib2 import urlopen from urllib import urlretrieve import os ########################################### #to resolve the fucking character encoding problem import sys reload(sys) sys.setdefaultencoding('utf8') def main(url, out_folder): """Downloads all the images at 'url' to out_folder""" pageFile = urlopen(url) #pageFile ---a file-liked object soup = BeautifulSoup(pageFile) #get a BeatifulSoup Object #print soup.prettify() # elements = urlparse.urlparse(url) #parse url into a 6-tuple print elements parsed = list(elements) #new list initialized from iterable items for image in soup.findAll("img"): #find all "img"tag #print "Image: %(src)s" % image print image,image['src'],type(image) image_url = urlparse.urljoin(url, image['src']) #construct a full url filename = image["src"].split("/")[-1] outpath = os.path.join(out_folder, filename) # #print out_folder,filename,outpath urlretrieve(image_url, outpath) #download pictrues if __name__ == "__main__": url = config.url folder = config.folder if os.path.exists(folder): print 'ok' main(url,folder) else: os.makedirs(folder) main(url,folder)
作者:sdu20112013
如果您觉得阅读本文对您有帮助,请点一下“推荐”按钮,您的“推荐”将是我最大的写作动力!欢迎转载,转载请注明出处.