实现抓图的工具
#encoding:UTF-8
import urllib
import urllib2
import re
import os
from BeautifulSoup import BeautifulSoup
def GetUrlContent(url,path):
#url = "http://www.2cto.com/meinv/sexmv/"
req = urllib2.urlopen(url)
content = req.read()
soup = BeautifulSoup(content)
# print soup.pret()
#查找左右链接,并且不含title属性
alinks = soup.findAll('a', attrs={"target": "_blank"}, title=None)
i = 0
for a in alinks:
surl = a['href']
print surl
GetUrl(surl,path)
print " "
print " "
#print surl
def createFileWithFileName(localPathParam,fileName):
totalPath=localPathParam+'\\'+fileName
if not os.path.exists(totalPath):
file=open(totalPath,'a+')
file.close()
return totalPath
def GetFileName(url):
sFilename=os.path.basename(url)
return sFilename
def GetUrl(myUrl,localPath):
#url = "http://www.2cto.com/meinv/sexmv/1819.html"
try:
req = urllib2.urlopen(myUrl,None,5)
content = req.read()
soup = BeautifulSoup(content)
alinks =soup.findAll("img",attrs={"src": re.compile("(.*)uploads/allimg(.*)")})
for d in alinks:
imgUrl=d["src"]
print imgUrl
fileName=GetFileName(imgUrl)
print fileName
urllib.urlretrieve(imgUrl,createFileWithFileName(localPath,fileName))
except Exception,e:
print "Error"
if __name__=='__main__':
#GetUrl("http://www.2cto.com/meinv/sexmv/1810.html")
print GetFileName("http://www.2cto.com/meinv/sexmv/1810.jpg")