Python爬取百度图片
import urllib.request as urqt
import urllib.parse as urps
from urllib.parse import quote
import requests
import os
import re
import sys
def gethtml(url):
header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:80.0) Gecko/20100101 Firefox/80.0"}
res = requests.Session()
res.headers = header
html = res.get(url, timeout = 10, allow_redirects = False).text
return html
def getbyte(url):
header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:80.0) Gecko/20100101 Firefox/80.0"}
rep = urqt.Request(url, headers = header)
return urqt.urlopen(rep).read()
def makejpg(url, f):
f.write(getbyte(url))
f.close()
def getintofold(string):
os.chdir(r"D:\信息\python\一些成品\百度图片爬虫")
want = string + "图片"
wehave = os.listdir()
if want in wehave:
os.chdir(want)
else:
os.mkdir(want)
os.chdir(want)
def getall(num, url):
key = re.compile(r'"thumbURL":"(.*?)"')
tot = 0
now = url
while tot < num:
html = gethtml(now)
for now in re.findall(key, html):
tot += 1
if tot > num:
break;
f = open(str(tot) + ".jpg", "wb")
try:
makejpg(now, f)
except BaseException:
print("错误")
tot -= 1
continue;
print("第 " + str(tot) + " 个已下载")
if tot > num:
break
now = url + "&pn=" + str(tot)
def endd():
g.msgbox("感谢使用")
sys.exit()
def init():
now = input("请输入想要的图片: ")
num = input("请输入想要的数量: ")
getintofold(now)
now = quote(now, encoding = 'utf-8')
url = "https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1599885698346_R&pv=&ic=0&nc=1&z=&hd=&latest=©right=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&sid=&word=" + now
getall(int(num), url)
init()
----olinr