Python爬取百度图片

import urllib.request as urqt
import urllib.parse as urps
from urllib.parse import quote
import requests
import os
import re
import sys
def gethtml(url):
    header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:80.0) Gecko/20100101 Firefox/80.0"}
    res = requests.Session()
    res.headers = header
    html = res.get(url, timeout = 10, allow_redirects = False).text
    return html
def getbyte(url):
    header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:80.0) Gecko/20100101 Firefox/80.0"}
    rep = urqt.Request(url, headers = header)
    return urqt.urlopen(rep).read()
def makejpg(url, f):
    f.write(getbyte(url))
    f.close()
def getintofold(string):
    os.chdir(r"D:\信息\python\一些成品\百度图片爬虫")
    want = string + "图片"
    wehave = os.listdir()
    if want in wehave:
        os.chdir(want)
    else:
        os.mkdir(want)
        os.chdir(want)
def getall(num, url):
    key = re.compile(r'"thumbURL":"(.*?)"')
    tot = 0
    now = url
    while tot < num:
        html = gethtml(now)
        for now in re.findall(key, html):
            tot += 1
            if tot > num:
                break;
            f = open(str(tot) + ".jpg", "wb")
            try:
                makejpg(now, f)
            except BaseException:
                print("错误")
                tot -= 1
                continue;
            print("第 " + str(tot) + " 个已下载")
        if tot > num:
            break
        now = url + "&pn=" + str(tot)
def endd():
    g.msgbox("感谢使用")
    sys.exit()
def init():
    now = input("请输入想要的图片: ")
    num = input("请输入想要的数量: ")
    getintofold(now)
    now = quote(now, encoding = 'utf-8')
    url = "https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1599885698346_R&pv=&ic=0&nc=1&z=&hd=&latest=&copyright=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&sid=&word=" + now
    getall(int(num), url)
init()

posted @ 2020-09-16 14:18  olinr  阅读(282)  评论(1编辑  收藏  举报