Python 爬取b站专栏图片
当olinr学会了爬虫。。。
嘿嘿嘿
import urllib.request as urqt
import urllib.parse as urps
import sys
import os
import re
import shutil
tot = 0
def gethtml(url):
header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:80.0) Gecko/20100101 Firefox/80.0"}
res = urqt.Request(url, headers = header)
html = urqt.urlopen(res).read().decode("utf-8")
return html
def GetIntoPlace(string):
os.chdir(r"D:\信息\python\一些成品\b站专栏图片爬虫")
have = os.listdir()
if string in have:
shutil.rmtree(string)
os.mkdir(string)
os.chdir(string)
def getpng(url):
global tot, num
try:
res = urqt.urlopen(url).read()
except BaseException:
return
tot += 1
f = open(str(tot) + '.jpg', 'wb')
f.write(res)
f.close()
print("正在下载第 " + str(tot) + " 张")
if tot == num:
sys.exit()
def getans(html):
key = re.compile('img data-src="//.+?\.jpg')
have = re.findall(key, html)
for per in have:
per = "http:" + per[14:]
getpng(per)
def work(html):
key1 = re.compile('a title.+? href=".+?"');
key2 = re.compile('//.+?"')
have1 = re.findall(key1, html)
for i in have1:
now = "http:" + re.findall(key2, i)[0]
getans(gethtml(now))
now = input("请输入想要的图片:")
num = int(input("请输入想要爬取的图片数量:"))
frm = int(input("请输入爬取起始页码:"))
GetIntoPlace(now)
now = urps.quote(now, encoding = "utf-8");
while tot < num:
url = "https://search.bilibili.com/article?keyword=" + now + "&page=" + str(frm)
work(gethtml(url))
frm += 1
----olinr