import requests
import re
from bs4 import BeautifulSoup
rp = r"<a href=(.*?) target"
rgm = r"<img src=(.*?) alt"
path = r"./Picture/chengshilvyou"
url_base = "https://www.ivsky.com"
parmas = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"Connection":"keep-alive"
}
class Picture():
def getHtml(self):
html = requests.get(url,params=parmas).text
soup = BeautifulSoup(html,features="html.parser")
return soup
def getPic_lis(self,url):
'''获取首页图片的列表'''
html_soup = picture.getHtml()
pic_list = []
T_list= html_soup.find_all("div",class_=re.compile("il_img"))
for i in T_list:
pic = re.findall(rp,str(i))
pic_list.append(url_base + str(pic[0]).replace('"',""))
print("msgone"+str(pic_list))
return pic_list
def getImg_list(self):
'''从首页获取的图片列表中依次获取图片集列表'''
img_list = []
index_list = picture.getPic_lis(url)
for x in index_list:
html_img = requests.get(x,params=parmas).text
print("metend"+ str(html_img))
# soup_img = BeautifulSoup(html_img)
# I_list = soup_img.find_all("div",class_=re.compile("il_img"))
I_list = re.findall(rgm,html_img)
print("msg"+str(I_list))
for n in I_list:
# img_url = re.findall(rg,str(n))
img_list.append("https:" + str(n).strip('"'))
print("msg2"+str(img_list))
return img_list
def download(self,num):
imgs_url = picture.getImg_list()
m = 0
for img_url in imgs_url:
print('***** ' + str(m) + '.jpg *****' + ' Downloading...')
dir = path + str(num) + str(m) + '.jpg'
response = requests.get(img_url)
with open(dir,"wb") as file:
file.write(response.content)
print('***** ' + str(m) + '.jpg *****' + 'Done.')
m = m + 1
if __name__ == "__main__":
num = 1
while num < 101:
url = "https://www.ivsky.com/tupian/chengshilvyou/index_" + str(num) + ".html"
picture = Picture()
picture.getPic_lis(url)
picture.getImg_list()
picture.download(num)
num += 1