import requests
from lxml import etree
cookies = {
'_pk_ref.2.90a9': '%5B%22%22%2C%22%22%2C1703739850%2C%22https%3A%2F%2Fwww.google.com.hk%2F%22%5D',
'_pk_id.2.90a9': 'b87f72074fff4914.1703739850.',
'_pk_ses.2.90a9': '1',
}
headers = {
'authority': 'www.xiurenwang.cc',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'accept-language': 'zh-CN,zh;q=0.9',
'cache-control': 'no-cache',
# Requests sorts cookies= alphabetically
# 'cookie': '_pk_ref.2.90a9=%5B%22%22%2C%22%22%2C1703739850%2C%22https%3A%2F%2Fwww.google.com.hk%2F%22%5D; _pk_id.2.90a9=b87f72074fff4914.1703739850.; _pk_ses.2.90a9=1',
'pragma': 'no-cache',
'referer': 'https://www.xiurenwang.cc/bang',
'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
}
response = requests.get('https://www.xiurenwang.cc/bang/page/2', cookies=cookies, headers=headers)
# print(response.text)
html=etree.HTML(response.text)
next_url=html.xpath('//div[@class="list"]/li/a[@class="img"]/@href')
for item in next_url:
detail_url='https://www.xiurenwang.cc'+item
response = requests.get(detail_url, cookies=cookies, headers=headers)
html = etree.HTML(response.text)
img_url = html.xpath('//div[@id="image"]/a/@href')
print(img_url)