某黑马magnet搜索接口

  
import requests
 
 
cookies = {
    'JSESSIONID': 'ACC5D9245FC54596A57C4486CF2C0EA9',
    'HstCfa4602665': '1707702051444',
    'HstCla4602665': '1707702051444',
    'HstCmu4602665': '1707702051444',
    'HstPn4602665': '1',
    'HstPt4602665': '1',
    'HstCnv4602665': '1',
    'HstCns4602665': '1',
    'aywcUid': 'waMYJwndUl_20240212094110',
    'tet': '1707705671658',
    'tetm': 'E8F5E',
    'tcv': 'V4',
}
 
headers = {
    'authority': 'heimaai.top',
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
    'accept-language': 'zh-CN,zh;q=0.9',
    'cache-control': 'no-cache',
    # Requests sorts cookies= alphabetically
    # 'cookie': 'JSESSIONID=ACC5D9245FC54596A57C4486CF2C0EA9; HstCfa4602665=1707702051444; HstCla4602665=1707702051444; HstCmu4602665=1707702051444; HstPn4602665=1; HstPt4602665=1; HstCnv4602665=1; HstCns4602665=1; aywcUid=waMYJwndUl_20240212094110; tet=1707705671658; tetm=E8F5E; tcv=V4',
    'pragma': 'no-cache',
    'referer': 'https://heimaai.top/recaptcha/v4/challenge?url=https://heimaai.top&s=1',
    'sec-ch-ua': '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'sec-fetch-dest': 'document',
    'sec-fetch-mode': 'navigate',
    'sec-fetch-site': 'same-origin',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
}
 
params = {
    'keyword': '美女',
}
 
response = requests.get('https://heimaai.top/search', params=params, cookies=cookies, headers=headers)
# print(response.text)
html=etree.HTML(response.text)
href=html.xpath('//li/a[@class="result-resource-title common-link"]/@href')
# print(href)
 
soup = BeautifulSoup(response.text, "html.parser")
 
 
title=[]
# 获取所有 li 标签下 p 标签下 a 标签的文本
for li_tag in soup.find_all("li"):
    for a in li_tag.find_all('a',{'class':'result-resource-title common-link'}):
        # print(a.text)
        title.append(a.text)
 
 
for item in list(zip(title,href)):
    if item[1].startswith("https"):
        print(item[0],item[1])
    else:
        print(item[0],'https://heimaai.top'+item[1])

posted @ 2024-02-12 09:58 萧海~ 阅读(1427) 评论(0) 编辑收藏举报

刷新页面返回顶部

登录后才能查看或发表评论，立即登录或者逛逛博客园首页

相关博文：

· korean sexy pet网站爬取

· py之秀人网详情链接

· Day 10 10.1 数据解析方法之-BS4

· 数据采集第一次作业

· 案例2 接口巡检项目

公告

侧边栏

昵称：萧海~
园龄： 4年11个月
粉丝： 4
关注： 88

+加关注

2025年3月

日

一

二

三

四

五

六

萧海~

记录自己和别人的博客

某黑马magnet搜索接口

公告

搜索

常用链接

随笔分类

随笔档案

阅读排行榜

评论排行榜

推荐排行榜

最新评论


	import requests


	cookies = {
	'JSESSIONID': 'ACC5D9245FC54596A57C4486CF2C0EA9',
	'HstCfa4602665': '1707702051444',
	'HstCla4602665': '1707702051444',
	'HstCmu4602665': '1707702051444',
	'HstPn4602665': '1',
	'HstPt4602665': '1',
	'HstCnv4602665': '1',
	'HstCns4602665': '1',
	'aywcUid': 'waMYJwndUl_20240212094110',
	'tet': '1707705671658',
	'tetm': 'E8F5E',
	'tcv': 'V4',
	}

	headers = {
	'authority': 'heimaai.top',
	'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,/;q=0.8,application/signed-exchange;v=b3;q=0.7',
	'accept-language': 'zh-CN,zh;q=0.9',
	'cache-control': 'no-cache',
	# Requests sorts cookies= alphabetically
	# 'cookie': 'JSESSIONID=ACC5D9245FC54596A57C4486CF2C0EA9; HstCfa4602665=1707702051444; HstCla4602665=1707702051444; HstCmu4602665=1707702051444; HstPn4602665=1; HstPt4602665=1; HstCnv4602665=1; HstCns4602665=1; aywcUid=waMYJwndUl_20240212094110; tet=1707705671658; tetm=E8F5E; tcv=V4',
	'pragma': 'no-cache',
	'referer': 'https://heimaai.top/recaptcha/v4/challenge?url=https://heimaai.top&s=1',
	'sec-ch-ua': '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"',
	'sec-ch-ua-mobile': '?0',
	'sec-ch-ua-platform': '"Windows"',
	'sec-fetch-dest': 'document',
	'sec-fetch-mode': 'navigate',
	'sec-fetch-site': 'same-origin',
	'upgrade-insecure-requests': '1',
	'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
	}

	params = {
	'keyword': '美女',
	}

	response = requests.get('https://heimaai.top/search', params=params, cookies=cookies, headers=headers)
	# print(response.text)
	html=etree.HTML(response.text)
	href=html.xpath('//li/a[@class="result-resource-title common-link"]/@href')
	# print(href)

	soup = BeautifulSoup(response.text, "html.parser")


	title=[]
	# 获取所有 li 标签下 p 标签下 a 标签的文本
	for li_tag in soup.find_all("li"):
	for a in li_tag.find_all('a',{'class':'result-resource-title common-link'}):
	# print(a.text)
	title.append(a.text)


	for item in list(zip(title,href)):
	if item[1].startswith("https"):
	print(item[0],item[1])
	else:
	print(item[0],'https://heimaai.top'+item[1])