爬虫urllib中的Cookie反爬处理

1.通过对百度翻译的分析,现在找出在百度翻译中有个“详细翻译的接口”

即:https://fanyi.baidu.com/v2transapi?from=en&to=zh请求地址

2.查找Request Headers

3.详细代码

复制代码
#百度详细翻译,反爬的第二种情况 Cookie
import urllib.request
import urllib.parse
import json

#1 url请求地址
url = 'https://fanyi.baidu.com/v2transapi?from=en&to=zh'
#2 请求头(反爬中的cookie)
headers = {
    #'Accept': '*/*',
    #'Accept-Encoding': 'gzip, deflate, br',
    # 'Accept-Language': 'zh-CN,zh;q=0.9',
    # 'Connection': 'keep-alive',
    # 'Content-Length': '136',
    # 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
    'Cookie': 'BIDUPSID=A4B1E6FC0F9F2380A366E7D59492ABAE; PSTM=1642494695; __yjs_duid=1_161fb9fd9dd519015047a85692375cf91642555200799; BDUSS=l3eFV1alZaYko1MTBZTEV0QWRvcjlnQW5OaXA2b1VYOXdScG0wUDVGN09NdzlpRVFBQUFBJCQAAAAAAAAAAAEAAAD8njU2u9vIysu8xO4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAM6m52HOpudhOF; BDUSS_BFESS=l3eFV1alZaYko1MTBZTEV0QWRvcjlnQW5OaXA2b1VYOXdScG0wUDVGN09NdzlpRVFBQUFBJCQAAAAAAAAAAAEAAAD8njU2u9vIysu8xO4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAM6m52HOpudhOF; REALTIME_TRANS_SWITCH=1; SOUND_SPD_SWITCH=1; HISTORY_SWITCH=1; FANYI_WORD_SWITCH=1; SOUND_PREFER_SWITCH=1; MCITY=-48%3A; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BDSFRCVID=iw-OJeC62Rux4w6Dirv4tSzx1GDoHtnTH6aoRkJDrtALUT_5h8BIEG0PSf8g0Kubigy6ogKKB2OTHnFF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF=tJKf_CIhtK-3f-op-P__jj_qhUKX5-RLfK5fsl7F5l8-hxoG25Objx0tKlDJ-pJEMn6xKn7dbnrxOKQphp5Oyx0X5-QrLl3dJC-O5qTN3KJmVnL9bT3v5tDtbpuq2-biW2uH2MbdaqQP_IoG2Mn8M4bb3qOpBtQmJeTxoUJ25DnJhbLGe4bK-Tr3jHtHJx5; H_PS_PSSID=36428_36455_31254_34813_35914_36165_35979_36055_36234_26350_36469_36447; delPer=0; PSINO=2; BDSFRCVID_BFESS=iw-OJeC62Rux4w6Dirv4tSzx1GDoHtnTH6aoRkJDrtALUT_5h8BIEG0PSf8g0Kubigy6ogKKB2OTHnFF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF_BFESS=tJKf_CIhtK-3f-op-P__jj_qhUKX5-RLfK5fsl7F5l8-hxoG25Objx0tKlDJ-pJEMn6xKn7dbnrxOKQphp5Oyx0X5-QrLl3dJC-O5qTN3KJmVnL9bT3v5tDtbpuq2-biW2uH2MbdaqQP_IoG2Mn8M4bb3qOpBtQmJeTxoUJ25DnJhbLGe4bK-Tr3jHtHJx5; BAIDUID=F35C07088578422B085A47B6C7D90E35:FG=1; BAIDUID_BFESS=F35C07088578422B085A47B6C7D90E35:FG=1; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1653269009; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1653269009; ab_sr=1.0.1_MTRkNjA3OGMzZTY3N2Q3MzQ1ZTdmNGFmNTAwMTc4MmJkNjg4YzIyYmJlYWU0OWUyNmY2YTM1NGU2NzZhZjg4MDJhYWZhMzQwMjJjYjMzY2UwNjMxOGI3YmMyZWFkOTE5MTVmYzZjNGRhNjAxMDFjZGI4NDNkZmEzM2Y3ODE3NTI4ZmJkYjdlNGIyZjA3YmE0NzIyMWM1NDliYzJkNjU4NjU0YWRhZWNhNWZhNjVjMmRkMzMwMDZmMGZjYmEyNWU3',
    # 'Host': 'fanyi.baidu.com',
    # 'Origin': 'https://fanyi.baidu.com',
    # 'Referer': 'https://fanyi.baidu.com/',
    # 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="101", "Google Chrome";v="101"',
    # 'sec-ch-ua-mobile': '?0',
    # 'sec-ch-ua-platform': '"Windows"',
    # 'Sec-Fetch-Dest': 'empty',
    # 'Sec-Fetch-Mode': 'cors',
    # 'Sec-Fetch-Site': 'same-origin',
    # 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36',
    # 'X-Requested-With': 'XMLHttpRequest'
}
#3 请求参数
data = {
    'from':'en',
    'to':'zh',
    'query': 'love',
    'transtype': 'translang',
    'simple_means_flag': '3',
    'sign': '198772.518981',
    'token': '4f6dbf9201136c6f7280be67858fd77d',
    'domain': 'common'
}
#post请求的参数必须进行编码并且要调用encode方法
data = urllib.parse.urlencode(data).encode('utf-8')

#4 请求对象的定制
request = urllib.request.Request(url=url,data=data,headers=headers)

#5 模拟浏览器向服务器发送请求
response = urllib.request.urlopen(request)

#6 获取响应的数据
content = response.read().decode('utf-8')
obj = json.loads(content)
print(obj)
复制代码

运行效果

 

posted @   创客未来  阅读(213)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· 单线程的Redis速度为什么快?
· SQL Server 2025 AI相关能力初探
· AI编程工具终极对决:字节Trae VS Cursor,谁才是开发者新宠?
· 展开说说关于C#中ORM框架的用法!
点击右上角即可分享
微信分享提示