爬虫urllib中的Cookie反爬处理
1.通过对百度翻译的分析,现在找出在百度翻译中有个“详细翻译的接口”
即:https://fanyi.baidu.com/v2transapi?from=en&to=zh请求地址
2.查找Request Headers
3.详细代码
#百度详细翻译,反爬的第二种情况 Cookie import urllib.request import urllib.parse import json #1 url请求地址 url = 'https://fanyi.baidu.com/v2transapi?from=en&to=zh' #2 请求头(反爬中的cookie) headers = { #'Accept': '*/*', #'Accept-Encoding': 'gzip, deflate, br', # 'Accept-Language': 'zh-CN,zh;q=0.9', # 'Connection': 'keep-alive', # 'Content-Length': '136', # 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Cookie': 'BIDUPSID=A4B1E6FC0F9F2380A366E7D59492ABAE; PSTM=1642494695; __yjs_duid=1_161fb9fd9dd519015047a85692375cf91642555200799; BDUSS=l3eFV1alZaYko1MTBZTEV0QWRvcjlnQW5OaXA2b1VYOXdScG0wUDVGN09NdzlpRVFBQUFBJCQAAAAAAAAAAAEAAAD8njU2u9vIysu8xO4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAM6m52HOpudhOF; BDUSS_BFESS=l3eFV1alZaYko1MTBZTEV0QWRvcjlnQW5OaXA2b1VYOXdScG0wUDVGN09NdzlpRVFBQUFBJCQAAAAAAAAAAAEAAAD8njU2u9vIysu8xO4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAM6m52HOpudhOF; REALTIME_TRANS_SWITCH=1; SOUND_SPD_SWITCH=1; HISTORY_SWITCH=1; FANYI_WORD_SWITCH=1; SOUND_PREFER_SWITCH=1; MCITY=-48%3A; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BDSFRCVID=iw-OJeC62Rux4w6Dirv4tSzx1GDoHtnTH6aoRkJDrtALUT_5h8BIEG0PSf8g0Kubigy6ogKKB2OTHnFF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF=tJKf_CIhtK-3f-op-P__jj_qhUKX5-RLfK5fsl7F5l8-hxoG25Objx0tKlDJ-pJEMn6xKn7dbnrxOKQphp5Oyx0X5-QrLl3dJC-O5qTN3KJmVnL9bT3v5tDtbpuq2-biW2uH2MbdaqQP_IoG2Mn8M4bb3qOpBtQmJeTxoUJ25DnJhbLGe4bK-Tr3jHtHJx5; H_PS_PSSID=36428_36455_31254_34813_35914_36165_35979_36055_36234_26350_36469_36447; delPer=0; PSINO=2; BDSFRCVID_BFESS=iw-OJeC62Rux4w6Dirv4tSzx1GDoHtnTH6aoRkJDrtALUT_5h8BIEG0PSf8g0Kubigy6ogKKB2OTHnFF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF_BFESS=tJKf_CIhtK-3f-op-P__jj_qhUKX5-RLfK5fsl7F5l8-hxoG25Objx0tKlDJ-pJEMn6xKn7dbnrxOKQphp5Oyx0X5-QrLl3dJC-O5qTN3KJmVnL9bT3v5tDtbpuq2-biW2uH2MbdaqQP_IoG2Mn8M4bb3qOpBtQmJeTxoUJ25DnJhbLGe4bK-Tr3jHtHJx5; BAIDUID=F35C07088578422B085A47B6C7D90E35:FG=1; BAIDUID_BFESS=F35C07088578422B085A47B6C7D90E35:FG=1; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1653269009; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1653269009; ab_sr=1.0.1_MTRkNjA3OGMzZTY3N2Q3MzQ1ZTdmNGFmNTAwMTc4MmJkNjg4YzIyYmJlYWU0OWUyNmY2YTM1NGU2NzZhZjg4MDJhYWZhMzQwMjJjYjMzY2UwNjMxOGI3YmMyZWFkOTE5MTVmYzZjNGRhNjAxMDFjZGI4NDNkZmEzM2Y3ODE3NTI4ZmJkYjdlNGIyZjA3YmE0NzIyMWM1NDliYzJkNjU4NjU0YWRhZWNhNWZhNjVjMmRkMzMwMDZmMGZjYmEyNWU3', # 'Host': 'fanyi.baidu.com', # 'Origin': 'https://fanyi.baidu.com', # 'Referer': 'https://fanyi.baidu.com/', # 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="101", "Google Chrome";v="101"', # 'sec-ch-ua-mobile': '?0', # 'sec-ch-ua-platform': '"Windows"', # 'Sec-Fetch-Dest': 'empty', # 'Sec-Fetch-Mode': 'cors', # 'Sec-Fetch-Site': 'same-origin', # 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36', # 'X-Requested-With': 'XMLHttpRequest' } #3 请求参数 data = { 'from':'en', 'to':'zh', 'query': 'love', 'transtype': 'translang', 'simple_means_flag': '3', 'sign': '198772.518981', 'token': '4f6dbf9201136c6f7280be67858fd77d', 'domain': 'common' } #post请求的参数必须进行编码并且要调用encode方法 data = urllib.parse.urlencode(data).encode('utf-8') #4 请求对象的定制 request = urllib.request.Request(url=url,data=data,headers=headers) #5 模拟浏览器向服务器发送请求 response = urllib.request.urlopen(request) #6 获取响应的数据 content = response.read().decode('utf-8') obj = json.loads(content) print(obj)
运行效果
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· 单线程的Redis速度为什么快?
· SQL Server 2025 AI相关能力初探
· AI编程工具终极对决:字节Trae VS Cursor,谁才是开发者新宠?
· 展开说说关于C#中ORM框架的用法!