boss

import subprocess
import re
import requests
from urllib.parse import urlparse, parse_qs

from functools import partial
subprocess.Popen = partial(subprocess.Popen, encoding="utf-8")

import execjs

获取security-check url

params = {
"query": "Java",
"industry": "",
"position": "",
"ka": "hot-position-1"
}
city="101280600"
headers = {
"Cookie":f"lastCity={city}",
"Referer":"https://www.zhipin.com/",
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
}
res = requests.get(url=f"https://www.zhipin.com/c{city}/",params=params,headers=headers,allow_redirects=False)
security_check = res.headers.get('location')
print(security_check)

检测 https://www.zhipin.com/ https://www.zhipin.com/web/common/security-js/feb06034.js

security_check_url = "https://www.zhipin.com/?"+security_check
sec_res = requests.get(url=security_check_url,headers=headers)
print(sec_res.status_code)

def get_js_name(url,val):
#获取js名称
parsed = urlparse(url)
params = parse_qs(parsed.query)
name = params.get(val, None)
return name[0]

def get_js():
name = get_js_name(security_check,"name")
js_url = f'https://www.zhipin.com/web/common/security-js/{name}.js'
res = requests.get(url=js_url,headers=headers)
return res

获取 ts 和 seed

ts = get_js_name(security_check,"ts")
seed = get_js_name(security_check,"seed")
js_code = get_js().text
print("获取seed,ts",ts,seed)

def process_js_code(js_code):
"""
:return: 修改后js
"""
# 找到js
regex = r'\w+=function(){return N.apply(null,[\d+].concat(Array.prototype.slice.call(arguments)));};'
match = re.search(regex,js_code)
regex_res = match.group(0)
#找到函数变量
regex1 = r'(\w+)=function(){return (\w+).apply(null,[(\d+)].concat(Array.prototype.slice.call(arguments)));};'
match1 = re.search(regex1, regex_res)

Function_name =  match1.group(1)
Apply_function = match1.group(2)
Number = match1.group(3)
#生成新的方法后替换
replacement = "{}=function(t,n){{return {}.apply(this,[{},t,n]);}};window.loader={};".format(Function_name,Apply_function,Number,Function_name)
new_str = re.sub(regex, replacement, js_code,1)

return new_str

def js_all_code(js_code):
code = "window = global;"
js_code = process_js_code(js_code)
sign_code = ";function sign(n,t){return window.loader(n,t)};"
return code+js_code+sign_code

def get_sign(js_code,zeed,ts):
data = js_all_code(js_code)
zp_stoken = execjs.compile(data).call("sign",zeed,ts)
print(data)
print(zp_stoken)
return zp_stoken

zp_stoken = get_sign(js_code,seed,int(ts))

def get_job_list():
cookies = {
'wd_guid': 'b0e02dad-9890-46a8-9fb9-914f9793383d',
'historyState': 'state',
'__g': '-',
'__l': 'l=%2Fwww.zhipin.com%2Fweb%2Fgeek%2Fjob%3Fquery%3DJava%26city%3D101010100&r=&g=&s=3&friend_source=0',
'_bl_uid': 'tjldetCOunj7XqwmaoFadsmbUgyh',
'Hm_lvt_194df3105ad7148dcf2b98a91b5e727a': '1710601402',
'lastCity': '101280600',
'Hm_lpvt_194df3105ad7148dcf2b98a91b5e727a': '1710604226',
'__c': '1710601401',
'__a': '15400391.1710601401..1710601401.14.1.14.14',
'zp_stoken': zp_stoken,
}

headers = {
    'authority': 'www.zhipin.com',
    'accept': 'application/json, text/plain, */*',
    'accept-language': 'zh-CN,zh;q=0.9',
    # 'cookie': 'wd_guid=b0e02dad-9890-46a8-9fb9-914f9793383d; historyState=state; __g=-; __l=l=%2Fwww.zhipin.com%2Fweb%2Fgeek%2Fjob%3Fquery%3DJava%26city%3D101010100&r=&g=&s=3&friend_source=0; _bl_uid=tjldetCOunj7XqwmaoFadsmbUgyh; Hm_lvt_194df3105ad7148dcf2b98a91b5e727a=1710601402; lastCity=101280600; Hm_lpvt_194df3105ad7148dcf2b98a91b5e727a=1710604226; __c=1710601401; __a=15400391.1710601401..1710601401.14.1.14.14; __zp_stoken__=5f92fOzXDlcK6XsK4QCYQDQwFBUEqNDUpMz07KUE1Pjs1Nzc8OzU%2FFTorUsK3KsO6w49hw4g0NSY1N0I1O0M3NUAWNUPEu8K6OjQsZMK1JMO6w5Nhw4oIDgRHK8OywrUpwprCtQg1wrcpJybCtTg6QjQSwrTCusODJMK2wrjDgyrCuMK6w4E6OjQ6MjYEWgQQNjpHS1UISGNKX2JMBlNRUyg0QTY1IsSDw7osNwwSDA4MDxEPDQ8ECgQTERMNExETBAoEBgQtO8KgwrjCosKAxKLEncO9xJTCm0%2FDp8Kvwr7Cp8OnwqzDt2fCtFLCr8Kuwo5twpFWwrHCtU7CpsOBfMK1VUx1WW7Cpm9PUsKyw4BJYMOATnARwrVxEBB1YhE2EmfCs8OF',
    'referer': 'https://www.zhipin.com/web/geek/job?query=Java&city=101280600&page=1',
    'sec-ch-ua': '"Chromium";v="122", "Not(A:Brand";v="24", "Google Chrome";v="122"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-origin',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
    'x-requested-with': 'XMLHttpRequest',
}
params = {
    'scene': '1',
    'query': 'Java',
    'city': '101280600',
    'experience': '',
    'payType': '',
    'partTime': '',
    'degree': '',
    'industry': '',
    'scale': '',
    'stage': '',
    'position': '',
    'jobType': '',
    'salary': '',
    'multiBusinessDistrict': '',
    'multiSubway': '',
    'page': '3',
    'pageSize': '30',
}

response = requests.get('https://www.zhipin.com/wapi/zpgeek/search/joblist.json', params=params, cookies=cookies, headers=headers)

seed = response.json().get("zpData").get("seed")
ts = response.json().get("zpData").get("ts")
zp_stoken2 = get_sign(js_code, seed, ts)
print("尝试第二次请求:zp_stoken2",zp_stoken2)
cookies['__zp_stoken__'] = zp_stoken2
response = requests.get('https://www.zhipin.com/wapi/zpgeek/search/joblist.json', params=params, cookies=cookies, headers=headers)

return response
posted @ 2024-03-17 20:02  牧羊人の冬天  阅读(107)  评论(0编辑  收藏  举报