用lxml、etree、xpath 来解析html,获取拉钩网的动态token
# conding:utf-8 import requests from lxml import etree import re import urllib3 urllib3.disable_warnings() s = requests.session() def get_it_execution(): loginurl = "https://passport.lagou.com/login/login.html" h1 = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:44.0) Gecko/20100101 Firefox/44.0", } r = s.get(loginurl, headers=h1,verify=False) dom = etree.HTML(r.content) tokencode = {} try: t = dom.xpath('//script[2]/text()') tt = ''.join(t) tokencode['X_Anti_Forge_Token'] = re.findall("Token = '(.+?)'", tt)[0] tokencode['X_Anti_Forge_Code'] = re.findall("Code = '(.+?)'", tt)[0] except: print("lt、execution参数获取失败!") return tokencode if __name__ == "__main__": print(get_it_execution())