requestium
import sys import requests from requestium import Session, Keys import json import pandas as pd import time import pickle import os import datetime import traceback def getdata(selected, startday): runtime = datetime.datetime.now().strftime('%Y-%m-%d_%H.%M.%S') basefolder = os.path.dirname(__file__) session = Session(webdriver_path=r'C:\py\tools\chromedriver.exe', browser='chrome', default_timeout=15, # webdriver_options={'arguments': ['headless']} ) session.driver.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', { 'source': 'Object.defineProperty(navigator, "webdriver", {get: () => undefined})' }) session.driver.get('https://1') session.driver.find_element_by_id('zjhm').send_keys('123') session.driver.find_element_by_id('sjhm').send_keys('123') input() session.transfer_driver_cookies_to_session() header = { 'Connection': 'keep-alive', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Host': 'zn', 'Origin': 'hn', 'Referer': 'hte=2', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36', } session.headers = header requesturl = 'htx' type = {'中介': 1, '业主': 2} type_d = type[selected] formdata_tmp = 'g05003' result = pd.DataFrame() i = 1 key = True while key: formdata = formdata_tmp % (type_d, startday, i) a = session.post(url=requesturl, data=formdata) try: b = json.loads(a.content.decode('utf8'))['list'] except BaseException: print(traceback.format_exc(), a.content.decode('utf8')) b = [] if len(b) > 0: for row in b: tmp_dict = { 'cqmc': row['cqmc'], 'fczsh': row['fczsh'], 'cjsj': row['cjsj'], 'fwtybh': row['fwtybh'], 'gpfyid': row['gpfyid'], 'gpid': row['gpid'], 'gplxrxm': row['gplxrxm'], 'jzmj': row['jzmj'], 'mdmc': row['mdmc'], 'scgpshsj': row['scgpshsj'], 'tygpbh': row['tygpbh'], 'wtcsjg': row['wtcsjg'], 'xqmc': row['xqmc'], 'xzqhname': row['xzqhname'], 'xzqh': row['xzqh'], } tmp_df = pd.DataFrame(tmp_dict, index=[0]) result = pd.concat([result, tmp_df], axis=0) print('%s done' % i, ', 记录: %s' % len(b)) time.sleep(1) else: key = False i = i + 1 result.to_excel(os.path.join(basefolder, 'info_%s_from%s_runAt%s.xlsx' % (selected, startday, runtime)), index=False) if __name__ == '__main__': selected = '中介' startday = '2022-06-01' getdata(selected=selected, startday=startday)