requestium

import sys

import requests
from requestium import Session, Keys
import json
import pandas as pd
import time
import pickle
import os
import datetime
import traceback


def getdata(selected, startday):
    runtime = datetime.datetime.now().strftime('%Y-%m-%d_%H.%M.%S')
    basefolder = os.path.dirname(__file__)

    session = Session(webdriver_path=r'C:\py\tools\chromedriver.exe',
            browser='chrome',
            default_timeout=15,
            # webdriver_options={'arguments': ['headless']}
                      )
    session.driver.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {
            'source': 'Object.defineProperty(navigator, "webdriver", {get: () => undefined})'
        })

    session.driver.get('https://1')
    session.driver.find_element_by_id('zjhm').send_keys('123')
    session.driver.find_element_by_id('sjhm').send_keys('123')

    input()
    session.transfer_driver_cookies_to_session()

    header = {
        'Connection': 'keep-alive',
        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'Host': 'zn',
        'Origin': 'hn',
        'Referer': 'hte=2',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36',
    }
    session.headers = header

    requesturl = 'htx'

    type = {'中介': 1, '业主': 2}
    type_d = type[selected]
    formdata_tmp = 'g05003'
    result = pd.DataFrame()
    i = 1
    key = True
    while key:
        formdata = formdata_tmp % (type_d, startday, i)
        a = session.post(url=requesturl, data=formdata)
        try:
            b = json.loads(a.content.decode('utf8'))['list']
        except BaseException:
            print(traceback.format_exc(), a.content.decode('utf8'))
            b = []
        if len(b) > 0:
            for row in b:
                tmp_dict = {
                    'cqmc': row['cqmc'],
                    'fczsh': row['fczsh'],
                    'cjsj': row['cjsj'],
                    'fwtybh': row['fwtybh'],
                    'gpfyid': row['gpfyid'],
                    'gpid': row['gpid'],
                    'gplxrxm': row['gplxrxm'],
                    'jzmj': row['jzmj'],
                    'mdmc': row['mdmc'],
                    'scgpshsj': row['scgpshsj'],
                    'tygpbh': row['tygpbh'],
                    'wtcsjg': row['wtcsjg'],
                    'xqmc': row['xqmc'],
                    'xzqhname': row['xzqhname'],
                    'xzqh': row['xzqh'],
                }
                tmp_df = pd.DataFrame(tmp_dict, index=[0])
                result = pd.concat([result, tmp_df], axis=0)
            print('%s done' % i, ', 记录: %s' % len(b))
            time.sleep(1)
        else:
            key = False
        i = i + 1

    result.to_excel(os.path.join(basefolder, 'info_%s_from%s_runAt%s.xlsx' % (selected, startday, runtime)), index=False)


if __name__ == '__main__':
    selected = '中介'
    startday = '2022-06-01'
    getdata(selected=selected, startday=startday)

  

posted @ 2022-04-10 17:37  CrossPython  阅读(68)  评论(0编辑  收藏  举报