统计论坛拍m次数

import time

import requests
import re
import pandas as pd
import random


headers = {
    'accept': '*/*',
    'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
    'cache-control': 'no-cache',
    'pragma': 'no-cache',
    'priority': 'u=1, i',
    'referer': 'xxxxxxx',
    'sec-ch-ua': '"Chromium";v="124", "Microsoft Edge";v="124", "Not-A.Brand";v="99"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-origin',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0',
    'x-requested-with': 'XMLHttpRequest',
}


def get_page(pages, tid):
    uid_counts = {}  # Dictionary to store UID counts
    for page in range(1, pages + 1):
        print(f'Getting page {page}...')
        params = {
            'operation': 'view',
            'tid': str(tid),
            'page': str(page),
            'inajax': '1',
            'ajaxtarget': 'ixmaclog',
        }

        response = requests.get('xxxxx', params=params, headers=headers)
        html = response.text
        uid_pattern = r'uid=(\d+)'
        uids = re.findall(uid_pattern, html)
        for uid in uids:
            # print(uid)
            uid_counts[uid] = uid_counts.get(uid, 0) + 1  # Increment count for UID
        time.sleep(random.uniform(0.3, 0.5))  # Sleep for a random amount of time between 1 and 3 seconds
    # Convert dictionary to DataFrame
    df = pd.DataFrame(list(uid_counts.items()), columns=['uid', 'count'])

    # Write DataFrame to Excel
    df.to_excel(f'./LOG/{tid}_uid_counts.xlsx', index=False)


if __name__ == '__main__':
    tid = xxxx
    pages = xxxx
    get_page(pages=pages, tid=tid)


posted @ 2024-05-09 09:37  __username  阅读(2)  评论(0编辑  收藏  举报

本文作者:DIVMonster

本文链接:https://www.cnblogs.com/guangzan/p/12886111.html

版权声明:本作品采用知识共享署名-非商业性使用-禁止演绎 2.5 中国大陆许可协议进行许可。