统计论坛拍m次数
import time
import requests
import re
import pandas as pd
import random
headers = {
'accept': '*/*',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'cache-control': 'no-cache',
'pragma': 'no-cache',
'priority': 'u=1, i',
'referer': 'xxxxxxx',
'sec-ch-ua': '"Chromium";v="124", "Microsoft Edge";v="124", "Not-A.Brand";v="99"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0',
'x-requested-with': 'XMLHttpRequest',
}
def get_page(pages, tid):
uid_counts = {} # Dictionary to store UID counts
for page in range(1, pages + 1):
print(f'Getting page {page}...')
params = {
'operation': 'view',
'tid': str(tid),
'page': str(page),
'inajax': '1',
'ajaxtarget': 'ixmaclog',
}
response = requests.get('xxxxx', params=params, headers=headers)
html = response.text
uid_pattern = r'uid=(\d+)'
uids = re.findall(uid_pattern, html)
for uid in uids:
# print(uid)
uid_counts[uid] = uid_counts.get(uid, 0) + 1 # Increment count for UID
time.sleep(random.uniform(0.3, 0.5)) # Sleep for a random amount of time between 1 and 3 seconds
# Convert dictionary to DataFrame
df = pd.DataFrame(list(uid_counts.items()), columns=['uid', 'count'])
# Write DataFrame to Excel
df.to_excel(f'./LOG/{tid}_uid_counts.xlsx', index=False)
if __name__ == '__main__':
tid = xxxx
pages = xxxx
get_page(pages=pages, tid=tid)
本文来自博客园,作者:__username,转载请注明原文链接:https://www.cnblogs.com/code3/p/18181416