获取XXXX个股前十大股东数据-01
# !/usr/bin/env python # -*- coding:utf-8 -*- """ # File : XXXX.py # Time :2023/8/18 11:38 # Author :lrtao2010 # version :python 3.10.1 # Description:获取XXXX个股前十大股东 """ #导入模块 import requests #下载网页 import re import time import random #更新时间 change_time = "2023/6/30" #生成股票代码列表 my_code_list = [] with open("./shuju/XXXX.txt",encoding='utf-8')as f: for i in f.readlines(): my_code_list.append(i.strip()) #print(my_code_list) def spider(url): # 自定义请求头 my_headers = { 'Accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive', 'Host': 'XXXX.XXXX.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36' } resp = requests.get(url,headers=my_headers,timeout=(30,30)) #print(resp.text) obj = re.compile(r'<td>十大流通股东</td>.*?<td>' r'(?P<time>.*?)</td>' #r'.*?href=.*?>(?P<name>.*?)</a>' r'.*?class=.*?>(<a.*?>){0,1}(?P<name>.*?)(</a>){0,1}</td>' #(<a.*?>){0,1} 作为整体匹配0或1次 r'.*?class=.*?>(?P<change>.*?)</td>',re.S) # re.S可以让re匹配到换行符 result = obj.finditer(resp.text) item_len = 0 for item in result: item_len += 1 dic = item.groupdict() if dic["time"] == change_time: break #print("|".join(dic.values())) with open("./shuju/shareholder.txt", 'a+', encoding='utf-8')as s_f: s_f.write(f"{my_list}" + "|" + "|".join(dic.values()) + "\n") if item_len >= 10: break #主程序,注意修改change_time 变量值 if __name__ == '__main__': for my_list in my_code_list: print(my_list) url = f"http://XXXX.XXXX.com//?XXXX?XXXX={my_list}" #url = f"http://XXXX.XXXX.com//?XXXX?XXXX=600936" spider(url) time.sleep(random.randint(1, 2))