国内股票爬取(简易版)+CSV 存储(一次性)(尝试版)

 现在使用  雪球网  对股票的各项数值进行爬取

 

雪球网中爬取的信息存储比较奇怪,需提取 其中字典,在这里使用提取沪市的部分股票为例,由于雪球网的数据基本可通过 关键词搜索 索引+1 来获取,所以在这里我就用简易XPath

注意之前选择正则取数据有坑,因为其数据的text分布不同,所以最好用查找关键词的方法筛选自己想要的数据

 

这是主要提取元素的方法,其中的URL 也可以使用科技股和其他股的,因为检索的索引关键词是一样的:

  1 def get_website():
  2 
  3 
  4     for stock_number in stock_list:
  5         stock_URL = "https://xueqiu.com/S/SH{}".format(str(stock_number))
  6         response = requests.get(stock_URL, headers=headers)
  7         # print(response.text)
  8         stock_html = etree.HTML(response.text)
  9         # print(stock_html)
 10 
 11         itemlist = {}
 12         # 股票名称
 13         stockitem_name = stock_html.xpath('//div[@class="stock-name"]/text()')[0]
 14         # print(stockitem_name)
 15         itemlist['stockitem_name'] = stockitem_name
 16         # 现在价格(不在td可循环内)
 17         stockitem_result = stock_html.xpath('//body//strong/text()')[8]
 18         # print(stockitem_result)
 19         itemlist['stockitem_result'] = stockitem_result
 20         # 现在状态
 21         stock_status = stock_html.xpath('//div[@class="stock-time"]/span/text()')[0]
 22         # print(stock_status)
 23         itemlist['stock_status'] = stock_status
 24         # 当前时间(记录时间)
 25         stock_time = stock_html.xpath('//div[@class="stock-time"]/span/text()')[1]
 26         # print(stock_time)
 27         itemlist['stock_time'] = stock_time
 28         # 开始td标签循环!
 29 
 30         stock_items = stock_html.xpath('//td//text()')
 31         # print(stock_items)
 32         for i in range(27):
 33             if "最高:" in stock_items[i]:
 34                 # print(stock_items[i+1])
 35                 itemlist['highest'] = stock_items[i+1]
 36             if "最低:" in stock_items[i]:
 37                 # print(stock_items[i + 1])
 38                 itemlist['lowerest'] = stock_items[i+1]
 39 
 40         for i in range(54):
 41             if "今开:" in stock_items[i]:
 42                 # print(stock_items[i+1])
 43                 itemlist['today_start'] = stock_items[i+1]
 44             if "涨停:" in stock_items[i]:
 45                 # print(stock_items[i+1])
 46                 itemlist['high_stop'] = stock_items[i+1]
 47             if "成交量:" in stock_items[i]:
 48                 # print(stock_items[i+1])
 49                 itemlist['ok_total'] = stock_items[i+1]
 50             if "昨收:" in stock_items[i]:
 51                 # print(stock_items[i+1])
 52                 itemlist['lastday_over'] = stock_items[i+1]
 53             if "跌停:" in stock_items[i]:
 54                 # print(stock_items[i+1])
 55                 itemlist['low_stop'] = stock_items[i+1]
 56             if "成交额:" in stock_items[i]:
 57                 # print(stock_items[i+1])
 58                 itemlist['ok_test'] = stock_items[i+1]
 59             if "量比:" in stock_items[i]:
 60                 # print(stock_items[i+1])
 61                 itemlist['liang_percent'] = stock_items[i+1]
 62             if "换手:" in stock_items[i]:
 63                 # print(stock_items[i+1])
 64                 itemlist['change_hand'] = stock_items[i+1]
 65             if "市盈率(动):" in stock_items[i]:
 66                 # print(stock_items[i+1])
 67                 itemlist['shiying_dong'] = stock_items[i+1]
 68             if "市盈率(TTM):" in stock_items[i]:
 69                 # print(stock_items[i+1])
 70                 itemlist['shiying_TTM'] = stock_items[i+1]
 71             if "委比:" in stock_items[i]:
 72                 # print(stock_items[i+1])
 73                 itemlist['wei_percent'] = stock_items[i+1]
 74             if "振幅:" in stock_items[i]:
 75                 # print(stock_items[i+1])
 76                 itemlist['zheng_way'] = stock_items[i+1]
 77             if "市盈率(静):" in stock_items[i]:
 78                 # print(stock_items[i+1])
 79                 itemlist['shiying_jin'] = stock_items[i+1]
 80             if "市净率:" in stock_items[i]:
 81                 # print(stock_items[i+1])
 82                 itemlist['shijin_percent'] = stock_items[i+1]
 83             if "每股收益:" in stock_items[i]:
 84                 # print(stock_items[i+1])
 85                 itemlist['per_stock_fit'] = stock_items[i+1]
 86             if "股息(TTM):" in stock_items[i]:
 87                 # print(stock_items[i+1])
 88                 itemlist['stock_xi_TTM'] = stock_items[i+1]
 89             if "总股本:" in stock_items[i]:
 90                 # print(stock_items[i+1])
 91                 itemlist['total_stock_ben'] = stock_items[i+1]
 92             if "总市值:" in stock_items[i]:
 93                 # print(stock_items[i+1])
 94                 itemlist['total_stock_value'] = stock_items[i+1]
 95             if "52周最高:" in stock_items[i]:
 96                 # print(stock_items[i+1])
 97                 itemlist['highest_52'] = stock_items[i+1]
 98             if "52周最低:" in stock_items[i]:
 99                 # print(stock_items[i+1])
100                 itemlist['lowerest_52'] = stock_items[i+1]
101             if "货币单位:" in stock_items[i]:
102                 # print(stock_items[i+1])
103                 itemlist['cop'] = stock_items[i+1]
104         # print(itemlist)
105         print(itemlist)
106         writer_to_csv_item(itemlist)

调用主函数,其中的股票列表可替换或用input自己添加:

这里每30秒记录一次,但是这里记录时间会不一样

if __name__ == '__main__':


    # stock = input("请输入您的股票:")
    stock_list = ['688157', '600018', '600072', '600635', '600497', '603658', '688208', '600550', '600498', '600703', '603815', '600127', '600305', '603777', '688004']
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36"}
    # stock_list.append(stock)
    writer_ss = write_to_csv_header()
    # get_website()

    # 循环
    scheduler = BlockingScheduler()
    scheduler.add_job(func=get_website, trigger='interval', seconds=30)
    scheduler.start()

 

写入csv函数:(好丑)还是有键名

 1 def write_to_csv_header():
 2     with open('stock.csv', 'a', encoding='utf-8-sig', newline='') as f:
 3         csv_header = ['股票名称', '现在价格', '现在状态', '当前记录时间', '最高', '最低', '今日开盘价', '涨停价格', '成交量', '昨日收盘价', '跌停价格', '成交额', '量比', '换手', '市盈率(动)', '市盈率(TTM)', '委比', '振幅', '市盈率(静)', '市净率', '每股收益', '股息(TTM)', '总股本', '总市值', '52周最高', '52周最低', '货币单位']
 4         writer = csv.writer(f)
 5         writer.writerow(csv_header)
 6 
 7     # with open('stock.csv', 'a', encoding='utf-8-sig', newline='') as w:
 8     #     writer_1 = csv.DictWriter(w, fieldnames=['stockitem_name', 'stockitem_result', 'stock_status', 'stock_time', 'highest', 'lowerest', 'today_start', 'high_stop', 'ok_total', 'lastday_over', 'low_stop', 'ok_test', 'liang_percent', 'change_hand', 'shiying_dong', 'shiying_TTM', 'wei_percent', 'zheng_way', 'shiying_jin', 'shijin_percent', 'per_stock_fit', 'stock_xi_TTM', 'total_stock_ben', 'total_stock_value', 'highest_52', 'lowerest_52', 'cop'])
 9     #     writer_1.writeheader()
10     return writer
11 def writer_to_csv_item(itemlist):
12     with open('stock.csv', 'a', encoding='utf-8-sig', newline='') as w:
13         writer_1 = csv.DictWriter(w, fieldnames=['stockitem_name', 'stockitem_result', 'stock_status', 'stock_time',
14                                                  'highest', 'lowerest', 'today_start', 'high_stop', 'ok_total',
15                                                  'lastday_over', 'low_stop', 'ok_test', 'liang_percent', 'change_hand',
16                                                  'shiying_dong', 'shiying_TTM', 'wei_percent', 'zheng_way',
17                                                  'shiying_jin', 'shijin_percent', 'per_stock_fit', 'stock_xi_TTM',
18                                                  'total_stock_ben', 'total_stock_value', 'highest_52', 'lowerest_52',
19                                                  'cop'])
20 
21         writer_1.writeheader()
22 
23         writer_1.writerow(itemlist)

 

posted @ 2020-07-08 13:19  酸辣土豆皮  阅读(324)  评论(1编辑  收藏  举报