python爬取连续一字板股票及当时日期数据【原创分享】
本篇为个人测试记录,记录爬取连续一字板的股票及当时日期。
import tushare as ts
import pandas as pd
import time
# 筛选一字板的策略
def gp_rules(code):
# 获取某只股票的历史数据
data = ts.get_hist_data(code, start='2018-01-01', end='2019-07-26')
# 排除该时间段无数据的股票
if data is None:
return
# 筛选表头【开盘价,收盘价, 涨幅】
data_2 = data.iloc[:, [0, 2, 6]]
# 筛选出涨幅大于9%且开盘价等于收盘价的股票,即一字涨停的股票(有待优化~)
data_3 = data_2[(data_2['p_change'] > 9) & (data_2['open'] == data_2['close'])]
# 去除不存在一字板的股票(即空数据)
if data_3.index is None:
return
line = len(data_3)
f, s = 1, 0
# 遍历检查是否连续的一字板
for n in range(0, line):
# 检查是否连续的一字板
if int(data_3['open'][n] * (data_3['p_change'][n]/100 + 1)) == int(data_3['open'][n-1]):
f = f + 1
# 若4连或4连以上一字板则打印代码及当时日期
if f >= 5 and s == 0:
print(code, data_3.index[n-4])
s = 1
else:
f = 1
# 筛选股票代码
def get_code():
# 自己获取
TOKEN = 'xxxx'
pro = ts.pro_api(token=TOKEN)
# 筛选代码,上市日期
get_codedata = pro.stock_basic(exchange='', list_status='L', fields='symbol, list_date')
# 将list_date改为日期格式并转为索引
get_codedata['list_date'] = pd.to_datetime(get_codedata['list_date'])
get_codedata = get_codedata.set_index('list_date')
# 筛选2018-01-01前的数据,取code,转为list
symbol_list = get_codedata[:'2018-01-01']['symbol'].tolist()
return symbol_list
# 测试运行时间
start = time.clock()
code_list = get_code()
print("正在爬取符合策略的股票,请耐心等待......")
for code in list(code_list):
time.sleep(1.4)
gp_rules(code)
print("爬取筛选完成!")
end = time.clock()
print('运行时间:%s Seconds' % (end-start))
如有错误,请及时指正,谢谢!