A股数据day级前复权数据下载与存储
存储为3个文件夹:大盘数据、股票数据与概括文件
数据获取来源:股票数据来源于R中的WindR;大盘数据来源于python中的tushare(wind中指数似乎只有中证板块)
设置数据文件夹,代码中只需修改更新截止日期与工作路径,后面创建文件都无需手动操作
文件夹中文件不能随意变动,标红代码部分需要自己修改
股票数据中变量分别为股票代码、股票简称、日期、时间、开高低收(乘10000)、交易量、交易额、转手率(基于自由流通股本)、自由流通股本
初次获取数据
1、大盘指数数据(399001, 399005, 399006, 399300,999999)
## 初始设置
os.chdir("C:/Users/Administrator.USER-20161208UW/Desktop/数据") #修改当前工作目录
start_date = "2009-01-01"
end_date = "2017-12-20"
new_date = end_date.replace("-", "")
## 创建新文件夹
os.mkdir("ts大盘数据")
os.mkdir("ts大盘数据/大盘数据"+new_date)
## 概括信息
stock_info = pd.DataFrame({"stock.code":["399001", "399005", "399006", "399300", "999999"] ,
"stock.name":["深证成指", "中小板指", "创业板指", "沪深300", "上证指数"],
"type":["SZ", "SZ", "SZ", "SZ", "SH"]})
stock_info.to_csv("ts大盘数据/大盘数据"+new_date+"/stock_info.csv", index=False)
# 获取大盘信息
dapanzhishu1 = ["399001", "399005", "399006", "399300", "000001"]
dapanzhishu2 = ["399001", "399005", "399006", "399300", "999999"]
wind_code = ["399001.SZ", "399005SZ", "399006SZ", "399300SZ", "999999SH"]
name = ["深证成指", "中小板指", "创业板指", "沪深300", "上证指数"]
for i in list(range(5)):
df = ts.get_h_data(dapanzhishu1[i], start=start_date, end=end_date, index=True, pause=4)
df.sort_index(inplace=True)
df[["open", "high", "close", "low"]] = df[["open", "high", "close", "low"]]*10000
df["date"] = df.index
df["date"] = df["date"].astype(str).apply(lambda x:x.replace('-', ''))
df.columns = ['open','high', 'close', 'low', 'volumw', 'turover', 'date']
df["name"] = name[i]
df["wind_code"] = wind_code[i]
df["time"] = 151500000
df["turn"] = 0
df["free_turn"] = 0
df = df[["wind_code", "name", "date", "time", "open", "high", 'low', 'close', 'volumw', 'turover', "turn", "free_turn"]]
df.to_csv("ts大盘数据/大盘数据"+new_date+"/"+dapanzhishu2[i]+".csv", index=False)
2、A股股票数据与概括文件
## R
library(WindR) library(xlsx) library(data.table) library(magrittr) library(tcltk2) setwd("C:/Users/~~~/Desktop/数据") new.date <- "20171220" start.date <- "20090101" w.start() end.date <- paste(substr(new.date, 1, 4), substr(new.date, 5, 6), substr(new.date, 7, 8), sep = "-") start.date <- paste(substr(start.date, 1, 4), substr(start.date, 5, 6), substr(start.date, 7, 8), sep = "-") ## 摘取当日在市股票代码 stock.code.df <- w.wset('sectorconstituent', paste0("date=", end.date, ";sectorid=a001010100000000")) if(stock.code.df$ErrorCode == 0){ stock.code.sh.sz <- stock.code.df$Data$wind_code }else{ print(paste0("获取数据出错,错误代码", stock.code.df$ErrorCode)) } new.stock.code <- substr(stock.code.sh.sz, 1, 6) # str(stock.code.sh.sz) ## 创建新的文件夹 dir.create("概括文件") dir.create("股票数据") dir.create(paste0("股票数据/股票数据", new.date)) ## 补全stock.code的开始及截至日期及其他信息CSV文件 general.information <- data.frame(array(dim=c(length(stock.code.sh.sz), 5))) colnames(general.information) <- c("stock.code", "stock.name", "type", "starttime", "endtime") ## 设置进度条 pb <- tkProgressBar("进度", "已完成 %", 0, 100) ## 从WindR获取数据 ## 未检查数据是否存在异常日期(大盘中没有的交易日) w.start() # i <- 1 for(i in 1:length(stock.code.sh.sz)){ wind.data <- w.wsd(stock.code.sh.sz[i], "trade_code, sec_name, open, high, low, close, volume, amt, free_turn,free_float_shares", start.date, end.date, "unit=1;PriceAdj=F") if(wind.data$ErrorCode == 0){ wind.df <- data.frame(array(dim=c(nrow(wind.data$Data), 12))) colnames(wind.df) <- c("wind_code", "name", "date", "time", "open", "high", "low", "close", "volumw", "turover", "free_turn", "free_float_shares") wind.df[, 1] <- wind.data$Code wind.df[, 2] <- wind.data$Data$SEC_NAME wind.df[, 3] <- gsub("-", "", wind.data$Data$DATETIME) wind.df[, 4] <- 151500000 wind.df[, 5:8] <- wind.data$Data[4:7] * 10000 wind.df[, 9:12] <- wind.data$Data[8:11] wind.df <- wind.df[!(is.na(wind.df$open)), ] if(nrow(wind.df) == 0){ print(c(i, stock.code[i])) }else{ if(any(is.na(wind.df))){ print(paste(stock.code.sh.sz[i], "数据出错(数据中仍有NA)")) }else{ if(any(wind.df[, 5:8] == 0)){ print(paste(stock.code.sh.sz[i], "数据出错(数据中开高低收存在0)")) }else{ if(any(table(wind.df$date) > 1)){ print(paste(stock.code.sh.sz[i], "数据出错(数据中存在日期相同)")) }else{ if(any(wind.df$date != sort(wind.df$date))){ print(paste(stock.code.sh.sz[i], "数据出错(数据中日期顺序不对)")) }else{ general.information[i, 1] <- substr(stock.code.sh.sz[i], 1, 6) general.information[i, 2] <- wind.df$name[1] general.information[i, 3] <- substr(stock.code.sh.sz[i], 8, 9) general.information[i, 4] <- wind.df[1, 3] general.information[i, 5] <- wind.df[nrow(wind.df), 3] write.csv(wind.df, paste("股票数据/股票数据", new.date, "/", new.stock.code[i], ".csv", sep=""), row.names = FALSE) } } } } } }else{ print(paste0(stock.code.sh.sz[i], "股票获取数据出错,错误代码:", wind.data$ErrorCode)) break } info <- sprintf("已完成 %d%%", round(i*100/length(stock.code.sh.sz))) setTkProgressBar(pb, i*100/length(stock.code.sh.sz), sprintf("进度 (%s)", info), info) } ## 关闭进度条 close(pb) ## 总概括文件中无NA时输出 if(all(!(is.na(general.information)))){ write.xlsx(general.information, paste0("概括文件/概括文件", new.date, ".xlsx"), row.names = FALSE) }else{ print("总概况文件中存在NA,需查验") }
3、文件格式
(1)一级
(2)二级
(3)三级