线型回归分析(csv文件的存储,修改,读取构造新数据)
# hanbb # come on!!! import pandas as pd import urllib import numpy as np import datetime as dt import matplotlib.pyplot as plt es_url = 'http://www.stoxx.com/download/historical_values/hbrbcpe.txt' vs_url = 'http://www.stoxx.com/download/historical_values/h_vstoxx.txt' # urllib.request.urlretrieve(es_url,'E:\\download\es.txt') # urllib.request.urlretrieve(vs_url,'E:\\download\hs.txt') lines = open('E:\\download\es.txt','r').readlines() lines = [line.replace(' ','') for line in lines] # print(lines[:6]) # for line in lines[3883:3890]: # print(line[41:]) ''' new_file = open('E:\\download\es50.txt','w') new_file.writelines('data'+lines[3][:-1]+';DEL'+lines[3][-1]) new_file.writelines(lines[4:-1]) new_file.close() new_lines = open('E:\\download\es50.txt','r').readlines() print(new_lines[:5]) ''' es = pd.read_csv('E:\\download\es50.txt',index_col=0,parse_dates=True,sep=';',dayfirst=True) del es['DEL'] print(es.tail()) # 原始值 # print(np.round(es.tail())) # np.round(四舍五入) ''' # 直接读取 cols = ['SX5P', 'SX5E', 'SXXP', 'SXXE' , 'SXXF' , 'SXXA' , 'DK5F' , 'DKXF'] es = pd.read_csv(es_url,index_col=0,parse_dates=True,sep=';',dayfirst=True,header=None,skiprows=4,names=cols) print(es.tail()) ''' vs=pd.read_csv('E:\\download\hs.txt',index_col=0,header=2,parse_dates=True,dayfirst=True,sep=',') print(vs.tail()) # 数据构造 data = pd.DataFrame({'EUROSTOXX':es['SX5E'][es.index>dt.datetime(1999,1,1)]}) data = data.join(pd.DataFrame({'VSTOXX':vs['V2TX'][vs.index>dt.datetime(1999,1,1)]})) data = data.fillna(method='ffill') print(data.head()) print(data.tail()) data.plot(subplots=True,grid=True,style="b",figsize=(8,6)) plt.show()