pandas入门:汇总和计算描述统计-相关系数与协方差
from pandas import DataFrame from pandas_datareader import data as web all_data = {} for ticker in ['AAPL','IBM','MSFT','GOOG']: all_data[ticker] = web.get_data_yahoo(ticker,'1/1/2000','1/1/2010') price = DataFrame({tic:data['Adj Close'] for tic,data in all_data.items()}) volume = DataFrame({tic:data['Volume'] for tic,data in all_data.items()}) returns = price.pct_change() # 计算价格百分数变化 print(returns.tail()) ''' AAPL IBM MSFT GOOG Date 2009-12-24 0.034339 0.004385 0.002587 0.011117 2009-12-28 0.012295 0.013326 0.005484 0.007098 2009-12-29 -0.011862 -0.003477 0.007058 -0.005571 2009-12-30 0.012147 0.005460 -0.013699 0.005376 2009-12-31 -0.004300 -0.012597 -0.015504 -0.004416 ''' print(returns.MSFT.corr(returns.IBM)) # 计算两个Series中重叠的、非NA的、按索引对齐的值的相关系数 ''' 0.4943581494208538 ''' print(returns.MSFT.cov(returns.IBM)) # 协方差 ''' 0.0002158212121866683 ''' print(returns.corr()) # 以DataFrame形式返回完整的相关系数 ''' AAPL IBM MSFT GOOG AAPL 1.000000 0.412391 0.423598 0.470676 IBM 0.412391 1.000000 0.494358 0.390688 MSFT 0.423598 0.494358 1.000000 0.443586 GOOG 0.470676 0.390688 0.443586 1.000000 ''' print(returns.cov()) # 以DataFrame形式返回完整的协方差 ''' AAPL IBM MSFT GOOG AAPL 0.001030 0.000254 0.000309 0.000303 IBM 0.000254 0.000369 0.000216 0.000142 MSFT 0.000309 0.000216 0.000516 0.000205 GOOG 0.000303 0.000142 0.000205 0.000580 ''' # 利用corrwith方法,可计算其列或行跟另一个Series或DataFrame之间的相关系数。传入一个Series将会返回一个相关系数值Series针对各列进行计算) print(returns.corrwith(returns.IBM)) ''' AAPL 0.412391 IBM 1.000000 MSFT 0.494358 GOOG 0.390688 dtype: float64 ''' print(returns.corrwith(volume)) ''' AAPL -0.057665 IBM -0.006592 MSFT -0.014228 GOOG 0.062648 dtype: float64 ''' #传入axis=1即可按行进行计算。
本文来自博客园,作者:OTAKU_nicole,转载请注明原文链接:https://www.cnblogs.com/nicole-zhang/p/14400018.html