pandas入门:汇总和计算描述统计-相关系数与协方差

from pandas import DataFrame
from pandas_datareader import data as web

all_data = {}
for ticker in ['AAPL','IBM','MSFT','GOOG']:
    all_data[ticker] = web.get_data_yahoo(ticker,'1/1/2000','1/1/2010')
price = DataFrame({tic:data['Adj Close'] for tic,data in all_data.items()})
volume = DataFrame({tic:data['Volume'] for tic,data in all_data.items()})

returns = price.pct_change() # 计算价格百分数变化
print(returns.tail())
'''
                AAPL       IBM      MSFT      GOOG
Date                                              
2009-12-24  0.034339  0.004385  0.002587  0.011117
2009-12-28  0.012295  0.013326  0.005484  0.007098
2009-12-29 -0.011862 -0.003477  0.007058 -0.005571
2009-12-30  0.012147  0.005460 -0.013699  0.005376
2009-12-31 -0.004300 -0.012597 -0.015504 -0.004416
'''
print(returns.MSFT.corr(returns.IBM)) # 计算两个Series中重叠的、非NA的、按索引对齐的值的相关系数
'''
0.4943581494208538
'''
print(returns.MSFT.cov(returns.IBM)) # 协方差
'''
0.0002158212121866683
'''
print(returns.corr()) # 以DataFrame形式返回完整的相关系数
'''
          AAPL       IBM      MSFT      GOOG
AAPL  1.000000  0.412391  0.423598  0.470676
IBM   0.412391  1.000000  0.494358  0.390688
MSFT  0.423598  0.494358  1.000000  0.443586
GOOG  0.470676  0.390688  0.443586  1.000000
'''
print(returns.cov()) # 以DataFrame形式返回完整的协方差
'''
          AAPL       IBM      MSFT      GOOG
AAPL  0.001030  0.000254  0.000309  0.000303
IBM   0.000254  0.000369  0.000216  0.000142
MSFT  0.000309  0.000216  0.000516  0.000205
GOOG  0.000303  0.000142  0.000205  0.000580
'''
# 利用corrwith方法,可计算其列或行跟另一个Series或DataFrame之间的相关系数。传入一个Series将会返回一个相关系数值Series针对各列进行计算)
print(returns.corrwith(returns.IBM))
'''
AAPL    0.412391
IBM     1.000000
MSFT    0.494358
GOOG    0.390688
dtype: float64
'''
print(returns.corrwith(volume))
'''
AAPL   -0.057665
IBM    -0.006592
MSFT   -0.014228
GOOG    0.062648
dtype: float64
'''
#传入axis=1即可按行进行计算。

 

posted @ 2021-02-13 13:06  OTAKU_nicole  阅读(344)  评论(0编辑  收藏  举报