六、股票市场分析实战项目一

数据获取

import pandas_datareader as pdr
alibaba = pdr.get_data_yahoo('BABA')
alibaba.head()

  结果:

 OpenHighLowCloseAdj CloseVolume
Date      
2014-09-19 92.699997 99.699997 89.949997 93.889999 93.889999 271879400
2014-09-22 92.699997 92.949997 89.500000 89.889999 89.889999 66657800
2014-09-23 88.940002 90.480003 86.620003 87.169998 87.169998 39009800
2014-09-24 88.470001 90.570000 87.220001 90.570000 90.570000 32088000
2014-09-25 91.089996 91.500000 88.500000 88.919998 88.919998 28598000
alibaba.shape#(789, 6)
alibaba.tail()

  结果:

 OpenHighLowCloseAdj CloseVolume
Date      
2017-10-30 178.429993 181.899994 177.589996 181.580002 181.580002 20219700
2017-10-31 183.570007 185.119995 181.811005 184.889999 184.889999 21256700
2017-11-01 187.880005 188.880005 183.580002 186.080002 186.080002 28594700
2017-11-02 190.990005 191.220001 183.309998 184.809998 184.809998 41239900
2017-11-03 186.509995 186.929993 182.059998 183.210007 183.210007 19621400
alibaba.describe()#汇总信息,基本信息

  结果:

 OpenHighLowCloseAdj CloseVolume
count 789.000000 789.000000 789.000000 789.000000 789.000000 7.890000e+02
mean 98.879004 99.977879 97.622681 98.800431 98.800431 1.681925e+07
std 29.076254 29.240037 28.743547 29.007961 29.007961 1.427472e+07
min 57.299999 58.650002 57.200001 57.389999 57.389999 3.775300e+06
25% 79.849998 80.989998 79.150002 79.889999 79.889999 1.003060e+07
50% 89.099998 90.459999 88.059998 88.900002 88.900002 1.340540e+07
75% 106.500000 107.550003 105.129997 105.980003 105.980003 1.915080e+07
max 190.990005 191.220001 183.580002 186.080002 186.080002 2.718794e+08
alibaba.info()#也是基本信息

  结果:

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 789 entries, 2014-09-19 to 2017-11-03
Data columns (total 6 columns):
Open         789 non-null float64
High         789 non-null float64
Low          789 non-null float64
Close        789 non-null float64
Adj Close    789 non-null float64
Volume       789 non-null int64
dtypes: float64(5), int64(1)
memory usage: 43.1 KB
View Code

 历史趋势分析

# 基本信息
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

# 股票数据的读取
import pandas_datareader as pdr

# 可视化
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# time
from datetime import datetime

  

start = datetime(2015,9,20)
alibaba = pdr.get_data_yahoo('BABA', start=start)
amazon = pdr.get_data_yahoo('AMZN', start=start)

  

alibaba.to_csv('../homework/BABA.csv')
amazon.to_csv('../homework/AMZN.csv')

  

alibaba.head()

  结果:

 OpenHighLowCloseAdj CloseVolume
Date      
2015-09-21 65.379997 66.400002 62.959999 63.900002 63.900002 22355100
2015-09-22 62.939999 63.270000 61.580002 61.900002 61.900002 14897900
2015-09-23 61.959999 62.299999 59.680000 60.000000 60.000000 22684600
2015-09-24 59.419998 60.340000 58.209999 59.919998 59.919998 20645700
2015-09-25 60.630001 60.840000 58.919998 59.240002 59.240002 17009100
alibaba['Adj Close'].plot(legend=True)

  结果:<matplotlib.axes._subplots.AxesSubplot at 0x11d910588>

 
alibaba['Volume'].plot(legend=True)

  结果:<matplotlib.axes._subplots.AxesSubplot at 0x120ebcda0>

alibaba['Adj Close'].plot()
amazon['Adj Close'].plot()

  结果:<matplotlib.axes._subplots.AxesSubplot at 0x120ee9a90>

alibaba.head()

  结果:

 OpenHighLowCloseAdj CloseVolume
Date      
2015-09-21 65.379997 66.400002 62.959999 63.900002 63.900002 22355100
2015-09-22 62.939999 63.270000 61.580002 61.900002 61.900002 14897900
2015-09-23 61.959999 62.299999 59.680000 60.000000 60.000000 22684600
2015-09-24 59.419998 60.340000 58.209999 59.919998 59.919998 20645700
2015-09-25 60.630001 60.840000 58.919998 59.240002 59.240002 17009100
alibaba['high-low'] = alibaba['High'] - alibaba['Low']
alibaba.head()

  结果:

 OpenHighLowCloseAdj CloseVolumehigh-low
Date       
2015-09-21 65.379997 66.400002 62.959999 63.900002 63.900002 22355100 3.440003
2015-09-22 62.939999 63.270000 61.580002 61.900002 61.900002 14897900 1.689998
2015-09-23 61.959999 62.299999 59.680000 60.000000 60.000000 22684600 2.619999
2015-09-24 59.419998 60.340000 58.209999 59.919998 59.919998 20645700 2.130001
2015-09-25 60.630001 60.840000 58.919998 59.240002 59.240002 17009100 1.920002
alibaba['high-low'].plot()

  结果:<matplotlib.axes._subplots.AxesSubplot at 0x120fdcf98>

# daily return  每天变化情况
alibaba['daily-return'] = alibaba['Adj Close'].pct_change() #.pct_change() 实现每天每天变化情况
alibaba['daily-return'].plot(figsize=(10,4),linestyle='--',marker='o')#figsize图大小,marker每个数据点类型

  结果:<matplotlib.axes._subplots.AxesSubplot at 0x121504d68>

alibaba['daily-return'].plot(kind='hist')#可以看出,基本上在-0.025到.025之间,所以股票价格稳定

  结果:<matplotlib.axes._subplots.AxesSubplot at 0x12169c630>

sns.distplot(alibaba['daily-return'].dropna(),bins=100,color='purple')

  结果:<matplotlib.axes._subplots.AxesSubplot at 0x121b15f98>

 风险分析

# 基本信息
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

# 股票数据的读取
import pandas_datareader as pdr

# 可视化
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# time
from datetime import datetime

  

start = datetime(2015,1,1)
company = ['AAPL','GOOG','MSFT','AMZN','FB']#5家公司
top_tech_df = pdr.get_data_yahoo(company, start=start)['Adj Close']

  

top_tech_df.to_csv('../homework/top5.csv')

  

top_tech_df.head()

  结果:

 AAPLAMZNFBGOOGMSFT
Date     
2017-11-03 172.500000 1111.599976 178.919998 1032.479980 84.139999
2017-11-02 168.110001 1094.219971 178.919998 1025.579956 84.050003
2017-11-01 166.889999 1103.680054 182.660004 1025.500000 83.180000
2017-10-31 169.039993 1105.280029 180.059998 1016.640015 83.180000
2017-10-30 166.720001 1110.849976 179.869995 1017.109985 83.889999
top_tech_dr = top_tech_df.pct_change()#股票数据每天的变化
top_tech_dr.head()

  结果:

 AAPLAMZNFBGOOGMSFT
Date     
2017-11-03 NaN NaN NaN NaN NaN
2017-11-02 -0.025449 -0.015635 0.000000 -0.006683 -0.001070
2017-11-01 -0.007257 0.008646 0.020903 -0.000078 -0.010351
2017-10-31 0.012883 0.001450 -0.014234 -0.008640 0.000000
2017-10-30 -0.013725 0.005039 -0.001055 0.000462 0.008536
top_tech_df.plot()#GOOG与AMZN曲线比较相似

  结果:<matplotlib.axes._subplots.AxesSubplot at 0x122293fd0>

top_tech_df[['AAPL','FB','MSFT']].plot()#画出其中三列 

  结果:<matplotlib.axes._subplots.AxesSubplot at 0x1225960b8>

sns.jointplot('AMZN','GOOG', top_tech_dr, kind='scatter')#kind='scatter'表示散点图;看点是否在一条斜率为45度的直线上来观察两家股票的相关性,注意坐标大小代表的是上升或者下降

  结果:<seaborn.axisgrid.JointGrid at 0x124dd0f98>

sns.jointplot('MSFT','FB', top_tech_dr, kind='scatter')

  结果:<seaborn.axisgrid.JointGrid at 0x125928940>

sns.pairplot(top_tech_dr.dropna())#.dropna()表示去掉NaN

  结果:<seaborn.axisgrid.PairGrid at 0x125fa27f0>

top_tech_dr['AAPL'].quantile(0.52)
#结果:-0.0001447090809730694

  

top_tech_dr['MSFT'].quantile(0.05)#有(1-%5)的信心每天的变化最大为-0.021328913487084877  (最大亏损为%2)

  结果:-0.021328913487084877

vips = pdr.get_data_yahoo('VIPS', start=start)['Adj Close']
vips.plot()

  结果:<matplotlib.axes._subplots.AxesSubplot at 0x127b0f7b8>

vips.pct_change().quantile(0.2)#-0.02181125767068397

  

 

 

 

posted @ 2018-04-17 15:44  耐烦不急  阅读(564)  评论(0编辑  收藏  举报