统计模型应用--基本预测手法
分类器:
1 | import sklearn<br> from sklearn.ensemble import RandomFoerestClassifier<br> from sklearn.linear.linear_model import LogisticRegression<br> from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA<br> from sklearn.metrics import confusion_matrix<br> from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA<br> from sklearn.svm import LinearSVC, SVC<br><br> def create_lagged_series(symbol, start_date, end_date, lags = 5 ):<br> ts = web.DataReader(<br> symbol, "quandl" ,<br> start_date - datetime.timedelta(days = 365 ),<br> end_date<br> ).sort_index()<br> tslag = pd.DataFrame(index = ts.index)<br> tslag[ "Today" ] = ts[ "AdjClose" ]<br> tslag[ 'Volume' ] = ts[ 'Volume' ]<br> for i in range ( 0 , lags):<br> tslag[ "lag%s" % str (i + 1 )] = ts[ 'AdjClose' ].shift(i + 1 )<br> tsret = pd.DataFrame(index = tslag.index)<br> tsret[ "Volume" ] = tslag[ "Vloume" ]<br> tsret[ "Today" ] = talag[ "Today" ].pct_change() * 100.0 <br> for i,x in enumerate (tsret[ 'Today' ]):<br> if ( abs (x) < 0.0001 ):<br> tsret[ 'Today' ][i] = 0.0001 <br> for i in range ( 0 ,lags):<br> tsret[ 'lag%s' % str (i + 1 )] = \<br> tsret[ 'lag%s' % str (i + 1 )].pct_change() * 100.0 <br> tsret[ "Direction" ] = np.sign(tsret[ 'Today' ])<br> tsret = tsret[tsret.index > = start_date]<br> return tsret<br> if __name__ = = '__main__' :<br> snpret = create_lagged_series(<br> "AAPL.US" , datetime.datetime( 2001 , 1 , 10 ),<br> datetime.datetime( 2005 , 12 , 31 ), lags = 5 <br> )<br> x = snpret[[ 'Lag1' , 'Lag2' ]]<br> y = snpret[ "Direction" ]<br> start_test = datetime.datetime( 2005 , 1 , 1 )<br> X_train = X[X.index < start_test]<br> X_test = X[X.index > = start_test]<br> Y_train = Y[Y.index < start_test]<br> Y_test = Y[Y.index > = start_test]<br> print ( "Hit Rates/Confusion Matrices:\n" )<br> models = [<br> ( 'LR' , LogisticRegression()),<br> ( 'LDA' ,LDA()),<br> ( 'QDA' ,QDA()),<br> ( "LSVC" ,LinearSVC()),<br> ( "RSVM" ,SVC(<br> C = 1000000.0 , cache_size = 200 , class_weight = None ,<br> coef0 = 0.0 , degree = 3 , gamma = 0.0001 , kernel = 'rbf' ,<br> max_iter = - 1 , probability = False ,random_state = None ,<br> shrinkinf = True , tol = 0.001 ,verbose = False <br> )),<br> ( 'RF' , RandomForestClassifier(<br> n_estimators = 1000 ,criterion = 'gini' ,<br> max_depth = None , min_samples_split = 2 ,<br> min_samples_leaf = 1 , max_features = 'auto' ,<br> bootstrap = True , oob_score = False , n_jobs = 1 ,<br> random_state = None , verbose = 0 )<br> )]<br> for m in models:<br> m[ 1 ].fit(X_train, Y_train)<br> pred = m[ 1 ].predict(X_test)<br> print ( "%s:\n%0.3f" % (m[ 0 ],m[ 1 ].score(X_test,Y_test)))<br> print ( "%s\n" % confusion_matrix(pred, Y_test)) |
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步