统计模型应用--基本预测手法

分类器:

1
import sklearn<br>from sklearn.ensemble import RandomFoerestClassifier<br>from sklearn.linear.linear_model import LogisticRegression<br>from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA<br>from sklearn.metrics import confusion_matrix<br>from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA<br>from sklearn.svm import LinearSVC, SVC<br><br>def create_lagged_series(symbol, start_date, end_date, lags=5):<br>    ts = web.DataReader(<br>    symbol, "quandl",<br>    start_date-datetime.timedelta(days=365),<br>    end_date<br>    ).sort_index()<br>    tslag = pd.DataFrame(index=ts.index)<br>    tslag["Today"] = ts["AdjClose"]<br>    tslag['Volume'] = ts['Volume']<br>    for i in range(0, lags):<br>       tslag["lag%s"% str(i+1)] = ts['AdjClose'].shift(i+1)<br>       tsret = pd.DataFrame(index=tslag.index)<br>       tsret["Volume"] = tslag["Vloume"]<br>       tsret["Today"] = talag["Today"].pct_change()*100.0<br>    for i,x in enumerate(tsret['Today']):<br>       if (abs(x) < 0.0001):<br>          tsret['Today'][i] = 0.0001<br>    for i in range(0,lags):<br>       tsret['lag%s'% str(i+1)] = \<br>       tsret['lag%s'% str(i+1)].pct_change()*100.0<br>    tsret["Direction"] = np.sign(tsret['Today'])<br>    tsret = tsret[tsret.index >= start_date]<br>    return tsret<br>if __name__ == '__main__':<br>   snpret = create_lagged_series(<br>   "AAPL.US", datetime.datetime(2001,1,10),<br>   datetime.datetime(2005,12,31), lags=5<br>   )<br>   x= snpret[['Lag1', 'Lag2']]<br>   y = snpret["Direction"]<br>   start_test = datetime.datetime(2005,1,1)<br>   X_train = X[X.index < start_test]<br>   X_test = X[X.index >= start_test]<br>   Y_train = Y[Y.index < start_test]<br>   Y_test = Y[Y.index >= start_test]<br>   print("Hit Rates/Confusion Matrices:\n")<br>   models = [<br>   ('LR', LogisticRegression()),<br>   ('LDA',LDA()),<br>   ('QDA',QDA()),<br>   ("LSVC",LinearSVC()),<br>   ("RSVM",SVC(<br>    C=1000000.0, cache_size=200, class_weight=None,<br>    coef0=0.0, degree=3, gamma=0.0001, kernel='rbf',<br>    max_iter=-1, probability=False,random_state=None,<br>    shrinkinf=True, tol=0.001,verbose=False<br>    )),<br>    ('RF', RandomForestClassifier(<br>    n_estimators=1000,criterion='gini',<br>    max_depth=None, min_samples_split=2,<br>    min_samples_leaf=1, max_features='auto',<br>    bootstrap=True, oob_score=False, n_jobs=1,<br>    random_state=None, verbose=0)<br>    )]<br>    for m in models:<br>       m[1].fit(X_train, Y_train)<br>       pred = m[1].predict(X_test)<br>       print("%s:\n%0.3f" % (m[0],m[1].score(X_test,Y_test)))<br>       print("%s\n" % confusion_matrix(pred, Y_test))

   

 

posted @   划得戳  阅读(452)  评论(0编辑  收藏  举报
努力加载评论中...
点击右上角即可分享
微信分享提示