记录RandomForestRegressor&MLPRegressor处理时间序列数据

数据概况:八个变量一个目标值;时间序列数据;

数据处理:归一化后的数据更适用神经网络模型;

方法:随机森林回归、神经网络模型(效果不佳)。

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn import metrics

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import max_error
#---------------------------------------------------------------------------------
# 准备数据
# 导入数据
data = pd.read_csv("yiyao.csv", encoding="utf8", low_memory=False)
#data = pd.read_csv("yiyaobzh.csv", encoding="utf8", low_memory=False)
data.head()

data.info()

# 选取特征及分类标签
features = data.iloc[:,[2,3,4,5,6,7,8,9]].values
targets = data.iloc[:,1].values
#分测试集,训练集
X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.3, shuffle=False) # 训练集前部分,测试集后部分,不打乱顺序
print('X_train shape:{}'.format(X_train.shape))
print('X_test shape:{}'.format(X_test.shape))

随机森林模型

# 随机森林模型
from sklearn.ensemble import RandomForestRegressor

regr = RandomForestRegressor(#n_estimators=50,
                             #max_features=32,
                             #random_state=1
                                )

regr = regr.fit(X_train,y_train)
# 输出预测结果
y_pred = regr.predict(X_test)
y_pred
# 将预测结果输出到文件中
y_pred=pd.DataFrame(y_pred)
y_pred.to_csv('y_pred.csv',header=True,index=False)
# 输出测试集模型得分
score_r = regr.score(X_test,y_test)
print("Random Forest:{}".format(score_r))
# mape
metrics.mean_absolute_percentage_error(y_test, y_pred)
# me
metrics.max_error(y_test, y_pred)
# mae
metrics.mean_absolute_error(y_test, y_pred)
# mse
metrics.mean_squared_error(y_test, y_pred)
#返回特征的重要性
#regr.feature_importances_
feature_name=["x1","x2","x3","x4","x5","x6","x7","x8"]
imp=[*zip(feature_name,regr.feature_importances_)]
imp
x=[]
y=[]
for i in range(0,8):
    x.append(imp[i][0])
for i in range(0,8):
    y.append(imp[i][1])
    
%matplotlib inline
plt.figure(figsize=(15, 10))

plt.barh(x,y,color='green')
plt.xlabel("Degree of importance")
plt.ylabel("Features")
plt.show()

神经网络模型

# 归一化处理
from sklearn.preprocessing import StandardScaler

stdScaler = StandardScaler()

X_train = stdScaler.fit_transform(X_train) # 训练集数据
X_test = stdScaler.fit_transform(X_test) 
from sklearn.neural_network import MLPRegressor
regr = MLPRegressor(
                    random_state=42
                    ,hidden_layer_sizes=(4, 2)
                    ,max_iter=50000
                    #,activation='relu'
                   )
regr = regr.fit(X_train,y_train)
# 输出预测结果
y_pred = regr.predict(X_test)
#y_pred
# 输出测试集模型得分
score_r = regr.score(X_test,y_test)
print(":{}".format(score_r))
# 输出相关参数
regr.get_params()
posted on 2022-03-11 11:17  cookie的笔记簿  阅读(486)  评论(0编辑  收藏  举报