记录RandomForestRegressor&MLPRegressor处理时间序列数据

数据概况：八个变量一个目标值；时间序列数据；

数据处理：归一化后的数据更适用神经网络模型；

方法：随机森林回归、神经网络模型（效果不佳）。

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn import metrics

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import max_error

#---------------------------------------------------------------------------------
# 准备数据
# 导入数据
data = pd.read_csv("yiyao.csv", encoding="utf8", low_memory=False)
#data = pd.read_csv("yiyaobzh.csv", encoding="utf8", low_memory=False)

data.head()

data.info()

# 选取特征及分类标签
features = data.iloc[:,[2,3,4,5,6,7,8,9]].values
targets = data.iloc[:,1].values

#分测试集，训练集
X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.3, shuffle=False) # 训练集前部分，测试集后部分，不打乱顺序

print('X_train shape:{}'.format(X_train.shape))
print('X_test shape:{}'.format(X_test.shape))

随机森林模型

# 随机森林模型
from sklearn.ensemble import RandomForestRegressor

regr = RandomForestRegressor(#n_estimators=50,
                             #max_features=32,
                             #random_state=1
                                )

regr = regr.fit(X_train,y_train)

# 输出预测结果
y_pred = regr.predict(X_test)
y_pred

# 将预测结果输出到文件中
y_pred=pd.DataFrame(y_pred)
y_pred.to_csv('y_pred.csv',header=True,index=False)

# 输出测试集模型得分
score_r = regr.score(X_test,y_test)
print("Random Forest:{}".format(score_r))

# mape
metrics.mean_absolute_percentage_error(y_test, y_pred)

# me
metrics.max_error(y_test, y_pred)

# mae
metrics.mean_absolute_error(y_test, y_pred)

# mse
metrics.mean_squared_error(y_test, y_pred)

#返回特征的重要性
#regr.feature_importances_
feature_name=["x1","x2","x3","x4","x5","x6","x7","x8"]
imp=[*zip(feature_name,regr.feature_importances_)]
imp

x=[]
y=[]
for i in range(0,8):
    x.append(imp[i][0])
for i in range(0,8):
    y.append(imp[i][1])
    
%matplotlib inline
plt.figure(figsize=(15, 10))

plt.barh(x,y,color='green')
plt.xlabel("Degree of importance")
plt.ylabel("Features")
plt.show()

神经网络模型

# 归一化处理
from sklearn.preprocessing import StandardScaler

stdScaler = StandardScaler()

X_train = stdScaler.fit_transform(X_train) # 训练集数据

X_test = stdScaler.fit_transform(X_test)

from sklearn.neural_network import MLPRegressor
regr = MLPRegressor(
                    random_state=42
                    ,hidden_layer_sizes=(4, 2)
                    ,max_iter=50000
                    #,activation='relu'
                   )
regr = regr.fit(X_train,y_train)

# 输出预测结果
y_pred = regr.predict(X_test)
#y_pred

# 输出测试集模型得分
score_r = regr.score(X_test,y_test)
print(":{}".format(score_r))

# 输出相关参数
regr.get_params()

posted on 2022-03-11 11:17 cookie的笔记簿阅读(486) 评论(0) 编辑收藏举报