机器学习预测共享单车
共享单车
共享单车的数据要从https://archive.ics.uci.edu/ml/dat asets/Bike+Sharing+Dataset这里下载
import sklearn
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import matplotlib
%matplotlib inline
matplotlib.rcParams['font.sans-serif']=['SimHei']
#分割数据集
from sklearn.model_selection import train_test_split
data=pd.read_csv('./day.csv')
print(data.shape) #打印几行几列
# print(data.head()) #打印头部
# 删除不需要数据 ,按照列名删除
df=data.drop(['dteday','casual','registered'],axis=1)
print(df.shape) # (731, 12)
# print(df.head()) # 查看没有问题
# print(df.info()) #查看是否有空值
# 分隔数据集
dataset = df.as_matrix() # 将pandas转为np.ndarray
train_set,test_set=train_test_split(dataset,test_size=0.1,random_state=37)
#查看他俩的数据,看看是不是 1/9分
print(train_set.shape) # (657, 13)
print(test_set.shape) # (74, 13)
print(dataset[:3])
# 构建随机森林回归模型
from sklearn.ensemble import RandomForestRegressor
rf_regressor =RandomForestRegressor()
#n_estimators:决策树的个数,越大越好,但是会达到一定边界
rf_regressor=RandomForestRegressor(n_estimators=1000,max_depth=10,min_samples_split=10)
rf_regressor.fit(train_set[:,:-1],train_set[:,-1]) # 训练模型
# 使用测试集来评价该回归模型
predict_test_y=rf_regressor.predict(test_set[:,:-1])
import sklearn.metrics as metrics
print('随机森林回归模型的评测结果----->>>')
print('均方误差MSE :{}'.format(round(metrics.mean_squared_error(predict_test_y,test_set[:,-1])/)))
print('解释方差分 :{}'.format(round(metrics.explained_variance_score(predict_test_y,test_set[:,-1]),2)))
print('R平方得分 :{}'.format(round(metrics.r2_score(predict_test_y,test_set[:,-1]),2)))
下载后导入文件 : : data=pd.read_csv('./day.csv') #read_csv 读取csv文件是使用,csv文件与模型代码必须在统一目录下
df=data.drop(['dteday','casual','registered'],axis=1) #删除某些不要的数据,axis=1是列,axis=0是行
dataset = df.as_matrix() # 将pandas转为np.ndarray(将表格数据转为矩阵)