回归

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
#数据集
from sklearn import datasets
from sklearn.datasets import load_iris
#数据集划分
from sklearn.model_selection import train_test_split
#api
from sklearn import linear_model
#最小二乘法
#from sklearn.linear_model import LinearRegression
#岭回归
from sklearn.linear_model import Ridge

#均方误差 mean_squared_error
#R^2值   r2_score
#R2与score() 相同
from sklearn.metrics import mean_squared_error, r2_score

#sklearn.linear_model.LinearRegression 最小二乘api
#最小二乘法中,x是样本,y(标签)是真实值
#return_X_y控制输出数据的结构
#True,则将因变量和自变量独立导出

#糖尿病的例子
x, y = datasets.load_diabetes(return_X_y=True)

# 仅使用一种特征
x=x[:, np.newaxis, 2]

# 将输入特征集分为训练集和测试集
x_train = x[:-20]
x_test = x[-20:]
# 将目标数据集也分为训练集和测试集
y_train = y[:-20]
y_test = y[-20:]

# 创建线性回归学习模型
regr = linear_model.LinearRegression()

# 用训练集数据训练该模型
regr.fit(x_train, y_train)

# 基于测试集数据进行目标值预测
#这条线就是回归线
y_pred = regr.predict(x_test)

# 输入特征的系数:coef用于存储输入特征的系数
print('Coefficients: \n', regr.coef_)
# 求取预测目标值与真实目标值之间的均方误差
print('Mean squared error: %.2f'
      % mean_squared_error(y_test, y_pred))
# 获取预测目标值与真实目标值之间的R^2值,1则表示预测结果很完美
print('Coefficient of determination: %.2f'
      % r2_score(y_test, y_pred))

# 以图像形式输出本学习模型的学习结果
plt.scatter(x_test, y_test,  color='black')
plt.plot(x_test, y_pred, color='blue', linewidth=3)

plt.xticks(())
plt.yticks(())

plt.show()


#最小二乘法的基础上增加了一个惩罚项
#岭回归
#sklearn.linear_model.Ridge 岭回归api
#fit_intercept——是否计算此模型的截距
#fit_intercept=false,则计算中将不使用截距(即数据应居中)
#normalize=True  归一化,从一点出发
#回归前,通过减去平均值并除以l2范数,对回归数X进行归一化

from sklearn.linear_model import Ridge,RidgeCV

#数据
iris = load_iris()
x = iris['data'] 
y = iris['target']
x_train, x_test, y_train,y_test = train_test_split(
    x,y, test_size = 0.2)  # 8:2 
################################################
#构造不同的lambda值
Lambdas=np.logspace(-5,2,200)
#计算
#存放偏回归系数
ridge_cofficients=[]
for Lambda in Lambdas:
    #注意参数alpha
    ridge=Ridge(alpha=Lambda,normalize=True)
    ridge.fit(x_train,y_train)
    ridge_cofficients.append(ridge.coef_)

#绘图
#背景样式
#plt.style.use('bmh')
#绘制岭迹曲线
plt.plot(Lambdas,ridge_cofficients)
#把x轴log
plt.xscale('log')
plt.xlabel('Log(Lambda)')
plt.ylabel('Cofficients')
plt.show()

#https://blog.csdn.net/weixin_43374551/article/details/83688913


#交叉验证  RidgeCV  api
# 

from sklearn import model_selection
from sklearn.linear_model import RidgeCV
#数据
iris = load_iris()
x = iris['data'] 
y = iris['target']
x_train, x_test, y_train,y_test = train_test_split(
    x,y, test_size = 0.2)  # 8:2 
################################################
#构造不同的lambda值
Lambdas=np.logspace(-5,2,200)
#设置交叉验证的参数,使用均方误差评估
ridge_cv=RidgeCV(
    alphas=Lambdas,normalize=True,
    scoring='neg_mean_squared_error',cv=10)
ridge_cv.fit(x_train,y_train)

print(ridge_cv.alpha_)

#基于最佳lambda值建模
ridge=Ridge(alpha=ridge_cv.alpha_,normalize=True)
ridge.fit(x_train,y_train)

#模型评估
ridge_pred=ridge.predict(x_test)
#均方误差
MSE=mean_squared_error(y_test,ridge_pred)
print(MSE)
#https://ask.hellobi.com/blog/lsxxx2011/10581



#lasso sklearn.linear_model.Lasso
#需要一个类似fit()的函数,path()
#用坐标下降法计算弹性网路径


from sklearn.linear_model import Lasso,LassoCV
#数据
iris = load_iris()
x = iris['data'] 
y = iris['target']
x_train, x_test, y_train,y_test = train_test_split(
    x,y, test_size = 0.2)  # 8:2 
################################################
alphas=np.logspace(-2,2,20)
#计算
#存放偏回归系数
lasso_cofficients=[]
for alpha in alphas:
    #注意参数alpha
    lasso=Lasso(alpha=alpha,normalize=True,max_iter=10000)
    lasso.fit(x_train, y_train)
    lasso_cofficients.append(lasso.coef_)
#绘制岭迹曲线
plt.plot(alphas,lasso_cofficients)
#把x轴log
plt.xscale('log')
plt.xlabel('Log(alpha)')
plt.ylabel('Cofficients')
plt.show()

# LASSO回归模型的交叉验证
lasso_cv = LassoCV(alphas = alphas, normalize=True, cv = 10, max_iter=10000)
lasso_cv.fit(x_train, y_train)
# 取出最佳的lambda值
lasso_best_alpha = lasso_cv.alpha_
lasso_best_alpha

#https://ask.hellobi.com/blog/lsxxx2011/10581



#贝叶斯岭回归 sklearn.linear_model.BayesianRidge
from sklearn import linear_model
clf = linear_model.BayesianRidge()
clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])
clf.predict([[1, 1]])




#logistic 解决分类问题
#sklearn.linear_model.LogisticRegression

from sklearn import model_selection
from sklearn import metrics
import seaborn as sns

#数据
iris = load_iris()
x = iris['data'] 
y = iris['target']
x_train, x_test, y_train,y_test = train_test_split(
    x,y, test_size = 0.2)  # 8:2 
################################################
#建模
logistic_model=linear_model.LogisticRegression()
logistic_model.fit(x_train,y_train)
#打印参数
print('Intercept:',logistic_model.intercept_)
print('Coef:',logistic_model.coef_)

#模型预测
pred=logistic_model.predict(x_test)
print(pd.Series(pred).value_counts())

 

posted @ 2020-09-27 17:03  愿君多采撷  阅读(103)  评论(0编辑  收藏  举报