线性回归-python实现的5种方法

import numpy as np
from scipy.stats import linregress
import statsmodels.api as sm
import matplotlib.pyplot as plt
from gekko import GEKKO

# Data
x = np.array([4,5,2,3,-1,1,6,7])
y = np.array([0.3,0.8,-0.05,0.1,-0.8,-0.5,0.5,0.65])

# calculate R^2
def rsq(y1,y2):
    yresid= y1 - y2
    SSresid = np.sum(yresid**2)
    SStotal = len(y1) * np.var(y1)
    r2 = 1 - SSresid/SStotal
    return r2

# Method 1: scipy linregress
slope,intercept,r,p_value,std_err = linregress(x,y)
a = [slope,intercept]
print('R^2 linregress = '+str(r**2))

# Method 2: numpy polyfit (1=linear)
a = np.polyfit(x,y,1); print(a)
yfit = np.polyval(a,x)
print('R^2 polyfit    = '+str(rsq(y,yfit)))

# Method 3: numpy linalg solution
#       y =     X a
#   X^T y = X^T X a
X = np.vstack((x,np.ones(len(x)))).T
# matrix operations
XX = np.dot(X.T,X)
XTy = np.dot(X.T,y)
a = np.linalg.solve(XX,XTy)
# same solution with lstsq
a = np.linalg.lstsq(X,y,rcond=None)[0]
yfit = a[0]*x+a[1]; print(a)
print('R^2 matrix     = '+str(rsq(y,yfit)))

# Method 4: statsmodels ordinary least squares
X = sm.add_constant(x,prepend=False)
model = sm.OLS(y,X).fit()
yfit = model.predict(X)
a = model.params
print(model.summary())

# Method 5: Gekko for constrained regression
m = GEKKO(remote=False); m.options.IMODE=2
c  = m.Array(m.FV,2); c[0].STATUS=1; c[1].STATUS=1
c[1].lower=-0.5
xd = m.Param(x); yd = m.Param(y); yp = m.Var()
m.Equation(yp==c[0]*xd+c[1])
m.Minimize((yd-yp)**2)
m.solve(disp=False)
c = [c[0].value[0],c[1].value[1]]
print(c)

# plot data and regressed line
plt.plot(x,y,'ko',label='data')
xp = np.linspace(-2,8,100)
slope     = str(np.round(a[0],2))
intercept = str(np.round(a[1],2))
eqn = 'LstSQ: y='+slope+'x'+intercept
plt.plot(xp,a[0]*xp+a[1],'r-',label=eqn)
slope     = str(np.round(c[0],2))
intercept = str(np.round(c[1],2))
eqn = 'Constraint: y='+slope+'x'+intercept
plt.plot(xp,c[0]*xp+c[1],'b--',label=eqn)
plt.grid()
plt.legend()
plt.show()

  

posted @ 2022-03-17 14:15  华小电  阅读(707)  评论(0编辑  收藏  举报