随笔- 59 文章- 0 评论- 1 阅读- 16万

Logisitic Regression（对率回归/逻辑回归)【python实现】

名为回归，其实为一种分类算法

数据集：

$D = \lbrace x_i, y_i \rbrace i = 1, 2 , ..., n$

其中

$x_i = (x_{i1}; x_{i2}; ...; x_{im})$

即每个样本有m个属性

$y_i = \begin{cases} 1 , & \text{属于1类}\\ 0 , & \text{属于0类, i = 1, 2, ..., n} \end{cases}$

$\hat x_i = (x_{i1}; x_{i2}; ...; x_{im};1)$

使用sigmoid函数

$y = \frac{1}{1+exp(z)}$

$z = \omega ^Tx + b$

令：

$X = [\hat x_1^T;\hat x_2^T; ...; \hat x_n^T]$

$Y = [y_1; y_2; ...; y_n]$

将分类函数化简，利用极大似然估计求 $\omega ^*$

$\omega ^*= (\omega _1; \omega _2; ...; \omega _m;b)$

利用牛顿法求极大似然函数极值，解出 $\omega ^*$

python程序

 import numpy as np 
import matplotlib.pyplot as plt 
from mpl_toolkits.mplot3d import Axes3D
M = 3 #属性个数+1       属性加 偏移项b， 一个3个参数
N = 50#二分类。每类样本N个
#随机生成两个属性的N个第一类样本
feature11 = np.random.randint(0, 10, size = N)
feature12 = np.random.randint(0, 5, size= N)
splt = np.ones((1, N))
temp_X11 = np.row_stack((feature11, feature12))
temp_X1 = np.vstack((temp_X11, splt))
X_t1 = np.mat(temp_X1)
X1 = X_t1.T 
Y1 = np.mat(np.zeros((N, 1)))
#随机生成两个属性的N个第二类样本
feature21 = np.random.randint(0,10, size= N)
feature22 = np.random.randint(6, 10, size= N)
splt = np.ones((1, N))
temp_X21 = np.row_stack((feature21, feature22))
temp_X2 = np.vstack((temp_X21, splt))
X_t2 = np.mat(temp_X2)
X2 = X_t2.T 
Y2 = np.mat(np.ones((N, 1)))
#画样本散点图
fig = plt.figure(1)
plt.scatter(feature11, feature12, marker='o', color = 'b')
plt.scatter(feature21, feature22, marker='*', color = 'y')
plt.xlabel('feature1')
plt.ylabel('feature2')
plt.title('samples')
#牛顿迭代法，求Omega
X = np.vstack((X1, X2))
Y = np.vstack((Y1, Y2))
Omega = np.mat(np.zeros((M, 1)))
Epsilon = 0.001  #输出精度
Delta = 1
counts =0
while Delta > Epsilon :
    counts += 1
    df = np.mat(np.zeros((M, 1)))
    d2f = np.mat(np.zeros((3)))
    for i in range(2*N) :
        f = X[i, :]*Omega
        p1 = np.math.exp(f) / (1 + np.math.exp(f))
        df -= X[i, :].T*(Y[i, 0] - p1)
        d2f = d2f + X[i, :].T*X[i, :]*p1*(1 - p1)
    Omega = Omega - np.linalg.pinv(d2f)*df
    Delta = np.linalg.norm(df)
    #print(Omega, end='\n')
    #print("迭代次数{}， Delta = {}".format(counts, Delta), end='\n')
#分类函数
def Classficate(sample):
    f = Omega.T*sample
    y = 1/(1+np.math.exp(-f))
    return y
#画分类面
K = 50
xx = np.linspace(0,10, num= K)
yy = np.linspace(0,10, num= K)
xx_1, yy_1 = np.meshgrid(xx, yy)
Omega_h = np.array(Omega.T)
r = np.exp(-(Omega_h[0, 0]*xx_1 + Omega_h[0, 1]*yy_1 + Omega_h[0, 2]))
zz_1 = 1/(1 + r)
fig = plt.figure(2)
ax1 = Axes3D(fig)
ax1.plot_surface(xx_1, yy_1, zz_1, alpha= 0.6, color= 'r')
ax1.set_xlabel('feature1')
ax1.set_ylabel('feature2')
ax1.set_zlabel('class')
ax1.set_title('LogisiticRegression model')
plt.show()

结果

参考资料

1.《机器学习》线性模型一章周志华老师
2.梯度下降法、牛顿法和拟牛顿法

posted @ 2020-05-02 23:09 ldfm 阅读(766) 评论(0) 编辑收藏举报

刷新页面返回顶部

登录后才能查看或发表评论，立即登录或者逛逛博客园首页

公告

昵称： ldfm
园龄： 5年6个月
粉丝： 3
关注： 3

2025年2月

日

一

二

三

四

五

六

随笔分类 (48)

随笔档案 (60)

阅读排行榜

评论排行榜

1. python matplotlib画多张图(1)

有点锋芒

Logisitic Regression（对率回归/逻辑回归)【python实现】

名为回归，其实为一种分类算法

python程序

结果

参考资料

公告

常用链接

我的标签

积分与排名

随笔分类 (48)

随笔档案 (60)

阅读排行榜

评论排行榜

推荐排行榜

最新评论

	import numpy as np
	import matplotlib.pyplot as plt
	from mpl_toolkits.mplot3d import Axes3D
	M = 3 #属性个数+1 属性加偏移项b，一个3个参数
	N = 50#二分类。每类样本N个
	#随机生成两个属性的N个第一类样本
	feature11 = np.random.randint(0, 10, size = N)
	feature12 = np.random.randint(0, 5, size= N)
	splt = np.ones((1, N))
	temp_X11 = np.row_stack((feature11, feature12))
	temp_X1 = np.vstack((temp_X11, splt))
	X_t1 = np.mat(temp_X1)
	X1 = X_t1.T
	Y1 = np.mat(np.zeros((N, 1)))
	#随机生成两个属性的N个第二类样本
	feature21 = np.random.randint(0,10, size= N)
	feature22 = np.random.randint(6, 10, size= N)
	splt = np.ones((1, N))
	temp_X21 = np.row_stack((feature21, feature22))
	temp_X2 = np.vstack((temp_X21, splt))
	X_t2 = np.mat(temp_X2)
	X2 = X_t2.T
	Y2 = np.mat(np.ones((N, 1)))
	#画样本散点图
	fig = plt.figure(1)
	plt.scatter(feature11, feature12, marker='o', color = 'b')
	plt.scatter(feature21, feature22, marker='*', color = 'y')
	plt.xlabel('feature1')
	plt.ylabel('feature2')
	plt.title('samples')
	#牛顿迭代法，求Omega
	X = np.vstack((X1, X2))
	Y = np.vstack((Y1, Y2))
	Omega = np.mat(np.zeros((M, 1)))
	Epsilon = 0.001 #输出精度
	Delta = 1
	counts =0
	while Delta > Epsilon :
	counts += 1
	df = np.mat(np.zeros((M, 1)))
	d2f = np.mat(np.zeros((3)))
	for i in range(2*N) :
	f = X[i, :]*Omega
	p1 = np.math.exp(f) / (1 + np.math.exp(f))
	df -= X[i, :].T*(Y[i, 0] - p1)
	d2f = d2f + X[i, :].TX[i, :]p1*(1 - p1)
	Omega = Omega - np.linalg.pinv(d2f)*df
	Delta = np.linalg.norm(df)
	#print(Omega, end='\n')
	#print("迭代次数{}， Delta = {}".format(counts, Delta), end='\n')
	#分类函数
	def Classficate(sample):
	f = Omega.T*sample
	y = 1/(1+np.math.exp(-f))
	return y
	#画分类面
	K = 50
	xx = np.linspace(0,10, num= K)
	yy = np.linspace(0,10, num= K)
	xx_1, yy_1 = np.meshgrid(xx, yy)
	Omega_h = np.array(Omega.T)
	r = np.exp(-(Omega_h[0, 0]xx_1 + Omega_h[0, 1]yy_1 + Omega_h[0, 2]))
	zz_1 = 1/(1 + r)
	fig = plt.figure(2)
	ax1 = Axes3D(fig)
	ax1.plot_surface(xx_1, yy_1, zz_1, alpha= 0.6, color= 'r')
	ax1.set_xlabel('feature1')
	ax1.set_ylabel('feature2')
	ax1.set_zlabel('class')
	ax1.set_title('LogisiticRegression model')
	plt.show()