用numpy实现带隐层的Softmax回归
基本模型:
class Model:
def __init__(self):
self.pre=None
def __call__(self,pre):
u=self
while u.pre!=None:
u=u.pre
u.pre=pre
return self
def forward(self,x):
pass
def backward(self,x,y):
pass
def _forward(self,x):
if self.pre is not None:
x=self.pre._forward(x)
self.x=x
self.z=self.forward(x)
return self.z
def _backward(self,dz):
self.dz=dz
self.dx=self.backward(dz)
if self.pre is not None:
self.pre._backward(self.dx)
return self.dx
def _set_dropout(self,act):
if isinstance(self,Dropout):
self.act=act
if self.pre is not None:
self.pre._set_dropout(act)
class Linear(Model):
def __init__(self,input_size,output_size):
super().__init__()
self.input_size=input_size
self.output_size=output_size
self.learning_rate=0.9
self.k=1e-3
self.w=(random.randn(self.input_size,self.output_size)*0.01).T
# self.w=(np.ones((self.input_size,self.output_size))*0.01).T
self.b=np.zeros((1,self.output_size)).astype(float).T
def forward(self,x):
return self.w@x+self.b
def backward(self,dz):
self.dw=dz@self.x.T/self.x.shape[1]
self.db=np.mean(dz,axis=1,keepdims=True)
self.w-=self.learning_rate*self.dw+self.k*sign(self.w)#L1正则
# self.w-=self.learning_rate*self.dw+self.k*self.w#L2正则
self.b-=self.learning_rate*self.db
self.dx=self.w.T@dz
return self.dx
class Dropout(Model):
def __init__(self,p):
super().__init__()
self.p=1-p
self.act=False
def forward(self,x):
z=x.copy()
if self.act==True:
mask=np.random.binomial(n=1,p=self.p,size=x.shape)
z=z*mask/self.p
return z
def backward(self,dz):
return dz
class Sigmoid(Model):
def forward(self,x):
return 1/(1+exp(-x))
def backward(self,dz):
return dz*self.z*(1-self.z)
class Tanh(Model):
def forward(self,x):
return np.tanh(x)
def backward(self,dz):
return dz*(1-self.z*self.z)
class Relu(Model):
def forward(self,x):
return np.where(x>=0,x,0.0)
def backward(self,dz):
return dz*np.where(self.x>=0,1.0,0.0)
class Loss(Model):
def compute_loss(self):
pass
def _forward(self,x,y):
if self.pre is not None:
x=self.pre._forward(x)
self.x=x
self.y=y
self.z=self.forward(x)
return self.z
class CrossEntropyLoss(Loss):
def forward(self,x):
return 1/(1+exp(-x))
def compute_loss(self):
return np.mean(-(self.y*log(self.z)+(1-self.y)*log(1-self.z)))
def backward(self,dz):
return dz*(self.z-self.y)
class CrossEntropyLoss2(Loss):
def forward(self,x):
z=exp(x)
z/=z.sum(axis=0)
return z
def compute_loss(self):
return np.mean(-self.y*log(self.z))
def backward(self,dz):
return dz*(self.z-self.y)
回归模型:
class MLP:
def __init__(self,model,scaler=True,degree=1):
self.scaler=scaler
self.degree=degree
self.model=model
# StandardScaler
def scaler_fit(self,X):
global mean,scale
mean=X.mean(axis=0)
scale=X.std(axis=0)
scale[scale<np.finfo(scale.dtype).eps]=1.0
def scaler_transform(self,X):
return (X-mean)/scale
# PolynomialFeatures
def poly_transform(self,X):
XX=X.T
ret=[np.repeat(1.0,XX.shape[1]),XX[0],XX[1]]
for i in range(2,self.degree+1):
for j in range(0,i+1):
ret.append(XX[0]**(i-j)*XX[1]**j)
return np.array(ret).T
def onehot_transform(self,y):
return np.eye(y.max()+1)[y]
def fit(self,X,Y):
self.model._set_dropout(True)
batch_size=32
epoch_num=200
XX=X.copy()
YY=Y.copy()
YY=self.onehot_transform(YY)
if self.degree>1:
XX=self.poly_transform(XX)
if self.scaler==True:
self.scaler_fit(XX)
XX=self.scaler_transform(XX)
I=list(range(len(XX)))
LOSS=[]
for epoch in range(epoch_num):
loss=0
random.shuffle(I)
XX=XX[I]
YY=YY[I]
for i in range(0,len(XX),batch_size):
x=XX[i:i+batch_size].T
y=YY[i:i+batch_size].T
self.model._forward(x,y)
loss+=self.model.compute_loss()
self.model._backward(1)
LOSS.append(loss)
plt.plot(list(range(len(LOSS))),LOSS,color='r')
plt.show()
def predict(self,X):
self.model._set_dropout(False)
XX=X.copy()
if self.degree>1:
XX=self.poly_transform(XX)
if self.scaler==True:
XX=self.scaler_transform(XX)
Z=self.model.pre._forward(XX.T).argmax(axis=0)
return Z
def plot_decision_boundary(self,X,Y):
x0_min,x0_max=X[:,0].min()-1,X[:,0].max()+1
x1_min,x1_max=X[:,1].min()-1,X[:,1].max()+1
m=500
x0,x1=np.meshgrid(
np.linspace(x0_min,x0_max,m),
np.linspace(x1_min,x1_max,m)
)
XX=np.c_[x0.ravel(),x1.ravel()]
Y_pred=self.predict(XX)
Z=Y_pred.reshape(x0.shape)
plt.contourf(x0,x1,Z,cmap=plt.cm.Spectral)
plt.scatter(X[:,0],X[:,1],c=Y,cmap=plt.cm.Spectral)
plt.show()
生成数据:
def generate_data(F,l,r,n,y):
x=np.linspace(l,r,n)
X=np.column_stack((x,F(x)))
Y=np.repeat(y,n)
return X,Y
主程序:
def generate_model():
scaler=True
degree=1
fc1=Tanh()(Dropout(0.01)(Linear(2,5)))
fc2=Dropout(0.01)(Linear(5,3))
criterion=CrossEntropyLoss2()
model=criterion(fc2(fc1))
return MLP(model,scaler,degree)
random.seed(114514)
data_size=200
# X1,Y1=generate_data(lambda x:x**2+2*x-2+random.randn(data_size)*0.8,-3,1,data_size,0)
# X2,Y2=generate_data(lambda x:-x**2+2*x+2+random.randn(data_size)*0.8,-1,3,data_size,1)
# X=np.vstack((X1,X2))
# Y=np.hstack((Y1,Y2))
# X1,Y1=generate_data(lambda x:1/x/5,0.1,1,data_size,0)
# X2,Y2=generate_data(lambda x:-1/x/5,-1,-0.1,data_size,1)
# X3,Y3=generate_data(lambda x:-x**2*5-1,-0.5,0.5,data_size,2)
# X=np.vstack((X1,X2,X3))
# Y=np.hstack((Y1,Y2,Y3))
X1,Y1=generate_data(lambda x:x**2+2*x-2+random.randn(data_size)*0.8,-3,1,data_size,0)
X2,Y2=generate_data(lambda x:-x**2+2*x+2+random.randn(data_size)*0.8,-1,3,data_size,1)
X3,Y3=generate_data(lambda x:x**2+2*x-2+random.randn(data_size)*0.8,-3,1,data_size,2)
X3[:,0]+=4
X=np.vstack((X1,X2,X3))
Y=np.hstack((Y1,Y2,Y3))
# plt.scatter(X[:,0],X[:,1],c=Y,cmap=plt.cm.Spectral)
# plt.show()
model=generate_model()
model.fit(X,Y)
model.plot_decision_boundary(X,Y)
Loss曲线及判别界面: