用numpy实现带隐层的Softmax回归

基本模型:

class Model:
    def __init__(self):
        self.pre=None
    def __call__(self,pre):
        u=self
        while u.pre!=None:
            u=u.pre
        u.pre=pre
        return self
    def forward(self,x):
        pass
    def backward(self,x,y):
        pass
    def _forward(self,x):
        if self.pre is not None:
            x=self.pre._forward(x)
        self.x=x
        self.z=self.forward(x)
        return self.z
    def _backward(self,dz):
        self.dz=dz
        self.dx=self.backward(dz)
        if self.pre is not None:
            self.pre._backward(self.dx)
        return self.dx
    def _set_dropout(self,act):
        if isinstance(self,Dropout):
            self.act=act
        if self.pre is not None:
            self.pre._set_dropout(act)

class Linear(Model):
    def __init__(self,input_size,output_size):
        super().__init__()
        self.input_size=input_size
        self.output_size=output_size
        self.learning_rate=0.9
        self.k=1e-3
        self.w=(random.randn(self.input_size,self.output_size)*0.01).T
#         self.w=(np.ones((self.input_size,self.output_size))*0.01).T
        self.b=np.zeros((1,self.output_size)).astype(float).T
    def forward(self,x):
        return self.w@x+self.b
    def backward(self,dz):
        self.dw=dz@self.x.T/self.x.shape[1]
        self.db=np.mean(dz,axis=1,keepdims=True)
        self.w-=self.learning_rate*self.dw+self.k*sign(self.w)#L1正则
#         self.w-=self.learning_rate*self.dw+self.k*self.w#L2正则
        self.b-=self.learning_rate*self.db
        self.dx=self.w.T@dz
        return self.dx

class Dropout(Model):
    def __init__(self,p):
        super().__init__()
        self.p=1-p
        self.act=False
    def forward(self,x):
        z=x.copy()
        if self.act==True:
            mask=np.random.binomial(n=1,p=self.p,size=x.shape)
            z=z*mask/self.p
        return z
    def backward(self,dz):
        return dz

class Sigmoid(Model):
    def forward(self,x):
        return 1/(1+exp(-x))
    def backward(self,dz):
        return dz*self.z*(1-self.z)

class Tanh(Model):
    def forward(self,x):
        return np.tanh(x)
    def backward(self,dz):
        return dz*(1-self.z*self.z)
    
class Relu(Model):
    def forward(self,x):
        return np.where(x>=0,x,0.0)
    def backward(self,dz):
        return dz*np.where(self.x>=0,1.0,0.0)

class Loss(Model):
    def compute_loss(self):
        pass
    def _forward(self,x,y):
        if self.pre is not None:
            x=self.pre._forward(x)
        self.x=x
        self.y=y
        self.z=self.forward(x)
        return self.z

class CrossEntropyLoss(Loss):
    def forward(self,x):
        return 1/(1+exp(-x))
    def compute_loss(self):
        return np.mean(-(self.y*log(self.z)+(1-self.y)*log(1-self.z)))
    def backward(self,dz):
        return dz*(self.z-self.y)

class CrossEntropyLoss2(Loss):
    def forward(self,x):
        z=exp(x)
        z/=z.sum(axis=0)
        return z
    def compute_loss(self):
        return np.mean(-self.y*log(self.z))
    def backward(self,dz):
        return dz*(self.z-self.y)

回归模型:

class MLP:
    def __init__(self,model,scaler=True,degree=1):
        self.scaler=scaler
        self.degree=degree
        self.model=model
        
    # StandardScaler
    def scaler_fit(self,X):
        global mean,scale
        mean=X.mean(axis=0)
        scale=X.std(axis=0)
        scale[scale<np.finfo(scale.dtype).eps]=1.0
    def scaler_transform(self,X):
        return (X-mean)/scale
    
    # PolynomialFeatures
    def poly_transform(self,X):
        XX=X.T
        ret=[np.repeat(1.0,XX.shape[1]),XX[0],XX[1]]
        for i in range(2,self.degree+1):
            for j in range(0,i+1):
                ret.append(XX[0]**(i-j)*XX[1]**j)
        return np.array(ret).T
    
    def onehot_transform(self,y):
        return np.eye(y.max()+1)[y]
    
    def fit(self,X,Y):
        self.model._set_dropout(True)
        batch_size=32
        epoch_num=200
        XX=X.copy()
        YY=Y.copy()
        YY=self.onehot_transform(YY)
        if self.degree>1:
            XX=self.poly_transform(XX)
        if self.scaler==True:
            self.scaler_fit(XX)
            XX=self.scaler_transform(XX)
        I=list(range(len(XX)))
        LOSS=[]
        for epoch in range(epoch_num):
            loss=0
            random.shuffle(I)
            XX=XX[I]
            YY=YY[I]
            for i in range(0,len(XX),batch_size):
                x=XX[i:i+batch_size].T
                y=YY[i:i+batch_size].T
                self.model._forward(x,y)
                loss+=self.model.compute_loss()
                self.model._backward(1)
            LOSS.append(loss)
        plt.plot(list(range(len(LOSS))),LOSS,color='r')
        plt.show()
    
    def predict(self,X):
        self.model._set_dropout(False)
        XX=X.copy()
        if self.degree>1:
            XX=self.poly_transform(XX)
        if self.scaler==True:
            XX=self.scaler_transform(XX)
        Z=self.model.pre._forward(XX.T).argmax(axis=0)
        return Z
    
    def plot_decision_boundary(self,X,Y):
        x0_min,x0_max=X[:,0].min()-1,X[:,0].max()+1
        x1_min,x1_max=X[:,1].min()-1,X[:,1].max()+1
        m=500
        x0,x1=np.meshgrid(
            np.linspace(x0_min,x0_max,m),
            np.linspace(x1_min,x1_max,m)
        )
        XX=np.c_[x0.ravel(),x1.ravel()]
        Y_pred=self.predict(XX)
        Z=Y_pred.reshape(x0.shape)
        plt.contourf(x0,x1,Z,cmap=plt.cm.Spectral)
        plt.scatter(X[:,0],X[:,1],c=Y,cmap=plt.cm.Spectral)
        plt.show()

生成数据:

def generate_data(F,l,r,n,y):
    x=np.linspace(l,r,n)
    X=np.column_stack((x,F(x)))
    Y=np.repeat(y,n)
    return X,Y

主程序:

def generate_model():
    scaler=True
    degree=1
    fc1=Tanh()(Dropout(0.01)(Linear(2,5)))
    fc2=Dropout(0.01)(Linear(5,3))
    criterion=CrossEntropyLoss2()
    model=criterion(fc2(fc1))
    return MLP(model,scaler,degree)
    
random.seed(114514)
data_size=200

# X1,Y1=generate_data(lambda x:x**2+2*x-2+random.randn(data_size)*0.8,-3,1,data_size,0)
# X2,Y2=generate_data(lambda x:-x**2+2*x+2+random.randn(data_size)*0.8,-1,3,data_size,1)
# X=np.vstack((X1,X2))
# Y=np.hstack((Y1,Y2))

# X1,Y1=generate_data(lambda x:1/x/5,0.1,1,data_size,0)
# X2,Y2=generate_data(lambda x:-1/x/5,-1,-0.1,data_size,1)
# X3,Y3=generate_data(lambda x:-x**2*5-1,-0.5,0.5,data_size,2)
# X=np.vstack((X1,X2,X3))
# Y=np.hstack((Y1,Y2,Y3))

X1,Y1=generate_data(lambda x:x**2+2*x-2+random.randn(data_size)*0.8,-3,1,data_size,0)
X2,Y2=generate_data(lambda x:-x**2+2*x+2+random.randn(data_size)*0.8,-1,3,data_size,1)
X3,Y3=generate_data(lambda x:x**2+2*x-2+random.randn(data_size)*0.8,-3,1,data_size,2)
X3[:,0]+=4
X=np.vstack((X1,X2,X3))
Y=np.hstack((Y1,Y2,Y3))



# plt.scatter(X[:,0],X[:,1],c=Y,cmap=plt.cm.Spectral)
# plt.show()
model=generate_model()
model.fit(X,Y)
model.plot_decision_boundary(X,Y)

Loss曲线及判别界面:

posted @ 2022-02-12 17:38  jrltx  阅读(111)  评论(0编辑  收藏  举报