Python SVM 我自己设计了三种不同的loss function,并对比了效果

# 解决分类问题
import math

from LinearRegression import *

def calDistance(x,y,w,b):
    r"""
    计算一个点(x,y)到直线(w,b)的距离
    :param x: point x
    :param y: point y
    :param w: 直线的斜率
    :param b: 直线的截距
    :return: 返回距离
    """
    x0 = x
    y0 = y
    x1 = (y0-b)/w
    y1 = w*x0+b
    d0 = math.sqrt((x0-x1)**2+(y0-y1)**2)
    if d0==0:
        return 0
    else:
        dis = abs(x0-x1)*abs(y0-y1)/d0
        return dis

def getSVMLoss(G1, G2, w, b):
    r"""
    计算在(w,b)的前提下,整个数据集的loss;
    loss function 是 hinge loss
    :param G1:第一类样本pandas,第一列是X,第二列是Y
    :param G2:第二类样本pandas,第一列是X,第二列是Y
    :param w:斜率
    :param b:截距
    :return:返回当前斜率和截距下的loss
    """
    total_loss = 0

    #G1的loss
    class1Num = G1.shape[0]
    d1min = 99999
    x_f_1, y_f_1 = 0, 0
    for i in range(class1Num):
        x = G1.iloc[i,0]
        y = G1.iloc[i,1]
        d = calDistance(x,y,w,b)
        if (w*x+b) > y:
            total_loss += d

        ####
        if d < d1min:
            x_f_1, y_f_1 = x, y
            d1min = d

    #G2的loss
    class2Num = G2.shape[0]
    d2min = 99999
    x_f_2, y_f_2 = 0, 0
    for i in range(class2Num):
        x = G2.iloc[i,0]
        y = G2.iloc[i,1]
        d = calDistance(x,y,w,b)
        if w*x+b < y: #分类错误进行惩罚
            total_loss += d

        if d < d2min:#分类错误进行惩罚
            x_f_2, y_f_2 = x, y
            d2min = d


    total_loss = total_loss + abs(d2min - d1min)#如果两者相距太远,进行惩罚

    return total_loss

def getSVMLoss2(G1, G2, w, b):
    r"""
    理解好wiki上的概念后,重新写的loss function.
    现在的直线是y= w*x+b
    换算成线性代数中的直线(w^T*x-b = 0)就是w*x-y-(-b) = 0
    即:(w^T) = [w,-1], (b) = -b  【←这个理解是错误的】
    当我设置为 (w^T) = [w,-1], (b) = -b,模型没有收敛到正确结果;
    当我设置为 (w^T) = [w,1], (b) = b,模型收敛到了正确结果;
    我们的目标是最小化 ||w||,同时满足约束条件y_i(w^T*x_i-b)>=1
    那么如何做呢?不满足条件的给一个很大的惩罚;||w||本身当做loss的一个部分;
    :param G1:
    :param G2:
    :param w:
    :param b:
    :return:
    """

    total_loss = 0
    w_v = np.array([w, 1]) #w^T
    w_v_nor =  np.linalg.norm(w_v, ord = 2) # ||w^T||,计算w向量的2范数
    total_loss = total_loss + w_v_nor

    #然后对不满足约束条件的变量也进行惩罚
    #G1的loss
    class1Num = G1.shape[0]
    class1Loss = 0
    for i in range(class1Num):
        x = G1.iloc[i,0]
        y = G1.iloc[i,1]
        x_v = np.array([x,y])
        class1Loss += max(0, 1-w_v@x_v-b) #大于1没有惩罚,小于1进行惩罚;

        # if co < 1:# 本来(w^T*x_i - b)>1,小于1就进行惩罚;
        #     class1Loss += abs(co-1)

    class1Loss = class1Loss/class1Num
    total_loss += class1Loss

    #G2的loss
    class2Num = G2.shape[0]
    class2Loss = 0
    for i in range(class2Num):
        x = G2.iloc[i,0]
        y = G2.iloc[i,1]
        x_v = np.array([x,y])
        class2Loss += max(0, 1+w_v@x_v-b)#小于-1没有惩罚,大于-1有惩罚
        # co = w_v@x_v
        # if co > -1: # 本来(w^T*x_i - b)<-1 ,大于-1就进行惩罚;
        #     total_loss += abs(co-1)
    class2Loss = class2Loss/class2Num
    total_loss += class2Loss
    return total_loss

def getSVMLoss3(G1, G2, w, b):
    r"""
    """

    total_loss = 0
    w_v = np.array([w, 1]) #w^T
    w_v_nor =  np.linalg.norm(w_v, ord = 2) # ||w^T||,计算w向量的2范数
    total_loss = total_loss + w_v_nor

    #然后对不满足约束条件的变量也进行惩罚
    #G1的loss
    class1Num = G1.shape[0]
    class1Loss = 0
    for i in range(class1Num):
        x = G1.iloc[i,0]
        y = G1.iloc[i,1]
        if y < w*x+b+1:
            class1Loss += w*x+b+1-y
    class1Loss = class1Loss/class1Num
    total_loss += class1Loss

    #G2的loss
    class2Num = G2.shape[0]
    class2Loss = 0
    for i in range(class2Num):
        x = G2.iloc[i,0]
        y = G2.iloc[i,1]
        if y > w*x+b-1:
            class2Loss += y - (w*x+b-1) #这里应该改为垂直距离,而不是截距差,不然就会出现结果的样子,

    class2Loss = class2Loss/class2Num
    total_loss += class2Loss
    return total_loss


def SVMFit(G1, G2):
    w_last, b_last = -5, 100
    w, b = -6, 99
    loss_last = 1
    loss = 0
    stop = 100000
    i = 0
    eta = 1e-3
    count = 0
    while(i < stop):
        print("{:05d}: w is {:.2f}, b is {:.2f}, loss is {:.2f}".format(i,w,b,loss))
        # loss = getSVMLoss(G1, G2, w, b)#方案1 (按照自己的理解设计的loss)
        # loss = getSVMLoss2(G1, G2, w, b)#方案2 (使用线性代数的概念)
        loss = getSVMLoss3(G1, G2, w, b)#方案3(替换方案2中的线代符号,使用直白的物理理解)
        if loss == 0:
            break
        if loss - loss_last < 0.1:
            count += 1
        if count>1000:
            break
        wn = w - eta * (loss-loss_last)/(w-w_last)
        bn = b - eta * (loss-loss_last)/(b-b_last)
        w_last = w
        w = wn
        b_last = b
        b = bn
        loss_last = loss
        i += 1

    return w, b, loss



if __name__ == "__main__":
    print("to solve classification problem")
    np.random.seed(5)
    G1, G2 = getData2()
    fig, ax = plt.subplots()

    ax.scatter(G1['X'], G1['Y'], color="C0")
    ax.scatter(G2['X'], G2['Y'], color="C1")

    ax.plot(np.array([50,50]), np.array([0,100]))
    ax.plot(np.array([0,100]), np.array([50,50]))

    w, b = -6, 99
    x = np.arange(0, 100, 1)
    y = w * x + b
    ax.plot(x, y, color="C2",label="original")

    w_f, b_f, loss_f = SVMFit(G1, G2)
    y_f = w_f * x + b_f
    ax.plot(x, y_f, color="C3",label="final")
    ax.legend()

    ax.set_xlim(xmin = 0, xmax = 100)
    ax.set_ylim(ymin = 0, ymax = 100)
    ax.set_title("LOSS-{}, eta = {}, loss is {:.2f}".format(3, 1e-3, loss_f))
    fig.show()
    # x,y,w,b
    # print("距离是:{:.2f}".format(calDistance(1,0,1,0)))

结论:

1、loss function的设计对结果很重要; 

2、不同的loss,选择的学习率也不一样。如果不改变学习率,模型很有可能不收敛;

3、对比一下三个结果吧↓  loss function1和loss function2效果一样。loss function3效果最差;

 

posted @ 2022-03-20 21:37  bH1pJ  阅读(84)  评论(0编辑  收藏  举报