python 实现简单的SVM分类器

def getData2():
    r"""
    使用numpy生成随机数;
    使用pandas构造满足条件的随机数;
    :return:
    """
    df = pd.DataFrame()
    df['X'] = np.random.randint(1,100,size=(100))
    df['Y'] = np.random.randint(1,100,size=(100))

    G1 = df[(df['X']>50) & (df['Y']>50)]
    G2 = df[(df['X']<50) & (df['Y']<50)]

    G1 = G1.reset_index(drop=True)
    G2 = G2.reset_index(drop=True)

    return (G1, G2)

# 解决分类问题
import math

from LinearRegression import *

def calDistance(x,y,w,b):
    r"""
    计算一个点(x,y)到直线(w,b)的距离
    :param x: point x
    :param y: point y
    :param w: 直线的斜率
    :param b: 直线的截距
    :return: 返回距离
    """
    x0 = x
    y0 = y
    x1 = (y0-b)/w
    y1 = w*x0+b
    d0 = math.sqrt((x0-x1)**2+(y0-y1)**2)
    if d0==0:
        return 0
    else:
        dis = abs(x0-x1)*abs(y0-y1)/d0
        return dis

def getSVMLoss(G1, G2, w, b):
    r"""
    计算在(w,b)的前提下,整个数据集的loss;
    loss function 是 hinge loss
    :param G1:第一类样本pandas,第一列是X,第二列是Y
    :param G2:第二类样本pandas,第一列是X,第二列是Y
    :param w:斜率
    :param b:截距
    :return:返回当前斜率和截距下的loss
    """
    total_loss = 0

    #G1的loss
    class1Num = G1.shape[0]
    d1min = 99999
    x_f_1, y_f_1 = 0, 0
    for i in range(class1Num):
        x = G1.iloc[i,0]
        y = G1.iloc[i,1]
        d = calDistance(x,y,w,b)
        if (w*x+b) > y:
            total_loss += d

        ####
        if d < d1min:
            x_f_1, y_f_1 = x, y
            d1min = d

    #G2的loss
    class2Num = G2.shape[0]
    d2min = 99999
    x_f_2, y_f_2 = 0, 0
    for i in range(class2Num):
        x = G2.iloc[i,0]
        y = G2.iloc[i,1]
        d = calDistance(x,y,w,b)
        if w*x+b < y: #分类错误进行惩罚
            total_loss += d

        if d < d2min:#分类错误进行惩罚
            x_f_2, y_f_2 = x, y
            d2min = d

    total_loss = total_loss + abs(d2min - d1min)#如果两者相距太远,进行惩罚

    return total_loss


def SVMFit(G1, G2):
    w_last, b_last = -5, 100
    w, b = -6, 99
    loss_last = 1
    loss = 0
    stop = 10000
    i = 0
    eta = 1e-4
    count = 0
    while(i < stop):
        print("{:05d}: w is {:.2f}, b is {:.2f}, loss is {:.2f}".format(i,w,b,loss))
        loss = getSVMLoss(G1, G2, w, b)
        if loss == 0:
            break
        if loss - loss_last < 0.1:
            count += 1
        if count>1000:
            break
        wn = w - eta * (loss-loss_last)/(w-w_last)
        bn = b - eta * (loss-loss_last)/(b-b_last)
        w_last = w
        w = wn
        b_last = b
        b = bn
        loss_last = loss
        i += 1

    return w, b



if __name__ == "__main__":
    print("to solve classification problem")
    np.random.seed(5)
    G1, G2 = getData2()
    fig, ax = plt.subplots()
    ax.scatter(G1['X'], G1['Y'], color="C0")
    ax.scatter(G2['X'], G2['Y'], color="C1")

    ax.plot(np.array([50,50]), np.array([0,100]))
    ax.plot(np.array([0,100]), np.array([50,50]))

    w, b = -6, 99
    x = np.arange(0, 100, 1)
    y = w * x + b
    ax.plot(x, y, color="C2",label="original")
    w_f, b_f = SVMFit(G1, G2)
    y_f = w_f * x + b_f
    ax.plot(x, y_f, color="C3",label="final")
    ax.legend()

    ax.set_xlim(xmin = 0, xmax = 100)
    ax.set_ylim(ymin = 0, ymax = 100)
    fig.show()
    # x,y,w,b
    # print("距离是:{:.2f}".format(calDistance(1,0,1,0)))

 

posted @ 2022-03-20 21:37  bH1pJ  阅读(454)  评论(0编辑  收藏  举报