Python SVM 我自己设计了三种不同的loss function,并对比了效果
# 解决分类问题 import math from LinearRegression import * def calDistance(x,y,w,b): r""" 计算一个点(x,y)到直线(w,b)的距离 :param x: point x :param y: point y :param w: 直线的斜率 :param b: 直线的截距 :return: 返回距离 """ x0 = x y0 = y x1 = (y0-b)/w y1 = w*x0+b d0 = math.sqrt((x0-x1)**2+(y0-y1)**2) if d0==0: return 0 else: dis = abs(x0-x1)*abs(y0-y1)/d0 return dis def getSVMLoss(G1, G2, w, b): r""" 计算在(w,b)的前提下,整个数据集的loss; loss function 是 hinge loss :param G1:第一类样本pandas,第一列是X,第二列是Y :param G2:第二类样本pandas,第一列是X,第二列是Y :param w:斜率 :param b:截距 :return:返回当前斜率和截距下的loss """ total_loss = 0 #G1的loss class1Num = G1.shape[0] d1min = 99999 x_f_1, y_f_1 = 0, 0 for i in range(class1Num): x = G1.iloc[i,0] y = G1.iloc[i,1] d = calDistance(x,y,w,b) if (w*x+b) > y: total_loss += d #### if d < d1min: x_f_1, y_f_1 = x, y d1min = d #G2的loss class2Num = G2.shape[0] d2min = 99999 x_f_2, y_f_2 = 0, 0 for i in range(class2Num): x = G2.iloc[i,0] y = G2.iloc[i,1] d = calDistance(x,y,w,b) if w*x+b < y: #分类错误进行惩罚 total_loss += d if d < d2min:#分类错误进行惩罚 x_f_2, y_f_2 = x, y d2min = d total_loss = total_loss + abs(d2min - d1min)#如果两者相距太远,进行惩罚 return total_loss def getSVMLoss2(G1, G2, w, b): r""" 理解好wiki上的概念后,重新写的loss function. 现在的直线是y= w*x+b 换算成线性代数中的直线(w^T*x-b = 0)就是w*x-y-(-b) = 0 即:(w^T) = [w,-1], (b) = -b 【←这个理解是错误的】 当我设置为 (w^T) = [w,-1], (b) = -b,模型没有收敛到正确结果; 当我设置为 (w^T) = [w,1], (b) = b,模型收敛到了正确结果; 我们的目标是最小化 ||w||,同时满足约束条件y_i(w^T*x_i-b)>=1 那么如何做呢?不满足条件的给一个很大的惩罚;||w||本身当做loss的一个部分; :param G1: :param G2: :param w: :param b: :return: """ total_loss = 0 w_v = np.array([w, 1]) #w^T w_v_nor = np.linalg.norm(w_v, ord = 2) # ||w^T||,计算w向量的2范数 total_loss = total_loss + w_v_nor #然后对不满足约束条件的变量也进行惩罚 #G1的loss class1Num = G1.shape[0] class1Loss = 0 for i in range(class1Num): x = G1.iloc[i,0] y = G1.iloc[i,1] x_v = np.array([x,y]) class1Loss += max(0, 1-w_v@x_v-b) #大于1没有惩罚,小于1进行惩罚; # if co < 1:# 本来(w^T*x_i - b)>1,小于1就进行惩罚; # class1Loss += abs(co-1) class1Loss = class1Loss/class1Num total_loss += class1Loss #G2的loss class2Num = G2.shape[0] class2Loss = 0 for i in range(class2Num): x = G2.iloc[i,0] y = G2.iloc[i,1] x_v = np.array([x,y]) class2Loss += max(0, 1+w_v@x_v-b)#小于-1没有惩罚,大于-1有惩罚 # co = w_v@x_v # if co > -1: # 本来(w^T*x_i - b)<-1 ,大于-1就进行惩罚; # total_loss += abs(co-1) class2Loss = class2Loss/class2Num total_loss += class2Loss return total_loss def getSVMLoss3(G1, G2, w, b): r""" """ total_loss = 0 w_v = np.array([w, 1]) #w^T w_v_nor = np.linalg.norm(w_v, ord = 2) # ||w^T||,计算w向量的2范数 total_loss = total_loss + w_v_nor #然后对不满足约束条件的变量也进行惩罚 #G1的loss class1Num = G1.shape[0] class1Loss = 0 for i in range(class1Num): x = G1.iloc[i,0] y = G1.iloc[i,1] if y < w*x+b+1: class1Loss += w*x+b+1-y class1Loss = class1Loss/class1Num total_loss += class1Loss #G2的loss class2Num = G2.shape[0] class2Loss = 0 for i in range(class2Num): x = G2.iloc[i,0] y = G2.iloc[i,1] if y > w*x+b-1: class2Loss += y - (w*x+b-1) #这里应该改为垂直距离,而不是截距差,不然就会出现结果的样子, class2Loss = class2Loss/class2Num total_loss += class2Loss return total_loss def SVMFit(G1, G2): w_last, b_last = -5, 100 w, b = -6, 99 loss_last = 1 loss = 0 stop = 100000 i = 0 eta = 1e-3 count = 0 while(i < stop): print("{:05d}: w is {:.2f}, b is {:.2f}, loss is {:.2f}".format(i,w,b,loss)) # loss = getSVMLoss(G1, G2, w, b)#方案1 (按照自己的理解设计的loss) # loss = getSVMLoss2(G1, G2, w, b)#方案2 (使用线性代数的概念) loss = getSVMLoss3(G1, G2, w, b)#方案3(替换方案2中的线代符号,使用直白的物理理解) if loss == 0: break if loss - loss_last < 0.1: count += 1 if count>1000: break wn = w - eta * (loss-loss_last)/(w-w_last) bn = b - eta * (loss-loss_last)/(b-b_last) w_last = w w = wn b_last = b b = bn loss_last = loss i += 1 return w, b, loss if __name__ == "__main__": print("to solve classification problem") np.random.seed(5) G1, G2 = getData2() fig, ax = plt.subplots() ax.scatter(G1['X'], G1['Y'], color="C0") ax.scatter(G2['X'], G2['Y'], color="C1") ax.plot(np.array([50,50]), np.array([0,100])) ax.plot(np.array([0,100]), np.array([50,50])) w, b = -6, 99 x = np.arange(0, 100, 1) y = w * x + b ax.plot(x, y, color="C2",label="original") w_f, b_f, loss_f = SVMFit(G1, G2) y_f = w_f * x + b_f ax.plot(x, y_f, color="C3",label="final") ax.legend() ax.set_xlim(xmin = 0, xmax = 100) ax.set_ylim(ymin = 0, ymax = 100) ax.set_title("LOSS-{}, eta = {}, loss is {:.2f}".format(3, 1e-3, loss_f)) fig.show() # x,y,w,b # print("距离是:{:.2f}".format(calDistance(1,0,1,0)))
结论:
1、loss function的设计对结果很重要;
2、不同的loss,选择的学习率也不一样。如果不改变学习率,模型很有可能不收敛;
3、对比一下三个结果吧↓ loss function1和loss function2效果一样。loss function3效果最差;
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· 三行代码完成国际化适配,妙~啊~
· .NET Core 中如何实现缓存的预热?