支持向量机Support Vector Machine做聚类的一个小程序
对散点进行二分聚类:
初始聚类中心的选择会影响分类次数甚至是否能成功分类, 算法采用离样本中心很近的两点作为初始聚类点.
程序如下:
% 设定分类次数,以自动调整分类精确度 % part1 得到散点数据并人工指定两个初始聚类中心点 clear all;clc;close all; %注意,3c前面不能写东西,会被擦除. x = [ [1,4]; [2,3]; [3,4]; [5,3]; [5,1]; [6,3]; [10,3]; [5,5 ] ]; cluster_count = 4; % 聚类次数 len = size(x, 1); intpx = 0; intpy = 0; for i = 1:len intpx = intpx + x(i, 1); intpy = intpy + x(i, 2); end intpx = intpx/len; intpy = intpy/len; % 注意坐标和矩阵要用方括号而不是圆括号 theta1 = [intpx + 0.01, intpy + 0.01]; theta2 = [intpx - 0.01, intpy - 0.01]; figure; % part2 循环过程,其中,计数矩阵和索引矩阵的每次初始化都在循环中完成 for j = 1:cluster_count % 初始化索引和计数矩阵 c = zeros(2, 1); % 画聚类中心点, 中点, 求斜率画中垂线 subplot(2,2,j); title(['di',num2str(j)]); plot(theta1(1), theta1(2), '*'); hold on; plot(theta2(1), theta2(2), '*'); hold on; mid1 = (theta1(1) + theta2(1))/2; mid2 = (theta1(2) + theta2(2))/2; plot(mid1, mid2, '+'); hold on; axis([-2 10 -2 10]) slope = (-1) * (theta1(1) - theta2(1))/(theta1(2) - theta2(2)); t = 0:0.01:10; line = slope*(t-mid1) + mid2; plot(t, line); hold on; % 判断分类结果, 画出ox区分, 并得到新的双theta % a 分类 thetanew1 = [0, 0]; thetanew2 = [0, 0]; for i = 1:len if (x(i,1)-theta1(1))^2 + (x(i,2)-theta1(2))^2 < (x(i,1)-theta2(1))^2 + (x(i,2)-theta2(2))^2 y(i) = 1; c(1) = c(1) + 1; plot(x(i,1), x(i,2), 'x');hold on; thetanew1(1) = thetanew1(1) + x(i,1); thetanew1(2) = thetanew1(2) + x(i,2); else y(i) = 0; c(2) = c(2) + 1; plot(x(i,1), x(i, 2), 'o');hold on; thetanew2(1) = thetanew2(1) + x(i,1); thetanew2(2) = thetanew2(2) + x(i,2); end end theta1 = thetanew1/c(1); theta2 = thetanew2/c(2); % b 双theta axis([-2 10 -2 10]) end % 若第一次分类为初始值分类,则可见第三次分类已达最佳
输出图像如下:
三分聚类:
% 三分聚类 % 2c clc; close all; % clear all is not really necesssary, beccause every variable with the same % name % 导入要分类的散点数据 x = [ [1,4]; [2,3]; [3,4]; [5,3]; [5,1]; [6,3]; [10,3]; [5,5 ]; [4, 0]; [3, 0] ]; %x = [ [1, 1]; [2, 1]; [2,2]; [8,1]; [8,2]; [8,3]; [4, 8]; [5, 8] ]; cluster_times = 4; len = size(x, 1); xxall = 0; xyall = 0; for i = 1:len xxall = xxall + x(i, 1); xyall = xyall + x(i, 2); end xysum = [xxall, xyall]; intpx = 0; intpy = 0; for i = 1:len intpx = intpx + x(i, 1); intpy = intpy + x(i, 2); end intpx = intpx/len; intpy = intpy/len; % 注意坐标和矩阵要用方括号而不是圆括号 %{ theta1 = [intpx, intpy + 1.01]; theta2 = [intpx - 1.02, intpy + 1.03]; theta3 = [intpx + 1.04, intpy - 1.05]; %} theta1 = x(1,:); theta2 = x(2,:); theta3 = x(10,:); % 判断 for j = 1:cluster_times % 初始化索引和计数矩阵 c = zeros(3, 1); % 画聚类中心点, 中点, 求斜率画中垂线 %subplot(3,3,j); figure; % title(['di',num2str(j)]); % plot(mid1, mid2, '+'); hold on; axis([-2 10 -2 10]) % 判断分类结果, 画出ox区分, 并得到新的双theta % a 分类 thetanew1 = [0, 0]; thetanew2 = [0, 0]; thetanew3 = [0, 0]; for i = 1:len if (x(i,1)-theta1(1))^2 + (x(i,2)-theta1(2))^2 < (x(i,1)-theta2(1))^2 + (x(i,2)-theta2(2))^2 ... && ((x(i,1)-theta1(1))^2 + (x(i,2)-theta1(2))^2 < (x(i,1)-theta3(1))^2 + (x(i,2)-theta3(2))^2) y(i) = 0; c(1) = c(1) + 1; plot(x(i,1), x(i,2), 'x');hold on; thetanew1(1) = thetanew1(1) + x(i,1); thetanew1(2) = thetanew1(2) + x(i,2); elseif (x(i,1)-theta2(1))^2 + (x(i,2)-theta2(2))^2 < (x(i,1)-theta1(1))^2 + (x(i,2)-theta1(2))^2 ... && ((x(i,1)-theta2(1))^2 + (x(i,2)-theta2(2))^2 < (x(i,1)-theta3(1))^2 + (x(i,2)-theta3(2))^2) y(i) = 1; c(2) = c(2) + 1; plot(x(i,1), x(i, 2), 'o'); hold on; thetanew2(1) = thetanew2(1) + x(i,1); thetanew2(2) = thetanew2(2) + x(i,2); else y(i) = 2; c(3) = c(3) + 1; plot(x(i, 1), x(i, 2), '+'); hold on; thetanew3(1) = thetanew3(1) + x(i, 1); thetanew3(2) = thetanew3(2) + x(i, 2); end end theta1 = thetanew1/c(1); theta2 = thetanew2/c(2); theta3 = thetanew3/c(3); mid12 = (theta1 + theta2)/2; mid23 = (theta2 + theta3)/2; mid31 = (theta3 + theta1)/2; slope12 = (-1) * (theta1(1) - theta2(1))/(theta1(2) - theta2(2)); %负倒数通过交换分子分母得到 slope23 = (-1) * (theta2(1) - theta3(1))/(theta2(2) - theta3(2)); slope31 = (-1) * (theta3(1) - theta1(1))/(theta3(2) - theta1(2)); t = 0:0.01:10; %t = 4.2:0.01:10; line12 = slope12*(t-mid12(1)) + mid12(2); plot(t, line12); hold on; %t = 0:0.01:4.2; line23 = slope23*(t-mid23(1)) + mid23(2); plot(t, line23); hold on; %t = 4.2:0.01:10; line31 = slope31*(t-mid31(1)) + mid31(2); plot(t, line31); hold on; %plot(theta1(1), theta1(2), '*'); %plot(theta2(1), theta2(2), '*'); %plot(theta3(1), theta3(2), '*'); % 运行结果不能达到预期时先不要否定算法而是先检查一下细节 % b 双theta axis([-2 10 -2 10]) end
输出如下:
初始点theta1, theta2, theta3 选x1, x2, x10:
初始点选x1, x2, x7 就会陷入局部最优: