基于COIL20数据集并计算评价指标NMI、ACC
- 1.先对图片集进行处理
clear
clc
path = 'F:\Matlab\bin\kmeans_coil20\coil-20-proc\'; %源数据集路径
save_path = 'F:\Matlab\bin\kmeans_coil20\'; %处理之后保存的路径
file = dir([path,'*.png']); %列出源路径下所有.png文件的信息:name date bytes...
COIL = [];
Label = [];
disp(length(file));
for i = 1:length(file) %源路径下png文件的个数
%%
%%对图片本身数据做处理
image0 = imread([path,file(i).name]); %读取某一张图片
str = file(i).name; %图片名保存为str
image1 = im2double(image0); %将图片中unit8转换为double
[a,b] = size(image1); %a为图片行数 b为图片列数
image2 = reshape(image1,1,a*b); %将a*b的图片转换为1行
COIL = [COIL;image2]; %每一张图片都加载在上一张图片的下一行
fprintf('%d th starting...\n',i); %记录程序运行过程
%%
%%对图片标签做处理 图片名为‘obj(类标签)__(拍摄角度).png’
%主要提取类标签
LabelStart = strfind(str,'j'); %找到图片名‘j’的位置(类标签前)
LabelEnd = strfind(str,'_'); %找到图片名‘__’的位置(类标签后)
label_temp = str2double(str(LabelStart+1:LabelEnd(1,1)-1)); %提取出图片类标签
Label = [Label;label_temp]; %每个类标签按顺序排排坐
end
save COIL COIL
save Label Label
- 2.计算acc及NMI参考了这篇
https://blog.csdn.net/hgh19951014/article/details/103496747 - 3.K-means
%% k=20 随机抽取二十个样本作为初始均值向量
n=1440;
m=16384;
k=20;
% flag保存随机选取的数字
flag=randperm(n,k);
% 将抽取的mu打上类标签
for i=1:k
COIL(flag(i),m+1)=i;
end
mu=zeros(k,m);
for i=1:k
for j=1:m
mu(i,j)=COIL(flag(i),j);
end
end
%% 计算欧式距离,选择最近的原型进行分簇
dis=zeros(n,k);
for i=1:n
min=10000000;
category = 1;
for j=1:k
tmp=0;
for o=1:m
tmp=tmp+(COIL(i,o)-mu(j,o)).^2;
end
dis(i,j)=sqrt(tmp);
if dis(i,j)<min
min= dis(i,j);
category = j;
end
end
COIL(i,m+1)=category;
end
%% 循环
for iter=2:10
% 重新计算mu
% 每个簇中数据和
sumn=zeros(k,m);
% 每个簇中的数据个数
num=zeros(1,k);
for j=1:n
for o=1:k
if COIL(j,m+1)==o
for p=1:m
sumn(o,p)=sumn(o,p)+COIL(j,p);
end
num(1,o)=num(1,o)+1;
end
end
end
isUpdate = false;
for i=1:k
for j=1:m
if mu(i,j)~=(sum(i,j)/num(1,i))
isUpdate = true;
end
end
end
if isUpdate==false
% 不更新
disp(iter-1);
break;
else
% 更新 mu
for i=1:k
for j=1:m
mu(i,j) = sumn(i,j)/num(1,i);
end
end
end
for i=1:n
min=10000;
category = 1;
for j=1:k
tmp=0;
for o=1:m
tmp=tmp+(COIL(i,o)-mu(j,o)).^2;
end
dis(i,j)=sqrt(tmp);
if dis(i,j)<min
min= dis(i,j);
category = j;
end
end
COIL(i,m+1)=category;
end
end
%% 计算NMI
% Y为聚类后的标签 Label真实标签
Y=COIL(:,m+1);
if length ( Label ) ~= length ( Y)
error ( 'length( Label ) must == length( Y)' );
end
if iscolumn(Label)
Label=Label';
end
if iscolumn(Y)
Y=Y';
end
t = length (Label);
A_ids = unique (Label);
A_class = length (A_ids);
B_ids = unique (Y);
B_class = length (B_ids);
% Mutual information
idAOccur = double ( repmat ( Label, A_class, 1) == repmat ( A_ids', 1, t )); %得到节点社区矩阵N*C
idBOccur = double ( repmat ( Y, B_class, 1) == repmat ( B_ids', 1, t ));
idABOccur = idAOccur * idBOccur';
Px = sum (idAOccur') / t;
Py = sum (idBOccur') / t;
Pxy = idABOccur / t;
MImatrix = Pxy .* log2 (Pxy ./(Px' * Py)+ eps );
MI = sum (MImatrix(:));
% Entropies
Hx = - sum (Px .* log2 (Px + eps ),2);
Hy = - sum (Py .* log2 (Py + eps ),2);
%Normalized Mutual information
nmi = 2 * MI / (Hx+Hy);
fprintf('聚类的标准互信息Nmi为:%f\n',nmi); %显示分类结果
%% 计算ACC
[NewLabel]=BestMapping(Y,Label);
T= Label==NewLabel;
acc=sum(T)/length(NewLabel);
fprintf('聚类的准确度acc为:%f\n',acc); %显示分类结果
- 4.结果
迭代2次
迭代10次
ljm要加油