基于COIL20数据集并计算评价指标NMI、ACC

  • 1.先对图片集进行处理
clear
clc
path = 'F:\Matlab\bin\kmeans_coil20\coil-20-proc\';     %源数据集路径
save_path = 'F:\Matlab\bin\kmeans_coil20\';     %处理之后保存的路径
file = dir([path,'*.png']);     %列出源路径下所有.png文件的信息:name date bytes...

COIL = [];
Label = [];
disp(length(file));
for i = 1:length(file)      %源路径下png文件的个数
    %%
    %%对图片本身数据做处理
    image0 = imread([path,file(i).name]);       %读取某一张图片
    str = file(i).name;     %图片名保存为str
    image1 = im2double(image0);     %将图片中unit8转换为double
    [a,b] = size(image1);       %a为图片行数 b为图片列数
    image2 = reshape(image1,1,a*b);     %将a*b的图片转换为1行
    COIL = [COIL;image2];       %每一张图片都加载在上一张图片的下一行
    fprintf('%d th starting...\n',i);       %记录程序运行过程
    %%
    %%对图片标签做处理 图片名为‘obj(类标签)__(拍摄角度).png’
    %主要提取类标签
    LabelStart = strfind(str,'j');      %找到图片名‘j’的位置(类标签前)
    LabelEnd = strfind(str,'_');        %找到图片名‘__’的位置(类标签后)
    label_temp = str2double(str(LabelStart+1:LabelEnd(1,1)-1));     %提取出图片类标签
    Label = [Label;label_temp];     %每个类标签按顺序排排坐
end

save COIL COIL
save Label Label
%% k=20 随机抽取二十个样本作为初始均值向量
n=1440;
m=16384;
k=20;
% flag保存随机选取的数字
flag=randperm(n,k);
% 将抽取的mu打上类标签
for i=1:k
    COIL(flag(i),m+1)=i;
end
mu=zeros(k,m);
for i=1:k
    for j=1:m
        mu(i,j)=COIL(flag(i),j);
    end
end
%% 计算欧式距离,选择最近的原型进行分簇
dis=zeros(n,k);
for i=1:n
    min=10000000;
    category = 1;
    for j=1:k
        tmp=0;
        for o=1:m
            tmp=tmp+(COIL(i,o)-mu(j,o)).^2;
        end
        dis(i,j)=sqrt(tmp);
        if dis(i,j)<min
            min= dis(i,j);
            category = j;
        end
    end
    COIL(i,m+1)=category;
end
%% 循环
for iter=2:10
    % 重新计算mu
    % 每个簇中数据和
    sumn=zeros(k,m);
    % 每个簇中的数据个数
    num=zeros(1,k);
    for j=1:n
        for o=1:k
           if COIL(j,m+1)==o
                 for p=1:m
                    sumn(o,p)=sumn(o,p)+COIL(j,p);
                 end
                 num(1,o)=num(1,o)+1;
           end
        end
    end
    isUpdate = false;
    for i=1:k
        for j=1:m
            if mu(i,j)~=(sum(i,j)/num(1,i))
            isUpdate = true;
            end
        end
    end
    if isUpdate==false
        % 不更新
        disp(iter-1);
        break;
    else
        % 更新 mu
        for i=1:k
            for j=1:m
                mu(i,j) = sumn(i,j)/num(1,i);
            end
        end
    end
    for i=1:n
        min=10000;
        category = 1;
        for j=1:k
            tmp=0;
            for o=1:m
                tmp=tmp+(COIL(i,o)-mu(j,o)).^2;
            end
            dis(i,j)=sqrt(tmp);
            if dis(i,j)<min
                min= dis(i,j);
                category = j;
            end
        end
        COIL(i,m+1)=category;
    end
end
%% 计算NMI
% Y为聚类后的标签 Label真实标签
Y=COIL(:,m+1);
if  length ( Label ) ~=  length ( Y)
     error ( 'length( Label ) must == length( Y)' );
end
if  iscolumn(Label)
    Label=Label';
end
if  iscolumn(Y)
     Y=Y';
end
t =  length (Label);
A_ids =  unique (Label);
A_class =  length (A_ids);
B_ids =  unique (Y);
B_class =  length (B_ids);
% Mutual information
idAOccur =  double  ( repmat ( Label, A_class, 1) ==  repmat ( A_ids', 1, t ));  %得到节点社区矩阵N*C
idBOccur =  double  ( repmat ( Y, B_class, 1) ==  repmat ( B_ids', 1, t ));
idABOccur = idAOccur * idBOccur';
Px =  sum (idAOccur') / t;
Py =  sum (idBOccur') / t;
Pxy = idABOccur / t;
MImatrix = Pxy .*  log2 (Pxy ./(Px' * Py)+ eps );
MI =  sum (MImatrix(:));
% Entropies
Hx = - sum (Px .*  log2 (Px +  eps ),2);
Hy = - sum (Py .*  log2 (Py +  eps ),2);
%Normalized Mutual information
nmi = 2 * MI / (Hx+Hy);
fprintf('聚类的标准互信息Nmi为:%f\n',nmi); %显示分类结果
%% 计算ACC
[NewLabel]=BestMapping(Y,Label);
T= Label==NewLabel;
acc=sum(T)/length(NewLabel);
fprintf('聚类的准确度acc为:%f\n',acc); %显示分类结果
  • 4.结果
    迭代2次

迭代10次

posted @ 2021-11-13 11:36  0x3fffffff  阅读(601)  评论(0编辑  收藏  举报