整理样本标签

%%%整理样本标签:
%%%1. 手动删除坏样本; 2.将训练样本和测试样本分开;
%%%3. 增加训练样本和对应标签(筛选标签,然后得到对应类别的文件名,从文件夹中挑选样本,并重命名);

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%读取原标签表格
[N, T] = xlsread('F:\kaggle_data_zip\train_zip\trainLabels.csv\trainLabels.xls');
%%读取所有标签,N是数字,T是文本

T=T(2:35111);%%只留下文件名对应的文本

for i=1:35110
    train(i).image=T{i};
    train(i).level=N(i);
end
%%%存入train 和 test 结构体中
test=train(length(train)-1000+1:length(train));
train=train(1:length(train)-1000);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%将训练和测试标签分别保存,先将结构体转化成cell,然后再将cell写入excel文件
testcell=cell(1000,2);
for i=1:length(test)
    testcell{i,1}=test(i).image;
    testcell{i,2}=test(i).level;
end
xlswrite('F:\kaggle_data_zip\train_zip\test.xls',testcell,'sheet1');

traincell=cell(34110,2);
for i=1:length(train)
    traincell{i,1}=train(i).image;
    traincell{i,2}=train(i).level;
end
xlswrite('F:\kaggle_data_zip\train_zip\train.xls',traincell,'sheet1');

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%不同类别标签筛选,不用排序
H=cell(length(train),1);
for i=1:length(train)
    if (train(i).level==0)
        H{i}=train(i).image;
    end
end
xlswrite('F:\kaggle_data_zip\train_zip\A.xls',H,'sheet1');%% 在excel表格中删除空行
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

clear all
im={};
[p, pp] = xlsread('F:\kaggle_data_zip\train_zip\B.xls');%% 读出新的表格
for i=1:length(pp)     %% 将cell再换成结构体
    dis(i).image=strcat(pp{i},'.jpeg');
    path=strcat('F:\AAAAA\角度\角度180\',dis(i).image);
    im{i}=imread(path);
    imwrite(im{i},strcat('F:\DR\train\B+\','30a',dis(i).image),'jpeg');% 将不同的类别的图像,分别写入到不同的文件夹下面
    im{i}=1;
end


%%%从不同类别的样本数据中,打乱后,随机挑选一定数量的样本
clear all
dis3=dir('F:\DR\train\E+\*jpeg');%%%所有样本
dis2=dis3(randperm(length(dis3)));%%%随机打乱
dis=dis2(1:int16(length(dis3)));%%%挑选一定数量

%%读取不同种类的数据并为他们加上标签,并输出位excel表格;当要将cell转换成表格时,维数过大也不行,第0类就不行。
%%我们在第0类只选取一部分数据,所以,不会出现这个问题。
im={};
for i=1:length(dis)
    path=strcat('F:\DR\train\E+\',dis(i).name);
    im{i}=imread(path);%%将挑选的样本添加到相应的 A/B/C/D ++ 文件夹中
    imwrite(im{i},strcat('F:\DR\train\E++\',dis(i).name),'jpeg');
    im{i}=1;
    dis1(i).image=dis(i).name;
    dis1(i).level=4;
    label0{i,1}=dis1(i).image;
    label0{i,2}=dis1(i).level;
end
xlswrite('F:\DR\train\4.xls',label0,'sheet1'); %%注意,E+ 文件夹中的数据处理,是100%

%%%给没后缀的名字加后缀! 也可以用txt文档中的替换功能;
%%% 直接写出文件夹中样本标签EXCEL。
clear all
dis=dir('F:\DR\train\A\*jpeg');%%%A所有样本
im={};
for i=1:length(dis)
    dis1(i).image=dis(i).name;
    dis1(i).level=0;
    label0{i,1}=dis1(i).image;
    label0{i,2}=dis1(i).level;
end
xlswrite('F:\DR\train\A1.xls',label0,'sheet1');

 

posted @ 2016-07-28 21:01  静悟生慧  阅读(491)  评论(0编辑  收藏  举报