整理样本标签
%%%整理样本标签: %%%1. 手动删除坏样本; 2.将训练样本和测试样本分开; %%%3. 增加训练样本和对应标签(筛选标签,然后得到对应类别的文件名,从文件夹中挑选样本,并重命名); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%读取原标签表格 [N, T] = xlsread('F:\kaggle_data_zip\train_zip\trainLabels.csv\trainLabels.xls'); %%读取所有标签,N是数字,T是文本 T=T(2:35111);%%只留下文件名对应的文本 for i=1:35110 train(i).image=T{i}; train(i).level=N(i); end %%%存入train 和 test 结构体中 test=train(length(train)-1000+1:length(train)); train=train(1:length(train)-1000); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%将训练和测试标签分别保存,先将结构体转化成cell,然后再将cell写入excel文件 testcell=cell(1000,2); for i=1:length(test) testcell{i,1}=test(i).image; testcell{i,2}=test(i).level; end xlswrite('F:\kaggle_data_zip\train_zip\test.xls',testcell,'sheet1'); traincell=cell(34110,2); for i=1:length(train) traincell{i,1}=train(i).image; traincell{i,2}=train(i).level; end xlswrite('F:\kaggle_data_zip\train_zip\train.xls',traincell,'sheet1'); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%不同类别标签筛选,不用排序 H=cell(length(train),1); for i=1:length(train) if (train(i).level==0) H{i}=train(i).image; end end xlswrite('F:\kaggle_data_zip\train_zip\A.xls',H,'sheet1');%% 在excel表格中删除空行 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% clear all im={}; [p, pp] = xlsread('F:\kaggle_data_zip\train_zip\B.xls');%% 读出新的表格 for i=1:length(pp) %% 将cell再换成结构体 dis(i).image=strcat(pp{i},'.jpeg'); path=strcat('F:\AAAAA\角度\角度180\',dis(i).image); im{i}=imread(path); imwrite(im{i},strcat('F:\DR\train\B+\','30a',dis(i).image),'jpeg');% 将不同的类别的图像,分别写入到不同的文件夹下面 im{i}=1; end %%%从不同类别的样本数据中,打乱后,随机挑选一定数量的样本 clear all dis3=dir('F:\DR\train\E+\*jpeg');%%%所有样本 dis2=dis3(randperm(length(dis3)));%%%随机打乱 dis=dis2(1:int16(length(dis3)));%%%挑选一定数量 %%读取不同种类的数据并为他们加上标签,并输出位excel表格;当要将cell转换成表格时,维数过大也不行,第0类就不行。 %%我们在第0类只选取一部分数据,所以,不会出现这个问题。 im={}; for i=1:length(dis) path=strcat('F:\DR\train\E+\',dis(i).name); im{i}=imread(path);%%将挑选的样本添加到相应的 A/B/C/D ++ 文件夹中 imwrite(im{i},strcat('F:\DR\train\E++\',dis(i).name),'jpeg'); im{i}=1; dis1(i).image=dis(i).name; dis1(i).level=4; label0{i,1}=dis1(i).image; label0{i,2}=dis1(i).level; end xlswrite('F:\DR\train\4.xls',label0,'sheet1'); %%注意,E+ 文件夹中的数据处理,是100% %%%给没后缀的名字加后缀! 也可以用txt文档中的替换功能; %%% 直接写出文件夹中样本标签EXCEL。 clear all dis=dir('F:\DR\train\A\*jpeg');%%%A所有样本 im={}; for i=1:length(dis) dis1(i).image=dis(i).name; dis1(i).level=0; label0{i,1}=dis1(i).image; label0{i,2}=dis1(i).level; end xlswrite('F:\DR\train\A1.xls',label0,'sheet1');