制作训练集和验证集、测试集
1 ##深度学习过程中,需要制作训练集和验证集、测试集。 2 3 import os, random, shutil 4 from config import * 5 import re 6 7 #用于清空并生成文件夹 8 def test_train_dir(): 9 # 清空文件夹里面的所有文件,然后创建,解决重复占用问题 10 # shutil.rmtree('要清空的文件夹名') 11 # os.mkdir('要清空的文件夹名') 12 13 if os.path.exists(os.path.join(abs_path, "data")): 14 15 16 shutil.rmtree(os.path.join(abs_path, "data")) 17 # 通过makedirs创建多级目录 18 os.makedirs(os.path.join(abs_path, "data", "test")) 19 os.makedirs(os.path.join(abs_path, "data", "train")) 20 os.makedirs(os.path.join(abs_path, "data", "samples")) 21 22 def tet_data(fileDir): 23 pathDir=os.listdir(fileDir) 24 for i in pathDir: 25 print(os.path.join(abs_path,"data","train",i)) 26 # shutil.move(fileDir + "\\" + i, tarDir_test + "\\" + i) 27 if os.path.exists(os.path.join(abs_path,"data","train",i)): 28 continue 29 30 else: 31 shutil.copy(fileDir + "\\" + i, tarDir_test + "\\" + i) 32 33 34 35 def moveFile(fileDir): 36 pathDir = os.listdir(fileDir) # 取图片的原始路径 37 # filenumber = len(pathDir) 38 # rate = 0.1 # 自定义抽取图片的比例,比方说100张抽10张,那就是0.1 39 # picknumber = int(filenumber * rate) # 按照rate比例从文件夹中取一定数量图片 40 sample = random.sample(pathDir, 30) # 随机选取picknumber数量的样本图片 41 print(sample) 42 for name in sample: 43 # shutil.move(fileDir +"\\"+ name, tarDir +"\\"+name) 44 shutil.copy(fileDir +"\\"+ name, tarDir +"\\"+name) 45 # return 46 47 def train_and_labels(): 48 49 50 # file1=open(os.path.join("D:\Jonie_Project_sss15\data","train_dir.txt"),"w") 51 file2=open(os.path.join(abs_path,"data","samples","train_dir.txt"),"w") 52 file3=open(os.path.join(abs_path,"data","samples","train_label.txt"),"w") 53 dir_sam=os.listdir(os.path.join(abs_path,"data","train")) 54 for i in dir_sam: 55 # print(i.split("_")[-2]) 56 # print(os.path.join(abs_path,"data")+ i + "\t" +i.split("_")[-2]+"\n") 57 print(os.path.join(abs_path,"data")+ i + "\t" +re.split('_\d+.',i)[0]+"\n") 58 # file2.write(os.path.join(abs_path,"dataset","scene_categories",i.split("_")[-2],i) +"\n")#解决a_b_12.jpg.jpg截取结果为b的问题 59 file2.write(os.path.join(abs_path,"dataset","scene_categories",re.split('_\d+.',i)[0],i) +"\n") 60 # file3.write(i.split("_")[-2]+"\n") 61 file3.write(re.split('_\d+.',i)[0]+"\n") 62 file2.close() 63 file3.close() 64 def tet_and_labels(): 65 # file1 = open(os.path.join("D:\Jonie_Project_sss15\data", "labels.txt"), "w") 66 file2 = open(os.path.join(abs_path,"data","samples", "test_dir.txt"), "w") 67 file3 = open(os.path.join(abs_path,"data","samples", "test_label.txt"), "w") 68 dir_sam = os.listdir(os.path.join(abs_path,"data","test")) 69 for i in dir_sam: 70 # print(i.split("_")[-2]) 71 # print(os.path.join(abs_path,"data",i)+ i.split("_")[-2] + "\n") 72 print(os.path.join(abs_path,"data",i)+ re.split('_\d+.',i)[0] + "\n") 73 # file2.write(os.path.join(abs_path,"dataset","scene_categories")+"\\" +i.split("_")[-2]+"\\"+i +"\n") 74 file2.write(os.path.join(abs_path,"dataset","scene_categories")+"\\" +re.split('_\d+.',i)[0]+"\\"+i +"\n") 75 # file3.write( i.split("_")[-2] + "\n") 76 file3.write( re.split('_\d+.',i)[0] + "\n") 77 file2.close() 78 file3.close() 79 80 if __name__ == '__main__': 81 82 83 for num1 in range(4): 84 test_train_dir() 85 86 dir_name=os.listdir(os.path.join(abs_path,"dataset","scene_categories")) 87 print("-------------------------",os.path.join(abs_path,"dataset","scene_categories")) 88 for i in dir_name: 89 # print(i) 90 fileDir =os.path.join(abs_path,"dataset","scene_categories",i) # 源图片文件夹路径 91 tarDir = os.path.join(abs_path,"data","train") # 移动到新的文件夹路径 92 tarDir_test = os.path.join(abs_path,"data","test") # 移动到新的文件夹路径 93 moveFile(fileDir)#train 94 # tarDir_test = r"D:\Jonie_Project_sss15\data\test" # 移动到新的文件夹路径 95 96 tet_data(fileDir)#test 97 train_and_labels() 98 tet_and_labels() 99 if not os.path.exists(os.path.join(abs_path,"data1",str(num1))): 100 os.makedirs(os.path.join(abs_path,"data1",str(num1))) 101 new_path = os.path.join(abs_path, "data1", str(num1)) 102 for derName, subfolders, filenames in os.walk(os.path.join(abs_path, "data", "samples")): 103 # print(derName/subfolders/filenames) 104 for j in range(len(filenames)): 105 if filenames[j].endswith('.txt'): 106 file_path = derName + '\\' + filenames[j] 107 newpath = new_path + '\\' + filenames[j] 108 shutil.move(file_path, newpath)