根据CSV文件生成ImageFolder格式数据集,并按比例划分训练集验证集
根据csv文件分类,生成ImageFolder格式数据集
import csv
import shutil
import os
target_path = './train_split/'
original_path = './train/'
with open('train.csv', "rt", encoding="utf-8") as csvfile:
reader = csv.reader(csvfile)
rows = [row for row in reader]
for row in rows:
if os.path.exists(target_path + row[1]):
full_path = original_path + row[0]
shutil.move(full_path, target_path + row[1] + '/')
else:
os.makedirs(target_path + row[1])
full_path = original_path + row[0]
shutil.move(full_path, target_path + row[1] + '/')
将ImageFolder格式数据集按比例划分训练集验证集
import os
import random
import shutil
dirct = './train/'
dirList = []
files = os.listdir(dirct)
for f in files:
if os.path.isdir(dirct + '/' + f): #这里是绝对路径,该句判断目录是否是文件夹
dirList.append(f)
#print(dirList)
org_dir = './train/'
tar_dir = './train_proportion/'
rate = 0.1
if not os.path.exists(tar_dir):
os.makedirs(tar_dir)
for fdir in dirList:
fileDir = org_dir + fdir + '/'
tarDir = tar_dir + fdir + '/'
pathDir = os.listdir(fileDir) #scan
filenumber = len(pathDir)
picknumber = int(filenumber * rate)
print('total {} pictures'.format(filenumber))
print('moved {} pictures to {}'.format(picknumber, tarDir))
if not os.path.exists(tarDir):
os.mkdir(tarDir)
sample = random.sample(pathDir, picknumber)
for name in sample:
shutil.move(fileDir + name, tarDir + name)
print(name)
print('succeed moved {} pictures from {} to {}'.format(
picknumber, fileDir, tarDir))