根据CSV文件生成ImageFolder格式数据集,并按比例划分训练集验证集

根据csv文件分类,生成ImageFolder格式数据集

import csv
import shutil
import os

target_path = './train_split/'
original_path = './train/'
with open('train.csv', "rt", encoding="utf-8") as csvfile:
    reader = csv.reader(csvfile)
    rows = [row for row in reader]
    for row in rows:
        if os.path.exists(target_path + row[1]):
            full_path = original_path + row[0]
            shutil.move(full_path, target_path + row[1] + '/')
        else:
            os.makedirs(target_path + row[1])
            full_path = original_path + row[0]
            shutil.move(full_path, target_path + row[1] + '/')

将ImageFolder格式数据集按比例划分训练集验证集

import os
import random
import shutil

dirct = './train/'
dirList = []
files = os.listdir(dirct)
for f in files:
    if os.path.isdir(dirct + '/' + f):  #这里是绝对路径,该句判断目录是否是文件夹
        dirList.append(f)

#print(dirList)

org_dir = './train/'
tar_dir = './train_proportion/'
rate = 0.1
if not os.path.exists(tar_dir):
    os.makedirs(tar_dir)
for fdir in dirList:
    fileDir = org_dir + fdir + '/'
    tarDir = tar_dir + fdir + '/'
    pathDir = os.listdir(fileDir)  #scan
    filenumber = len(pathDir)
    picknumber = int(filenumber * rate)
    print('total {} pictures'.format(filenumber))
    print('moved {} pictures to {}'.format(picknumber, tarDir))
    if not os.path.exists(tarDir):
        os.mkdir(tarDir)
    sample = random.sample(pathDir, picknumber)
    for name in sample:
        shutil.move(fileDir + name, tarDir + name)
        print(name)
    print('succeed moved {} pictures from {} to {}'.format(
        picknumber, fileDir, tarDir))

posted @ 2020-11-22 16:41  Arsene_W  阅读(373)  评论(0编辑  收藏  举报