常用脚本之 拼接多个 txt 文件

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@File : gen_all_data.py
@Time : 2023/05/30 10:46:29
@Author : H
@Version : 1.0
@Contact :
@License :
@Desc : 拼接多个 txt 文件,以生成图像分类的数据标注
'''
# here put the import lib
import os
import random
from PIL import Image
male_all_txt = []
male_input_txt = ['baseBodyDataset/bodyCrop/aaf-images-bodyCrop/images_m_list.txt',
'/data/huyuzhen/datasets/baseBodyDataset/bodyCrop/CACD2000-bodyCrop/m_list.txt',
'/data/huyuzhen/datasets/baseBodyDataset/bodyCrop/detect-bodyCrop/m_list.txt',
'baseBodyDataset/bodyCrop/megaage-asian/6-17-60+m_list.txt',
'baseBodyDataset/bodyCrop/megaage-asian/18-59-m_list.txt',
'baseBodyDataset/bodyCrop/UTKface-inthewild-bodyCrop/m_list.txt',
'baseBodyDataset/bodyCrop/SCUT-FBP5500_v2/m_list.txt',
'baseBodyDataset/bodyCrop/Adience-bodyCrop/m_list.txt',
'baseBodyDataset/bodyCrop/CelebA/Anno/male_list.txt',
'baseBodyDataset/bodyCrop/LFW/Anno/male_list.txt'
]
for txt_path in male_input_txt:
print(txt_path)
f = open(txt_path, encoding='utf-8')
for line in f:
img_path = line.split(' ')[0].replace('/bodyCrop', '/baseBodyDataset/bodyCrop')
if os.path.exists(img_path):
try:
img = Image.open(img_path)
male_all_txt.append(img_path + f' 0')
except:
continue
f.close()
print(len(male_all_txt))
random.shuffle(male_all_txt)
female_input_txt = ['baseBodyDataset/bodyCrop/aaf-images-bodyCrop/images_fm_list.txt',
'/data/huyuzhen/datasets/baseBodyDataset/bodyCrop/CACD2000-bodyCrop/fm_list.txt',
'/data/huyuzhen/datasets/baseBodyDataset/bodyCrop/detect-bodyCrop/fm_list.txt',
'baseBodyDataset/bodyCrop/megaage-asian/6-17-60+fm_list.txt',
'baseBodyDataset/bodyCrop/megaage-asian/18-59-fm_list.txt',
'baseBodyDataset/bodyCrop/UTKface-inthewild-bodyCrop/fm_list.txt',
'baseBodyDataset/bodyCrop/SCUT-FBP5500_v2/fm_list.txt',
'baseBodyDataset/bodyCrop/Adience-bodyCrop/fm_list.txt',
'baseBodyDataset/bodyCrop/CelebA/Anno/female_list.txt',
'baseBodyDataset/bodyCrop/LFW/Anno/female_list.txt'
]
female_all_txt = []
for txt_path in female_input_txt:
print(txt_path)
f = open(txt_path, encoding='utf-8')
for line in f:
img_path = line.split(' ')[0].replace('/bodyCrop', '/baseBodyDataset/bodyCrop')
if os.path.exists(img_path):
try:
img = Image.open(img_path)
female_all_txt.append(img_path + f' 1')
except:
continue
f.close()
print(len(female_all_txt))
random.shuffle(female_all_txt)
female_all_txt = female_all_txt[:len(male_all_txt)]
train_all_txt = male_all_txt[:int(len(male_all_txt) * 0.9)] + female_all_txt[:int(len(male_all_txt) * 0.9)]
val_all_txt = male_all_txt[int(len(male_all_txt) * 0.9)+1:] + female_all_txt[int(len(male_all_txt) * 0.9)+1:]
print(len(train_all_txt), len(val_all_txt))
with open(f'/data/huyuzhen/datasets/baseBodyDataset/Annos/train.txt', 'w') as train_f:
for line in train_all_txt:
train_f.write(line + f' \n')
with open(f'/data/huyuzhen/datasets/baseBodyDataset/Annos/val.txt', 'w') as val_f:
for line in val_all_txt:
val_f.write(line + f' \n')
posted @   Zenith_Hugh  阅读(63)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 分享一个免费、快速、无限量使用的满血 DeepSeek R1 模型,支持深度思考和联网搜索!
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· ollama系列01:轻松3步本地部署deepseek,普通电脑可用
· 25岁的心里话
· 按钮权限的设计及实现

喜欢请打赏

扫描二维码打赏

微信打赏

点击右上角即可分享
微信分享提示