构建不平衡数据集
chatgpt呆子,不知道怎么构建不平衡数据及,不会递减的构建,长尾人表示心痛
琐碎直接给个万能模板
import argparse import random from data_utils import * from loss import * import torch.nn as nn import torch.optim as optim from torch.utils.data.sampler import WeightedRandomSampler import os import torch import scipy.io as sio from Meta_train import ResNet32_100 parser = argparse.ArgumentParser(description='Imbalanced Example') parser.add_argument('--dataset', default='cifar100', type=str, help='dataset (cifar10 or cifar100[default])') parser.add_argument('--batch-size', type=int, default=100, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--num_classes', type=int, default=100) parser.add_argument('--num_meta', type=int, default=0, help='The number of meta data for each class.') parser.add_argument('--imb_factor', type=float, default=0.01) #100 parser.add_argument('--test-batch-size', type=int, default=100, metavar='N', help='input batch size for testing (default: 100)') parser.add_argument('--epochs', type=int, default=200, metavar='N', help='number of epochs to train') parser.add_argument('--lr', '--learning-rate', default=1e-1, type=float, help='initial learning rate') parser.add_argument('--momentum', default=0.9, type=float, help='momentum') parser.add_argument('--nesterov', default=True, type=bool, help='nesterov momentum') parser.add_argument('--weight-decay', '--wd', default=5e-4, type=float, help='weight decay (default: 5e-4)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--split', type=int, default=1000) parser.add_argument('--seed', type=int, default=42, metavar='S', help='random seed (default: 42)') parser.add_argument('--print-freq', '-p', default=100, type=int, help='print frequency (default: 10)') parser.add_argument('--lam', default=0.25, type=float, help='[0.25, 0.5, 0.75, 1.0]') #default=0.25 parser.add_argument('--gpu', default=0, type=int) parser.add_argument('--meta_lr', default=0.1, type=float) parser.add_argument('--save_name', default='name', type=str) parser.add_argument('--idx', default='0', type=str) args = parser.parse_args() for arg in vars(args): print("{}={}".format(arg, getattr(args, arg))) os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"]= str(args.gpu) kwargs = {'num_workers': 1, 'pin_memory': False} use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") train_data_meta, train_data, test_dataset = build_dataset(args.dataset, args.num_meta) print(f'length of meta dataset:{len(train_data_meta)}') print(f'length of train dataset: {len(train_data)}') train_loader = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=True, **kwargs) np.random.seed(42) random.seed(42) torch.manual_seed(args.seed) classe_labels = range(args.num_classes) data_list = {} for j in range(args.num_classes): data_list[j] = [i for i, label in enumerate(train_loader.dataset.targets) if label == j] img_num_list = get_img_num_per_cls(args.dataset, args.imb_factor, args.num_meta*args.num_classes) print(img_num_list) print(sum(img_num_list)) im_data = {} idx_to_del = [] for cls_idx, img_id_list in data_list.items(): random.shuffle(img_id_list) img_num = img_num_list[int(cls_idx)] im_data[cls_idx] = img_id_list[img_num:] idx_to_del.extend(img_id_list[img_num:]) print(len(idx_to_del)) imbalanced_train_dataset = copy.deepcopy(train_data) imbalanced_train_dataset.targets = np.delete(train_loader.dataset.targets, idx_to_del, axis=0) imbalanced_train_dataset.data = np.delete(train_loader.dataset.data, idx_to_del, axis=0) print(len(imbalanced_train_dataset)) imbalanced_train_loader = torch.utils.data.DataLoader( imbalanced_train_dataset, batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=args.batch_size, shuffle=False, **kwargs) best_prec1 = 0 def main(): imbalanced_train_loader test_loader if __name__ == '__main__': main()
import torch import torch.nn as nn import torch.nn.functional as F import torch.nn.parallel import torch.backends.cudnn as cudnn import torch.optim import torch.utils.data import torchvision.transforms as transforms import torchvision import numpy as np import copy np.random.seed(6) def build_dataset(dataset,num_meta): normalize = transforms.Normalize(mean=[x / 255.0 for x in [125.3, 123.0, 113.9]], std=[x / 255.0 for x in [63.0, 62.1, 66.7]]) transform_train = transforms.Compose([ transforms.ToTensor(), transforms.Lambda(lambda x: F.pad(x.unsqueeze(0), (4, 4, 4, 4), mode='reflect').squeeze()), transforms.ToPILImage(), transforms.RandomCrop(32), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) transform_test = transforms.Compose([ transforms.ToTensor(), normalize ]) if dataset == 'cifar10': train_dataset = torchvision.datasets.CIFAR10(root='../cifar-10', train=True, download=False, transform=transform_train) test_dataset = torchvision.datasets.CIFAR10('../cifar-10', train=False, transform=transform_test) img_num_list = [num_meta] * 10 num_classes = 10 if dataset == 'cifar100': train_dataset = torchvision.datasets.CIFAR100(root='../cifar-100', train=True, download=True, transform=transform_train) test_dataset = torchvision.datasets.CIFAR100('../cifar-100', train=False, transform=transform_test) img_num_list = [num_meta] * 100 num_classes = 100 data_list_val = {} for j in range(num_classes): data_list_val[j] = [i for i, label in enumerate(train_dataset.targets) if label == j] idx_to_meta = [] idx_to_train = [] print(img_num_list) for cls_idx, img_id_list in data_list_val.items(): np.random.shuffle(img_id_list) img_num = img_num_list[int(cls_idx)] idx_to_meta.extend(img_id_list[:img_num]) idx_to_train.extend(img_id_list[img_num:]) train_data = copy.deepcopy(train_dataset) train_data_meta = copy.deepcopy(train_dataset) train_data_meta.data = np.delete(train_dataset.data, idx_to_train,axis=0) train_data_meta.targets = np.delete(train_dataset.targets, idx_to_train, axis=0) train_data.data = np.delete(train_dataset.data, idx_to_meta, axis=0) train_data.targets = np.delete(train_dataset.targets, idx_to_meta, axis=0) return train_data_meta, train_data, test_dataset def get_img_num_per_cls(dataset, imb_factor=None, num_meta=None): if dataset == 'cifar10': img_max = (50000-num_meta)/10 cls_num = 10 if dataset == 'cifar100': img_max = (50000-num_meta)/100 cls_num = 100 if imb_factor is None: return [img_max] * cls_num img_num_per_cls = [] for cls_idx in range(cls_num): num = img_max * (imb_factor**(cls_idx / (cls_num - 1.0))) img_num_per_cls.append(int(num)) return img_num_per_cls
本文作者:太好了还有脑子可以用
本文链接:https://www.cnblogs.com/ZarkY/p/18083782
版权声明:本作品采用知识共享署名-非商业性使用-禁止演绎 2.5 中国大陆许可协议进行许可。
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
2023-03-19 nb复试上机题