pytorch自定义或自组织数据集

 

import os
from pathlib import Path
from typing import Any, Callable, Optional, Tuple
import numpy as np
import torch
import torchvision
from PIL import Image


class DatasetSelfDefine(torchvision.datasets.vision.VisionDataset):
    def __init__(
            self,
            root: str,
            name: str,
            transform: Optional[Callable] = None,
            target_transform: Optional[Callable] = None,
            transforms: Optional[Callable] = None,
    ) -> None:
        super(DatasetSelfDefine, self).__init__(root, transforms, transform, target_transform)
        images_dir = Path(root) / 'images' / name
        labels_dir = Path(root) / 'labels' / name
        self.images = [n for n in images_dir.iterdir()]
        self.labels = []
        for image in self.images:
            base, _ = os.path.splitext(os.path.basename(image))
            label = labels_dir / f'{base}.txt'
            self.labels.append(label if label.exists() else None)

    #  获取数据集大小
    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
        img = Image.open(self.images[idx]).convert('RGB')# PIL Image, 大小为 (H, W)

        label_file = self.labels[idx]
        if label_file is not None:  # found
            with open(label_file, 'r') as f:
                labels = [x.split() for x in f.read().strip().splitlines()]
                labels = np.array(labels, dtype=np.float32)
        else:  # missing
            labels = np.zeros((0, 5), dtype=np.float32)

        boxes = []
        classes = []
        for label in labels:
            x, y, w, h = label[1:]
            boxes.append([
                    (x - w / 2) * img.width,
                    (y - h / 2) * img.height,
                    (x + w / 2) * img.width,
                    (y + h / 2) * img.height])
            classes.append(label[0])

        target = {}
        target["boxes"] = torch.as_tensor(boxes, dtype=torch.float32)# 真实标注框 [x1, y1, x2, y2], x 范围 [0,W], y 范围 [0,H]
        target["labels"] = torch.as_tensor(classes, dtype=torch.int64)# 上述标注框的类别标识

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    #  访问第 i 个数据
    def __len__(self) -> int:
        return len(self.images)


if __name__ == '__main__':

    batch_size = 64

    dataset = DatasetSelfDefine('./data/coco128', 'train2017', transform=torchvision.transforms.ToTensor())
    print(f'dataset: {len(dataset)}')
    print(f'dataset[0]: {dataset[0]}')

    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True,
                            collate_fn=lambda batch: tuple(zip(*batch)))

    for batch_i, (images, targets) in enumerate(dataloader):
        print(f'batch {batch_i}, images {len(images)}, targets {len(targets)}')
        print(f'  images[0]: shape={images[0].shape}')
        print(f'  targets[0]: {targets[0]}')

  

 

posted @ 2023-04-25 12:42  土博姜山山  阅读(21)  评论(0编辑  收藏  举报