Yolov8-源码解析-四十二-

Yolov8 源码解析(四十二)

.\yolov8\ultralytics\utils\loss.py

# 导入PyTorch库中需要的模块
import torch
import torch.nn as nn
import torch.nn.functional as F

# 从Ultralytics工具包中导入一些特定的功能
from ultralytics.utils.metrics import OKS_SIGMA
from ultralytics.utils.ops import crop_mask, xywh2xyxy, xyxy2xywh
from ultralytics.utils.tal import RotatedTaskAlignedAssigner, TaskAlignedAssigner, dist2bbox, dist2rbox, make_anchors
from ultralytics.utils.torch_utils import autocast

# 从当前目录下的.metrics文件中导入bbox_iou和probiou函数
from .metrics import bbox_iou, probiou
# 从当前目录下的.tal文件中导入bbox2dist函数
from .tal import bbox2dist

# 定义一个名为VarifocalLoss的PyTorch模块,继承自nn.Module
class VarifocalLoss(nn.Module):
    """
    Varifocal loss by Zhang et al.

    https://arxiv.org/abs/2008.13367.
    """

    def __init__(self):
        """Initialize the VarifocalLoss class."""
        super().__init__()

    @staticmethod
    def forward(pred_score, gt_score, label, alpha=0.75, gamma=2.0):
        """Computes varfocal loss."""
        # 计算权重
        weight = alpha * pred_score.sigmoid().pow(gamma) * (1 - label) + gt_score * label
        # 禁用自动混合精度计算
        with autocast(enabled=False):
            # 计算损失
            loss = (
                (F.binary_cross_entropy_with_logits(pred_score.float(), gt_score.float(), reduction="none") * weight)
                .mean(1)  # 沿着维度1取均值
                .sum()  # 求和
            )
        return loss


class FocalLoss(nn.Module):
    """Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)."""

    def __init__(self):
        """Initializer for FocalLoss class with no parameters."""
        super().__init__()

    @staticmethod
    def forward(pred, label, gamma=1.5, alpha=0.25):
        """Calculates and updates confusion matrix for object detection/classification tasks."""
        # 计算二元交叉熵损失
        loss = F.binary_cross_entropy_with_logits(pred, label, reduction="none")
        # 计算概率
        pred_prob = pred.sigmoid()  # logits转为概率
        # 计算p_t值
        p_t = label * pred_prob + (1 - label) * (1 - pred_prob)
        # 计算调节因子
        modulating_factor = (1.0 - p_t) ** gamma
        # 应用调节因子到损失上
        loss *= modulating_factor
        # 如果alpha大于0,则应用alpha因子
        if alpha > 0:
            alpha_factor = label * alpha + (1 - label) * (1 - alpha)
            loss *= alpha_factor
        return loss.mean(1).sum()


class DFLoss(nn.Module):
    """Criterion class for computing DFL losses during training."""

    def __init__(self, reg_max=16) -> None:
        """Initialize the DFL module."""
        super().__init__()
        self.reg_max = reg_max
    def __call__(self, pred_dist, target):
        """
        Return sum of left and right DFL losses.

        Distribution Focal Loss (DFL) proposed in Generalized Focal Loss
        https://ieeexplore.ieee.org/document/9792391
        """
        # 将目标张量限制在 [0, self.reg_max - 1 - 0.01] 的范围内
        target = target.clamp_(0, self.reg_max - 1 - 0.01)
        # 将目标张量转换为长整型(整数)
        tl = target.long()  # target left
        # 计算目标张量的右侧邻近值
        tr = tl + 1  # target right
        # 计算左侧权重
        wl = tr - target  # weight left
        # 计算右侧权重
        wr = 1 - wl  # weight right
        # 计算左侧损失(使用交叉熵损失函数)
        left_loss = F.cross_entropy(pred_dist, tl.view(-1), reduction="none").view(tl.shape) * wl
        # 计算右侧损失(使用交叉熵损失函数)
        right_loss = F.cross_entropy(pred_dist, tr.view(-1), reduction="none").view(tl.shape) * wr
        # 返回左侧损失和右侧损失的平均值(在最后一个维度上求均值,保持维度)
        return (left_loss + right_loss).mean(-1, keepdim=True)
# 定义了一个用于计算边界框损失的模块
class BboxLoss(nn.Module):
    """Criterion class for computing training losses during training."""

    def __init__(self, reg_max=16):
        """Initialize the BboxLoss module with regularization maximum and DFL settings."""
        super().__init__()
        # 如果 reg_max 大于 1,则创建一个 DFLoss 对象,否则设为 None
        self.dfl_loss = DFLoss(reg_max) if reg_max > 1 else None

    def forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask):
        """Compute IoU loss."""
        # 计算前景掩码中目标得分的总和,并扩展维度
        weight = target_scores.sum(-1)[fg_mask].unsqueeze(-1)
        # 计算预测边界框和目标边界框之间的 IoU(Intersection over Union)
        iou = bbox_iou(pred_bboxes[fg_mask], target_bboxes[fg_mask], xywh=False, CIoU=True)
        # 计算 IoU 损失
        loss_iou = ((1.0 - iou) * weight).sum() / target_scores_sum

        # 计算 DFL loss
        if self.dfl_loss:
            # 将锚点和目标边界框转换成距离形式
            target_ltrb = bbox2dist(anchor_points, target_bboxes, self.dfl_loss.reg_max - 1)
            # 计算 DFL 损失
            loss_dfl = self.dfl_loss(pred_dist[fg_mask].view(-1, self.dfl_loss.reg_max), target_ltrb[fg_mask]) * weight
            loss_dfl = loss_dfl.sum() / target_scores_sum
        else:
            # 如果没有 DFL loss,则设为 0
            loss_dfl = torch.tensor(0.0).to(pred_dist.device)

        return loss_iou, loss_dfl


# 继承自 BboxLoss 类,用于处理旋转边界框损失
class RotatedBboxLoss(BboxLoss):
    """Criterion class for computing training losses during training."""

    def __init__(self, reg_max):
        """Initialize the RotatedBboxLoss module with regularization maximum and DFL settings."""
        super().__init__(reg_max)

    def forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask):
        """Compute IoU loss for rotated bounding boxes."""
        # 计算前景掩码中目标得分的总和,并扩展维度
        weight = target_scores.sum(-1)[fg_mask].unsqueeze(-1)
        # 计算预测边界框和目标边界框之间的概率 IoU
        iou = probiou(pred_bboxes[fg_mask], target_bboxes[fg_mask])
        # 计算 IoU 损失
        loss_iou = ((1.0 - iou) * weight).sum() / target_scores_sum

        # 计算 DFL loss
        if self.dfl_loss:
            # 将锚点和目标边界框转换成距离形式
            target_ltrb = bbox2dist(anchor_points, xywh2xyxy(target_bboxes[..., :4]), self.dfl_loss.reg_max - 1)
            # 计算 DFL 损失
            loss_dfl = self.dfl_loss(pred_dist[fg_mask].view(-1, self.dfl_loss.reg_max), target_ltrb[fg_mask]) * weight
            loss_dfl = loss_dfl.sum() / target_scores_sum
        else:
            # 如果没有 DFL loss,则设为 0
            loss_dfl = torch.tensor(0.0).to(pred_dist.device)

        return loss_iou, loss_dfl


# 用于计算关键点损失的模块
class KeypointLoss(nn.Module):
    """Criterion class for computing training losses."""

    def __init__(self, sigmas) -> None:
        """Initialize the KeypointLoss class."""
        super().__init__()
        # 初始化关键点损失类,接收 sigmas 参数
        self.sigmas = sigmas
    # 定义一个方法,用于计算预测关键点和真实关键点之间的损失因子和欧氏距离损失。
    def forward(self, pred_kpts, gt_kpts, kpt_mask, area):
        # 计算预测关键点与真实关键点在 x 和 y 方向上的平方差,得到欧氏距离的平方
        d = (pred_kpts[..., 0] - gt_kpts[..., 0]).pow(2) + (pred_kpts[..., 1] - gt_kpts[..., 1]).pow(2)
        # 计算关键点损失因子,用于调整不同关键点的重要性,避免稀疏区域对损失的过度影响
        kpt_loss_factor = kpt_mask.shape[1] / (torch.sum(kpt_mask != 0, dim=1) + 1e-9)
        # 计算欧氏距离损失,根据预设的尺度参数 self.sigmas 和区域面积 area 进行调整
        e = d / ((2 * self.sigmas).pow(2) * (area + 1e-9) * 2)  # 来自于 cocoeval 的公式
        # 返回加权后的关键点损失的均值,其中损失通过 kpt_mask 进行加权,确保只考虑有效关键点
        return (kpt_loss_factor.view(-1, 1) * ((1 - torch.exp(-e)) * kpt_mask)).mean()
class v8DetectionLoss:
    """Criterion class for computing training losses."""

    def __init__(self, model, tal_topk=10):  # model must be de-paralleled
        """Initializes v8DetectionLoss with the model, defining model-related properties and BCE loss function."""
        # 获取模型的设备信息
        device = next(model.parameters()).device  # get model device
        # 从模型中获取超参数
        h = model.args  # hyperparameters

        # 获取最后一个模型组件,通常是 Detect() 模块
        m = model.model[-1]  # Detect() module
        # 使用 nn.BCEWithLogitsLoss 计算 BCE 损失,设置为不进行归约
        self.bce = nn.BCEWithLogitsLoss(reduction="none")
        # 保存超参数
        self.hyp = h
        # 获取模型的步长信息
        self.stride = m.stride  # model strides
        # 获取模型的类别数
        self.nc = m.nc  # number of classes
        # 设置输出通道数,包括类别和回归目标
        self.no = m.nc + m.reg_max * 4
        # 获取模型的最大回归目标数量
        self.reg_max = m.reg_max
        # 保存模型的设备信息
        self.device = device

        # 判断是否使用 DFL(Distribution-based Focal Loss)
        self.use_dfl = m.reg_max > 1

        # 初始化任务对齐分配器,用于匹配目标框和锚点框
        self.assigner = TaskAlignedAssigner(topk=tal_topk, num_classes=self.nc, alpha=0.5, beta=6.0)
        # 初始化边界框损失函数,使用指定数量的回归目标
        self.bbox_loss = BboxLoss(m.reg_max).to(device)
        # 创建一个张量,用于后续的数学运算,位于指定的设备上
        self.proj = torch.arange(m.reg_max, dtype=torch.float, device=device)

    def preprocess(self, targets, batch_size, scale_tensor):
        """Preprocesses the target counts and matches with the input batch size to output a tensor."""
        # 获取目标张量的维度信息
        nl, ne = targets.shape
        # 如果目标张量为空,则返回一个零张量
        if nl == 0:
            out = torch.zeros(batch_size, 0, ne - 1, device=self.device)
        else:
            # 获取图像索引和其对应的计数
            i = targets[:, 0]  # image index
            _, counts = i.unique(return_counts=True)
            counts = counts.to(dtype=torch.int32)
            # 创建零张量,用于存储预处理后的目标数据
            out = torch.zeros(batch_size, counts.max(), ne - 1, device=self.device)
            for j in range(batch_size):
                # 获取与当前批次图像索引匹配的目标
                matches = i == j
                n = matches.sum()
                if n:
                    out[j, :n] = targets[matches, 1:]
            # 对输出的边界框坐标进行缩放和转换
            out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor))
        return out

    def bbox_decode(self, anchor_points, pred_dist):
        """Decode predicted object bounding box coordinates from anchor points and distribution."""
        # 如果使用 DFL,则对预测分布进行处理
        if self.use_dfl:
            b, a, c = pred_dist.shape  # batch, anchors, channels
            # 对预测分布进行解码,使用预定义的投影张量
            pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))
            # 另外两种可能的解码方式,根据实际需求选择
            # pred_dist = pred_dist.view(b, a, c // 4, 4).transpose(2,3).softmax(3).matmul(self.proj.type(pred_dist.dtype))
            # pred_dist = (pred_dist.view(b, a, c // 4, 4).softmax(2) * self.proj.type(pred_dist.dtype).view(1, 1, -1, 1)).sum(2)
        # 返回解码后的边界框坐标
        return dist2bbox(pred_dist, anchor_points, xywh=False)
    def __call__(self, preds, batch):
        """Calculate the sum of the loss for box, cls and dfl multiplied by batch size."""
        # 初始化一个全零的张量,用于存储损失值,包括box、cls和dfl
        loss = torch.zeros(3, device=self.device)  # box, cls, dfl
        
        # 如果preds是元组,则取第二个元素作为feats,否则直接使用preds
        feats = preds[1] if isinstance(preds, tuple) else preds
        
        # 将feats中的特征拼接并分割,得到预测的分布(pred_distri)和分数(pred_scores)
        pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
            (self.reg_max * 4, self.nc), 1
        )

        # 调整张量维度,使得pred_scores和pred_distri的维度更适合后续计算
        pred_scores = pred_scores.permute(0, 2, 1).contiguous()
        pred_distri = pred_distri.permute(0, 2, 1).contiguous()

        # 获取pred_scores的数据类型和batch size
        dtype = pred_scores.dtype
        batch_size = pred_scores.shape[0]
        
        # 计算图像尺寸,以张量形式存储在imgsz中,单位是像素
        imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0]  # image size (h,w)
        
        # 使用make_anchors函数生成锚点(anchor_points)和步长张量(stride_tensor)
        anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)

        # 处理目标数据,包括图像索引、类别和边界框,转换为Tensor并传输到设备上
        targets = torch.cat((batch["batch_idx"].view(-1, 1), batch["cls"].view(-1, 1), batch["bboxes"]), 1)
        targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
        
        # 将目标数据拆分为类别标签(gt_labels)和边界框(gt_bboxes)
        gt_labels, gt_bboxes = targets.split((1, 4), 2)  # cls, xyxy
        
        # 生成用于过滤的掩码(mask_gt),判断边界框是否有效
        mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0.0)

        # 解码预测的边界框(pred_bboxes),得到真实坐标
        pred_bboxes = self.bbox_decode(anchor_points, pred_distri)  # xyxy, (b, h*w, 4)

        # 使用分配器(assigner)计算匹配的目标边界框和分数
        _, target_bboxes, target_scores, fg_mask, _ = self.assigner(
            pred_scores.detach().sigmoid(),
            (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype),
            anchor_points * stride_tensor,
            gt_labels,
            gt_bboxes,
            mask_gt,
        )

        # 计算目标分数之和,用于损失计算的归一化
        target_scores_sum = max(target_scores.sum(), 1)

        # 类别损失计算,使用二元交叉熵损失(BCE)
        loss[1] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum  # BCE

        # 如果有前景掩码(fg_mask)存在,则计算边界框损失和分布损失
        if fg_mask.sum():
            target_bboxes /= stride_tensor
            loss[0], loss[2] = self.bbox_loss(
                pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask
            )

        # 损失值乘以超参数中的各自增益系数
        loss[0] *= self.hyp.box  # box gain
        loss[1] *= self.hyp.cls  # cls gain
        loss[2] *= self.hyp.dfl  # dfl gain

        # 返回损失值的总和乘以batch size,以及分离的损失张量
        return loss.sum() * batch_size, loss.detach()  # loss(box, cls, dfl)
class v8SegmentationLoss(v8DetectionLoss):
    """Criterion class for computing training losses."""

    def __init__(self, model):  # model must be de-paralleled
        """Initializes the v8SegmentationLoss class, taking a de-paralleled model as argument."""
        super().__init__(model)
        self.overlap = model.args.overlap_mask

    @staticmethod
    def single_mask_loss(
        gt_mask: torch.Tensor, pred: torch.Tensor, proto: torch.Tensor, xyxy: torch.Tensor, area: torch.Tensor
    ) -> torch.Tensor:
        """
        Compute the instance segmentation loss for a single image.

        Args:
            gt_mask (torch.Tensor): Ground truth mask of shape (n, H, W), where n is the number of objects.
            pred (torch.Tensor): Predicted mask coefficients of shape (n, 32).
            proto (torch.Tensor): Prototype masks of shape (32, H, W).
            xyxy (torch.Tensor): Ground truth bounding boxes in xyxy format, normalized to [0, 1], of shape (n, 4).
            area (torch.Tensor): Area of each ground truth bounding box of shape (n,).

        Returns:
            (torch.Tensor): The calculated mask loss for a single image.

        Notes:
            The function uses the equation pred_mask = torch.einsum('in,nhw->ihw', pred, proto) to produce the
            predicted masks from the prototype masks and predicted mask coefficients.
        """
        # Compute predicted masks using prototype masks and coefficients
        pred_mask = torch.einsum("in,nhw->ihw", pred, proto)  # (n, 32) @ (32, H, W) -> (n, H, W)
        # Compute binary cross entropy loss between predicted masks and ground truth masks
        loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none")
        # Crop the loss using bounding boxes, then compute mean per instance and sum across instances
        return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).sum()

    def calculate_segmentation_loss(
        self,
        fg_mask: torch.Tensor,
        masks: torch.Tensor,
        target_gt_idx: torch.Tensor,
        target_bboxes: torch.Tensor,
        batch_idx: torch.Tensor,
        proto: torch.Tensor,
        pred_masks: torch.Tensor,
        imgsz: torch.Tensor,
        overlap: bool,
    ) -> torch.Tensor:
        """
        Calculate the loss for instance segmentation.

        Args:
            fg_mask (torch.Tensor): A binary tensor of shape (BS, N_anchors) indicating which anchors are positive.
            masks (torch.Tensor): Ground truth masks of shape (BS, H, W) if `overlap` is False, otherwise (BS, ?, H, W).
            target_gt_idx (torch.Tensor): Indexes of ground truth objects for each anchor of shape (BS, N_anchors).
            target_bboxes (torch.Tensor): Ground truth bounding boxes for each anchor of shape (BS, N_anchors, 4).
            batch_idx (torch.Tensor): Batch indices of shape (N_labels_in_batch, 1).
            proto (torch.Tensor): Prototype masks of shape (BS, 32, H, W).
            pred_masks (torch.Tensor): Predicted masks for each anchor of shape (BS, N_anchors, 32).
            imgsz (torch.Tensor): Size of the input image as a tensor of shape (2), i.e., (H, W).
            overlap (bool): Whether the masks in `masks` tensor overlap.

        Returns:
            (torch.Tensor): The calculated loss for instance segmentation.

        Notes:
            The batch loss can be computed for improved speed at higher memory usage.
            For example, pred_mask can be computed as follows:
                pred_mask = torch.einsum('in,nhw->ihw', pred, proto)  # (i, 32) @ (32, 160, 160) -> (i, 160, 160)
        """
        _, _, mask_h, mask_w = proto.shape  # 获取原型掩模的高度和宽度

        loss = 0  # 初始化损失值为0

        # Normalize to 0-1
        target_bboxes_normalized = target_bboxes / imgsz[[1, 0, 1, 0]]  # 将目标边界框归一化到0-1范围

        # Areas of target bboxes
        marea = xyxy2xywh(target_bboxes_normalized)[..., 2:].prod(2)  # 计算目标边界框的面积

        # Normalize to mask size
        mxyxy = target_bboxes_normalized * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=proto.device)  # 将边界框归一化到掩模大小

        # 遍历每个样本中的前景掩模、目标索引、预测掩模、原型、归一化边界框、目标面积、掩模
        for i, single_i in enumerate(zip(fg_mask, target_gt_idx, pred_masks, proto, mxyxy, marea, masks)):
            fg_mask_i, target_gt_idx_i, pred_masks_i, proto_i, mxyxy_i, marea_i, masks_i = single_i

            if fg_mask_i.any():  # 如果前景掩模中有任何True值
                mask_idx = target_gt_idx_i[fg_mask_i]  # 获取前景掩模对应的目标索引
                if overlap:
                    gt_mask = masks_i == (mask_idx + 1).view(-1, 1, 1)  # 如果存在重叠,则获取真实掩模
                    gt_mask = gt_mask.float()  # 转换为浮点数
                else:
                    gt_mask = masks[batch_idx.view(-1) == i][mask_idx]  # 否则直接获取真实掩模

                # 计算单个掩模损失
                loss += self.single_mask_loss(
                    gt_mask, pred_masks_i[fg_mask_i], proto_i, mxyxy_i[fg_mask_i], marea_i[fg_mask_i]
                )

            # WARNING: lines below prevents Multi-GPU DDP 'unused gradient' PyTorch errors, do not remove
            else:
                # 防止在多GPU分布式数据并行处理中出现未使用梯度的错误
                loss += (proto * 0).sum() + (pred_masks * 0).sum()  # 将inf和相加可能导致nan损失

        # 返回平均每个前景掩模的损失
        return loss / fg_mask.sum()
class v8PoseLoss(v8DetectionLoss):
    """Criterion class for computing training losses."""

    def __init__(self, model):  # model must be de-paralleled
        """Initializes v8PoseLoss with model, sets keypoint variables and declares a keypoint loss instance."""
        super().__init__(model)
        self.kpt_shape = model.model[-1].kpt_shape
        self.bce_pose = nn.BCEWithLogitsLoss()
        # Check if the model deals with pose keypoints (17 keypoints with 3 coordinates each)
        is_pose = self.kpt_shape == [17, 3]
        nkpt = self.kpt_shape[0]  # number of keypoints
        # Set sigmas for keypoint loss calculation based on whether it's pose or not
        sigmas = torch.from_numpy(OKS_SIGMA).to(self.device) if is_pose else torch.ones(nkpt, device=self.device) / nkpt
        self.keypoint_loss = KeypointLoss(sigmas=sigmas)

    @staticmethod
    def kpts_decode(anchor_points, pred_kpts):
        """Decodes predicted keypoints to image coordinates."""
        y = pred_kpts.clone()
        # Scale keypoints coordinates
        y[..., :2] *= 2.0
        # Translate keypoints to their anchor points
        y[..., 0] += anchor_points[:, [0]] - 0.5
        y[..., 1] += anchor_points[:, [1]] - 0.5
        return y

    def calculate_keypoints_loss(
        self, masks, target_gt_idx, keypoints, batch_idx, stride_tensor, target_bboxes, pred_kpts
):
        """Calculate the keypoints loss."""
        # Implementation of keypoints loss calculation goes here
        pass  # Placeholder for actual implementation

class v8ClassificationLoss:
    """Criterion class for computing training losses."""

    def __call__(self, preds, batch):
        """Compute the classification loss between predictions and true labels."""
        loss = F.cross_entropy(preds, batch["cls"], reduction="mean")
        loss_items = loss.detach()
        return loss, loss_items


class v8OBBLoss(v8DetectionLoss):
    """Calculates losses for object detection, classification, and box distribution in rotated YOLO models."""

    def __init__(self, model):
        """Initializes v8OBBLoss with model, assigner, and rotated bbox loss; note model must be de-paralleled."""
        super().__init__(model)
        self.assigner = RotatedTaskAlignedAssigner(topk=10, num_classes=self.nc, alpha=0.5, beta=6.0)
        self.bbox_loss = RotatedBboxLoss(self.reg_max).to(self.device)

    def preprocess(self, targets, batch_size, scale_tensor):
        """Preprocesses the target counts and matches with the input batch size to output a tensor."""
        if targets.shape[0] == 0:
            # If no targets, return zero tensor
            out = torch.zeros(batch_size, 0, 6, device=self.device)
        else:
            i = targets[:, 0]  # image index
            _, counts = i.unique(return_counts=True)
            counts = counts.to(dtype=torch.int32)
            # Initialize output tensor with proper dimensions
            out = torch.zeros(batch_size, counts.max(), 6, device=self.device)
            for j in range(batch_size):
                matches = i == j
                n = matches.sum()
                if n:
                    # Extract and scale bounding boxes, then concatenate with class labels
                    bboxes = targets[matches, 2:]
                    bboxes[..., :4].mul_(scale_tensor)
                    out[j, :n] = torch.cat([targets[matches, 1:2], bboxes], dim=-1)
        return out
    def bbox_decode(self, anchor_points, pred_dist, pred_angle):
        """
        Decode predicted object bounding box coordinates from anchor points and distribution.

        Args:
            anchor_points (torch.Tensor): Anchor points, (h*w, 2).
            pred_dist (torch.Tensor): Predicted rotated distance, (bs, h*w, 4).
            pred_angle (torch.Tensor): Predicted angle, (bs, h*w, 1).

        Returns:
            (torch.Tensor): Predicted rotated bounding boxes with angles, (bs, h*w, 5).
        """
        # 如果使用 DFL(Dynamic Feature Learning),对预测的距离进行处理
        if self.use_dfl:
            # 获取批量大小、锚点数、通道数
            b, a, c = pred_dist.shape  # batch, anchors, channels
            # 重新调整预测距离的形状并进行 softmax 处理,然后乘以投影矩阵
            pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))
        # 将解码后的旋转边界框坐标和预测的角度拼接在一起并返回
        return torch.cat((dist2rbox(pred_dist, pred_angle, anchor_points), pred_angle), dim=-1)
# 定义一个名为 E2EDetectLoss 的类,用于计算训练损失
class E2EDetectLoss:
    """Criterion class for computing training losses."""

    def __init__(self, model):
        """初始化 E2EDetectLoss 类,使用提供的模型初始化一个一对多和一个对一检测损失。"""
        self.one2many = v8DetectionLoss(model, tal_topk=10)  # 初始化一对多检测损失对象
        self.one2one = v8DetectionLoss(model, tal_topk=1)    # 初始化一对一检测损失对象

    def __call__(self, preds, batch):
        """调用实例时计算框、类别和深度特征点损失的总和,乘以批次大小。"""
        preds = preds[1] if isinstance(preds, tuple) else preds  # 如果 preds 是元组,则使用第二个元素
        one2many = preds["one2many"]  # 获取预测结果中的一对多损失
        loss_one2many = self.one2many(one2many, batch)  # 计算一对多损失
        one2one = preds["one2one"]  # 获取预测结果中的一对一损失
        loss_one2one = self.one2one(one2one, batch)  # 计算一对一损失
        return loss_one2many[0] + loss_one2one[0], loss_one2many[1] + loss_one2one[1]
        # 返回两个损失的总和,分别对应框和类别损失,以及深度特征点损失

.\yolov8\ultralytics\utils\metrics.py

# Ultralytics YOLO 🚀, AGPL-3.0 license
"""Model validation metrics."""

import math
import warnings
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import torch

from ultralytics.utils import LOGGER, SimpleClass, TryExcept, plt_settings

# Object Keypoint Similarity (OKS) sigmas for different keypoints
OKS_SIGMA = (
    np.array([0.26, 0.25, 0.25, 0.35, 0.35, 0.79, 0.79, 0.72, 0.72, 0.62, 0.62, 1.07, 1.07, 0.87, 0.87, 0.89, 0.89])
    / 10.0
)


def bbox_ioa(box1, box2, iou=False, eps=1e-7):
    """
    Calculate the intersection over box2 area given box1 and box2. Boxes are in x1y1x2y2 format.

    Args:
        box1 (np.ndarray): A numpy array of shape (n, 4) representing n bounding boxes.
        box2 (np.ndarray): A numpy array of shape (m, 4) representing m bounding boxes.
        iou (bool): Calculate the standard IoU if True else return inter_area/box2_area.
        eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.

    Returns:
        (np.ndarray): A numpy array of shape (n, m) representing the intersection over box2 area.
    """

    # Get the coordinates of bounding boxes
    b1_x1, b1_y1, b1_x2, b1_y2 = box1.T
    b2_x1, b2_y1, b2_x2, b2_y2 = box2.T

    # Intersection area calculation
    inter_area = (np.minimum(b1_x2[:, None], b2_x2) - np.maximum(b1_x1[:, None], b2_x1)).clip(0) * (
        np.minimum(b1_y2[:, None], b2_y2) - np.maximum(b1_y1[:, None], b2_y1)
    ).clip(0)

    # Box2 area calculation
    area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1)
    if iou:
        box1_area = (b1_x2 - b1_x1) * (b1_y2 - b1_y1)
        area = area + box1_area[:, None] - inter_area

    # Intersection over box2 area
    return inter_area / (area + eps)


def box_iou(box1, box2, eps=1e-7):
    """
    Calculate intersection-over-union (IoU) of boxes. Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
    Based on https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py

    Args:
        box1 (torch.Tensor): A tensor of shape (N, 4) representing N bounding boxes.
        box2 (torch.Tensor): A tensor of shape (M, 4) representing M bounding boxes.
        eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.

    Returns:
        (torch.Tensor): An NxM tensor containing the pairwise IoU values for every element in box1 and box2.
    """

    # Convert box coordinates to float for accurate computation
    (a1, a2), (b1, b2) = box1.float().unsqueeze(1).chunk(2, 2), box2.float().unsqueeze(0).chunk(2, 2)
    
    # Calculate intersection area
    inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp_(0).prod(2)

    # Compute IoU
    return inter / ((a2 - a1).prod(2) + (b2 - b1).prod(2) - inter + eps)


def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
    """
    Calculate Intersection over Union (IoU) of box1(1, 4) to box2(n, 4).

    This function calculates IoU considering different variants such as Generalized IoU (GIoU),
    Distance IoU (DIoU), and Complete IoU (CIoU) if specified.

    Args:
        box1 (torch.Tensor): A tensor representing a single bounding box of shape (1, 4).
        box2 (torch.Tensor): A tensor representing multiple bounding boxes of shape (n, 4).
        xywh (bool, optional): If True, treats boxes as (x_center, y_center, width, height).
        GIoU (bool, optional): If True, compute Generalized IoU. Defaults to False.
        DIoU (bool, optional): If True, compute Distance IoU. Defaults to False.
        CIoU (bool, optional): If True, compute Complete IoU. Defaults to False.
        eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.

    Returns:
        (torch.Tensor): IoU values between box1 and each box in box2, of shape (n,).
    """
    Args:
        box1 (torch.Tensor): A tensor representing a single bounding box with shape (1, 4).
        box2 (torch.Tensor): A tensor representing n bounding boxes with shape (n, 4).
        xywh (bool, optional): If True, input boxes are in (x, y, w, h) format. If False, input boxes are in
                               (x1, y1, x2, y2) format. Defaults to True.
        GIoU (bool, optional): If True, calculate Generalized IoU. Defaults to False.
        DIoU (bool, optional): If True, calculate Distance IoU. Defaults to False.
        CIoU (bool, optional): If True, calculate Complete IoU. Defaults to False.
        eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.

    Returns:
        (torch.Tensor): IoU, GIoU, DIoU, or CIoU values depending on the specified flags.
    """
    # 根据输入的格式标志,获取边界框的坐标信息
    if xywh:  # 如果输入格式为 (x, y, w, h)
        # 将 box1 和 box2 按照坐标和尺寸分块
        (x1, y1, w1, h1), (x2, y2, w2, h2) = box1.chunk(4, -1), box2.chunk(4, -1)
        # 计算各自的一半宽度和高度
        w1_, h1_, w2_, h2_ = w1 / 2, h1 / 2, w2 / 2, h2 / 2
        # 计算边界框的四个顶点坐标
        b1_x1, b1_x2, b1_y1, b1_y2 = x1 - w1_, x1 + w1_, y1 - h1_, y1 + h1_
        b2_x1, b2_x2, b2_y1, b2_y2 = x2 - w2_, x2 + w2_, y2 - h2_, y2 + h2_
    else:  # 如果输入格式为 (x1, y1, x2, y2)
        # 将 box1 和 box2 按照坐标分块
        b1_x1, b1_y1, b1_x2, b1_y2 = box1.chunk(4, -1)
        b2_x1, b2_y1, b2_x2, b2_y2 = box2.chunk(4, -1)
        # 计算边界框的宽度和高度,并添加一个小值 eps 避免除以零
        w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
        w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps

    # 计算交集面积
    inter = (b1_x2.minimum(b2_x2) - b1_x1.maximum(b2_x1)).clamp_(0) * (
        b1_y2.minimum(b2_y2) - b1_y1.maximum(b2_y1)
    ).clamp_(0)

    # 计算并集面积
    union = w1 * h1 + w2 * h2 - inter + eps

    # 计算 IoU
    iou = inter / union
    if CIoU or DIoU or GIoU:
        # 计算最小包围框的宽度和高度
        cw = b1_x2.maximum(b2_x2) - b1_x1.minimum(b2_x1)
        ch = b1_y2.maximum(b2_y2) - b1_y1.minimum(b2_y1)
        if CIoU or DIoU:  # 如果是 Distance IoU 或者 Complete IoU
            # 计算最小包围框的对角线的平方
            c2 = cw.pow(2) + ch.pow(2) + eps
            # 计算中心距离的平方
            rho2 = (
                (b2_x1 + b2_x2 - b1_x1 - b1_x2).pow(2) + (b2_y1 + b2_y2 - b1_y1 - b1_y2).pow(2)
            ) / 4
            if CIoU:  # 如果是 Complete IoU
                v = (4 / math.pi**2) * ((w2 / h2).atan() - (w1 / h1).atan()).pow(2)
                with torch.no_grad():
                    alpha = v / (v - iou + (1 + eps))
                return iou - (rho2 / c2 + v * alpha)  # 计算 CIoU
            return iou - rho2 / c2  # 计算 DIoU
        # 计算最小包围框的面积
        c_area = cw * ch + eps
        return iou - (c_area - union) / c_area  # 计算 GIoU
    return iou  # 返回 IoU
# 计算两个方向边界框之间的概率 IoU,参考论文 https://arxiv.org/pdf/2106.06072v1.pdf
def probiou(obb1, obb2, CIoU=False, eps=1e-7):
    """
    Calculate the prob IoU between oriented bounding boxes, https://arxiv.org/pdf/2106.06072v1.pdf.

    Args:
        obb1 (torch.Tensor): A tensor of shape (N, 5) representing the first oriented bounding boxes in xywhr format.
        obb2 (torch.Tensor): A tensor of shape (M, 5) representing the second oriented bounding boxes in xywhr format.
        CIoU (bool, optional): If True, compute Complete IoU. Defaults to False.
        eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.

    Returns:
        (torch.Tensor): A tensor of shape (N, M) representing the probabilities of IoU between obb1 and obb2.
    """
    # 将 Gaussian 边界框合并,忽略中心点(前两列)因为这里不需要
    gbbs = torch.cat((obb1[:, 2:4].pow(2) / 12, obb1[:, 4:]), dim=-1)
    a, b, c = gbbs.split(1, dim=-1)
    cos = c.cos()
    sin = c.sin()
    cos2 = cos.pow(2)
    sin2 = sin.pow(2)
    # 计算旋转边界框的协方差矩阵
    return a * cos2 + b * sin2, a * sin2 + b * cos2, (a - b) * cos * sin
    Args:
        obb1 (torch.Tensor): A tensor of shape (N, 5) representing ground truth obbs, with xywhr format.
        obb2 (torch.Tensor): A tensor of shape (N, 5) representing predicted obbs, with xywhr format.
        eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.

    Returns:
        (torch.Tensor): A tensor of shape (N, ) representing obb similarities.
    """
    # Splitting the x and y coordinates from obb1 and obb2
    x1, y1 = obb1[..., :2].split(1, dim=-1)
    x2, y2 = obb2[..., :2].split(1, dim=-1)

    # Calculating covariance matrix components for obb1 and obb2
    a1, b1, c1 = _get_covariance_matrix(obb1)
    a2, b2, c2 = _get_covariance_matrix(obb2)

    # Calculation of terms t1, t2, and t3 for IoU computation
    t1 = (
        ((a1 + a2) * (y1 - y2).pow(2) + (b1 + b2) * (x1 - x2).pow(2)) / ((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2) + eps)
    ) * 0.25
    t2 = (((c1 + c2) * (x2 - x1) * (y1 - y2)) / ((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2) + eps)) * 0.5
    t3 = (
        ((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2))
        / (4 * ((a1 * b1 - c1.pow(2)).clamp_(0) * (a2 * b2 - c2.pow(2)).clamp_(0)).sqrt() + eps)
        + eps
    ).log() * 0.5

    # Combined term for boundary distance
    bd = (t1 + t2 + t3).clamp(eps, 100.0)

    # Hausdorff distance calculation
    hd = (1.0 - (-bd).exp() + eps).sqrt()

    # Intersection over Union (IoU) computation
    iou = 1 - hd

    # Compute Complete IoU (CIoU) if CIoU flag is True
    if CIoU:
        # Splitting width and height components from obb1 and obb2
        w1, h1 = obb1[..., 2:4].split(1, dim=-1)
        w2, h2 = obb2[..., 2:4].split(1, dim=-1)

        # Calculating v value based on width and height ratios
        v = (4 / math.pi**2) * ((w2 / h2).atan() - (w1 / h1).atan()).pow(2)

        # Compute alpha factor and adjust IoU for CIoU
        with torch.no_grad():
            alpha = v / (v - iou + (1 + eps))

        return iou - v * alpha  # CIoU

    # Return regular IoU if CIoU flag is False
    return iou
# 计算两个有方向边界框之间的概率IoU,参考论文 https://arxiv.org/pdf/2106.06072v1.pdf
def batch_probiou(obb1, obb2, eps=1e-7):
    """
    Calculate the prob IoU between oriented bounding boxes, https://arxiv.org/pdf/2106.06072v1.pdf.

    Args:
        obb1 (torch.Tensor | np.ndarray): A tensor of shape (N, 5) representing ground truth obbs, with xywhr format.
        obb2 (torch.Tensor | np.ndarray): A tensor of shape (M, 5) representing predicted obbs, with xywhr format.
        eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.

    Returns:
        (torch.Tensor): A tensor of shape (N, M) representing obb similarities.
    """
    # 将输入obb1和obb2转换为torch.Tensor,如果它们是np.ndarray类型的话
    obb1 = torch.from_numpy(obb1) if isinstance(obb1, np.ndarray) else obb1
    obb2 = torch.from_numpy(obb2) if isinstance(obb2, np.ndarray) else obb2

    # 分割xy坐标和宽高比例与旋转角度信息,以便后续处理
    x1, y1 = obb1[..., :2].split(1, dim=-1)
    x2, y2 = (x.squeeze(-1)[None] for x in obb2[..., :2].split(1, dim=-1))
    
    # 计算相关的协方差矩阵分量
    a1, b1, c1 = _get_covariance_matrix(obb1)
    a2, b2, c2 = (x.squeeze(-1)[None] for x in _get_covariance_matrix(obb2))

    # 计算概率IoU的三个部分
    t1 = (
        ((a1 + a2) * (y1 - y2).pow(2) + (b1 + b2) * (x1 - x2).pow(2)) / ((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2) + eps)
    ) * 0.25
    t2 = (((c1 + c2) * (x2 - x1) * (y1 - y2)) / ((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2) + eps)) * 0.5
    t3 = (
        ((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2))
        / (4 * ((a1 * b1 - c1.pow(2)).clamp_(0) * (a2 * b2 - c2.pow(2)).clamp_(0)).sqrt() + eps)
        + eps
    ).log() * 0.5
    
    # 组合三个部分,并进行一些修正和限制
    bd = (t1 + t2 + t3).clamp(eps, 100.0)
    hd = (1.0 - (-bd).exp() + eps).sqrt()
    
    # 返回1减去修正的IoU概率
    return 1 - hd


# 计算平滑的正负二元交叉熵目标
def smooth_BCE(eps=0.1):
    """
    Computes smoothed positive and negative Binary Cross-Entropy targets.

    This function calculates positive and negative label smoothing BCE targets based on a given epsilon value.
    For implementation details, refer to https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441.

    Args:
        eps (float, optional): The epsilon value for label smoothing. Defaults to 0.1.

    Returns:
        (tuple): A tuple containing the positive and negative label smoothing BCE targets.
    """
    # 计算平滑后的正负二元交叉熵目标
    return 1.0 - 0.5 * eps, 0.5 * eps


class ConfusionMatrix:
    """
    A class for calculating and updating a confusion matrix for object detection and classification tasks.

    Attributes:
        task (str): The type of task, either 'detect' or 'classify'.
        matrix (np.ndarray): The confusion matrix, with dimensions depending on the task.
        nc (int): The number of classes.
        conf (float): The confidence threshold for detections.
        iou_thres (float): The Intersection over Union threshold.
    """
    # 用于计算和更新目标检测和分类任务的混淆矩阵的类定义
    def __init__(self, task, nc, conf=0.5, iou_thres=0.5):
        self.task = task
        self.matrix = np.zeros((nc, nc), dtype=np.int64)
        self.nc = nc
        self.conf = conf
        self.iou_thres = iou_thres

    # 更新混淆矩阵中的条目
    def update_matrix(self, targets, preds):
        """
        Update the confusion matrix with new target and prediction entries.

        Args:
            targets (np.ndarray): An array containing the ground truth labels.
            preds (np.ndarray): An array containing the predicted labels.
        """
        for t, p in zip(targets, preds):
            self.matrix[t, p] += 1

    # 重置混淆矩阵
    def reset_matrix(self):
        """Reset the confusion matrix to all zeros."""
        self.matrix.fill(0)

    # 打印混淆矩阵的当前状态
    def print_matrix(self):
        """Print the current state of the confusion matrix."""
        print(self.matrix)
    def __init__(self, nc, conf=0.25, iou_thres=0.45, task="detect"):
        """
        Initialize attributes for the YOLO model.

        Args:
            nc (int): Number of classes.
            conf (float): Confidence threshold, default is 0.25, adjusted to 0.25 if None or 0.001.
            iou_thres (float): IoU (Intersection over Union) threshold.
            task (str): Task type, either "detect" or other.

        Initializes:
            self.task (str): Task type.
            self.matrix (np.ndarray): Confusion matrix initialized based on task type and number of classes.
            self.nc (int): Number of classes.
            self.conf (float): Confidence threshold.
            self.iou_thres (float): IoU threshold.
        """
        self.task = task
        self.matrix = np.zeros((nc + 1, nc + 1)) if self.task == "detect" else np.zeros((nc, nc))
        self.nc = nc  # number of classes
        self.conf = 0.25 if conf in {None, 0.001} else conf  # apply 0.25 if default val conf is passed
        self.iou_thres = iou_thres

    def process_cls_preds(self, preds, targets):
        """
        Update confusion matrix for classification task.

        Args:
            preds (Array[N, min(nc,5)]): Predicted class labels.
            targets (Array[N, 1]): Ground truth class labels.
        """
        preds, targets = torch.cat(preds)[:, 0], torch.cat(targets)
        for p, t in zip(preds.cpu().numpy(), targets.cpu().numpy()):
            self.matrix[p][t] += 1
    def process_batch(self, detections, gt_bboxes, gt_cls):
        """
        Update confusion matrix for object detection task.

        Args:
            detections (Array[N, 6] | Array[N, 7]): Detected bounding boxes and their associated information.
                                      Each row should contain (x1, y1, x2, y2, conf, class)
                                      or with an additional element `angle` when it's obb.
            gt_bboxes (Array[M, 4]| Array[N, 5]): Ground truth bounding boxes with xyxy/xyxyr format.
            gt_cls (Array[M]): The class labels.
        """
        # 检查标签是否为空
        if gt_cls.shape[0] == 0:
            if detections is not None:
                # 根据置信度阈值过滤掉低置信度的检测结果
                detections = detections[detections[:, 4] > self.conf]
                # 提取检测结果的类别
                detection_classes = detections[:, 5].int()
                for dc in detection_classes:
                    self.matrix[dc, self.nc] += 1  # 假阳性
            return
        
        # 如果没有检测结果
        if detections is None:
            # 提取真实标签的类别
            gt_classes = gt_cls.int()
            for gc in gt_classes:
                self.matrix[self.nc, gc] += 1  # 背景 FN
            return

        # 根据置信度阈值过滤掉低置信度的检测结果
        detections = detections[detections[:, 4] > self.conf]
        # 提取真实标签的类别
        gt_classes = gt_cls.int()
        # 提取检测结果的类别
        detection_classes = detections[:, 5].int()
        # 判断是否为带有角度信息的检测结果和真实标签
        is_obb = detections.shape[1] == 7 and gt_bboxes.shape[1] == 5
        # 计算 IoU(交并比)
        iou = (
            batch_probiou(gt_bboxes, torch.cat([detections[:, :4], detections[:, -1:]], dim=-1))
            if is_obb
            else box_iou(gt_bboxes, detections[:, :4])
        )

        # 根据 IoU 阈值筛选匹配结果
        x = torch.where(iou > self.iou_thres)
        if x[0].shape[0]:
            matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()
            if x[0].shape[0] > 1:
                matches = matches[matches[:, 2].argsort()[::-1]]
                matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
                matches = matches[matches[:, 2].argsort()[::-1]]
                matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
        else:
            matches = np.zeros((0, 3))

        # 判断是否有匹配结果
        n = matches.shape[0] > 0
        m0, m1, _ = matches.transpose().astype(int)
        # 更新混淆矩阵
        for i, gc in enumerate(gt_classes):
            j = m0 == i
            if n and sum(j) == 1:
                self.matrix[detection_classes[m1[j]], gc] += 1  # 正确
            else:
                self.matrix[self.nc, gc] += 1  # 真实背景

        # 如果有匹配结果,更新混淆矩阵
        if n:
            for i, dc in enumerate(detection_classes):
                if not any(m1 == i):
                    self.matrix[dc, self.nc] += 1  # 预测背景

    def matrix(self):
        """Returns the confusion matrix."""
        return self.matrix
    def tp_fp(self):
        """Returns true positives and false positives."""
        tp = self.matrix.diagonal()  # 提取混淆矩阵的对角线元素,即 true positives
        fp = self.matrix.sum(1) - tp  # 计算每行的和减去对角线元素,得到 false positives
        # fn = self.matrix.sum(0) - tp  # false negatives (missed detections) -- 该行被注释掉,不起作用
        return (tp[:-1], fp[:-1]) if self.task == "detect" else (tp, fp)  # 如果任务是检测,移除背景类别后返回结果

    @TryExcept("WARNING ⚠️ ConfusionMatrix plot failure")
    @plt_settings()
    def plot(self, normalize=True, save_dir="", names=(), on_plot=None):
        """
        Plot the confusion matrix using seaborn and save it to a file.

        Args:
            normalize (bool): Whether to normalize the confusion matrix.
            save_dir (str): Directory where the plot will be saved.
            names (tuple): Names of classes, used as labels on the plot.
            on_plot (func): An optional callback to pass plots path and data when they are rendered.
        """
        import seaborn  # 引入 seaborn 库,用于绘制混淆矩阵图

        array = self.matrix / ((self.matrix.sum(0).reshape(1, -1) + 1e-9) if normalize else 1)  # 对混淆矩阵进行列归一化处理
        array[array < 0.005] = np.nan  # 将小于 0.005 的值设为 NaN,不在图上标注

        fig, ax = plt.subplots(1, 1, figsize=(12, 9), tight_layout=True)  # 创建图和轴对象,设置图的大小和布局
        nc, nn = self.nc, len(names)  # 类别数和类别名称列表的长度
        seaborn.set_theme(font_scale=1.0 if nc < 50 else 0.8)  # 设置字体大小,根据类别数决定
        labels = (0 < nn < 99) and (nn == nc)  # 根据类别名称是否符合要求决定是否应用于刻度标签
        ticklabels = (list(names) + ["background"]) if labels else "auto"  # 根据条件设置刻度标签
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")  # 忽略警告信息,避免空矩阵的 RuntimeWarning: All-NaN slice encountered
            seaborn.heatmap(
                array,
                ax=ax,
                annot=nc < 30,  # 如果类别数小于 30,则在图上标注数值
                annot_kws={"size": 8},  # 标注的字体大小
                cmap="Blues",  # 使用蓝色调色板
                fmt=".2f" if normalize else ".0f",  # 数值格式,归一化时保留两位小数,否则取整数
                square=True,  # 方形图
                vmin=0.0,  # 最小值为 0
                xticklabels=ticklabels,  # X 轴刻度标签
                yticklabels=ticklabels,  # Y 轴刻度标签
            ).set_facecolor((1, 1, 1))  # 设置图的背景色为白色
        title = "Confusion Matrix" + " Normalized" * normalize  # 图表标题,根据是否归一化添加后缀
        ax.set_xlabel("True")  # X 轴标签
        ax.set_ylabel("Predicted")  # Y 轴标签
        ax.set_title(title)  # 设置图表标题
        plot_fname = Path(save_dir) / f'{title.lower().replace(" ", "_")}.png'  # 图片保存的文件名
        fig.savefig(plot_fname, dpi=250)  # 保存图表为 PNG 文件,设置 DPI 为 250
        plt.close(fig)  # 关闭图表
        if on_plot:
            on_plot(plot_fname)  # 如果有回调函数,则调用该函数,并传递图表文件路径

    def print(self):
        """Print the confusion matrix to the console."""
        for i in range(self.nc + 1):  # 循环打印混淆矩阵的每一行
            LOGGER.info(" ".join(map(str, self.matrix[i])))  # 将每一行转换为字符串并记录到日志中
def compute_ap(recall, precision):
    """
    Compute the average precision (AP) given the recall and precision curves.

    Args:
        recall (list): The recall curve.
        precision (list): The precision curve.

    Returns:
        (float): Average precision.
        (np.ndarray): Precision envelope curve.
        (np.ndarray): Modified recall curve with sentinel values added at the beginning and end.
    """

    # Append sentinel values to beginning and end
    mrec = np.concatenate(([0.0], recall, [1.0]))
    mpre = np.concatenate(([1.0], precision, [0.0]))

    # Compute the precision envelope
    mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))

    # Integrate area under curve
    # 计算曲线下面积,使用梯形法则
    ap = np.sum(np.diff(mrec) * mpre[:-1])

    return ap, mpre, mrec
    method = "interp"  # 定义变量 method,并赋值为 "interp",表示采用插值法计算平均精度
    if method == "interp":
        x = np.linspace(0, 1, 101)  # 在 [0, 1] 区间生成101个均匀间隔的点,用于插值计算 (COCO)
        ap = np.trapz(np.interp(x, mrec, mpre), x)  # 使用梯形法则计算插值后的曲线下面积,得到平均精度
    else:  # 如果 method 不是 "interp",则执行 'continuous' 分支
        i = np.where(mrec[1:] != mrec[:-1])[0]  # 找到 mrec 中 recall 值发生变化的索引位置
        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])  # 计算曲线下面积,得到平均精度

    return ap, mpre, mrec  # 返回计算得到的平均精度 ap,以及修改后的 mpre 和 mrec
    # 根据对象置信度降序排列索引
    i = np.argsort(-conf)
    
    # 按照排序后的顺序重新排列 tp, conf, pred_cls
    tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]

    # 找出唯一的类别和它们的数量
    unique_classes, nt = np.unique(target_cls, return_counts=True)
    nc = unique_classes.shape[0]  # 类别的数量,也是检测的数量

    # 创建 Precision-Recall 曲线并计算每个类别的平均精度 (AP)
    x, prec_values = np.linspace(0, 1, 1000), []

    # 初始化存储平均精度 (AP),精度 (Precision),和召回率 (Recall) 曲线的数组
    ap, p_curve, r_curve = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
    for ci, c in enumerate(unique_classes):
        # ci 是类别 c 在 unique_classes 中的索引,c 是当前类别的值
        i = pred_cls == c
        # 计算预测类别为 c 的样本数
        n_l = nt[ci]  # number of labels
        # 计算真实类别为 c 的样本数
        n_p = i.sum()  # number of predictions
        # 如果没有预测类别为 c 的样本或者真实类别为 c 的样本,则跳过
        if n_p == 0 or n_l == 0:
            continue

        # 累积计算假阳性和真阳性
        fpc = (1 - tp[i]).cumsum(0)
        tpc = tp[i].cumsum(0)

        # 计算召回率
        recall = tpc / (n_l + eps)  # recall curve
        # 在负向 x 上插值,以生成召回率曲线
        r_curve[ci] = np.interp(-x, -conf[i], recall[:, 0], left=0)  # negative x, xp because xp decreases

        # 计算精确率
        precision = tpc / (tpc + fpc)  # precision curve
        # 在负向 x 上插值,以生成精确率曲线
        p_curve[ci] = np.interp(-x, -conf[i], precision[:, 0], left=1)  # p at pr_score

        # 从召回率-精确率曲线计算平均准确率
        for j in range(tp.shape[1]):
            ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
            # 如果需要绘图并且是第一个类别,记录在 mAP@0.5 处的精确率值
            if plot and j == 0:
                prec_values.append(np.interp(x, mrec, mpre))  # precision at mAP@0.5

    prec_values = np.array(prec_values)  # (nc, 1000)

    # 计算 F1 值(精确率和召回率的调和平均数)
    f1_curve = 2 * p_curve * r_curve / (p_curve + r_curve + eps)
    # 仅保留有数据的类别名称列表
    names = [v for k, v in names.items() if k in unique_classes]  # list: only classes that have data
    names = dict(enumerate(names))  # 转换为字典形式
    # 如果需要绘图,则绘制精确率-召回率曲线、F1 曲线、精确率曲线、召回率曲线
    if plot:
        plot_pr_curve(x, prec_values, ap, save_dir / f"{prefix}PR_curve.png", names, on_plot=on_plot)
        plot_mc_curve(x, f1_curve, save_dir / f"{prefix}F1_curve.png", names, ylabel="F1", on_plot=on_plot)
        plot_mc_curve(x, p_curve, save_dir / f"{prefix}P_curve.png", names, ylabel="Precision", on_plot=on_plot)
        plot_mc_curve(x, r_curve, save_dir / f"{prefix}R_curve.png", names, ylabel="Recall", on_plot=on_plot)

    # 找到最大 F1 值所在的索引
    i = smooth(f1_curve.mean(0), 0.1).argmax()  # max F1 index
    # 获取最大 F1 值对应的精确率、召回率、F1 值
    p, r, f1 = p_curve[:, i], r_curve[:, i], f1_curve[:, i]  # max-F1 precision, recall, F1 values
    # 计算真正例(TP)
    tp = (r * nt).round()  # true positives
    # 计算假正例(FP)
    fp = (tp / (p + eps) - tp).round()  # false positives
    # 返回结果:TP、FP、精确率、召回率、F1 值、平均准确率、唯一类别、精确率曲线、召回率曲线、F1 曲线、x 值、精确率值
    return tp, fp, p, r, f1, ap, unique_classes.astype(int), p_curve, r_curve, f1_curve, x, prec_values
class Metric(SimpleClass):
    """
    Class for computing evaluation metrics for YOLOv8 model.

    Attributes:
        p (list): Precision for each class. Shape: (nc,).
        r (list): Recall for each class. Shape: (nc,).
        f1 (list): F1 score for each class. Shape: (nc,).
        all_ap (list): AP scores for all classes and all IoU thresholds. Shape: (nc, 10).
        ap_class_index (list): Index of class for each AP score. Shape: (nc,).
        nc (int): Number of classes.

    Methods:
        ap50(): AP at IoU threshold of 0.5 for all classes. Returns: List of AP scores. Shape: (nc,) or [].
        ap(): AP at IoU thresholds from 0.5 to 0.95 for all classes. Returns: List of AP scores. Shape: (nc,) or [].
        mp(): Mean precision of all classes. Returns: Float.
        mr(): Mean recall of all classes. Returns: Float.
        map50(): Mean AP at IoU threshold of 0.5 for all classes. Returns: Float.
        map75(): Mean AP at IoU threshold of 0.75 for all classes. Returns: Float.
        map(): Mean AP at IoU thresholds from 0.5 to 0.95 for all classes. Returns: Float.
        mean_results(): Mean of results, returns mp, mr, map50, map.
        class_result(i): Class-aware result, returns p[i], r[i], ap50[i], ap[i].
        maps(): mAP of each class. Returns: Array of mAP scores, shape: (nc,).
        fitness(): Model fitness as a weighted combination of metrics. Returns: Float.
        update(results): Update metric attributes with new evaluation results.
    """

    def __init__(self) -> None:
        """Initializes a Metric instance for computing evaluation metrics for the YOLOv8 model."""
        self.p = []  # Precision for each class, initialized as an empty list
        self.r = []  # Recall for each class, initialized as an empty list
        self.f1 = []  # F1 score for each class, initialized as an empty list
        self.all_ap = []  # AP scores for all classes and IoU thresholds, initialized as an empty list
        self.ap_class_index = []  # Index of class for each AP score, initialized as an empty list
        self.nc = 0  # Number of classes, initialized to 0

    @property
    def ap50(self):
        """
        Returns the Average Precision (AP) at an IoU threshold of 0.5 for all classes.

        Returns:
            (np.ndarray, list): Array of shape (nc,) with AP50 values per class, or an empty list if not available.
        """
        return self.all_ap[:, 0] if len(self.all_ap) else []  # Return AP50 values if all_ap is not empty, otherwise an empty list

    @property
    def ap(self):
        """
        Returns the Average Precision (AP) at IoU thresholds from 0.5 to 0.95 for all classes.

        Returns:
            (np.ndarray, list): Array of shape (nc,) with mean AP values per class, or an empty list if not available.
        """
        return self.all_ap.mean(1) if len(self.all_ap) else []  # Return mean AP values across IoU thresholds if all_ap is not empty, otherwise an empty list

    @property
    def mp(self):
        """
        Returns the Mean Precision of all classes.

        Returns:
            (float): The mean precision of all classes.
        """
        return self.p.mean() if len(self.p) else 0.0  # Return the mean precision of classes if p is not empty, otherwise 0.0

    @property
    def mr(self):
        """
        Returns the Mean Recall of all classes.

        Returns:
            (float): The mean recall of all classes.
        """
        return self.r.mean() if len(self.r) else 0.0  # Return the mean recall of classes if r is not empty, otherwise 0.0
    def map50(self):
        """
        Returns the mean Average Precision (mAP) at an IoU threshold of 0.5.

        Returns:
            (float): The mAP at an IoU threshold of 0.5.
        """
        return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0



    @property
    def map75(self):
        """
        Returns the mean Average Precision (mAP) at an IoU threshold of 0.75.

        Returns:
            (float): The mAP at an IoU threshold of 0.75.
        """
        return self.all_ap[:, 5].mean() if len(self.all_ap) else 0.0



    @property
    def map(self):
        """
        Returns the mean Average Precision (mAP) over IoU thresholds of 0.5 - 0.95 in steps of 0.05.

        Returns:
            (float): The mAP over IoU thresholds of 0.5 - 0.95 in steps of 0.05.
        """
        return self.all_ap.mean() if len(self.all_ap) else 0.0



    def mean_results(self):
        """Mean of results, return mp, mr, map50, map."""
        return [self.mp, self.mr, self.map50, self.map]



    def class_result(self, i):
        """Class-aware result, return p[i], r[i], ap50[i], ap[i]."""
        return self.p[i], self.r[i], self.ap50[i], self.ap[i]



    @property
    def maps(self):
        """MAP of each class."""
        maps = np.zeros(self.nc) + self.map
        for i, c in enumerate(self.ap_class_index):
            maps[c] = self.ap[i]
        return maps



    def fitness(self):
        """Model fitness as a weighted combination of metrics."""
        w = [0.0, 0.0, 0.1, 0.9]  # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
        return (np.array(self.mean_results()) * w).sum()



    def update(self, results):
        """
        Updates the evaluation metrics of the model with a new set of results.

        Args:
            results (tuple): A tuple containing the following evaluation metrics:
                - p (list): Precision for each class. Shape: (nc,).
                - r (list): Recall for each class. Shape: (nc,).
                - f1 (list): F1 score for each class. Shape: (nc,).
                - all_ap (list): AP scores for all classes and all IoU thresholds. Shape: (nc, 10).
                - ap_class_index (list): Index of class for each AP score. Shape: (nc,).

        Side Effects:
            Updates the class attributes `self.p`, `self.r`, `self.f1`, `self.all_ap`, and `self.ap_class_index` based
            on the values provided in the `results` tuple.
        """
        (
            self.p,
            self.r,
            self.f1,
            self.all_ap,
            self.ap_class_index,
            self.p_curve,
            self.r_curve,
            self.f1_curve,
            self.px,
            self.prec_values,
        ) = results



    @property
    def curves(self):
        """Returns a list of curves for accessing specific metrics curves."""
        return []



    @property
    def pr_curves(self):
        """Returns precision and recall curves."""
        return self.p_curve, self.r_curve



    @property
    def f1_curves(self):
        """Returns F1 score curves."""
        return self.f1_curve



    @property
    def precision_values(self):
        """Returns precision values for the PR curve."""
        return self.px, self.prec_values



    @property
    def pr_values(self):
        """Returns precision and recall values."""
        return self.p, self.r



    @property
    def f1_values(self):
        """Returns F1 values."""
        return self.f1



    @property
    def pr(self):
        """Returns precision and recall."""
        return self.p, self.r



    @property
    def f1(self):
        """Returns F1 score."""
        return self.f1



    def print_results(self):
        """Prints results (p, r, ap50, ap)."""
        print(self.p, self.r, self.ap50, self.ap)



    def evaluation(self):
        """Model evaluation with metric AP."""
        return self.ap



    def result(self):
        """Return p, r, ap50, ap."""
        return self.p, self.r, self.ap50, self.ap



    @property
    def recall(self):
        """Returns recall."""
        return self.r



    @property
    def mean(self):
        """Returns the mean AP."""
        return self.ap.mean()



    @property
    def mapss(self):
        """Returns mAP of each class."""
        maps = np.zeros(self.nc) + self.map
        for i, c in enumerate(self.ap_class_index):
            maps[c] = self.ap[i]
        return maps



    @property
    def model(self):
        """Returns the model."""
        return self.model
    def curves_results(self):
        """Returns a list of curves for accessing specific metrics curves."""
        返回一个包含多个曲线的列表,用于访问特定的度量曲线。

        return [
            [self.px, self.prec_values, "Recall", "Precision"],
            # 返回包含 Precision 和 Recall 曲线的列表,使用 self.px 作为 x 轴,self.prec_values 作为 y 轴

            [self.px, self.f1_curve, "Confidence", "F1"],
            # 返回包含 F1 曲线的列表,使用 self.px 作为 x 轴,self.f1_curve 作为 y 轴

            [self.px, self.p_curve, "Confidence", "Precision"],
            # 返回包含 Precision 曲线的列表,使用 self.px 作为 x 轴,self.p_curve 作为 y 轴

            [self.px, self.r_curve, "Confidence", "Recall"],
            # 返回包含 Recall 曲线的列表,使用 self.px 作为 x 轴,self.r_curve 作为 y 轴
        ]
class DetMetrics(SimpleClass):
    """
    This class is a utility class for computing detection metrics such as precision, recall, and mean average precision
    (mAP) of an object detection model.

    Args:
        save_dir (Path): A path to the directory where the output plots will be saved. Defaults to current directory.
        plot (bool): A flag that indicates whether to plot precision-recall curves for each class. Defaults to False.
        on_plot (func): An optional callback to pass plots path and data when they are rendered. Defaults to None.
        names (tuple of str): A tuple of strings that represents the names of the classes. Defaults to an empty tuple.

    Attributes:
        save_dir (Path): A path to the directory where the output plots will be saved.
        plot (bool): A flag that indicates whether to plot the precision-recall curves for each class.
        on_plot (func): An optional callback to pass plots path and data when they are rendered.
        names (tuple of str): A tuple of strings that represents the names of the classes.
        box (Metric): An instance of the Metric class for storing the results of the detection metrics.
        speed (dict): A dictionary for storing the execution time of different parts of the detection process.

    Methods:
        process(tp, conf, pred_cls, target_cls): Updates the metric results with the latest batch of predictions.
        keys: Returns a list of keys for accessing the computed detection metrics.
        mean_results: Returns a list of mean values for the computed detection metrics.
        class_result(i): Returns a list of values for the computed detection metrics for a specific class.
        maps: Returns a dictionary of mean average precision (mAP) values for different IoU thresholds.
        fitness: Computes the fitness score based on the computed detection metrics.
        ap_class_index: Returns a list of class indices sorted by their average precision (AP) values.
        results_dict: Returns a dictionary that maps detection metric keys to their computed values.
        curves: TODO
        curves_results: TODO
    """

    def __init__(self, save_dir=Path("."), plot=False, on_plot=None, names=()) -> None:
        """
        Initialize a DetMetrics instance with a save directory, plot flag, callback function, and class names.
        """
        # 设置保存输出图表的目录路径,默认为当前目录
        self.save_dir = save_dir
        # 是否绘制每个类别的精度-召回率曲线的标志,默认为 False
        self.plot = plot
        # 可选的回调函数,用于在绘制完成时传递图表路径和数据,默认为 None
        self.on_plot = on_plot
        # 类别名称的元组,表示检测模型所涉及的类别名称,默认为空元组
        self.names = names
        # Metric 类的实例,用于存储检测指标的结果
        self.box = Metric()
        # 存储检测过程中不同部分执行时间的字典
        self.speed = {"preprocess": 0.0, "inference": 0.0, "loss": 0.0, "postprocess": 0.0}
        # 任务类型,这里为检测任务
        self.task = "detect"
    @property
    def keys(self):
        """Returns a list of keys for accessing specific metrics."""
        # 返回一个包含特定指标键的列表,用于访问特定指标数据
        return ["metrics/precision(B)", "metrics/recall(B)", "metrics/mAP50(B)", "metrics/mAP50-95(B)"]

    def mean_results(self):
        """Calculate mean of detected objects & return precision, recall, mAP50, and mAP50-95."""
        # 计算检测到的对象的平均值,并返回精度、召回率、mAP50 和 mAP50-95
        return self.box.mean_results()

    def class_result(self, i):
        """Return the result of evaluating the performance of an object detection model on a specific class."""
        # 返回评估特定类别对象检测模型性能的结果
        return self.box.class_result(i)

    @property
    def maps(self):
        """Returns mean Average Precision (mAP) scores per class."""
        # 返回每个类别的平均精度 (mAP) 分数
        return self.box.maps

    @property
    def fitness(self):
        """Returns the fitness of box object."""
        # 返回盒子对象的适应性(健壮性)
        return self.box.fitness()

    @property
    def ap_class_index(self):
        """Returns the average precision index per class."""
        # 返回每个类别的平均精度指数
        return self.box.ap_class_index

    @property
    def results_dict(self):
        """Returns dictionary of computed performance metrics and statistics."""
        # 返回计算的性能指标和统计数据的字典
        return dict(zip(self.keys + ["fitness"], self.mean_results() + [self.fitness]))

    @property
    def curves(self):
        """Returns a list of curves for accessing specific metrics curves."""
        # 返回用于访问特定指标曲线的曲线列表
        return ["Precision-Recall(B)", "F1-Confidence(B)", "Precision-Confidence(B)", "Recall-Confidence(B)"]

    @property
    def curves_results(self):
        """Returns dictionary of computed performance metrics and statistics."""
        # 返回计算的性能指标和统计数据的字典
        return self.box.curves_results
# SegmentMetrics 类,继承自 SimpleClass,用于计算和聚合给定类别集合上的检测和分割指标。

class SegmentMetrics(SimpleClass):
    """
    Calculates and aggregates detection and segmentation metrics over a given set of classes.

    Args:
        save_dir (Path): Path to the directory where the output plots should be saved. Default is the current directory.
        plot (bool): Whether to save the detection and segmentation plots. Default is False.
        on_plot (func): An optional callback to pass plots path and data when they are rendered. Defaults to None.
        names (list): List of class names. Default is an empty list.

    Attributes:
        save_dir (Path): Path to the directory where the output plots should be saved.
        plot (bool): Whether to save the detection and segmentation plots.
        on_plot (func): An optional callback to pass plots path and data when they are rendered.
        names (list): List of class names.
        box (Metric): An instance of the Metric class to calculate box detection metrics.
        seg (Metric): An instance of the Metric class to calculate mask segmentation metrics.
        speed (dict): Dictionary to store the time taken in different phases of inference.

    Methods:
        process(tp_m, tp_b, conf, pred_cls, target_cls): Processes metrics over the given set of predictions.
        mean_results(): Returns the mean of the detection and segmentation metrics over all the classes.
        class_result(i): Returns the detection and segmentation metrics of class `i`.
        maps: Returns the mean Average Precision (mAP) scores for IoU thresholds ranging from 0.50 to 0.95.
        fitness: Returns the fitness scores, which are a single weighted combination of metrics.
        ap_class_index: Returns the list of indices of classes used to compute Average Precision (AP).
        results_dict: Returns the dictionary containing all the detection and segmentation metrics and fitness score.
    """

    def __init__(self, save_dir=Path("."), plot=False, on_plot=None, names=()) -> None:
        """Initialize a SegmentMetrics instance with a save directory, plot flag, callback function, and class names."""
        # 初始化保存结果图像的目录路径
        self.save_dir = save_dir
        # 是否保存检测和分割图像的标志
        self.plot = plot
        # 可选的回调函数,用于在图像渲染时传递图像路径和数据
        self.on_plot = on_plot
        # 类别名称列表
        self.names = names
        # Metric 类的实例,用于计算盒子检测指标
        self.box = Metric()
        # Metric 类的实例,用于计算分割掩码指标
        self.seg = Metric()
        # 存储不同推理阶段时间消耗的字典
        self.speed = {"preprocess": 0.0, "inference": 0.0, "loss": 0.0, "postprocess": 0.0}
        # 任务类型,标识为 "segment"
        self.task = "segment"
    @property
    def keys(self):
        """Returns a list of keys for accessing metrics."""
        # 返回用于访问指标的键列表,用于对象检测和语义分割模型的评估
        return [
            "metrics/precision(B)",    # 精度(Bounding Box)
            "metrics/recall(B)",       # 召回率(Bounding Box)
            "metrics/mAP50(B)",        # 平均精度 (mAP) @ IoU 50% (Bounding Box)
            "metrics/mAP50-95(B)",     # 平均精度 (mAP) @ IoU 50%-95% (Bounding Box)
            "metrics/precision(M)",    # 精度(Mask)
            "metrics/recall(M)",       # 召回率(Mask)
            "metrics/mAP50(M)",        # 平均精度 (mAP) @ IoU 50% (Mask)
            "metrics/mAP50-95(M)",     # 平均精度 (mAP) @ IoU 50%-95% (Mask)
        ]

    def mean_results(self):
        """Return the mean metrics for bounding box and segmentation results."""
        # 返回边界框和分割结果的平均指标
        return self.box.mean_results() + self.seg.mean_results()

    def class_result(self, i):
        """Returns classification results for a specified class index."""
        # 返回指定类索引的分类结果
        return self.box.class_result(i) + self.seg.class_result(i)

    @property
    def maps(self):
        """Returns mAP scores for object detection and semantic segmentation models."""
        # 返回对象检测和语义分割模型的 mAP 分数
        return self.box.maps + self.seg.maps

    @property
    def fitness(self):
        """Get the fitness score for both segmentation and bounding box models."""
        # 获取分割和边界框模型的适应性分数
        return self.seg.fitness() + self.box.fitness()

    @property
    def ap_class_index(self):
        """Boxes and masks have the same ap_class_index."""
        # 边界框和掩膜具有相同的 ap_class_index
        return self.box.ap_class_index

    @property
    def results_dict(self):
        """Returns results of object detection model for evaluation."""
        # 返回对象检测模型的评估结果
        return dict(zip(self.keys + ["fitness"], self.mean_results() + [self.fitness]))

    @property
    # 返回一个包含特定度量曲线的列表,用于访问特定度量曲线。
    def curves(self):
        """Returns a list of curves for accessing specific metrics curves."""
        return [
            "Precision-Recall(B)",         # 精确率-召回率(B)
            "F1-Confidence(B)",            # F1-置信度(B)
            "Precision-Confidence(B)",     # 精确率-置信度(B)
            "Recall-Confidence(B)",        # 召回率-置信度(B)
            "Precision-Recall(M)",         # 精确率-召回率(M)
            "F1-Confidence(M)",            # F1-置信度(M)
            "Precision-Confidence(M)",     # 精确率-置信度(M)
            "Recall-Confidence(M)",        # 召回率-置信度(M)
        ]

    @property
    # 返回一个包含计算的性能指标和统计数据的字典。
    def curves_results(self):
        """Returns dictionary of computed performance metrics and statistics."""
        return self.box.curves_results + self.seg.curves_results
class PoseMetrics(SegmentMetrics):
    """
    Calculates and aggregates detection and pose metrics over a given set of classes.

    Args:
        save_dir (Path): Path to the directory where the output plots should be saved. Default is the current directory.
        plot (bool): Whether to save the detection and segmentation plots. Default is False.
        on_plot (func): An optional callback to pass plots path and data when they are rendered. Defaults to None.
        names (list): List of class names. Default is an empty list.

    Attributes:
        save_dir (Path): Path to the directory where the output plots should be saved.
        plot (bool): Whether to save the detection and segmentation plots.
        on_plot (func): An optional callback to pass plots path and data when they are rendered.
        names (list): List of class names.
        box (Metric): An instance of the Metric class to calculate box detection metrics.
        pose (Metric): An instance of the Metric class to calculate mask segmentation metrics.
        speed (dict): Dictionary to store the time taken in different phases of inference.

    Methods:
        process(tp_m, tp_b, conf, pred_cls, target_cls): Processes metrics over the given set of predictions.
        mean_results(): Returns the mean of the detection and segmentation metrics over all the classes.
        class_result(i): Returns the detection and segmentation metrics of class `i`.
        maps: Returns the mean Average Precision (mAP) scores for IoU thresholds ranging from 0.50 to 0.95.
        fitness: Returns the fitness scores, which are a single weighted combination of metrics.
        ap_class_index: Returns the list of indices of classes used to compute Average Precision (AP).
        results_dict: Returns the dictionary containing all the detection and segmentation metrics and fitness score.
    """

    def __init__(self, save_dir=Path("."), plot=False, on_plot=None, names=()) -> None:
        """Initialize the PoseMetrics class with directory path, class names, and plotting options."""
        # 调用父类的初始化方法,初始化基础类SegmentMetrics的属性
        super().__init__(save_dir, plot, names)
        # 设置实例属性:保存输出图表的目录路径
        self.save_dir = save_dir
        # 设置实例属性:是否保存检测和分割图表的标志
        self.plot = plot
        # 设置实例属性:用于在渲染时传递图表路径和数据的回调函数
        self.on_plot = on_plot
        # 设置实例属性:类名列表
        self.names = names
        # 设置实例属性:用于计算框检测指标的Metric类实例
        self.box = Metric()
        # 设置实例属性:用于计算姿势分割指标的Metric类实例
        self.pose = Metric()
        # 设置实例属性:存储推断不同阶段所花费时间的字典
        self.speed = {"preprocess": 0.0, "inference": 0.0, "loss": 0.0, "postprocess": 0.0}
        # 设置实例属性:任务类型为姿势估计
        self.task = "pose"
    def process(self, tp, tp_p, conf, pred_cls, target_cls):
        """
        Processes the detection and pose metrics over the given set of predictions.

        Args:
            tp (list): List of True Positive boxes.
            tp_p (list): List of True Positive keypoints.
            conf (list): List of confidence scores.
            pred_cls (list): List of predicted classes.
            target_cls (list): List of target classes.
        """

        # Calculate pose metrics per class and update PoseEvaluator
        results_pose = ap_per_class(
            tp_p,
            conf,
            pred_cls,
            target_cls,
            plot=self.plot,
            on_plot=self.on_plot,
            save_dir=self.save_dir,
            names=self.names,
            prefix="Pose",
        )[2:]
        # Set the number of classes for pose evaluation
        self.pose.nc = len(self.names)
        # Update pose metrics with calculated results
        self.pose.update(results_pose)

        # Calculate box metrics per class and update BoxEvaluator
        results_box = ap_per_class(
            tp,
            conf,
            pred_cls,
            target_cls,
            plot=self.plot,
            on_plot=self.on_plot,
            save_dir=self.save_dir,
            names=self.names,
            prefix="Box",
        )[2:]
        # Set the number of classes for box evaluation
        self.box.nc = len(self.names)
        # Update box metrics with calculated results
        self.box.update(results_box)

    @property
    def keys(self):
        """Returns list of evaluation metric keys."""
        return [
            "metrics/precision(B)",
            "metrics/recall(B)",
            "metrics/mAP50(B)",
            "metrics/mAP50-95(B)",
            "metrics/precision(P)",
            "metrics/recall(P)",
            "metrics/mAP50(P)",
            "metrics/mAP50-95(P)",
        ]

    def mean_results(self):
        """Return the mean results of box and pose."""
        # Return mean results of both box and pose evaluations
        return self.box.mean_results() + self.pose.mean_results()

    def class_result(self, i):
        """Return the class-wise detection results for a specific class i."""
        # Return class-wise detection results for class i from both box and pose evaluations
        return self.box.class_result(i) + self.pose.class_result(i)

    @property
    def maps(self):
        """Returns the mean average precision (mAP) per class for both box and pose detections."""
        # Return mean average precision (mAP) per class for both box and pose detections
        return self.box.maps + self.pose.maps

    @property
    def fitness(self):
        """Computes classification metrics and speed using the `targets` and `pred` inputs."""
        # Compute classification metrics and speed using the `targets` and `pred` inputs for both box and pose
        return self.pose.fitness() + self.box.fitness()

    @property
    def curves(self):
        """Returns a list of curves for accessing specific metrics curves."""
        # Return a list of curves for accessing specific metrics curves
        return [
            "Precision-Recall(B)",
            "F1-Confidence(B)",
            "Precision-Confidence(B)",
            "Recall-Confidence(B)",
            "Precision-Recall(P)",
            "F1-Confidence(P)",
            "Precision-Confidence(P)",
            "Recall-Confidence(P)",
        ]

    @property
    def curves_results(self):
        """Returns dictionary of computed performance metrics and statistics."""
        # Return dictionary of computed performance metrics and statistics for both box and pose
        return self.box.curves_results + self.pose.curves_results
class ClassifyMetrics(SimpleClass):
    """
    Class for computing classification metrics including top-1 and top-5 accuracy.

    Attributes:
        top1 (float): The top-1 accuracy.
        top5 (float): The top-5 accuracy.
        speed (Dict[str, float]): A dictionary containing the time taken for each step in the pipeline.
        fitness (float): The fitness of the model, which is equal to top-5 accuracy.
        results_dict (Dict[str, Union[float, str]]): A dictionary containing the classification metrics and fitness.
        keys (List[str]): A list of keys for the results_dict.

    Methods:
        process(targets, pred): Processes the targets and predictions to compute classification metrics.
    """

    def __init__(self) -> None:
        """Initialize a ClassifyMetrics instance."""
        # 初始化 top1 和 top5 精度为 0
        self.top1 = 0
        self.top5 = 0
        # 初始化速度字典,包含各个步骤的时间,初始值都为 0.0
        self.speed = {"preprocess": 0.0, "inference": 0.0, "loss": 0.0, "postprocess": 0.0}
        # 设定任务类型为分类
        self.task = "classify"

    def process(self, targets, pred):
        """Target classes and predicted classes."""
        # 合并预测结果和目标类别,以便计算准确率
        pred, targets = torch.cat(pred), torch.cat(targets)
        # 计算每个样本的正确性
        correct = (targets[:, None] == pred).float()
        # 计算 top-1 和 top-5 精度
        acc = torch.stack((correct[:, 0], correct.max(1).values), dim=1)  # (top1, top5) accuracy
        self.top1, self.top5 = acc.mean(0).tolist()

    @property
    def fitness(self):
        """Returns mean of top-1 and top-5 accuracies as fitness score."""
        # 计算并返回 top-1 和 top-5 精度的平均值作为 fitness 分数
        return (self.top1 + self.top5) / 2

    @property
    def results_dict(self):
        """Returns a dictionary with model's performance metrics and fitness score."""
        # 返回包含模型性能指标和 fitness 分数的字典
        return dict(zip(self.keys + ["fitness"], [self.top1, self.top5, self.fitness]))

    @property
    def keys(self):
        """Returns a list of keys for the results_dict property."""
        # 返回结果字典中的键列表
        return ["metrics/accuracy_top1", "metrics/accuracy_top5"]

    @property
    def curves(self):
        """Returns a list of curves for accessing specific metrics curves."""
        # 返回一个空列表,用于访问特定的度量曲线
        return []

    @property
    def curves_results(self):
        """Returns a list of curves for accessing specific metrics curves."""
        # 返回一个空列表,用于访问特定的度量曲线
        return []


class OBBMetrics(SimpleClass):
    """Metrics for evaluating oriented bounding box (OBB) detection, see https://arxiv.org/pdf/2106.06072.pdf."""

    def __init__(self, save_dir=Path("."), plot=False, on_plot=None, names=()) -> None:
        """Initialize an OBBMetrics instance with directory, plotting, callback, and class names."""
        # 初始化 OBBMetrics 实例,包括保存目录、绘图标志、回调函数和类名列表
        self.save_dir = save_dir
        self.plot = plot
        self.on_plot = on_plot
        self.names = names
        # 初始化 Metric 类型的 box 属性
        self.box = Metric()
        # 初始化速度字典,包含各个步骤的时间,初始值都为 0.0
        self.speed = {"preprocess": 0.0, "inference": 0.0, "loss": 0.0, "postprocess": 0.0}
    # 处理目标检测的预测结果并更新指标
    def process(self, tp, conf, pred_cls, target_cls):
        """Process predicted results for object detection and update metrics."""
        # 调用 ap_per_class 函数计算每个类别的平均精度等指标,返回结果列表,去掉前两个元素
        results = ap_per_class(
            tp,
            conf,
            pred_cls,
            target_cls,
            plot=self.plot,  # 是否绘制结果的标志
            save_dir=self.save_dir,  # 结果保存目录
            names=self.names,  # 类别名称列表
            on_plot=self.on_plot,  # 是否在绘图时处理结果的标志
        )[2:]
        # 更新 self.box 对象的类别数
        self.box.nc = len(self.names)
        # 调用 self.box 对象的 update 方法,更新检测结果
        self.box.update(results)

    @property
    def keys(self):
        """Returns a list of keys for accessing specific metrics."""
        # 返回用于访问特定指标的键列表
        return ["metrics/precision(B)", "metrics/recall(B)", "metrics/mAP50(B)", "metrics/mAP50-95(B)"]

    def mean_results(self):
        """Calculate mean of detected objects & return precision, recall, mAP50, and mAP50-95."""
        # 调用 self.box 对象的 mean_results 方法,计算检测到的物体的平均指标,返回包含这些指标的列表
        return self.box.mean_results()

    def class_result(self, i):
        """Return the result of evaluating the performance of an object detection model on a specific class."""
        # 调用 self.box 对象的 class_result 方法,返回指定类别 i 的性能评估结果
        return self.box.class_result(i)

    @property
    def maps(self):
        """Returns mean Average Precision (mAP) scores per class."""
        # 返回每个类别的平均精度 (mAP) 分数列表,由 self.box 对象的 maps 属性提供
        return self.box.maps

    @property
    def fitness(self):
        """Returns the fitness of box object."""
        # 返回 self.box 对象的 fitness 方法计算的适应度值
        return self.box.fitness()

    @property
    def ap_class_index(self):
        """Returns the average precision index per class."""
        # 返回每个类别的平均精度索引,由 self.box 对象的 ap_class_index 属性提供
        return self.box.ap_class_index

    @property
    def results_dict(self):
        """Returns dictionary of computed performance metrics and statistics."""
        # 返回计算的性能指标和统计信息的字典,包括指标键列表和适应度值
        return dict(zip(self.keys + ["fitness"], self.mean_results() + [self.fitness]))

    @property
    def curves(self):
        """Returns a list of curves for accessing specific metrics curves."""
        # 返回一个曲线列表,用于访问特定的指标曲线,这里返回一个空列表
        return []

    @property
    def curves_results(self):
        """Returns a list of curves for accessing specific metrics curves."""
        # 返回一个曲线列表,用于访问特定的指标曲线,这里返回一个空列表
        return []

.\yolov8\ultralytics\utils\ops.py

# Ultralytics YOLO 🚀, AGPL-3.0 license

import contextlib  # 导入上下文管理器相关的模块
import math  # 导入数学函数模块
import re  # 导入正则表达式模块
import time  # 导入时间模块

import cv2  # 导入OpenCV库
import numpy as np  # 导入NumPy库
import torch  # 导入PyTorch库
import torch.nn.functional as F  # 导入PyTorch的函数模块

from ultralytics.utils import LOGGER  # 从ultralytics.utils中导入LOGGER对象
from ultralytics.utils.metrics import batch_probiou  # 从ultralytics.utils.metrics中导入batch_probiou函数


class Profile(contextlib.ContextDecorator):
    """
    YOLOv8 Profile class. Use as a decorator with @Profile() or as a context manager with 'with Profile():'.

    Example:
        ```py
        from ultralytics.utils.ops import Profile

        with Profile(device=device) as dt:
            pass  # slow operation here

        print(dt)  # prints "Elapsed time is 9.5367431640625e-07 s"
        ```
    """

    def __init__(self, t=0.0, device: torch.device = None):
        """
        Initialize the Profile class.

        Args:
            t (float): Initial time. Defaults to 0.0.
            device (torch.device): Devices used for model inference. Defaults to None (cpu).
        """
        self.t = t  # 初始化累计时间
        self.device = device  # 初始化设备
        self.cuda = bool(device and str(device).startswith("cuda"))  # 检查是否使用CUDA加速

    def __enter__(self):
        """Start timing."""
        self.start = self.time()  # 记录开始时间
        return self

    def __exit__(self, type, value, traceback):  # noqa
        """Stop timing."""
        self.dt = self.time() - self.start  # 计算耗时
        self.t += self.dt  # 累加耗时到总时间

    def __str__(self):
        """Returns a human-readable string representing the accumulated elapsed time in the profiler."""
        return f"Elapsed time is {self.t} s"  # 返回累计的耗时信息

    def time(self):
        """Get current time."""
        if self.cuda:
            torch.cuda.synchronize(self.device)  # 同步CUDA流
        return time.time()  # 返回当前时间戳


def segment2box(segment, width=640, height=640):
    """
    Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy).

    Args:
        segment (torch.Tensor): the segment label
        width (int): the width of the image. Defaults to 640
        height (int): The height of the image. Defaults to 640

    Returns:
        (np.ndarray): the minimum and maximum x and y values of the segment.
    """
    x, y = segment.T  # 提取segment的xy坐标
    inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height)  # 内部约束条件
    x = x[inside]  # 过滤符合约束条件的x坐标
    y = y[inside]  # 过滤符合约束条件的y坐标
    return (
        np.array([x.min(), y.min(), x.max(), y.max()], dtype=segment.dtype)
        if any(x)
        else np.zeros(4, dtype=segment.dtype)
    )  # 返回segment的最小和最大xy坐标,如果没有符合条件的点则返回全零数组


def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding=True, xywh=False):
    """
    Rescales bounding boxes (in the format of xyxy by default) from the shape of the image they were originally
    specified in (img1_shape) to the shape of a different image (img0_shape).

    Args:
        img1_shape (tuple): Shape of the original image (height, width).
        boxes (torch.Tensor): Bounding boxes in format xyxy.
        img0_shape (tuple): Shape of the new image (height, width).
        ratio_pad (tuple): Aspect ratio and padding.
        padding (bool): Whether to pad bounding boxes or not.
        xywh (bool): Whether the boxes are in xywh format or not. Defaults to False.
    """
    """
        Args:
            img1_shape (tuple): 目标图像的形状,格式为 (高度, 宽度)
            boxes (torch.Tensor): 图像中物体的边界框,格式为 (x1, y1, x2, y2)
            img0_shape (tuple): 原始图像的形状,格式为 (高度, 宽度)
            ratio_pad (tuple): 一个元组 (ratio, pad),用于缩放边界框。如果未提供,则根据两个图像的大小差异计算 ratio 和 pad
            padding (bool): 如果为 True,则假设边界框基于 YOLO 样式增强的图像。如果为 False,则进行常规的重新缩放
            xywh (bool): 边界框格式是否为 xywh, 默认为 False
    
        Returns:
            boxes (torch.Tensor): 缩放后的边界框,格式为 (x1, y1, x2, y2)
    """
    if ratio_pad is None:  # 如果未提供 ratio_pad,则从 img0_shape 计算
        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # 计算缩放比例 gain = 目标图像尺寸 / 原始图像尺寸
        pad = (
            round((img1_shape[1] - img0_shape[1] * gain) / 2 - 0.1),  # 计算宽度方向的填充量
            round((img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1),  # 计算高度方向的填充量
        )
    else:
        gain = ratio_pad[0][0]  # 使用提供的 ratio_pad 中的缩放比例
        pad = ratio_pad[1]  # 使用提供的 ratio_pad 中的填充量
    
    if padding:
        boxes[..., 0] -= pad[0]  # 减去 x 方向的填充量
        boxes[..., 1] -= pad[1]  # 减去 y 方向的填充量
        if not xywh:
            boxes[..., 2] -= pad[0]  # 对于非 xywh 格式的边界框,再次减去 x 方向的填充量
            boxes[..., 3] -= pad[1]  # 对于非 xywh 格式的边界框,再次减去 y 方向的填充量
    
    boxes[..., :4] /= gain  # 缩放边界框坐标
    return clip_boxes(boxes, img0_shape)  # 调用 clip_boxes 函数,确保边界框在图像内部
# 执行非极大值抑制(NMS)操作,用于一组边界框,支持掩码和每个框多个标签。
def non_max_suppression(
    prediction,
    conf_thres=0.25,  # 置信度阈值,低于此阈值的框将被忽略
    iou_thres=0.45,  # IoU(交并比)阈值,用于判断重叠框之间是否合并
    classes=None,  # 类别列表,用于过滤特定类别的框
    agnostic=False,  # 是否忽略预测框的类别信息
    multi_label=False,  # 是否支持多标签输出
    labels=(),  # 标签列表,指定要保留的标签
    max_det=300,  # 最大检测框数
    nc=0,  # 类别数量(可选)
    max_time_img=0.05,  # 最大图像处理时间
    max_nms=30000,  # 最大NMS操作数
    max_wh=7680,  # 最大宽度和高度
    in_place=True,  # 是否就地修改
    rotated=False,  # 是否为旋转框
):
    """
    Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box.
    """
    # 如果预测为空,返回一个空的numpy数组
    if len(prediction) == 0:
        return np.empty((0,), dtype=np.int8)
    
    # 根据置信度对预测框进行降序排序
    sorted_idx = torch.argsort(prediction[:, 4], descending=True)
    prediction = prediction[sorted_idx]
    
    # 计算所有框两两之间的probiou得分矩阵,并取其上三角部分
    ious = batch_probiou(prediction, prediction).triu_(diagonal=1)
    
    # 根据IoU阈值进行非极大值抑制,保留符合条件的框索引
    pick = torch.nonzero(ious.max(dim=0)[0] < iou_thres).squeeze(-1)
    
    # 返回按照降序排列的被选框的索引
    return sorted_idx[pick]
    import torchvision  # 引入torchvision模块,用于加快“import ultralytics”的速度

    # 检查置信度阈值的有效性,必须在0到1之间
    assert 0 <= conf_thres <= 1, f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0"
    # 检查IoU阈值的有效性,必须在0到1之间
    assert 0 <= iou_thres <= 1, f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0"

    # 如果prediction是一个列表或元组(例如YOLOv8模型在验证模式下的输出),选择推断输出部分
    if isinstance(prediction, (list, tuple)):
        prediction = prediction[0]  # 选择推断输出

    # 如果指定了classes,则将其转换为与prediction设备相同的torch张量
    if classes is not None:
        classes = torch.tensor(classes, device=prediction.device)

    # 如果prediction的最后一个维度为6,说明是端到端模型的输出(BNC格式,即1,300,6)
    if prediction.shape[-1] == 6:
        # 对每个预测结果进行置信度阈值过滤
        output = [pred[pred[:, 4] > conf_thres] for pred in prediction]
        # 如果指定了classes,则进一步根据classes进行过滤
        if classes is not None:
            output = [pred[(pred[:, 5:6] == classes).any(1)] for pred in output]
        return output

    # 获取batch size(BCN格式,即1,84,6300)
    bs = prediction.shape[0]
    # 如果未指定nc(类别数量),则根据prediction的形状推断类别数量
    nc = nc or (prediction.shape[1] - 4)  # number of classes
    # 计算预测结果中的掩码数量
    nm = prediction.shape[1] - nc - 4  # number of masks
    
    # 确定掩码起始索引
    mi = 4 + nc  # mask start index
    
    # 根据置信度阈值确定候选项
    xc = prediction[:, 4:mi].amax(1) > conf_thres  # candidates

    # 设置时间限制
    time_limit = 2.0 + max_time_img * bs  # seconds to quit after
    
    # 若多标签设置为真,则每个框可能有多个标签(增加0.5ms/图像)
    multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)

    # 调整预测结果的维度顺序,将最后两个维度互换
    prediction = prediction.transpose(-1, -2)  # shape(1,84,6300) to shape(1,6300,84)
    
    # 如果不是旋转框,根据需求将预测的边界框格式从xywh转换为xyxy
    if not rotated:
        if in_place:
            prediction[..., :4] = xywh2xyxy(prediction[..., :4])  # xywh to xyxy in-place modification
        else:
            # 在非原地操作时,将边界框和其他预测结果连接起来,转换为xyxy格式
            prediction = torch.cat((xywh2xyxy(prediction[..., :4]), prediction[..., 4:]), dim=-1)  # xywh to xyxy

    # 记录当前时间
    t = time.time()
    
    # 初始化输出列表,每个元素都是一个空的张量,形状为(0, 6 + nm),在指定设备上
    output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs
    for xi, x in enumerate(prediction):  # 对每个预测结果进行遍历,xi是索引,x是预测结果
        # Apply constraints
        # 应用约束条件
        # x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0  # width-height
        # 对预测结果中的宽度和高度进行约束,将不满足条件的置为0

        x = x[xc[xi]]  # confidence
        # 根据置信度索引获取预测结果的子集

        # Cat apriori labels if autolabelling
        # 如果自动标注,合并先验标签
        if labels and len(labels[xi]) and not rotated:
            lb = labels[xi]
            v = torch.zeros((len(lb), nc + nm + 4), device=x.device)
            v[:, :4] = xywh2xyxy(lb[:, 1:5])  # box
            v[range(len(lb)), lb[:, 0].long() + 4] = 1.0  # cls
            x = torch.cat((x, v), 0)
            # 将先验标签与预测结果合并,形成新的预测结果

        # If none remain process next image
        # 如果没有剩余的预测结果,则处理下一张图像
        if not x.shape[0]:
            continue

        # Detections matrix nx6 (xyxy, conf, cls)
        # 检测矩阵,大小为nx6(xyxy坐标,置信度,类别)
        box, cls, mask = x.split((4, nc, nm), 1)

        if multi_label:
            i, j = torch.where(cls > conf_thres)
            x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1)
            # 如果支持多标签,根据置信度阈值筛选类别,并形成新的预测结果
        else:  # best class only
            conf, j = cls.max(1, keepdim=True)
            x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]
            # 否则,选择最高置信度的类别作为预测结果

        # Filter by class
        # 根据类别进行过滤
        if classes is not None:
            x = x[(x[:, 5:6] == classes).any(1)]
            # 如果指定了类别,只保留匹配指定类别的预测结果

        # Check shape
        # 检查预测结果的形状
        n = x.shape[0]  # number of boxes
        # n为盒子(边界框)的数量
        if not n:  # no boxes
            continue
        if n > max_nms:  # excess boxes
            x = x[x[:, 4].argsort(descending=True)[:max_nms]]
            # 如果盒子数量超过设定的最大NMS数量,则按置信度排序并保留前max_nms个盒子

        # Batched NMS
        # 批处理的非极大值抑制(NMS)
        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
        scores = x[:, 4]  # scores
        if rotated:
            boxes = torch.cat((x[:, :2] + c, x[:, 2:4], x[:, -1:]), dim=-1)
            i = nms_rotated(boxes, scores, iou_thres)
            # 如果启用了旋转NMS,对旋转边界框进行NMS处理
        else:
            boxes = x[:, :4] + c
            i = torchvision.ops.nms(boxes, scores, iou_thres)
            # 否则,对标准边界框进行NMS处理
        i = i[:max_det]  # limit detections
        # 限制最终的检测结果数量

        output[xi] = x[i]
        # 将处理后的预测结果存入输出中的对应位置
        if (time.time() - t) > time_limit:
            LOGGER.warning(f"WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded")
            break  # time limit exceeded
            # 如果超过了NMS处理时间限制,记录警告并跳出循环

    return output
def scale_image(masks, im0_shape, ratio_pad=None):
    """
    Takes a mask, and resizes it to the original image size.

    Args:
        masks (np.ndarray): resized and padded masks/images, [h, w, num]/[h, w, 3].
        im0_shape (tuple): the original image shape
        ratio_pad (tuple): the ratio of the padding to the original image.

    Returns:
        masks (np.ndarray): The masks that are being returned with shape [h, w, num].
    """
    # 获取当前 masks 的形状
    im1_shape = masks.shape
    
    # 如果当前 masks 形状与原始图片形状相同,则直接返回 masks,无需调整大小
    if im1_shape[:2] == im0_shape[:2]:
        return masks
    
    # 如果未指定 ratio_pad,则根据 im0_shape 计算 gain 和 pad
    if ratio_pad is None:
        # 计算 gain,即缩放比例
        gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1])
        # 计算 padding 的宽度和高度
        pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2
    else:
        pad = ratio_pad[1]  # 使用指定的 ratio_pad 中的 padding 值
    
    # 将 pad 转换为整数,表示上、左、下、右的边界
    top, left = int(pad[1]), int(pad[0])  # y, x
    bottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0])
    
    # 如果 masks 的维度小于 2,则抛出异常
    if len(masks.shape) < 2:
        raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
    
    # 对 masks 进行裁剪,按照计算得到的边界进行裁剪
    masks = masks[top:bottom, left:right]
    
    # 将裁剪后的 masks 调整大小至原始图片大小
    masks = cv2.resize(masks, (im0_shape[1], im0_shape[0]))
    # 检查 masks 的维度是否为 2
    if len(masks.shape) == 2:
        # 如果是,添加一个额外的维度,使其变为三维
        masks = masks[:, :, None]

    # 返回处理后的 masks 变量
    return masks
def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
    """
    Convert bounding box coordinates from (x1, y1, x2, y2) format to normalized (x, y, width, height) format,
    relative to image dimensions and optionally clip the values.

    Args:
        x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x1, y1, x2, y2) format.
        w (int): Width of the image. Defaults to 640.
        h (int): Height of the image. Defaults to 640.
        clip (bool): Whether to clip the normalized coordinates to [0, 1]. Defaults to False.
        eps (float): Epsilon value for numerical stability. Defaults to 0.0.

    Returns:
        y (np.ndarray | torch.Tensor): The bounding box coordinates in normalized (x, y, width, height) format.
    """
    assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
    y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x)  # faster than clone/copy
    half_w = w / 2.0
    half_h = h / 2.0
    y[..., 0] = (x[..., 0] + x[..., 2]) / (2 * w)  # center x normalized
    y[..., 1] = (x[..., 1] + x[..., 3]) / (2 * h)  # center y normalized
    y[..., 2] = (x[..., 2] - x[..., 0]) / w  # width normalized
    y[..., 3] = (x[..., 3] - x[..., 1]) / h  # height normalized

    if clip:
        y = torch.clamp(y, min=eps, max=1.0 - eps) if isinstance(y, torch.Tensor) else np.clip(y, eps, 1.0 - eps)

    return y
    # 将边界框坐标从 (x1, y1, x2, y2) 格式转换为 (x, y, width, height, normalized) 格式。其中 x, y, width 和 height 均已归一化至图像尺寸。
    
    Args:
        x (np.ndarray | torch.Tensor): 输入的边界框坐标,格式为 (x1, y1, x2, y2)。
        w (int): 图像的宽度。默认为 640。
        h (int): 图像的高度。默认为 640。
        clip (bool): 如果为 True,则将边界框裁剪到图像边界内。默认为 False。
        eps (float): 边界框宽度和高度的最小值。默认为 0.0。
    
    Returns:
        y (np.ndarray | torch.Tensor): 格式为 (x, y, width, height, normalized) 的边界框坐标。
    """
    if clip:
        # 调用 clip_boxes 函数,将边界框 x 裁剪到图像边界内,边界为 (h - eps, w - eps)
        x = clip_boxes(x, (h - eps, w - eps))
    assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
    # 根据输入 x 的类型创建与之相同类型的空数组 y,相比 clone/copy 操作更快
    y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x)
    # 计算 x 中每个边界框的中心点 x 坐标,并将其归一化到图像宽度 w
    y[..., 0] = ((x[..., 0] + x[..., 2]) / 2) / w  # x center
    # 计算 x 中每个边界框的中心点 y 坐标,并将其归一化到图像高度 h
    y[..., 1] = ((x[..., 1] + x[..., 3]) / 2) / h  # y center
    # 计算 x 中每个边界框的宽度,并将其归一化到图像宽度 w
    y[..., 2] = (x[..., 2] - x[..., 0]) / w  # width
    # 计算 x 中每个边界框的高度,并将其归一化到图像高度 h
    y[..., 3] = (x[..., 3] - x[..., 1]) / h  # height
    # 返回格式为 (x, y, width, height, normalized) 的边界框坐标 y
    return y
def xywh2ltwh(x):
    """
    将边界框格式从 [x, y, w, h] 转换为 [x1, y1, w, h],其中 x1, y1 是左上角坐标。

    Args:
        x (np.ndarray | torch.Tensor): 输入张量,包含 xywh 格式的边界框坐标

    Returns:
        y (np.ndarray | torch.Tensor): 输出张量,包含 xyltwh 格式的边界框坐标
    """
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[..., 0] = x[..., 0] - x[..., 2] / 2  # 左上角 x 坐标
    y[..., 1] = x[..., 1] - x[..., 3] / 2  # 左上角 y 坐标
    return y


def xyxy2ltwh(x):
    """
    将多个 [x1, y1, x2, y2] 格式的边界框转换为 [x1, y1, w, h] 格式,其中 xy1 是左上角,xy2 是右下角。

    Args:
        x (np.ndarray | torch.Tensor): 输入张量,包含 xyxy 格式的边界框坐标

    Returns:
        y (np.ndarray | torch.Tensor): 输出张量,包含 xyltwh 格式的边界框坐标
    """
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[..., 2] = x[..., 2] - x[..., 0]  # 宽度
    y[..., 3] = x[..., 3] - x[..., 1]  # 高度
    return y


def ltwh2xywh(x):
    """
    将 [x1, y1, w, h] 格式的边界框转换为 [x, y, w, h] 格式,其中 xy1 是左上角,xy 是中心坐标。

    Args:
        x (torch.Tensor): 输入张量

    Returns:
        y (np.ndarray | torch.Tensor): 输出张量,包含 xywh 格式的边界框坐标
    """
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[..., 0] = x[..., 0] + x[..., 2] / 2  # 中心 x 坐标
    y[..., 1] = x[..., 1] + x[..., 3] / 2  # 中心 y 坐标
    return y


def xyxyxyxy2xywhr(x):
    """
    将批量的方向边界框 (OBB) 从 [xy1, xy2, xy3, xy4] 格式转换为 [cx, cy, w, h, rotation] 格式。
    旋转角度的范围是从 0 到 90 度。

    Args:
        x (numpy.ndarray | torch.Tensor): 输入的角点数组 [xy1, xy2, xy3, xy4],形状为 (n, 8)。

    Returns:
        (numpy.ndarray | torch.Tensor): 转换后的数据,形状为 (n, 5),包含 [cx, cy, w, h, rotation] 格式。
    """
    is_torch = isinstance(x, torch.Tensor)
    points = x.cpu().numpy() if is_torch else x
    points = points.reshape(len(x), -1, 2)
    rboxes = []
    for pts in points:
        # 注意: 使用 cv2.minAreaRect 来获取准确的 xywhr 格式,
        # 特别是当数据加载器中的一些对象因增强而被裁剪时。
        (cx, cy), (w, h), angle = cv2.minAreaRect(pts)
        rboxes.append([cx, cy, w, h, angle / 180 * np.pi])
    return torch.tensor(rboxes, device=x.device, dtype=x.dtype) if is_torch else np.asarray(rboxes)


def xywhr2xyxyxyxy(x):
    """
    将批量的方向边界框 (OBB) 从 [cx, cy, w, h, rotation] 格式转换为 [xy1, xy2, xy3, xy4] 格式。
    旋转角度的范围应为 0 到 90 度。

    Args:
        x (numpy.ndarray | torch.Tensor): 输入的角点数组,形状为 (n, 5) 或 (b, n, 5)。

    Returns:
        (numpy.ndarray | torch.Tensor): 转换后的角点数组,形状为 (n, 4, 2) 或 (b, n, 4, 2)。
    """
    # 这个函数没有实现主体部分,因此不需要添加注释。
    pass
    # 根据输入的张量类型选择对应的数学函数库
    cos, sin, cat, stack = (
        (torch.cos, torch.sin, torch.cat, torch.stack)
        if isinstance(x, torch.Tensor)
        else (np.cos, np.sin, np.concatenate, np.stack)
    )

    # 提取张量 x 的中心坐标
    ctr = x[..., :2]
    # 提取张量 x 的宽度、高度和角度信息
    w, h, angle = (x[..., i : i + 1] for i in range(2, 5))
    # 计算角度的余弦和正弦值
    cos_value, sin_value = cos(angle), sin(angle)
    # 计算第一个向量 vec1
    vec1 = [w / 2 * cos_value, w / 2 * sin_value]
    # 计算第二个向量 vec2
    vec2 = [-h / 2 * sin_value, h / 2 * cos_value]
    # 合并向量 vec1 的两个分量
    vec1 = cat(vec1, -1)
    # 合并向量 vec2 的两个分量
    vec2 = cat(vec2, -1)
    # 计算矩形的四个顶点
    pt1 = ctr + vec1 + vec2
    pt2 = ctr + vec1 - vec2
    pt3 = ctr - vec1 - vec2
    pt4 = ctr - vec1 + vec2
    # 将四个顶点按行堆叠形成新的张量,并沿着倒数第二个维度堆叠
    return stack([pt1, pt2, pt3, pt4], -2)
def ltwh2xyxy(x):
    """
    将边界框从[x1, y1, w, h]转换为[x1, y1, x2, y2],其中xy1为左上角,xy2为右下角。

    Args:
        x (np.ndarray | torch.Tensor): 输入的图像或张量

    Returns:
        y (np.ndarray | torch.Tensor): 边界框的xyxy坐标
    """
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[..., 2] = x[..., 2] + x[..., 0]  # 计算宽度
    y[..., 3] = x[..., 3] + x[..., 1]  # 计算高度
    return y


def segments2boxes(segments):
    """
    将分段标签转换为框标签,即(cls, xy1, xy2, ...)转换为(cls, xywh)

    Args:
        segments (list): 分段列表,每个分段是一个点列表,每个点是一个包含x, y坐标的列表

    Returns:
        (np.ndarray): 边界框的xywh坐标
    """
    boxes = []
    for s in segments:
        x, y = s.T  # 提取分段的xy坐标
        boxes.append([x.min(), y.min(), x.max(), y.max()])  # 计算xyxy坐标
    return xyxy2xywh(np.array(boxes))  # 转换为xywh坐标


def resample_segments(segments, n=1000):
    """
    将分段列表(samples,2)输入并将其上采样到每个n点的分段列表(samples,2)。

    Args:
        segments (list): 包含(samples,2)数组的列表,其中samples是分段中的点数。
        n (int): 要上采样到的点数,默认为1000。

    Returns:
        segments (list): 上采样后的分段列表。
    """
    for i, s in enumerate(segments):
        s = np.concatenate((s, s[0:1, :]), axis=0)  # 首尾相接,闭合分段
        x = np.linspace(0, len(s) - 1, n)
        xp = np.arange(len(s))
        segments[i] = (
            np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)], dtype=np.float32).reshape(2, -1).T
        )  # 插值获取上采样点
    return segments


def crop_mask(masks, boxes):
    """
    根据边界框裁剪掩模,并返回裁剪后的掩模。

    Args:
        masks (torch.Tensor): [n, h, w] 掩模张量
        boxes (torch.Tensor): [n, 4] 相对点形式的边界框坐标

    Returns:
        (torch.Tensor): 裁剪后的掩模
    """
    _, h, w = masks.shape
    x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1)  # 分离边界框坐标
    r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :]  # 行索引
    c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None]  # 列索引

    return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))


def process_mask(protos, masks_in, bboxes, shape, upsample=False):
    """
    使用掩模头部的输出,将掩模应用于边界框。

    Args:
        protos: 未指定
        masks_in (torch.Tensor): [n, h, w] 掩模张量
        bboxes (torch.Tensor): [n, 4] 边界框坐标
        shape: 未指定
        upsample (bool): 是否上采样,默认为False

    Returns:
        unspecified
    """
    # 函数体未提供
    pass
    # 获取 protos 张量的形状信息,分别赋值给 c, mh, mw
    c, mh, mw = protos.shape  # CHW
    
    # 解构 shape 元组,获取输入图像的高度和宽度信息,分别赋值给 ih, iw
    ih, iw = shape
    
    # 计算每个 mask 的输出,通过 masks_in 与 protos 的矩阵乘法,再重新 reshape 成 [n, mh, mw] 的形状
    masks = (masks_in @ protos.float().view(c, -1)).view(-1, mh, mw)  # CHW
    
    # 计算宽度和高度的比率,用于将 bounding boxes 按比例缩放
    width_ratio = mw / iw
    height_ratio = mh / ih
    
    # 复制 bounding boxes 张量,按照比率调整左上角和右下角的坐标
    downsampled_bboxes = bboxes.clone()
    downsampled_bboxes[:, 0] *= width_ratio
    downsampled_bboxes[:, 2] *= width_ratio
    downsampled_bboxes[:, 3] *= height_ratio
    downsampled_bboxes[:, 1] *= height_ratio
    
    # 裁剪 masks,根据 downsampled_bboxes 中的边界框信息进行裁剪,输出结果的形状保持为 CHW
    masks = crop_mask(masks, downsampled_bboxes)  # CHW
    
    # 如果 upsample 标志为 True,则对 masks 进行双线性插值,将其尺寸调整为 shape,最终形状为 [1, h, w]
    if upsample:
        masks = F.interpolate(masks[None], shape, mode="bilinear", align_corners=False)[0]  # CHW
    
    # 返回 masks 张量中大于 0.0 的元素,即二值化后的二进制 mask 张量,形状为 [n, h, w]
    return masks.gt_(0.0)
# 定义函数 process_mask_native,处理原生掩模的逻辑
def process_mask_native(protos, masks_in, bboxes, shape):
    """
    It takes the output of the mask head, and crops it after upsampling to the bounding boxes.

    Args:
        protos (torch.Tensor): [mask_dim, mask_h, mask_w],原型掩模的张量,形状为 [掩模维度, 高度, 宽度]
        masks_in (torch.Tensor): [n, mask_dim],经 NMS 后的掩模张量,形状为 [n, 掩模维度],n 为经过 NMS 后的掩模数量
        bboxes (torch.Tensor): [n, 4],经 NMS 后的边界框张量,形状为 [n, 4],n 为经过 NMS 后的掩模数量
        shape (tuple): 输入图像的尺寸 (高度, 宽度)

    Returns:
        masks (torch.Tensor): 处理后的掩模张量,形状为 [高度, 宽度, n]
    """
    c, mh, mw = protos.shape  # 获取原型掩模的通道数、高度、宽度
    masks = (masks_in @ protos.float().view(c, -1)).view(-1, mh, mw)  # 计算掩模张量,进行上采样后裁剪到边界框大小
    masks = scale_masks(masks[None], shape)[0]  # 对掩模进行尺寸缩放
    masks = crop_mask(masks, bboxes)  # 根据边界框裁剪掩模
    return masks.gt_(0.0)  # 返回掩模张量,应用大于零的阈值处理


# 定义函数 scale_masks,将分段掩模尺寸缩放到指定形状
def scale_masks(masks, shape, padding=True):
    """
    Rescale segment masks to shape.

    Args:
        masks (torch.Tensor): (N, C, H, W),掩模张量,形状为 (批量大小, 通道数, 高度, 宽度)
        shape (tuple): 目标高度和宽度
        padding (bool): 如果为 True,则假设边界框基于 YOLO 样式增强的图像。如果为 False,则进行常规尺寸缩放。

    Returns:
        masks (torch.Tensor): 缩放后的掩模张量
    """
    mh, mw = masks.shape[2:]  # 获取掩模张量的高度和宽度
    gain = min(mh / shape[0], mw / shape[1])  # 计算缩放比例 gain = 旧尺寸 / 新尺寸
    pad = [mw - shape[1] * gain, mh - shape[0] * gain]  # 计算高度和宽度的填充值

    if padding:
        pad[0] /= 2  # 宽度填充减半
        pad[1] /= 2  # 高度填充减半

    top, left = (int(pad[1]), int(pad[0])) if padding else (0, 0)  # 计算顶部和左侧填充位置
    bottom, right = (int(mh - pad[1]), int(mw - pad[0]))  # 计算底部和右侧填充位置
    masks = masks[..., top:bottom, left:right]  # 对掩模进行裁剪

    masks = F.interpolate(masks, shape, mode="bilinear", align_corners=False)  # 使用双线性插值对掩模进行尺寸缩放
    return masks


# 定义函数 scale_coords,将图像 1 的分割坐标缩放到图像 0 的尺寸
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize=False, padding=True):
    """
    Rescale segment coordinates (xy) from img1_shape to img0_shape.

    Args:
        img1_shape (tuple): 坐标所在图像的尺寸。
        coords (torch.Tensor): 需要缩放的坐标,形状为 n,2。
        img0_shape (tuple): 应用分割的目标图像的尺寸。
        ratio_pad (tuple): 图像尺寸与填充图像尺寸的比例。
        normalize (bool): 如果为 True,则将坐标归一化到 [0, 1] 范围内。默认为 False。
        padding (bool): 如果为 True,则假设边界框基于 YOLO 样式增强的图像。如果为 False,则进行常规尺寸缩放。

    Returns:
        coords (torch.Tensor): 缩放后的坐标。
    """
    if ratio_pad is None:  # 如果没有指定比例,则根据图像 0 的尺寸计算
        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # 计算缩放比例 gain = 旧尺寸 / 新尺寸
        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # 计算高度和宽度的填充值
    else:
        gain = ratio_pad[0][0]  # 获取填充比例的缩放增益
        pad = ratio_pad[1]  # 获取填充值

    if padding:
        coords[..., 0] -= pad[0]  # 减去 x 方向的填充值
        coords[..., 1] -= pad[1]  # 减去 y 方向的填充值

    coords[..., 0] /= gain  # 根据缩放增益进行 x 坐标缩放
    coords[..., 1] /= gain  # 根据缩放增益进行 y 坐标缩放
    coords = clip_coords(coords, img0_shape)  # 调用 clip_coords 函数对坐标进行裁剪
    # 如果 normalize 参数为 True,则进行坐标归一化处理
    if normalize:
        # 将所有坐标点的 x 值除以图像宽度,实现 x 坐标的归一化
        coords[..., 0] /= img0_shape[1]  # width
        # 将所有坐标点的 y 值除以图像高度,实现 y 坐标的归一化
        coords[..., 1] /= img0_shape[0]  # height
    # 返回归一化后的坐标数组
    return coords
# Regularize rotated boxes in range [0, pi/2].
def regularize_rboxes(rboxes):
    x, y, w, h, t = rboxes.unbind(dim=-1)
    # Swap edge and angle if h >= w
    w_ = torch.where(w > h, w, h)  # Determine the maximum edge length
    h_ = torch.where(w > h, h, w)  # Determine the minimum edge length
    t = torch.where(w > h, t, t + math.pi / 2) % math.pi  # Adjust angle if height is greater than width
    return torch.stack([x, y, w_, h_, t], dim=-1)  # Stack the regularized boxes


# It takes a list of masks(n,h,w) and returns a list of segments(n,xy)
def masks2segments(masks, strategy="largest"):
    segments = []
    for x in masks.int().cpu().numpy().astype("uint8"):
        # Find contours in the mask image
        c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
        if c:
            if strategy == "concat":  # concatenate all segments
                c = np.concatenate([x.reshape(-1, 2) for x in c])
            elif strategy == "largest":  # select largest segment
                c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)
        else:
            c = np.zeros((0, 2))  # no segments found
        segments.append(c.astype("float32"))
    return segments


# Convert a batch of FP32 torch tensors (0.0-1.0) to a NumPy uint8 array (0-255), changing from BCHW to BHWC layout.
def convert_torch2numpy_batch(batch: torch.Tensor) -> np.ndarray:
    return (batch.permute(0, 2, 3, 1).contiguous() * 255).clamp(0, 255).to(torch.uint8).cpu().numpy()


# Cleans a string by replacing special characters with underscore _
def clean_str(s):
    return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s)
posted @ 2024-09-05 12:03  绝不原创的飞龙  阅读(8)  评论(0编辑  收藏  举报