Transformers-源码解析-七十六-

Transformers 源码解析(七十六)

.\models\mobilebert\modeling_tf_mobilebert.py

# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" TF 2.0 MobileBERT model."""

from __future__ import annotations

import warnings
from dataclasses import dataclass
from typing import Optional, Tuple, Union

import numpy as np
import tensorflow as tf

# Importing specific modules and classes from other files in the package
from ...activations_tf import get_tf_activation
from ...modeling_tf_outputs import (
    TFBaseModelOutput,
    TFBaseModelOutputWithPooling,
    TFMaskedLMOutput,
    TFMultipleChoiceModelOutput,
    TFNextSentencePredictorOutput,
    TFQuestionAnsweringModelOutput,
    TFSequenceClassifierOutput,
    TFTokenClassifierOutput,
)
from ...modeling_tf_utils import (
    TFMaskedLanguageModelingLoss,
    TFModelInputType,
    TFMultipleChoiceLoss,
    TFNextSentencePredictionLoss,
    TFPreTrainedModel,
    TFQuestionAnsweringLoss,
    TFSequenceClassificationLoss,
    TFTokenClassificationLoss,
    get_initializer,
    keras,
    keras_serializable,
    unpack_inputs,
)
from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax
from ...utils import (
    ModelOutput,
    add_code_sample_docstrings,
    add_start_docstrings,
    add_start_docstrings_to_model_forward,
    logging,
    replace_return_docstrings,
)
from .configuration_mobilebert import MobileBertConfig

# Setting up logging for this module
logger = logging.get_logger(__name__)

# Documentation constants for different tasks/models

_CHECKPOINT_FOR_DOC = "google/mobilebert-uncased"
_CONFIG_FOR_DOC = "MobileBertConfig"

# TokenClassification docstring
_CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "vumichien/mobilebert-finetuned-ner"
_TOKEN_CLASS_EXPECTED_OUTPUT = "['I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'I-LOC', 'O', 'I-LOC', 'I-LOC']"
_TOKEN_CLASS_EXPECTED_LOSS = 0.03

# QuestionAnswering docstring
_CHECKPOINT_FOR_QA = "vumichien/mobilebert-uncased-squad-v2"
_QA_EXPECTED_OUTPUT = "'a nice puppet'"
_QA_EXPECTED_LOSS = 3.98
_QA_TARGET_START_INDEX = 12
_QA_TARGET_END_INDEX = 13

# SequenceClassification docstring
_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "vumichien/emo-mobilebert"
_SEQ_CLASS_EXPECTED_OUTPUT = "'others'"
_SEQ_CLASS_EXPECTED_LOSS = "4.72"

# List of pretrained model archives for MobileBERT
TF_MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
    "google/mobilebert-uncased",
    # See all MobileBERT models at https://huggingface.co/models?filter=mobilebert
]

# Definition of a custom loss class for MobileBERT pretraining tasks
class TFMobileBertPreTrainingLoss:
    """
    Placeholder class definition for the MobileBERT pre-training loss.
    This class is likely intended to be implemented later.
    """
    Loss function suitable for BERT-like pretraining, that is, the task of pretraining a language model by combining
    NSP + MLM. .. note:: Any label of -100 will be ignored (along with the corresponding logits) in the loss
    computation.
    """

    # 定义一个计算损失函数,适用于类似BERT的预训练任务,即结合NSP(Next Sentence Prediction)和MLM(Masked Language Modeling)
    def hf_compute_loss(self, labels: tf.Tensor, logits: tf.Tensor) -> tf.Tensor:
        # 使用稀疏分类交叉熵损失函数,适用于逻辑回归(logits),保留每个样本的独立损失
        loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction=keras.losses.Reduction.NONE)

        # 将负标签截断为零,以避免NaN和错误,这些位置稍后会被掩盖
        unmasked_lm_losses = loss_fn(y_true=tf.nn.relu(labels["labels"]), y_pred=logits[0])
        # 确保仅计算不等于-100的标签的损失
        lm_loss_mask = tf.cast(labels["labels"] != -100, dtype=unmasked_lm_losses.dtype)
        masked_lm_losses = unmasked_lm_losses * lm_loss_mask
        reduced_masked_lm_loss = tf.reduce_sum(masked_lm_losses) / tf.reduce_sum(lm_loss_mask)

        # 再次将负标签截断为零,避免NaN和错误,这些位置稍后会被掩盖
        unmasked_ns_loss = loss_fn(y_true=tf.nn.relu(labels["next_sentence_label"]), y_pred=logits[1])
        ns_loss_mask = tf.cast(labels["next_sentence_label"] != -100, dtype=unmasked_ns_loss.dtype)
        masked_ns_loss = unmasked_ns_loss * ns_loss_mask

        reduced_masked_ns_loss = tf.reduce_sum(masked_ns_loss) / tf.reduce_sum(ns_loss_mask)

        # 返回损失的张量形状
        return tf.reshape(reduced_masked_lm_loss + reduced_masked_ns_loss, (1,))
class TFMobileBertIntermediate(keras.layers.Layer):
    # 初始化中间层,包括一个全连接层和激活函数
    def __init__(self, config, **kwargs):
        super().__init__(**kwargs)

        # 创建全连接层,使用配置中的中间层大小,命名为"dense"
        self.dense = keras.layers.Dense(config.intermediate_size, name="dense")

        # 根据配置选择激活函数,如果是字符串则通过辅助函数获取对应的 TensorFlow 激活函数
        if isinstance(config.hidden_act, str):
            self.intermediate_act_fn = get_tf_activation(config.hidden_act)
        else:
            self.intermediate_act_fn = config.hidden_act
        self.config = config

    # 定义调用方法,对输入的隐藏状态执行全连接层和激活函数操作
    def call(self, hidden_states):
        hidden_states = self.dense(hidden_states)
        hidden_states = self.intermediate_act_fn(hidden_states)

        return hidden_states

    # 构建层,确保只构建一次,并设置全连接层的输入形状
    def build(self, input_shape=None):
        if self.built:
            return
        self.built = True
        if getattr(self, "dense", None) is not None:
            with tf.name_scope(self.dense.name):
                # 设置全连接层的输入形状,其中 None 表示批量大小可变
                self.dense.build([None, None, self.config.true_hidden_size])


class TFLayerNorm(keras.layers.LayerNormalization):
    # 初始化 LayerNormalization 层,指定特征大小
    def __init__(self, feat_size, *args, **kwargs):
        self.feat_size = feat_size
        super().__init__(*args, **kwargs)

    # 构建层,设置输入形状为 [None, None, feat_size]
    def build(self, input_shape=None):
        super().build([None, None, self.feat_size])


class TFNoNorm(keras.layers.Layer):
    # 初始化不进行归一化的层,指定特征大小和其他参数
    def __init__(self, feat_size, epsilon=None, **kwargs):
        super().__init__(**kwargs)
        self.feat_size = feat_size

    # 构建层,设置偏置和权重参数的形状,并调用父类的 build 方法
    def build(self, input_shape):
        self.bias = self.add_weight("bias", shape=[self.feat_size], initializer="zeros")
        self.weight = self.add_weight("weight", shape=[self.feat_size], initializer="ones")
        super().build(input_shape)

    # 定义调用方法,对输入执行加权和加偏操作
    def call(self, inputs: tf.Tensor):
        return inputs * self.weight + self.bias


# 定义一个字典,将字符串类型的归一化方式映射到对应的类
NORM2FN = {"layer_norm": TFLayerNorm, "no_norm": TFNoNorm}


class TFMobileBertEmbeddings(keras.layers.Layer):
    """Construct the embeddings from word, position and token_type embeddings."""

    # 初始化嵌入层,包括词、位置和类型嵌入的构建
    def __init__(self, config, **kwargs):
        super().__init__(**kwargs)

        # 从配置中获取三元输入标志、嵌入大小等信息
        self.trigram_input = config.trigram_input
        self.embedding_size = config.embedding_size
        self.config = config
        self.hidden_size = config.hidden_size
        self.max_position_embeddings = config.max_position_embeddings
        self.initializer_range = config.initializer_range

        # 创建嵌入转换层,将输入转换为隐藏大小的表示,命名为"embedding_transformation"
        self.embedding_transformation = keras.layers.Dense(config.hidden_size, name="embedding_transformation")

        # 创建归一化层,根据配置中的归一化类型选择对应的类,设置 epsilon 和名称
        # 这里保持不改变 TensorFlow 模型变量名称,以便能够加载任何 TensorFlow 检查点文件
        self.LayerNorm = NORM2FN[config.normalization_type](
            config.hidden_size, epsilon=config.layer_norm_eps, name="LayerNorm"
        )

        # 创建 dropout 层,根据配置中的隐藏层 dropout 概率设置丢弃率
        self.dropout = keras.layers.Dropout(rate=config.hidden_dropout_prob)

        # 计算嵌入输入大小,考虑是否使用三元输入
        self.embedded_input_size = self.embedding_size * (3 if self.trigram_input else 1)
    # 定义 build 方法,用于构建模型的各个部分
    def build(self, input_shape=None):
        # 在 "word_embeddings" 命名空间下创建权重变量
        with tf.name_scope("word_embeddings"):
            self.weight = self.add_weight(
                name="weight",
                shape=[self.config.vocab_size, self.embedding_size],
                initializer=get_initializer(initializer_range=self.initializer_range),
            )

        # 在 "token_type_embeddings" 命名空间下创建 token 类型的嵌入权重变量
        with tf.name_scope("token_type_embeddings"):
            self.token_type_embeddings = self.add_weight(
                name="embeddings",
                shape=[self.config.type_vocab_size, self.hidden_size],
                initializer=get_initializer(initializer_range=self.initializer_range),
            )

        # 在 "position_embeddings" 命名空间下创建位置编码的嵌入权重变量
        with tf.name_scope("position_embeddings"):
            self.position_embeddings = self.add_weight(
                name="embeddings",
                shape=[self.max_position_embeddings, self.hidden_size],
                initializer=get_initializer(initializer_range=self.initializer_range),
            )

        # 如果模型已经构建过,直接返回
        if self.built:
            return
        
        # 标记模型为已构建状态
        self.built = True
        
        # 如果存在 embedding_transformation 属性,构建对应的变换层
        if getattr(self, "embedding_transformation", None) is not None:
            with tf.name_scope(self.embedding_transformation.name):
                # 使用 build 方法构建 embedding_transformation 层
                self.embedding_transformation.build([None, None, self.embedded_input_size])
        
        # 如果存在 LayerNorm 属性,构建 LayerNorm 层
        if getattr(self, "LayerNorm", None) is not None:
            with tf.name_scope(self.LayerNorm.name):
                # 使用 build 方法构建 LayerNorm 层
                self.LayerNorm.build(None)
    def call(self, input_ids=None, position_ids=None, token_type_ids=None, inputs_embeds=None, training=False):
        """
        Applies embedding based on inputs tensor.

        Returns:
            final_embeddings (`tf.Tensor`): output embedding tensor.
        """
        # 断言确保 input_ids 或 inputs_embeds 至少有一个不为 None
        assert not (input_ids is None and inputs_embeds is None)

        # 如果传入了 input_ids,则根据 input_ids 从权重矩阵中获取对应的嵌入向量
        if input_ids is not None:
            check_embeddings_within_bounds(input_ids, self.config.vocab_size)
            inputs_embeds = tf.gather(params=self.weight, indices=input_ids)

        # 获取输入嵌入张量的形状,去掉最后一维(用于嵌入维度)
        input_shape = shape_list(inputs_embeds)[:-1]

        # 如果未提供 token_type_ids,则创建一个与输入嵌入张量形状相同的张量,并填充为 0
        if token_type_ids is None:
            token_type_ids = tf.fill(dims=input_shape, value=0)

        # 如果设定了 trigram_input 标志
        if self.trigram_input:
            # 根据 MobileBERT 论文中的描述,对输入嵌入张量进行 trigram 输入处理
            inputs_embeds = tf.concat(
                [
                    tf.pad(inputs_embeds[:, 1:], ((0, 0), (0, 1), (0, 0))),
                    inputs_embeds,
                    tf.pad(inputs_embeds[:, :-1], ((0, 0), (1, 0), (0, 0))),
                ],
                axis=2,
            )

        # 如果设定了 trigram_input 标志或者 embedding_size 不等于 hidden_size
        if self.trigram_input or self.embedding_size != self.hidden_size:
            # 对输入嵌入张量进行额外的嵌入转换处理
            inputs_embeds = self.embedding_transformation(inputs_embeds)

        # 如果未提供 position_ids,则创建一个一维张量,包含从 0 到输入张量最后维度长度的范围值
        if position_ids is None:
            position_ids = tf.expand_dims(tf.range(start=0, limit=input_shape[-1]), axis=0)

        # 根据 position_ids 获取位置嵌入张量
        position_embeds = tf.gather(params=self.position_embeddings, indices=position_ids)
        # 根据 token_type_ids 获取 token 类型嵌入张量
        token_type_embeds = tf.gather(params=self.token_type_embeddings, indices=token_type_ids)
        # 最终的嵌入张量由输入嵌入张量、位置嵌入张量和 token 类型嵌入张量相加而得
        final_embeddings = inputs_embeds + position_embeds + token_type_embeds
        # 应用 LayerNorm 层进行标准化处理
        final_embeddings = self.LayerNorm(inputs=final_embeddings)
        # 根据训练状态应用 dropout 层
        final_embeddings = self.dropout(inputs=final_embeddings, training=training)

        # 返回最终的嵌入张量
        return final_embeddings
    def __init__(self, config, **kwargs):
        super().__init__(**kwargs)
        # 检查隐藏层大小是否能被注意力头数整除
        if config.hidden_size % config.num_attention_heads != 0:
            raise ValueError(
                f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
                f"heads ({config.num_attention_heads}"
            )

        # 设置注意力头数和是否输出注意力权重的配置
        self.num_attention_heads = config.num_attention_heads
        self.output_attentions = config.output_attentions
        # 确保隐藏层大小能被注意力头数整除
        assert config.hidden_size % config.num_attention_heads == 0
        # 计算每个注意力头的大小和所有注意力头的总大小
        self.attention_head_size = int(config.true_hidden_size / config.num_attention_heads)
        self.all_head_size = self.num_attention_heads * self.attention_head_size

        # 初始化查询、键、值矩阵的全连接层
        self.query = keras.layers.Dense(
            self.all_head_size, kernel_initializer=get_initializer(config.initializer_range), name="query"
        )
        self.key = keras.layers.Dense(
            self.all_head_size, kernel_initializer=get_initializer(config.initializer_range), name="key"
        )
        self.value = keras.layers.Dense(
            self.all_head_size, kernel_initializer=get_initializer(config.initializer_range), name="value"
        )

        # 初始化 dropout 层,并设置注意力概率
        self.dropout = keras.layers.Dropout(config.attention_probs_dropout_prob)
        self.config = config

    def transpose_for_scores(self, x, batch_size):
        # 将输入张量 x 从 [batch_size, seq_length, all_head_size] 重塑为 [batch_size, seq_length, num_attention_heads, attention_head_size]
        x = tf.reshape(x, (batch_size, -1, self.num_attention_heads, self.attention_head_size))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(
        self, query_tensor, key_tensor, value_tensor, attention_mask, head_mask, output_attentions, training=False
    ):
        # 实现自注意力机制的前向传播
        ):
            # 获取 batch_size
            batch_size = shape_list(attention_mask)[0]
            # 计算 query 的混合层
            mixed_query_layer = self.query(query_tensor)
            # 计算 key 的混合层
            mixed_key_layer = self.key(key_tensor)
            # 计算 value 的混合层
            mixed_value_layer = self.value(value_tensor)
            # 调整混合后的 query 层为得分计算做准备
            query_layer = self.transpose_for_scores(mixed_query_layer, batch_size)
            # 调整混合后的 key 层为得分计算做准备
            key_layer = self.transpose_for_scores(mixed_key_layer, batch_size)
            # 调整混合后的 value 层为得分计算做准备
            value_layer = self.transpose_for_scores(mixed_value_layer, batch_size)

            # 计算 "query" 和 "key" 之间的点积,得到原始的注意力分数
            attention_scores = tf.matmul(
                query_layer, key_layer, transpose_b=True
            )  # (batch size, num_heads, seq_len_q, seq_len_k)
            # 缩放注意力分数
            dk = tf.cast(shape_list(key_layer)[-1], dtype=attention_scores.dtype)
            attention_scores = attention_scores / tf.math.sqrt(dk)

            # 如果有注意力掩码,应用它(在 TFMobileBertModel call() 函数中预先计算)
            if attention_mask is not None:
                attention_mask = tf.cast(attention_mask, dtype=attention_scores.dtype)
                attention_scores = attention_scores + attention_mask

            # 将注意力分数归一化为概率
            attention_probs = stable_softmax(attention_scores, axis=-1)

            # 对注意力概率进行 dropout
            attention_probs = self.dropout(attention_probs, training=training)

            # 如果有头部掩码,应用头部掩码
            if head_mask is not None:
                attention_probs = attention_probs * head_mask

            # 计算上下文向量
            context_layer = tf.matmul(attention_probs, value_layer)

            # 转置和重塑上下文向量
            context_layer = tf.transpose(context_layer, perm=[0, 2, 1, 3])
            context_layer = tf.reshape(
                context_layer, (batch_size, -1, self.all_head_size)
            )  # (batch_size, seq_len_q, all_head_size)

            # 返回输出结果,根据是否需要返回注意力概率
            outputs = (context_layer, attention_probs) if output_attentions else (context_layer,)

            return outputs

        def build(self, input_shape=None):
            if self.built:
                return
            self.built = True
            # 如果已经定义了 query 层,建立它
            if getattr(self, "query", None) is not None:
                with tf.name_scope(self.query.name):
                    self.query.build([None, None, self.config.true_hidden_size])
            # 如果已经定义了 key 层,建立它
            if getattr(self, "key", None) is not None:
                with tf.name_scope(self.key.name):
                    self.key.build([None, None, self.config.true_hidden_size])
            # 如果已经定义了 value 层,建立它
            if getattr(self, "value", None) is not None:
                with tf.name_scope(self.value.name):
                    self.value.build(
                        [
                            None,
                            None,
                            self.config.true_hidden_size
                            if self.config.use_bottleneck_attention
                            else self.config.hidden_size,
                        ]
                    )
# 定义 TFMobileBertSelfOutput 类,继承自 keras.layers.Layer
class TFMobileBertSelfOutput(keras.layers.Layer):
    
    # 初始化方法,接收 config 和其他关键字参数
    def __init__(self, config, **kwargs):
        super().__init__(**kwargs)
        # 根据 config 设置是否使用瓶颈层
        self.use_bottleneck = config.use_bottleneck
        # 创建一个全连接层,用于变换隐藏状态的维度
        self.dense = keras.layers.Dense(
            config.true_hidden_size, kernel_initializer=get_initializer(config.initializer_range), name="dense"
        )
        # 根据 config 设置归一化层,例如 LayerNorm
        self.LayerNorm = NORM2FN[config.normalization_type](
            config.true_hidden_size, epsilon=config.layer_norm_eps, name="LayerNorm"
        )
        # 如果不使用瓶颈层,则创建一个 dropout 层,用于训练时随机丢弃部分神经元
        if not self.use_bottleneck:
            self.dropout = keras.layers.Dropout(config.hidden_dropout_prob)
        # 保存 config 对象
        self.config = config

    # 定义调用方法,用于前向传播计算
    def call(self, hidden_states, residual_tensor, training=False):
        # 使用全连接层变换隐藏状态
        hidden_states = self.dense(hidden_states)
        # 如果不使用瓶颈层,则对变换后的隐藏状态进行 dropout 处理
        if not self.use_bottleneck:
            hidden_states = self.dropout(hidden_states, training=training)
        # 将变换后的隐藏状态与残差张量相加,并通过归一化层处理
        hidden_states = self.LayerNorm(hidden_states + residual_tensor)
        return hidden_states

    # 构建方法,用于构建层次结构
    def build(self, input_shape=None):
        if self.built:
            return
        self.built = True
        # 如果存在全连接层,则构建该层
        if getattr(self, "dense", None) is not None:
            with tf.name_scope(self.dense.name):
                self.dense.build([None, None, self.config.true_hidden_size])
        # 如果存在归一化层,则构建该层
        if getattr(self, "LayerNorm", None) is not None:
            with tf.name_scope(self.LayerNorm.name):
                self.LayerNorm.build(None)


# 定义 TFMobileBertAttention 类,继承自 keras.layers.Layer
class TFMobileBertAttention(keras.layers.Layer):
    
    # 初始化方法,接收 config 和其他关键字参数
    def __init__(self, config, **kwargs):
        super().__init__(**kwargs)
        # 创建自注意力层对象
        self.self = TFMobileBertSelfAttention(config, name="self")
        # 创建 TFMobileBertSelfOutput 层对象,用于处理自注意力层的输出
        self.mobilebert_output = TFMobileBertSelfOutput(config, name="output")

    # 头部剪枝方法,抛出未实现错误
    def prune_heads(self, heads):
        raise NotImplementedError

    # 定义调用方法,用于前向传播计算
    def call(
        self,
        query_tensor,
        key_tensor,
        value_tensor,
        layer_input,
        attention_mask,
        head_mask,
        output_attentions,
        training=False,
    ):
        # 使用自注意力层处理输入张量
        self_outputs = self.self(
            query_tensor, key_tensor, value_tensor, attention_mask, head_mask, output_attentions, training=training
        )
        # 使用 TFMobileBertSelfOutput 层处理自注意力层的输出和层输入张量
        attention_output = self.mobilebert_output(self_outputs[0], layer_input, training=training)
        # 构造输出元组,包含注意力输出和可能的额外输出
        outputs = (attention_output,) + self_outputs[1:]  # 如果需要额外的注意力输出,则添加
        return outputs

    # 构建方法,用于构建层次结构
    def build(self, input_shape=None):
        if self.built:
            return
        self.built = True
        # 如果存在自注意力层,则构建该层
        if getattr(self, "self", None) is not None:
            with tf.name_scope(self.self.name):
                self.self.build(None)
        # 如果存在 TFMobileBertSelfOutput 层,则构建该层
        if getattr(self, "mobilebert_output", None) is not None:
            with tf.name_scope(self.mobilebert_output.name):
                self.mobilebert_output.build(None)


# 定义 TFOutputBottleneck 类,继承自 keras.layers.Layer
class TFOutputBottleneck(keras.layers.Layer):
    # 初始化方法,用于创建对象时初始化各个成员变量和层对象
    def __init__(self, config, **kwargs):
        # 调用父类的初始化方法
        super().__init__(**kwargs)
        # 创建一个全连接层对象,用于变换隐藏状态的维度
        self.dense = keras.layers.Dense(config.hidden_size, name="dense")
        # 创建一个归一化层对象,根据配置选择不同的归一化类型
        self.LayerNorm = NORM2FN[config.normalization_type](
            config.hidden_size, epsilon=config.layer_norm_eps, name="LayerNorm"
        )
        # 创建一个 Dropout 层对象,用于在训练时进行随机失活
        self.dropout = keras.layers.Dropout(config.hidden_dropout_prob)
        # 存储配置对象,以便后续使用
        self.config = config

    # 调用方法,用于实际执行神经网络的前向计算过程
    def call(self, hidden_states, residual_tensor, training=False):
        # 线性变换层,将隐藏状态映射到新的空间
        layer_outputs = self.dense(hidden_states)
        # 在训练时对输出进行 Dropout 处理,防止过拟合
        layer_outputs = self.dropout(layer_outputs, training=training)
        # 应用归一化层,处理残差连接和变换后的输出
        layer_outputs = self.LayerNorm(layer_outputs + residual_tensor)
        # 返回处理后的输出
        return layer_outputs

    # 构建方法,用于构建网络层的内部结构
    def build(self, input_shape=None):
        # 如果已经构建过网络层,直接返回
        if self.built:
            return
        # 标记当前网络层已构建
        self.built = True
        # 如果存在 dense 层对象,则根据配置构建该层
        if getattr(self, "dense", None) is not None:
            with tf.name_scope(self.dense.name):
                self.dense.build([None, None, self.config.true_hidden_size])
        # 如果存在 LayerNorm 层对象,则构建该层
        if getattr(self, "LayerNorm", None) is not None:
            with tf.name_scope(self.LayerNorm.name):
                self.LayerNorm.build(None)
class TFMobileBertOutput(keras.layers.Layer):
    def __init__(self, config, **kwargs):
        super().__init__(**kwargs)
        self.use_bottleneck = config.use_bottleneck  # 根据配置决定是否使用瓶颈层
        self.dense = keras.layers.Dense(
            config.true_hidden_size, kernel_initializer=get_initializer(config.initializer_range), name="dense"
        )  # 创建全连接层,用于转换输入的隐藏状态维度
        self.LayerNorm = NORM2FN[config.normalization_type](
            config.true_hidden_size, epsilon=config.layer_norm_eps, name="LayerNorm"
        )  # 根据配置选择合适的归一化层
        if not self.use_bottleneck:
            self.dropout = keras.layers.Dropout(config.hidden_dropout_prob)  # 如果不使用瓶颈层,则创建Dropout层
        else:
            self.bottleneck = TFOutputBottleneck(config, name="bottleneck")  # 如果使用瓶颈层,则创建瓶颈层对象
        self.config = config  # 保存配置信息

    def call(self, hidden_states, residual_tensor_1, residual_tensor_2, training=False):
        hidden_states = self.dense(hidden_states)  # 经过全连接层转换隐藏状态
        if not self.use_bottleneck:
            hidden_states = self.dropout(hidden_states, training=training)  # 如果不使用瓶颈层,则应用Dropout
            hidden_states = self.LayerNorm(hidden_states + residual_tensor_1)  # 对输入和残差进行归一化和残差连接
        else:
            hidden_states = self.LayerNorm(hidden_states + residual_tensor_1)  # 对输入和残差进行归一化和残差连接
            hidden_states = self.bottleneck(hidden_states, residual_tensor_2)  # 经过瓶颈层处理残差
        return hidden_states  # 返回处理后的隐藏状态

    def build(self, input_shape=None):
        if self.built:
            return
        self.built = True
        if getattr(self, "dense", None) is not None:
            with tf.name_scope(self.dense.name):
                self.dense.build([None, None, self.config.intermediate_size])  # 构建全连接层
        if getattr(self, "LayerNorm", None) is not None:
            with tf.name_scope(self.LayerNorm.name):
                self.LayerNorm.build(None)  # 构建归一化层
        if getattr(self, "bottleneck", None) is not None:
            with tf.name_scope(self.bottleneck.name):
                self.bottleneck.build(None)  # 构建瓶颈层


class TFBottleneckLayer(keras.layers.Layer):
    def __init__(self, config, **kwargs):
        super().__init__(**kwargs)
        self.dense = keras.layers.Dense(config.intra_bottleneck_size, name="dense")  # 创建瓶颈层的全连接层
        self.LayerNorm = NORM2FN[config.normalization_type](
            config.intra_bottleneck_size, epsilon=config.layer_norm_eps, name="LayerNorm"
        )  # 根据配置选择合适的归一化层
        self.config = config  # 保存配置信息

    def call(self, inputs):
        hidden_states = self.dense(inputs)  # 经过全连接层转换输入
        hidden_states = self.LayerNorm(hidden_states)  # 对转换后的数据进行归一化
        return hidden_states  # 返回处理后的数据

    def build(self, input_shape=None):
        if self.built:
            return
        self.built = True
        if getattr(self, "dense", None) is not None:
            with tf.name_scope(self.dense.name):
                self.dense.build([None, None, self.config.hidden_size])  # 构建全连接层
        if getattr(self, "LayerNorm", None) is not None:
            with tf.name_scope(self.LayerNorm.name):
                self.LayerNorm.build(None)  # 构建归一化层


class TFBottleneck(keras.layers.Layer):
    # 这里是 TFBottleneck 类的定义,暂时没有额外的代码需要注释
    def __init__(self, config, **kwargs):
        super().__init__(**kwargs)
        self.key_query_shared_bottleneck = config.key_query_shared_bottleneck
        self.use_bottleneck_attention = config.use_bottleneck_attention
        # 使用传入的配置信息初始化共享瓶颈层和注意力机制的使用标志
        self.bottleneck_input = TFBottleneckLayer(config, name="input")
        # 如果设置了共享瓶颈层,初始化注意力机制的瓶颈层
        if self.key_query_shared_bottleneck:
            self.attention = TFBottleneckLayer(config, name="attention")

    def call(self, hidden_states):
        # 这个方法可以返回三种不同的元组值。这些不同的值利用了瓶颈层,这些线性层用于将隐藏状态投影到一个低维向量,
        # 从而减少内存使用。这些线性层的权重在训练期间学习。
        #
        # 如果 `config.use_bottleneck_attention` 为真,则会四次返回瓶颈层的结果,
        # 分别用于键、查询、值和“层输入”,供注意力层使用。
        # 这个瓶颈层用于投影隐藏层。这个“层输入”将在计算完注意力分数后,作为注意力自输出中的残差张量使用。
        #
        # 如果不使用 `config.use_bottleneck_attention` 且使用了 `config.key_query_shared_bottleneck`,
        # 则会返回四个值,其中三个经过了瓶颈层处理:查询和键通过同一个瓶颈层,而在注意力自输出中,通过另一个瓶颈层处理残差层。
        #
        # 最后一种情况,查询、键和值的值为未经瓶颈处理的隐藏状态,而残差层则经过了瓶颈处理。

        bottlenecked_hidden_states = self.bottleneck_input(hidden_states)
        # 根据配置决定返回哪些值的元组
        if self.use_bottleneck_attention:
            return (bottlenecked_hidden_states,) * 4
        elif self.key_query_shared_bottleneck:
            shared_attention_input = self.attention(hidden_states)
            return (shared_attention_input, shared_attention_input, hidden_states, bottlenecked_hidden_states)
        else:
            return (hidden_states, hidden_states, hidden_states, bottlenecked_hidden_states)

    def build(self, input_shape=None):
        # 如果已经构建过,直接返回
        if self.built:
            return
        self.built = True
        # 如果存在瓶颈输入层,构建该层
        if getattr(self, "bottleneck_input", None) is not None:
            with tf.name_scope(self.bottleneck_input.name):
                self.bottleneck_input.build(None)
        # 如果存在注意力瓶颈层,构建该层
        if getattr(self, "attention", None) is not None:
            with tf.name_scope(self.attention.name):
                self.attention.build(None)
# 定义一个 Keras 自定义层 TFMobileBertLayer,继承自 keras.layers.Layer 类
class TFMobileBertLayer(keras.layers.Layer):
    # 初始化方法,接受 config 和其他关键字参数
    def __init__(self, config, **kwargs):
        super().__init__(**kwargs)
        # 根据 config 配置决定是否使用瓶颈结构
        self.use_bottleneck = config.use_bottleneck
        # 存储 feedforward 网络的数量
        self.num_feedforward_networks = config.num_feedforward_networks
        # 创建 TFMobileBertAttention 层,命名为 "attention"
        self.attention = TFMobileBertAttention(config, name="attention")
        # 创建 TFMobileBertIntermediate 层,命名为 "intermediate"
        self.intermediate = TFMobileBertIntermediate(config, name="intermediate")
        # 创建 TFMobileBertOutput 层,命名为 "output"
        self.mobilebert_output = TFMobileBertOutput(config, name="output")

        # 如果使用瓶颈结构,创建 TFBottleneck 层,命名为 "bottleneck"
        if self.use_bottleneck:
            self.bottleneck = TFBottleneck(config, name="bottleneck")
        
        # 如果 feedforward 网络数量大于1,创建多个 TFFFNLayer 层
        if config.num_feedforward_networks > 1:
            # 使用列表推导创建多个 TFFFNLayer 实例,命名为 "ffn.{i}"
            self.ffn = [TFFFNLayer(config, name=f"ffn.{i}") for i in range(config.num_feedforward_networks - 1)]

    # call 方法定义了层的前向传播逻辑
    def call(self, hidden_states):
        # 调用注意力层处理隐藏状态
        attention_output = self.attention(hidden_states)
        # 调用中间层处理注意力输出
        intermediate_output = self.intermediate(attention_output)
        # 调用 MobileBERT 输出层处理中间层输出和原始隐藏状态
        mobilebert_output = self.mobilebert_output(intermediate_output, hidden_states)
        
        # 如果使用瓶颈结构,将输出传入瓶颈层
        if self.use_bottleneck:
            mobilebert_output = self.bottleneck(mobilebert_output)
        
        # 对于每个 feedforward 网络,依次调用处理
        if self.num_feedforward_networks > 1:
            for ffn_layer in self.ffn:
                mobilebert_output = ffn_layer(mobilebert_output)
        
        # 返回处理后的输出
        return mobilebert_output

    # build 方法用于构建层,包括初始化权重等操作
    def build(self, input_shape=None):
        # 如果已经构建过,则直接返回
        if self.built:
            return
        self.built = True
        
        # 如果存在 intermediate 层,则构建该层
        if getattr(self, "intermediate", None) is not None:
            with tf.name_scope(self.intermediate.name):
                self.intermediate.build(None)
        
        # 如果存在 MobileBERT 输出层,则构建该层
        if getattr(self, "mobilebert_output", None) is not None:
            with tf.name_scope(self.mobilebert_output.name):
                self.mobilebert_output.build(None)
        
        # 如果使用瓶颈结构,构建瓶颈层
        if self.use_bottleneck and getattr(self, "bottleneck", None) is not None:
            with tf.name_scope(self.bottleneck.name):
                self.bottleneck.build(None)
        
        # 如果有多个 feedforward 网络,依次构建每个网络层
        if self.num_feedforward_networks > 1:
            for ffn_layer in self.ffn:
                with tf.name_scope(ffn_layer.name):
                    ffn_layer.build(None)
    # 定义一个方法,用于处理网络的前向传播,接受隐藏状态、注意力掩码、头掩码、是否输出注意力权重以及训练标志
    def call(self, hidden_states, attention_mask, head_mask, output_attentions, training=False):
        # 如果使用瓶颈层,调用瓶颈层方法生成查询、键、值张量以及层输入
        if self.use_bottleneck:
            query_tensor, key_tensor, value_tensor, layer_input = self.bottleneck(hidden_states)
        else:
            # 否则复制隐藏状态作为查询、键、值张量,同时层输入也设为隐藏状态
            query_tensor, key_tensor, value_tensor, layer_input = [hidden_states] * 4

        # 调用注意力层进行注意力计算,传入查询、键、值张量、层输入、注意力掩码、头掩码、是否输出注意力权重以及训练标志
        attention_outputs = self.attention(
            query_tensor,
            key_tensor,
            value_tensor,
            layer_input,
            attention_mask,
            head_mask,
            output_attentions,
            training=training,
        )

        # 从注意力输出中获取注意力张量
        attention_output = attention_outputs[0]
        s = (attention_output,)

        # 如果存在多个前馈网络,则依次对注意力输出进行处理
        if self.num_feedforward_networks != 1:
            for i, ffn_module in enumerate(self.ffn):
                attention_output = ffn_module(attention_output)
                s += (attention_output,)

        # 经过中间层处理注意力输出得到中间输出
        intermediate_output = self.intermediate(attention_output)
        # 经过MobileBERT输出层处理中间输出、注意力输出以及隐藏状态,得到层输出
        layer_output = self.mobilebert_output(intermediate_output, attention_output, hidden_states, training=training)

        # 构造最终输出,包括层输出、注意力输出的其它部分以及可能的注意力张量
        outputs = (
            (layer_output,)
            + attention_outputs[1:]
            + (
                tf.constant(0),
                query_tensor,
                key_tensor,
                value_tensor,
                layer_input,
                attention_output,
                intermediate_output,
            )
            + s
        )  # 如果需要输出注意力权重,则添加进输出中

        # 返回构造好的输出
        return outputs

    # 构建网络层,如果已经构建过则直接返回
    def build(self, input_shape=None):
        if self.built:
            return
        self.built = True
        # 如果注意力层存在,则逐一构建它们
        if getattr(self, "attention", None) is not None:
            with tf.name_scope(self.attention.name):
                self.attention.build(None)
        # 如果中间层存在,则逐一构建它们
        if getattr(self, "intermediate", None) is not None:
            with tf.name_scope(self.intermediate.name):
                self.intermediate.build(None)
        # 如果MobileBERT输出层存在,则逐一构建它们
        if getattr(self, "mobilebert_output", None) is not None:
            with tf.name_scope(self.mobilebert_output.name):
                self.mobilebert_output.build(None)
        # 如果瓶颈层存在,则逐一构建它们
        if getattr(self, "bottleneck", None) is not None:
            with tf.name_scope(self.bottleneck.name):
                self.bottleneck.build(None)
        # 如果前馈网络存在,则逐一构建它们
        if getattr(self, "ffn", None) is not None:
            for layer in self.ffn:
                with tf.name_scope(layer.name):
                    layer.build(None)
class TFMobileBertEncoder(keras.layers.Layer):
    # TFMobileBertEncoder 类定义,继承自 keras 的 Layer 类
    def __init__(self, config, **kwargs):
        super().__init__(**kwargs)
        # 初始化输出参数的标志
        self.output_attentions = config.output_attentions
        self.output_hidden_states = config.output_hidden_states
        # 创建多个 TFMobileBertLayer 层组成的列表
        self.layer = [TFMobileBertLayer(config, name=f"layer_._{i}") for i in range(config.num_hidden_layers)]

    def call(
        self,
        hidden_states,
        attention_mask,
        head_mask,
        output_attentions,
        output_hidden_states,
        return_dict,
        training=False,
    ):
        # 初始化存储所有隐藏状态和注意力的元组
        all_hidden_states = () if output_hidden_states else None
        all_attentions = () if output_attentions else None
        # 遍历所有层并调用它们的 call 方法
        for i, layer_module in enumerate(self.layer):
            if output_hidden_states:
                all_hidden_states = all_hidden_states + (hidden_states,)

            # 调用当前层的 call 方法,计算输出
            layer_outputs = layer_module(
                hidden_states, attention_mask, head_mask[i], output_attentions, training=training
            )

            hidden_states = layer_outputs[0]

            if output_attentions:
                all_attentions = all_attentions + (layer_outputs[1],)

        # 添加最后一层的隐藏状态
        if output_hidden_states:
            all_hidden_states = all_hidden_states + (hidden_states,)

        # 根据 return_dict 决定返回值的形式
        if not return_dict:
            return tuple(v for v in [hidden_states, all_hidden_states, all_attentions] if v is not None)
        # 返回 TFBaseModelOutput 对象,包含最后的隐藏状态、所有隐藏状态和注意力
        return TFBaseModelOutput(
            last_hidden_state=hidden_states, hidden_states=all_hidden_states, attentions=all_attentions
        )

    def build(self, input_shape=None):
        # 如果已经构建过,直接返回
        if self.built:
            return
        self.built = True
        # 构建每一层
        if getattr(self, "layer", None) is not None:
            for layer in self.layer:
                with tf.name_scope(layer.name):
                    layer.build(None)


class TFMobileBertPooler(keras.layers.Layer):
    # TFMobileBertPooler 类定义,继承自 keras 的 Layer 类
    def __init__(self, config, **kwargs):
        super().__init__(**kwargs)
        # 根据配置决定是否激活分类器的激活函数
        self.do_activate = config.classifier_activation
        if self.do_activate:
            # 如果激活,创建一个全连接层,使用 tanh 激活函数
            self.dense = keras.layers.Dense(
                config.hidden_size,
                kernel_initializer=get_initializer(config.initializer_range),
                activation="tanh",
                name="dense",
            )
        self.config = config

    def call(self, hidden_states):
        # 通过获取第一个 token 对应的隐藏状态来实现模型的 "汇聚"
        first_token_tensor = hidden_states[:, 0]
        if not self.do_activate:
            # 如果不需要激活,直接返回第一个 token 的隐藏状态
            return first_token_tensor
        else:
            # 否则,通过全连接层处理第一个 token 的隐藏状态
            pooled_output = self.dense(first_token_tensor)
            return pooled_output

    def build(self, input_shape=None):
        # 如果已经构建过,直接返回
        if self.built:
            return
        self.built = True
        # 如果存在全连接层,构建该层
        if getattr(self, "dense", None) is not None:
            with tf.name_scope(self.dense.name):
                self.dense.build([None, None, self.config.hidden_size])
class TFMobileBertPredictionHeadTransform(keras.layers.Layer):
    # TFMobileBert 模型的预测头变换层,用于处理隐藏状态
    def __init__(self, config, **kwargs):
        super().__init__(**kwargs)
        # 定义一个全连接层,输出维度为 config.hidden_size,使用指定的初始化方法
        self.dense = keras.layers.Dense(
            config.hidden_size, kernel_initializer=get_initializer(config.initializer_range), name="dense"
        )
        # 根据配置选择激活函数,或者直接使用给定的激活函数对象
        if isinstance(config.hidden_act, str):
            self.transform_act_fn = get_tf_activation(config.hidden_act)
        else:
            self.transform_act_fn = config.hidden_act
        # 创建 LayerNorm 层,用于归一化隐藏状态向量
        self.LayerNorm = NORM2FN["layer_norm"](config.hidden_size, epsilon=config.layer_norm_eps, name="LayerNorm")
        self.config = config

    # 定义调用函数,实现层的前向传播
    def call(self, hidden_states):
        # 全连接层处理隐藏状态向量
        hidden_states = self.dense(hidden_states)
        # 应用激活函数变换
        hidden_states = self.transform_act_fn(hidden_states)
        # 归一化处理
        hidden_states = self.LayerNorm(hidden_states)
        return hidden_states

    # 构建层的方法,用于创建层的权重
    def build(self, input_shape=None):
        if self.built:
            return
        self.built = True
        # 如果存在 dense 层,则构建 dense 层的权重
        if getattr(self, "dense", None) is not None:
            with tf.name_scope(self.dense.name):
                self.dense.build([None, None, self.config.hidden_size])
        # 如果存在 LayerNorm 层,则构建 LayerNorm 层的权重
        if getattr(self, "LayerNorm", None) is not None:
            with tf.name_scope(self.LayerNorm.name):
                self.LayerNorm.build(None)


class TFMobileBertLMPredictionHead(keras.layers.Layer):
    # TFMobileBert 模型的语言模型预测头层
    def __init__(self, config, **kwargs):
        super().__init__(**kwargs)
        # 创建预测头变换层对象
        self.transform = TFMobileBertPredictionHeadTransform(config, name="transform")
        self.config = config

    # 构建方法,用于创建层的权重
    def build(self, input_shape=None):
        # 创建偏置项权重,形状为 (config.vocab_size,)
        self.bias = self.add_weight(shape=(self.config.vocab_size,), initializer="zeros", trainable=True, name="bias")
        # 创建全连接层的权重,形状为 (config.hidden_size - config.embedding_size, config.vocab_size)
        self.dense = self.add_weight(
            shape=(self.config.hidden_size - self.config.embedding_size, self.config.vocab_size),
            initializer="zeros",
            trainable=True,
            name="dense/weight",
        )
        # 创建解码器权重,形状为 (config.vocab_size, config.embedding_size)
        self.decoder = self.add_weight(
            shape=(self.config.vocab_size, self.config.embedding_size),
            initializer="zeros",
            trainable=True,
            name="decoder/weight",
        )

        if self.built:
            return
        self.built = True
        # 如果存在 transform 层,则构建 transform 层的权重
        if getattr(self, "transform", None) is not None:
            with tf.name_scope(self.transform.name):
                self.transform.build(None)

    # 获取输出的嵌入向量
    def get_output_embeddings(self):
        return self

    # 设置输出的嵌入向量
    def set_output_embeddings(self, value):
        self.decoder = value
        self.config.vocab_size = shape_list(value)[0]

    # 获取偏置项
    def get_bias(self):
        return {"bias": self.bias}

    # 设置偏置项
    def set_bias(self, value):
        self.bias = value["bias"]
        self.config.vocab_size = shape_list(value["bias"])[0]
    # 定义一个方法,用于处理传入的隐藏状态数据
    def call(self, hidden_states):
        # 调用transform方法,对隐藏状态进行转换处理
        hidden_states = self.transform(hidden_states)
        # 使用矩阵乘法将转换后的隐藏状态与decoder和dense张量的连接进行乘法运算
        hidden_states = tf.matmul(hidden_states, tf.concat([tf.transpose(self.decoder), self.dense], axis=0))
        # 将偏置项加到乘法结果上
        hidden_states = hidden_states + self.bias
        # 返回处理后的隐藏状态数据
        return hidden_states
class TFMobileBertMLMHead(keras.layers.Layer):
    def __init__(self, config, **kwargs):
        super().__init__(**kwargs)
        # 初始化 MLM 预测头部,使用 MobileBertLMPredictionHead 类
        self.predictions = TFMobileBertLMPredictionHead(config, name="predictions")

    def call(self, sequence_output):
        # 调用 predictions 对象进行序列输出的预测评分
        prediction_scores = self.predictions(sequence_output)
        return prediction_scores

    def build(self, input_shape=None):
        if self.built:
            return
        self.built = True
        if getattr(self, "predictions", None) is not None:
            with tf.name_scope(self.predictions.name):
                # 构建 predictions 对象,传入 None 的输入形状
                self.predictions.build(None)


@keras_serializable
class TFMobileBertMainLayer(keras.layers.Layer):
    config_class = MobileBertConfig

    def __init__(self, config, add_pooling_layer=True, **kwargs):
        super().__init__(**kwargs)

        # 初始化 MobileBertMainLayer,配置各种属性
        self.config = config
        self.num_hidden_layers = config.num_hidden_layers
        self.output_attentions = config.output_attentions
        self.output_hidden_states = config.output_hidden_states
        self.return_dict = config.use_return_dict

        # 初始化 MobileBertEmbeddings、MobileBertEncoder 和可选的 MobileBertPooler 层
        self.embeddings = TFMobileBertEmbeddings(config, name="embeddings")
        self.encoder = TFMobileBertEncoder(config, name="encoder")
        self.pooler = TFMobileBertPooler(config, name="pooler") if add_pooling_layer else None

    def get_input_embeddings(self):
        # 返回嵌入层对象
        return self.embeddings

    def set_input_embeddings(self, value):
        # 设置嵌入层的权重和词汇大小
        self.embeddings.weight = value
        self.embeddings.vocab_size = shape_list(value)[0]

    def _prune_heads(self, heads_to_prune):
        """
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        """
        # 剪枝模型中的注意力头部,heads_to_prune 参数为要剪枝的头部字典
        raise NotImplementedError

    @unpack_inputs
    def call(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
        training=False,
    ):
        # 执行 MobileBertMainLayer 的前向传播,支持参数解包和可选的返回字典模式
        ...

    def build(self, input_shape=None):
        if self.built:
            return
        self.built = True
        if getattr(self, "embeddings", None) is not None:
            with tf.name_scope(self.embeddings.name):
                # 构建 embeddings 对象,传入 None 的输入形状
                self.embeddings.build(None)
        if getattr(self, "encoder", None) is not None:
            with tf.name_scope(self.encoder.name):
                # 构建 encoder 对象,传入 None 的输入形状
                self.encoder.build(None)
        if getattr(self, "pooler", None) is not None:
            with tf.name_scope(self.pooler.name):
                # 构建 pooler 对象,传入 None 的输入形状
                self.pooler.build(None)


class TFMobileBertPreTrainedModel(TFPreTrainedModel):
    """
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    """

    config_class = MobileBertConfig
    base_model_prefix = "mobilebert"


@dataclass
class TFMobileBertForPreTrainingOutput(ModelOutput):
    # TFMobileBert 预训练模型的输出数据结构
    ...
    # 定义一个类似于 Type 注释的多行字符串,描述了 `TFMobileBertForPreTraining` 的输出类型信息
    Output type of [`TFMobileBertForPreTraining`].
    
    Args:
        prediction_logits (`tf.Tensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
            预测语言建模头部的预测分数(在 SoftMax 之前的每个词汇标记的分数)。
        seq_relationship_logits (`tf.Tensor` of shape `(batch_size, 2)`):
            下一个序列预测(分类)头部的预测分数(在 SoftMax 之前的 True/False 连续性的分数)。
        hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            一个元组,包含 `tf.Tensor` 的输出(一个用于嵌入的输出 + 每个层的输出),形状为 `(batch_size, sequence_length, hidden_size)`。
    
            模型在每个层输出的隐藏状态以及初始嵌入的输出。
        attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            一个元组,包含每个层的 `tf.Tensor`,形状为 `(batch_size, num_heads, sequence_length, sequence_length)`。
    
            注意力 softmax 后的注意力权重,用于在自注意力头部中计算加权平均值。
    
    """
    
    # 定义可选的损失张量
    loss: tf.Tensor | None = None
    # 定义预测语言建模头部的预测分数张量
    prediction_logits: tf.Tensor = None
    # 定义下一个序列预测头部的预测分数张量
    seq_relationship_logits: tf.Tensor = None
    # 定义隐藏状态的元组张量,可选返回,当 `output_hidden_states=True` 或 `config.output_hidden_states=True` 时返回
    hidden_states: Tuple[tf.Tensor] | None = None
    # 定义注意力权重的元组张量,可选返回,当 `output_attentions=True` 或 `config.output_attentions=True` 时返回
    attentions: Tuple[tf.Tensor] | None = None
"""
    This model inherits from `TFPreTrainedModel`. Check the superclass documentation for the generic methods the
    library implements for all its models (such as downloading or saving, resizing the input embeddings, pruning heads
    etc.)

    This model is also a `keras.Model` subclass. Use it as a regular TF 2.0 Keras Model and refer to the TF 2.0
    documentation for all matters related to general usage and behavior.

    <Tip>

    TensorFlow models and layers in `transformers` accept two formats as input:

    - having all inputs as keyword arguments (like PyTorch models), or
    - having all inputs as a list, tuple or dict in the first positional argument.

    The reason the second format is supported is that Keras methods prefer this format when passing inputs to models
    and layers. Because of this support, when using methods like `model.fit()` things should "just work" for you - just
    pass your inputs and labels in any format that `model.fit()` supports! If, however, you want to use the second
    format outside of Keras methods like `fit()` and `predict()`, such as when creating your own layers or models with
    the Keras `Functional` API, there are three possibilities you can use to gather all the input Tensors in the first
    positional argument:

    - a single Tensor with `input_ids` only and nothing else: `model(input_ids)`
    - a list of varying length with one or several input Tensors IN THE ORDER given in the docstring:
      `model([input_ids, attention_mask])` or `model([input_ids, attention_mask, token_type_ids])`
    - a dictionary with one or several input Tensors associated to the input names given in the docstring:
      `model({"input_ids": input_ids, "token_type_ids": token_type_ids})`

    Note that when creating models and layers with
    [subclassing](https://keras.io/guides/making_new_layers_and_models_via_subclassing/) then you don't need to worry
    about any of this, as you can just pass inputs like you would to any other Python function!

    </Tip>

    Parameters:
        config (`MobileBertConfig`): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the `PreTrainedModel.from_pretrained` method to load the model weights.
"""

"""
    The bare MobileBert Model transformer outputting raw hidden-states without any specific head on top.

    This model inherits the documentation from `MOBILEBERT_START_DOCSTRING`, which provides detailed information about
    its usage with TensorFlow 2.0, input formats, and integration with Keras.

    Parameters:
        *inputs: Variable length input arguments to allow flexible input formats as described in the `MOBILEBERT_START_DOCSTRING`.
        **kwargs: Additional keyword arguments passed to the superclass constructor.
"""

@add_start_docstrings(
    "The bare MobileBert Model transformer outputting raw hidden-states without any specific head on top.",
    MOBILEBERT_START_DOCSTRING,
)
class TFMobileBertModel(TFMobileBertPreTrainedModel):
    def __init__(self, config, *inputs, **kwargs):
        super().__init__(config, *inputs, **kwargs)
        # Instantiate the core MobileBERT main layer with the provided configuration
        self.mobilebert = TFMobileBertMainLayer(config, name="mobilebert")

    @unpack_inputs
    # 将模型的前向方法文档化,添加关于输入参数的说明,使用装饰器实现
    @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    # 添加代码示例的文档字符串,包括模型的检查点、输出类型、配置类等信息
    @add_code_sample_docstrings(
        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=TFBaseModelOutputWithPooling,
        config_class=_CONFIG_FOR_DOC,
    )
    # 定义模型的前向传播方法
    def call(
        self,
        input_ids: TFModelInputType | None = None,
        attention_mask: np.ndarray | tf.Tensor | None = None,
        token_type_ids: np.ndarray | tf.Tensor | None = None,
        position_ids: np.ndarray | tf.Tensor | None = None,
        head_mask: np.ndarray | tf.Tensor | None = None,
        inputs_embeds: np.ndarray | tf.Tensor | None = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
        training: Optional[bool] = False,
    ) -> Union[Tuple, TFBaseModelOutputWithPooling]:
        # 调用 MobileBERT 模型的前向方法,传递各种输入参数
        outputs = self.mobilebert(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
            training=training,
        )
        # 返回模型前向传播的输出结果
        return outputs

    # 构建模型的方法,用于初始化模型结构
    def build(self, input_shape=None):
        # 如果模型已经构建完成,则直接返回
        if self.built:
            return
        # 设置模型已构建的标志为 True
        self.built = True
        # 如果存在 MobileBERT 模型,则在命名空间下构建该模型
        if getattr(self, "mobilebert", None) is not None:
            with tf.name_scope(self.mobilebert.name):
                self.mobilebert.build(None)
"""
MobileBert Model with two heads on top as done during the pretraining: a `masked language modeling` head and a
`next sentence prediction (classification)` head.
"""
# 定义 TFMobileBertForPreTraining 类,继承自 TFMobileBertPreTrainedModel 和 TFMobileBertPreTrainingLoss
class TFMobileBertForPreTraining(TFMobileBertPreTrainedModel, TFMobileBertPreTrainingLoss):

    def __init__(self, config, *inputs, **kwargs):
        # 调用父类的初始化方法
        super().__init__(config, *inputs, **kwargs)
        # 创建 TFMobileBertMainLayer 实例,并命名为 'mobilebert'
        self.mobilebert = TFMobileBertMainLayer(config, name="mobilebert")
        # 创建 TFMobileBertMLMHead 实例,并命名为 'predictions'
        self.predictions = TFMobileBertMLMHead(config, name="predictions___cls")
        # 创建 TFMobileBertOnlyNSPHead 实例,并命名为 'seq_relationship'
        self.seq_relationship = TFMobileBertOnlyNSPHead(config, name="seq_relationship___cls")

    def get_lm_head(self):
        # 返回 predictions 的预测结果
        return self.predictions.predictions

    def get_prefix_bias_name(self):
        # 发出警告,指示 get_prefix_bias_name 方法已过时,建议使用 'get_bias' 方法代替
        warnings.warn("The method get_prefix_bias_name is deprecated. Please use `get_bias` instead.", FutureWarning)
        # 返回包含预测名称路径的字符串
        return self.name + "/" + self.predictions.name + "/" + self.predictions.predictions.name

    @unpack_inputs
    @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @replace_return_docstrings(output_type=TFMobileBertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC)
    def call(
        self,
        input_ids: TFModelInputType | None = None,
        attention_mask: np.ndarray | tf.Tensor | None = None,
        token_type_ids: np.ndarray | tf.Tensor | None = None,
        position_ids: np.ndarray | tf.Tensor | None = None,
        head_mask: np.ndarray | tf.Tensor | None = None,
        inputs_embeds: np.ndarray | tf.Tensor | None = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
        labels: np.ndarray | tf.Tensor | None = None,
        next_sentence_label: np.ndarray | tf.Tensor | None = None,
        training: Optional[bool] = False,
    ) -> Union[Tuple, TFMobileBertForPreTrainingOutput]:
        r"""
        返回类型注释,此函数返回一个元组或者 TFMobileBertForPreTrainingOutput 对象。

        示例:

        ```
        >>> import tensorflow as tf
        >>> from transformers import AutoTokenizer, TFMobileBertForPreTraining

        >>> tokenizer = AutoTokenizer.from_pretrained("google/mobilebert-uncased")
        >>> model = TFMobileBertForPreTraining.from_pretrained("google/mobilebert-uncased")
        >>> input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute"))[None, :]  # Batch size 1
        >>> outputs = model(input_ids)
        >>> prediction_scores, seq_relationship_scores = outputs[:2]
        ```

        执行模型的前向传播,生成预测分数和序列关系分数。

        Parameters:
        - input_ids (tf.Tensor): 输入的 token IDs
        - attention_mask (Optional[tf.Tensor]): 注意力掩码
        - token_type_ids (Optional[tf.Tensor]): token 类型 IDs
        - position_ids (Optional[tf.Tensor]): 位置 IDs
        - head_mask (Optional[tf.Tensor]): 头部掩码
        - inputs_embeds (Optional[tf.Tensor]): 输入嵌入
        - output_attentions (Optional[bool]): 是否输出注意力
        - output_hidden_states (Optional[bool]): 是否输出隐藏状态
        - return_dict (Optional[bool]): 是否以字典形式返回结果
        - training (Optional[bool]): 是否处于训练模式

        Returns:
        - 如果 return_dict=False,则返回一个元组 (total_loss, prediction_scores, seq_relationship_scores, hidden_states, attentions) 或者 (prediction_scores, seq_relationship_scores, hidden_states, attentions)。
        - 如果 return_dict=True,则返回一个 TFMobileBertForPreTrainingOutput 对象,包含 loss, prediction_logits, seq_relationship_logits, hidden_states, attentions 字段。

        Raises:
        - 无异常抛出。

        """
        outputs = self.mobilebert(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
            training=training,
        )

        sequence_output, pooled_output = outputs[:2]
        prediction_scores = self.predictions(sequence_output)
        seq_relationship_score = self.seq_relationship(pooled_output)

        total_loss = None
        if labels is not None and next_sentence_label is not None:
            d_labels = {"labels": labels}
            d_labels["next_sentence_label"] = next_sentence_label
            total_loss = self.hf_compute_loss(labels=d_labels, logits=(prediction_scores, seq_relationship_score))

        if not return_dict:
            output = (prediction_scores, seq_relationship_score) + outputs[2:]
            return ((total_loss,) + output) if total_loss is not None else output

        return TFMobileBertForPreTrainingOutput(
            loss=total_loss,
            prediction_logits=prediction_scores,
            seq_relationship_logits=seq_relationship_score,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )

    def build(self, input_shape=None):
        if self.built:
            return
        self.built = True
        if getattr(self, "mobilebert", None) is not None:
            with tf.name_scope(self.mobilebert.name):
                self.mobilebert.build(None)
        if getattr(self, "predictions", None) is not None:
            with tf.name_scope(self.predictions.name):
                self.predictions.build(None)
        if getattr(self, "seq_relationship", None) is not None:
            with tf.name_scope(self.seq_relationship.name):
                self.seq_relationship.build(None)

    def tf_to_pt_weight_rename(self, tf_weight):
        if tf_weight == "cls.predictions.decoder.weight":
            return tf_weight, "mobilebert.embeddings.word_embeddings.weight"
        else:
            return (tf_weight,)
# 使用装饰器为类添加文档字符串,描述该类是带有顶部语言建模头的 MobileBert 模型
@add_start_docstrings("""MobileBert Model with a `language modeling` head on top.""", MOBILEBERT_START_DOCSTRING)
class TFMobileBertForMaskedLM(TFMobileBertPreTrainedModel, TFMaskedLanguageModelingLoss):
    # 在从 PT 模型加载 TF 模型时,忽略特定名称的层
    # 包含'.'的名称表示在加载时是授权的意外/丢失层
    _keys_to_ignore_on_load_unexpected = [
        r"pooler",
        r"seq_relationship___cls",
        r"cls.seq_relationship",
    ]

    def __init__(self, config, *inputs, **kwargs):
        super().__init__(config, *inputs, **kwargs)

        # 初始化 MobileBert 主层,不添加池化层,命名为"mobilebert"
        self.mobilebert = TFMobileBertMainLayer(config, add_pooling_layer=False, name="mobilebert")
        # 初始化 MobileBert 的语言建模头,命名为"predictions___cls"
        self.predictions = TFMobileBertMLMHead(config, name="predictions___cls")

    # 返回语言建模头的预测部分
    def get_lm_head(self):
        return self.predictions.predictions

    # 返回前缀偏置名称,该方法已弃用,将来将使用`get_bias`替代
    def get_prefix_bias_name(self):
        warnings.warn("The method get_prefix_bias_name is deprecated. Please use `get_bias` instead.", FutureWarning)
        return self.name + "/" + self.mlm.name + "/" + self.mlm.predictions.name

    # 使用装饰器为前向传播方法添加文档字符串,描述输入参数和预期输出
    @unpack_inputs
    @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=TFMaskedLMOutput,
        config_class=_CONFIG_FOR_DOC,
        expected_output="'paris'",
        expected_loss=0.57,
    )
    # 前向传播方法,接收多个输入参数,返回模型输出或损失
    def call(
        self,
        input_ids: TFModelInputType | None = None,
        attention_mask: np.ndarray | tf.Tensor | None = None,
        token_type_ids: np.ndarray | tf.Tensor | None = None,
        position_ids: np.ndarray | tf.Tensor | None = None,
        head_mask: np.ndarray | tf.Tensor | None = None,
        inputs_embeds: np.ndarray | tf.Tensor | None = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
        labels: np.ndarray | tf.Tensor | None = None,
        training: Optional[bool] = False,
    ) -> Union[Tuple, TFMaskedLMOutput]:
        r"""
        labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels
        """
        # 使用 `->` 表示函数的返回类型注解,这里返回的是一个元组或者 TFMaskedLMOutput 对象
        outputs = self.mobilebert(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
            training=training,
        )
        # 获取模型输出的序列输出,通常是模型的第一个输出
        sequence_output = outputs[0]
        # 将序列输出传入预测模块,生成预测得分
        prediction_scores = self.predictions(sequence_output, training=training)

        # 如果没有提供标签,则损失设为 None;否则计算预测损失
        loss = None if labels is None else self.hf_compute_loss(labels, prediction_scores)

        # 如果 return_dict 为 False,则返回的输出包括预测得分和可能的额外输出
        if not return_dict:
            output = (prediction_scores,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        # 如果 return_dict 为 True,则返回 TFMaskedLMOutput 对象,包括损失、预测得分、隐藏状态和注意力权重
        return TFMaskedLMOutput(
            loss=loss,
            logits=prediction_scores,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )

    def build(self, input_shape=None):
        # 如果模型已经构建,则直接返回,避免重复构建
        if self.built:
            return
        # 将模型标记为已构建状态
        self.built = True
        # 如果存在 mobilebert 模型,则构建其子模块
        if getattr(self, "mobilebert", None) is not None:
            with tf.name_scope(self.mobilebert.name):
                self.mobilebert.build(None)
        # 如果存在 predictions 模型,则构建其子模块
        if getattr(self, "predictions", None) is not None:
            with tf.name_scope(self.predictions.name):
                self.predictions.build(None)

    def tf_to_pt_weight_rename(self, tf_weight):
        # 将特定的 TensorFlow 权重名称映射为 PyTorch 权重名称
        if tf_weight == "cls.predictions.decoder.weight":
            return tf_weight, "mobilebert.embeddings.word_embeddings.weight"
        else:
            return (tf_weight,)
# MobileBert 只有下一句预测(NSP)头部的层定义
class TFMobileBertOnlyNSPHead(keras.layers.Layer):
    def __init__(self, config, **kwargs):
        super().__init__(**kwargs)
        # 创建一个全连接层用于下一句预测,输出维度为2,命名为"seq_relationship"
        self.seq_relationship = keras.layers.Dense(2, name="seq_relationship")
        # 保存配置信息
        self.config = config

    def call(self, pooled_output):
        # 通过全连接层计算序列关系得分
        seq_relationship_score = self.seq_relationship(pooled_output)
        return seq_relationship_score

    def build(self, input_shape=None):
        if self.built:
            return
        self.built = True
        # 如果已经构建过,则直接返回;否则构建全连接层,输入形状为[None, None, self.config.hidden_size]
        if getattr(self, "seq_relationship", None) is not None:
            with tf.name_scope(self.seq_relationship.name):
                self.seq_relationship.build([None, None, self.config.hidden_size])


@add_start_docstrings(
    """MobileBert 模型,顶部带有`下一句预测(分类)`头部。""",
    MOBILEBERT_START_DOCSTRING,
)
# TFMobileBertForNextSentencePrediction 继承自 TFMobileBertPreTrainedModel 和 TFNextSentencePredictionLoss
class TFMobileBertForNextSentencePrediction(TFMobileBertPreTrainedModel, TFNextSentencePredictionLoss):
    # 当从 PT 模型加载 TF 模型时,命名中带有'.'的层表示可接受的未预期/缺失层
    _keys_to_ignore_on_load_unexpected = [r"predictions___cls", r"cls.predictions"]

    def __init__(self, config, *inputs, **kwargs):
        super().__init__(config, *inputs, **kwargs)

        # 创建 MobileBert 主层,命名为"mobilebert"
        self.mobilebert = TFMobileBertMainLayer(config, name="mobilebert")
        # 创建仅有下一句预测头部的层,命名为"seq_relationship___cls"
        self.cls = TFMobileBertOnlyNSPHead(config, name="seq_relationship___cls")

    # 解压输入参数
    # 添加模型前向传播的文档字符串
    # 替换返回文档字符串,输出类型为 TFNextSentencePredictorOutput,配置类为 _CONFIG_FOR_DOC
    @unpack_inputs
    @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @replace_return_docstrings(output_type=TFNextSentencePredictorOutput, config_class=_CONFIG_FOR_DOC)
    def call(
        self,
        input_ids: TFModelInputType | None = None,
        attention_mask: np.ndarray | tf.Tensor | None = None,
        token_type_ids: np.ndarray | tf.Tensor | None = None,
        position_ids: np.ndarray | tf.Tensor | None = None,
        head_mask: np.ndarray | tf.Tensor | None = None,
        inputs_embeds: np.ndarray | tf.Tensor | None = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
        next_sentence_label: np.ndarray | tf.Tensor | None = None,
        training: Optional[bool] = False,
        ) -> Union[Tuple, TFNextSentencePredictorOutput]:
        r"""
        返回模型的输出结果或损失值。

        Examples:

        ```
        >>> import tensorflow as tf
        >>> from transformers import AutoTokenizer, TFMobileBertForNextSentencePrediction

        >>> tokenizer = AutoTokenizer.from_pretrained("google/mobilebert-uncased")
        >>> model = TFMobileBertForNextSentencePrediction.from_pretrained("google/mobilebert-uncased")

        >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
        >>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
        >>> encoding = tokenizer(prompt, next_sentence, return_tensors="tf")

        >>> logits = model(encoding["input_ids"], token_type_ids=encoding["token_type_ids"])[0]
        ```"""

        # 调用 MobileBERT 模型来进行预测
        outputs = self.mobilebert(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
            training=training,
        )

        # 提取池化后的输出
        pooled_output = outputs[1]

        # 将池化输出传入分类层,得到下一个句子关系的分数
        seq_relationship_scores = self.cls(pooled_output)

        # 计算下一个句子关系的损失值
        next_sentence_loss = (
            None
            if next_sentence_label is None
            else self.hf_compute_loss(labels=next_sentence_label, logits=seq_relationship_scores)
        )

        # 如果不要求返回字典,则组装输出
        if not return_dict:
            output = (seq_relationship_scores,) + outputs[2:]
            return ((next_sentence_loss,) + output) if next_sentence_loss is not None else output

        # 返回 TFNextSentencePredictorOutput 对象,包含损失值、分数、隐藏状态和注意力权重
        return TFNextSentencePredictorOutput(
            loss=next_sentence_loss,
            logits=seq_relationship_scores,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )

    def build(self, input_shape=None):
        if self.built:
            return
        self.built = True

        # 如果模型已经构建,则直接返回
        if getattr(self, "mobilebert", None) is not None:
            with tf.name_scope(self.mobilebert.name):
                self.mobilebert.build(None)

        # 如果分类层已经存在,则构建分类层
        if getattr(self, "cls", None) is not None:
            with tf.name_scope(self.cls.name):
                self.cls.build(None)
# 使用装饰器为类添加文档字符串,描述了 MobileBert 模型的用途,特别是在顶部增加了一个线性层用于序列分类或回归任务,例如 GLUE 任务
@add_start_docstrings(
    """
    MobileBert Model transformer with a sequence classification/regression head on top (a linear layer on top of the
    pooled output) e.g. for GLUE tasks.
    """,
    MOBILEBERT_START_DOCSTRING,
)
class TFMobileBertForSequenceClassification(TFMobileBertPreTrainedModel, TFSequenceClassificationLoss):
    # 当从 PyTorch 模型加载到 TF 模型时,忽略的层名列表,包括预期未找到或多余的层
    _keys_to_ignore_on_load_unexpected = [
        r"predictions___cls",
        r"seq_relationship___cls",
        r"cls.predictions",
        r"cls.seq_relationship",
    ]
    # 当从 PyTorch 模型加载到 TF 模型时,忽略的缺失层名列表
    _keys_to_ignore_on_load_missing = [r"dropout"]

    def __init__(self, config, *inputs, **kwargs):
        # 调用父类构造函数初始化模型配置
        super().__init__(config, *inputs, **kwargs)
        # 设定模型输出的类别数目
        self.num_labels = config.num_labels

        # 创建 MobileBert 主层,使用给定的配置,命名为 "mobilebert"
        self.mobilebert = TFMobileBertMainLayer(config, name="mobilebert")
        # 根据配置设定分类器的 dropout 率,如果未指定,则使用隐藏层 dropout 率
        classifier_dropout = (
            config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob
        )
        # 创建一个 dropout 层,应用于分类器
        self.dropout = keras.layers.Dropout(classifier_dropout)
        # 创建一个全连接层作为分类器,设定输出类别数,使用指定范围的初始化器初始化权重
        self.classifier = keras.layers.Dense(
            config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
        )
        # 保存配置对象
        self.config = config

    # 使用装饰器定义模型的前向传播函数,并添加详细的文档字符串描述其输入参数和预期输出
    @unpack_inputs
    @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION,
        output_type=TFSequenceClassifierOutput,
        config_class=_CONFIG_FOR_DOC,
        expected_output=_SEQ_CLASS_EXPECTED_OUTPUT,
        expected_loss=_SEQ_CLASS_EXPECTED_LOSS,
    )
    def call(
        self,
        input_ids: TFModelInputType | None = None,
        attention_mask: np.ndarray | tf.Tensor | None = None,
        token_type_ids: np.ndarray | tf.Tensor | None = None,
        position_ids: np.ndarray | tf.Tensor | None = None,
        head_mask: np.ndarray | tf.Tensor | None = None,
        inputs_embeds: np.ndarray | tf.Tensor | None = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
        labels: np.ndarray | tf.Tensor | None = None,
        training: Optional[bool] = False,
    ) -> Union[Tuple, TFSequenceClassifierOutput]:
        r"""
        labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        """
        # 调用 MobileBERT 模型进行前向传播,获取输出结果
        outputs = self.mobilebert(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
            training=training,
        )
        # 从 MobileBERT 输出中获取池化后的表示,用于分类器输入
        pooled_output = outputs[1]

        # 对池化后的表示应用 dropout,用于模型训练时的正则化
        pooled_output = self.dropout(pooled_output, training=training)
        # 将池化后的表示输入分类器,得到预测 logits
        logits = self.classifier(pooled_output)

        # 如果存在标签,则计算损失;否则损失置为 None
        loss = None if labels is None else self.hf_compute_loss(labels, logits)

        # 如果不需要返回 dict 格式的结果,则按照 tuple 形式返回输出
        if not return_dict:
            output = (logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        # 返回 TFSequenceClassifierOutput 格式的结果,包括损失、预测 logits、隐藏状态和注意力权重
        return TFSequenceClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )

    def build(self, input_shape=None):
        # 如果模型已经构建,则直接返回,避免重复构建
        if self.built:
            return
        # 将模型标记为已构建状态
        self.built = True
        # 如果存在 MobileBERT 模型,则构建 MobileBERT
        if getattr(self, "mobilebert", None) is not None:
            with tf.name_scope(self.mobilebert.name):
                self.mobilebert.build(None)
        # 如果存在分类器,则构建分类器,指定输入形状为 [None, None, 隐藏层大小]
        if getattr(self, "classifier", None) is not None:
            with tf.name_scope(self.classifier.name):
                self.classifier.build([None, None, self.config.hidden_size])
# 使用特定的文档字符串为类添加描述信息,说明这是一个在移动端BERT模型基础上构建的用于抽取式问答任务(如SQuAD)的模型
@add_start_docstrings(
    """
    MobileBert Model with a span classification head on top for extractive question-answering tasks like SQuAD (a
    linear layers on top of the hidden-states output to compute `span start logits` and `span end logits`).
    """,
    MOBILEBERT_START_DOCSTRING,
)
class TFMobileBertForQuestionAnswering(TFMobileBertPreTrainedModel, TFQuestionAnsweringLoss):
    # 在从PyTorch模型加载为TensorFlow模型时,以下层的名称中包含'.',表示这些层可以忽略或出现未预期的情况
    _keys_to_ignore_on_load_unexpected = [
        r"pooler",
        r"predictions___cls",
        r"seq_relationship___cls",
        r"cls.predictions",
        r"cls.seq_relationship",
    ]

    def __init__(self, config, *inputs, **kwargs):
        # 调用父类的初始化方法
        super().__init__(config, *inputs, **kwargs)
        # 设置模型的标签数量
        self.num_labels = config.num_labels

        # 创建MobileBERT主层,不添加池化层,命名为"mobilebert"
        self.mobilebert = TFMobileBertMainLayer(config, add_pooling_layer=False, name="mobilebert")
        
        # 创建用于问答任务输出的全连接层,输出大小为config.num_labels,使用指定的初始化器初始化权重,命名为"qa_outputs"
        self.qa_outputs = keras.layers.Dense(
            config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="qa_outputs"
        )
        
        # 保存配置信息
        self.config = config

    # 使用特定的装饰器为call方法添加文档字符串,描述模型前向传播的输入和输出
    @unpack_inputs
    @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        checkpoint=_CHECKPOINT_FOR_QA,
        output_type=TFQuestionAnsweringModelOutput,
        config_class=_CONFIG_FOR_DOC,
        qa_target_start_index=_QA_TARGET_START_INDEX,
        qa_target_end_index=_QA_TARGET_END_INDEX,
        expected_output=_QA_EXPECTED_OUTPUT,
        expected_loss=_QA_EXPECTED_LOSS,
    )
    def call(
        self,
        input_ids: TFModelInputType | None = None,
        attention_mask: np.ndarray | tf.Tensor | None = None,
        token_type_ids: np.ndarray | tf.Tensor | None = None,
        position_ids: np.ndarray | tf.Tensor | None = None,
        head_mask: np.ndarray | tf.Tensor | None = None,
        inputs_embeds: np.ndarray | tf.Tensor | None = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
        start_positions: np.ndarray | tf.Tensor | None = None,
        end_positions: np.ndarray | tf.Tensor | None = None,
        training: Optional[bool] = False,
    ) -> Union[Tuple, TFQuestionAnsweringModelOutput]:
        r"""
        start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for position (index) of the start of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.
        end_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for position (index) of the end of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.
        """
        # 调用 MobileBERT 模型进行推断,获取模型输出
        outputs = self.mobilebert(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
            training=training,
        )
        # 获取模型输出的序列输出部分
        sequence_output = outputs[0]

        # 通过输出序列计算问答任务的 logits
        logits = self.qa_outputs(sequence_output)
        # 将 logits 沿着最后一个维度分割为 start_logits 和 end_logits
        start_logits, end_logits = tf.split(logits, 2, axis=-1)
        # 去除 start_logits 和 end_logits 的最后一个维度,使其形状变为 (batch_size,)
        start_logits = tf.squeeze(start_logits, axis=-1)
        end_logits = tf.squeeze(end_logits, axis=-1)

        loss = None
        # 如果提供了 start_positions 和 end_positions,则计算损失
        if start_positions is not None and end_positions is not None:
            labels = {"start_position": start_positions, "end_position": end_positions}
            loss = self.hf_compute_loss(labels, (start_logits, end_logits))

        # 如果 return_dict=False,返回扩展的输出元组
        if not return_dict:
            output = (start_logits, end_logits) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        # 如果 return_dict=True,返回 TFQuestionAnsweringModelOutput 对象
        return TFQuestionAnsweringModelOutput(
            loss=loss,
            start_logits=start_logits,
            end_logits=end_logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )

    def build(self, input_shape=None):
        # 如果模型已经构建完成,则直接返回
        if self.built:
            return
        self.built = True
        # 如果存在 MobileBERT 模型,则构建 MobileBERT
        if getattr(self, "mobilebert", None) is not None:
            with tf.name_scope(self.mobilebert.name):
                self.mobilebert.build(None)
        # 如果存在 QA 输出层,则构建 QA 输出层
        if getattr(self, "qa_outputs", None) is not None:
            with tf.name_scope(self.qa_outputs.name):
                self.qa_outputs.build([None, None, self.config.hidden_size])
"""
MobileBert Model with a multiple choice classification head on top (a linear layer on top of the pooled output and
a softmax) e.g. for RocStories/SWAG tasks.
"""
# 定义 TFMobileBertForMultipleChoice 类,用于在 MobileBert 模型基础上添加多选分类头部的功能
class TFMobileBertForMultipleChoice(TFMobileBertPreTrainedModel, TFMultipleChoiceLoss):

    # 当从 PT 模型加载 TF 模型时,以下名称表示可以忽略的未预期/丢失的层
    _keys_to_ignore_on_load_unexpected = [
        r"predictions___cls",
        r"seq_relationship___cls",
        r"cls.predictions",
        r"cls.seq_relationship",
    ]

    # 当从 PT 模型加载 TF 模型时,以下名称表示可以忽略的缺失的层
    _keys_to_ignore_on_load_missing = [r"dropout"]

    def __init__(self, config, *inputs, **kwargs):
        super().__init__(config, *inputs, **kwargs)

        # 创建 TFMobileBertMainLayer 实例作为模型的主体部分
        self.mobilebert = TFMobileBertMainLayer(config, name="mobilebert")
        
        # 创建 Dropout 层,使用配置中指定的隐藏层 dropout 概率
        self.dropout = keras.layers.Dropout(config.hidden_dropout_prob)
        
        # 创建分类器 Dense 层,用于多选分类,输出维度为1
        self.classifier = keras.layers.Dense(
            1, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
        )
        
        # 存储模型配置
        self.config = config

    @unpack_inputs
    @add_start_docstrings_to_model_forward(
        MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")
    )
    @add_code_sample_docstrings(
        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=TFMultipleChoiceModelOutput,
        config_class=_CONFIG_FOR_DOC,
    )
    # 定义模型的前向传播方法
    def call(
        self,
        input_ids: TFModelInputType | None = None,
        attention_mask: np.ndarray | tf.Tensor | None = None,
        token_type_ids: np.ndarray | tf.Tensor | None = None,
        position_ids: np.ndarray | tf.Tensor | None = None,
        head_mask: np.ndarray | tf.Tensor | None = None,
        inputs_embeds: np.ndarray | tf.Tensor | None = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
        labels: np.ndarray | tf.Tensor | None = None,
        training: Optional[bool] = False,
    ) -> Union[Tuple, TFMultipleChoiceModelOutput]:
        r"""
        labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
            where `num_choices` is the size of the second dimension of the input tensors. (See `input_ids` above)
        """
        # 如果提供了 input_ids,则获取 num_choices 和 seq_length
        if input_ids is not None:
            num_choices = shape_list(input_ids)[1]
            seq_length = shape_list(input_ids)[2]
        else:
            # 否则,从 inputs_embeds 中获取 num_choices 和 seq_length
            num_choices = shape_list(inputs_embeds)[1]
            seq_length = shape_list(inputs_embeds)[2]

        # 将 input_ids 和相关的张量重新整形为二维张量
        flat_input_ids = tf.reshape(input_ids, (-1, seq_length)) if input_ids is not None else None
        flat_attention_mask = tf.reshape(attention_mask, (-1, seq_length)) if attention_mask is not None else None
        flat_token_type_ids = tf.reshape(token_type_ids, (-1, seq_length)) if token_type_ids is not None else None
        flat_position_ids = tf.reshape(position_ids, (-1, seq_length)) if position_ids is not None else None
        flat_inputs_embeds = (
            tf.reshape(inputs_embeds, (-1, seq_length, shape_list(inputs_embeds)[3]))
            if inputs_embeds is not None
            else None
        )
        
        # 调用 MobileBERT 模型进行前向传播
        outputs = self.mobilebert(
            flat_input_ids,
            flat_attention_mask,
            flat_token_type_ids,
            flat_position_ids,
            head_mask,
            flat_inputs_embeds,
            output_attentions,
            output_hidden_states,
            return_dict=return_dict,
            training=training,
        )
        
        # 获取池化后的输出,并应用 dropout
        pooled_output = outputs[1]
        pooled_output = self.dropout(pooled_output, training=training)
        
        # 使用分类器得到 logits
        logits = self.classifier(pooled_output)
        
        # 重新整形 logits 为二维张量
        reshaped_logits = tf.reshape(logits, (-1, num_choices))

        # 如果提供了 labels,则计算损失
        loss = None if labels is None else self.hf_compute_loss(labels, reshaped_logits)

        # 如果不需要返回字典形式的输出,则返回 reshaped_logits 和其他输出项
        if not return_dict:
            output = (reshaped_logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        # 如果需要返回字典形式的输出,则构建 TFMultipleChoiceModelOutput 对象
        return TFMultipleChoiceModelOutput(
            loss=loss,
            logits=reshaped_logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )

    def build(self, input_shape=None):
        # 如果模型已经建立,则直接返回
        if self.built:
            return
        
        # 标记模型为已建立状态
        self.built = True
        
        # 如果存在 MobileBERT 模型,则构建其结构
        if getattr(self, "mobilebert", None) is not None:
            with tf.name_scope(self.mobilebert.name):
                self.mobilebert.build(None)
        
        # 如果存在分类器,则构建其结构,输入形状为 [None, None, self.config.hidden_size]
        if getattr(self, "classifier", None) is not None:
            with tf.name_scope(self.classifier.name):
                self.classifier.build([None, None, self.config.hidden_size])
"""
MobileBert Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g.
for Named-Entity-Recognition (NER) tasks.
"""
@add_start_docstrings(
    """
    MobileBert Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g.
    for Named-Entity-Recognition (NER) tasks.
    """,
    MOBILEBERT_START_DOCSTRING,
)
class TFMobileBertForTokenClassification(TFMobileBertPreTrainedModel, TFTokenClassificationLoss):
    """
    Subclass of TFMobileBertPreTrainedModel and TFTokenClassificationLoss for token classification tasks,
    incorporating MobileBert architecture.
    """
    # names with a '.' represents the authorized unexpected/missing layers when a TF model is loaded from a PT model
    _keys_to_ignore_on_load_unexpected = [
        r"pooler",
        r"predictions___cls",
        r"seq_relationship___cls",
        r"cls.predictions",
        r"cls.seq_relationship",
    ]
    # List of keys to ignore when certain layers are missing during model loading
    _keys_to_ignore_on_load_missing = [r"dropout"]

    def __init__(self, config, *inputs, **kwargs):
        """
        Initialize TFMobileBertForTokenClassification model.

        Args:
            config (MobileBertConfig): Configuration object specifying model parameters.
            *inputs: Variable length argument list for additional inputs.
            **kwargs: Additional keyword arguments.
        """
        super().__init__(config, *inputs, **kwargs)
        self.num_labels = config.num_labels

        # Initialize MobileBertMainLayer without pooling layer for token classification
        self.mobilebert = TFMobileBertMainLayer(config, add_pooling_layer=False, name="mobilebert")
        
        # Set dropout rate for classifier layer based on config
        classifier_dropout = (
            config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob
        )
        self.dropout = keras.layers.Dropout(classifier_dropout)
        
        # Linear classification layer for token classification
        self.classifier = keras.layers.Dense(
            config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
        )
        
        # Store the configuration object for reference
        self.config = config

    @unpack_inputs
    @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        checkpoint=_CHECKPOINT_FOR_TOKEN_CLASSIFICATION,
        output_type=TFTokenClassifierOutput,
        config_class=_CONFIG_FOR_DOC,
        expected_output=_TOKEN_CLASS_EXPECTED_OUTPUT,
        expected_loss=_TOKEN_CLASS_EXPECTED_LOSS,
    )
    def call(
        self,
        input_ids: TFModelInputType | None = None,
        attention_mask: np.ndarray | tf.Tensor | None = None,
        token_type_ids: np.ndarray | tf.Tensor | None = None,
        position_ids: np.ndarray | tf.Tensor | None = None,
        head_mask: np.ndarray | tf.Tensor | None = None,
        inputs_embeds: np.ndarray | tf.Tensor | None = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
        labels: np.ndarray | tf.Tensor | None = None,
        training: Optional[bool] = False,
        **kwargs,
    ):
        """
        Perform forward pass of TFMobileBertForTokenClassification model.

        Args:
            input_ids (TFModelInputType, optional): Tensor of input token IDs.
            attention_mask (np.ndarray or tf.Tensor, optional): Tensor of attention masks.
            token_type_ids (np.ndarray or tf.Tensor, optional): Tensor of token type IDs.
            position_ids (np.ndarray or tf.Tensor, optional): Tensor of position IDs.
            head_mask (np.ndarray or tf.Tensor, optional): Tensor of head masks.
            inputs_embeds (np.ndarray or tf.Tensor, optional): Tensor of input embeddings.
            output_attentions (bool, optional): Whether to output attentions.
            output_hidden_states (bool, optional): Whether to output hidden states.
            return_dict (bool, optional): Whether to return a dictionary.
            labels (np.ndarray or tf.Tensor, optional): Tensor of labels for token classification.
            training (bool, optional): Whether in training mode.
            **kwargs: Additional keyword arguments.

        Returns:
            TFTokenClassifierOutput or dict: Output of the model.
        """
        # Forward pass through MobileBert model
        return self.mobilebert(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
            training=training,
            labels=labels,
            **kwargs,
        )
    ) -> Union[Tuple, TFTokenClassifierOutput]:
        r"""
        labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
        """
        # 调用 MobileBERT 模型进行推断或训练,获取输出结果
        outputs = self.mobilebert(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
            training=training,
        )
        # 获取 MobileBERT 模型的序列输出
        sequence_output = outputs[0]

        # 对序列输出应用 dropout,用于防止过拟合
        sequence_output = self.dropout(sequence_output, training=training)
        # 将 dropout 后的输出传入分类器,生成分类 logits
        logits = self.classifier(sequence_output)

        # 如果存在标签,计算损失值
        loss = None if labels is None else self.hf_compute_loss(labels, logits)

        # 如果不需要返回字典,返回分类 logits 和可能的附加输出
        if not return_dict:
            output = (logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        # 返回 TFTokenClassifierOutput 对象,包括损失、logits、隐藏状态和注意力权重
        return TFTokenClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )

    def build(self, input_shape=None):
        # 如果已经构建过模型,直接返回
        if self.built:
            return
        # 设置构建状态为已完成
        self.built = True
        # 如果存在 MobileBERT 模型,构建 MobileBERT
        if getattr(self, "mobilebert", None) is not None:
            with tf.name_scope(self.mobilebert.name):
                self.mobilebert.build(None)
        # 如果存在分类器模型,构建分类器
        if getattr(self, "classifier", None) is not None:
            with tf.name_scope(self.classifier.name):
                self.classifier.build([None, None, self.config.hidden_size])

.\models\mobilebert\tokenization_mobilebert.py

# coding=utf-8
# 上面是指定源代码文件的编码格式为UTF-8

# 版权声明和许可信息,这段代码受 Apache License, Version 2.0 许可,详细信息可以在给定的 URL 查看
# http://www.apache.org/licenses/LICENSE-2.0

"""Tokenization classes for MobileBERT."""
# 以上是对本文件模块的简要描述和标识,说明其包含 MobileBERT 的分词类

import collections  # 导入 collections 模块,用于高性能容器数据类型的支持
import os  # 导入 os 模块,用于与操作系统进行交互
import unicodedata  # 导入 unicodedata 模块,用于对 Unicode 字符数据库的访问和操作
from typing import List, Optional, Tuple  # 导入类型提示的相关内容

from ...tokenization_utils import PreTrainedTokenizer, _is_control, _is_punctuation, _is_whitespace
# 导入必要的 tokenizer 相关模块和函数
from ...utils import logging  # 导入 logging 模块用于日志记录

logger = logging.get_logger(__name__)  # 获取当前模块的日志记录器实例

VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}  # 定义词汇表文件名

PRETRAINED_VOCAB_FILES_MAP = {
    "vocab_file": {"mobilebert-uncased": "https://huggingface.co/google/mobilebert-uncased/resolve/main/vocab.txt"}
}
# 定义预训练模型的词汇表文件映射,提供了 mobilebert-uncased 的词汇表文件下载链接

PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {"mobilebert-uncased": 512}
# 定义预训练模型的位置嵌入尺寸,这里是 mobilebert-uncased 的位置嵌入尺寸为 512

PRETRAINED_INIT_CONFIGURATION = {}
# 定义预训练模型的初始化配置,此处为空字典

# 从 transformers.models.bert.tokenization_bert.load_vocab 复制而来
def load_vocab(vocab_file):
    """Loads a vocabulary file into a dictionary."""
    # 加载词汇表文件到一个有序字典中
    vocab = collections.OrderedDict()
    with open(vocab_file, "r", encoding="utf-8") as reader:
        tokens = reader.readlines()  # 逐行读取词汇表文件内容
    for index, token in enumerate(tokens):
        token = token.rstrip("\n")  # 去除每行末尾的换行符
        vocab[token] = index  # 将词汇表中的词条和对应的索引存入字典
    return vocab  # 返回加载后的词汇表字典

# 从 transformers.models.bert.tokenization_bert.whitespace_tokenize 复制而来
def whitespace_tokenize(text):
    """Runs basic whitespace cleaning and splitting on a piece of text."""
    text = text.strip()  # 去除文本首尾空格
    if not text:
        return []  # 如果文本为空,则返回空列表
    tokens = text.split()  # 使用空格分割文本,得到token列表
    return tokens  # 返回分割后的token列表

# 从 transformers.models.bert.tokenization_bert.BertTokenizer 复制而来,修改为 MobileBertTokenizer
class MobileBertTokenizer(PreTrainedTokenizer):
    r"""
    Construct a MobileBERT tokenizer. Based on WordPiece.

    This tokenizer inherits from [`PreTrainedTokenizer`] which contains most of the main methods. Users should refer to
    this superclass for more information regarding those methods.
    """
    # 创建 MobileBERT 分词器,基于 WordPiece 算法实现
    # 定义 Transformer 的 Tokenizer 类
    class PreTrainedTokenizer:
        # 类属性:指定了用于加载词汇表文件的名称
        vocab_files_names = VOCAB_FILES_NAMES
        # 类属性:指定了预训练模型的词汇表文件映射
        pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
        # 类属性:指定了预训练模型初始化的配置
        pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
        # 类属性:指定了预训练模型的最大输入大小
        max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES

        # 初始化方法,接收多个参数
        def __init__(
            self,
            vocab_file,
            do_lower_case=True,
            do_basic_tokenize=True,
            never_split=None,
            unk_token="[UNK]",
            sep_token="[SEP]",
            pad_token="[PAD]",
            cls_token="[CLS]",
            mask_token="[MASK]",
            tokenize_chinese_chars=True,
            strip_accents=None,
            **kwargs,
    ):
        # 检查词汇文件是否存在,如果不存在则抛出 ValueError 异常
        if not os.path.isfile(vocab_file):
            raise ValueError(
                f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained"
                " model use `tokenizer = MobileBertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`"
            )
        # 加载词汇表并将其存储在 self.vocab 中
        self.vocab = load_vocab(vocab_file)
        # 根据词汇表生成一个从 ids 到 tokens 的有序字典
        self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()])
        # 根据参数 do_basic_tokenize 决定是否执行基本的分词处理
        self.do_basic_tokenize = do_basic_tokenize
        if do_basic_tokenize:
            # 如果需要基本分词,则创建 BasicTokenizer 对象
            self.basic_tokenizer = BasicTokenizer(
                do_lower_case=do_lower_case,
                never_split=never_split,
                tokenize_chinese_chars=tokenize_chinese_chars,
                strip_accents=strip_accents,
            )

        # 创建 WordpieceTokenizer 对象,使用给定的词汇表和未知 token
        self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab, unk_token=str(unk_token))

        # 调用父类的初始化方法,传递相关参数
        super().__init__(
            do_lower_case=do_lower_case,
            do_basic_tokenize=do_basic_tokenize,
            never_split=never_split,
            unk_token=unk_token,
            sep_token=sep_token,
            pad_token=pad_token,
            cls_token=cls_token,
            mask_token=mask_token,
            tokenize_chinese_chars=tokenize_chinese_chars,
            strip_accents=strip_accents,
            **kwargs,
        )

    @property
    def do_lower_case(self):
        # 返回 basic_tokenizer 的 do_lower_case 属性
        return self.basic_tokenizer.do_lower_case

    @property
    def vocab_size(self):
        # 返回词汇表的大小
        return len(self.vocab)

    def get_vocab(self):
        # 返回包含词汇表和 added_tokens_encoder 的字典
        return dict(self.vocab, **self.added_tokens_encoder)

    def _tokenize(self, text, split_special_tokens=False):
        # 初始化分词结果列表
        split_tokens = []
        if self.do_basic_tokenize:
            # 如果需要基本分词,则使用 basic_tokenizer 对文本进行分词处理
            for token in self.basic_tokenizer.tokenize(
                text, never_split=self.all_special_tokens if not split_special_tokens else None
            ):
                # 如果 token 在 never_split 集合中,则直接加入结果列表
                if token in self.basic_tokenizer.never_split:
                    split_tokens.append(token)
                else:
                    # 否则,将 token 使用 wordpiece_tokenizer 进行进一步分词处理并加入结果列表
                    split_tokens += self.wordpiece_tokenizer.tokenize(token)
        else:
            # 如果不需要基本分词,则直接使用 wordpiece_tokenizer 对文本进行分词处理
            split_tokens = self.wordpiece_tokenizer.tokenize(text)
        # 返回最终的分词结果列表
        return split_tokens

    def _convert_token_to_id(self, token):
        """Converts a token (str) in an id using the vocab."""
        # 根据 token 查找对应的 id,如果不存在则返回 unk_token 对应的 id
        return self.vocab.get(token, self.vocab.get(self.unk_token))

    def _convert_id_to_token(self, index):
        """Converts an index (integer) in a token (str) using the vocab."""
        # 根据 index 查找对应的 token,如果不存在则返回 unk_token
        return self.ids_to_tokens.get(index, self.unk_token)

    def convert_tokens_to_string(self, tokens):
        """Converts a sequence of tokens (string) in a single string."""
        # 将 tokens 列表转换成一个字符串,同时去除 " ##" 并且去除两端的空格
        out_string = " ".join(tokens).replace(" ##", "").strip()
        return out_string

    def build_inputs_with_special_tokens(
        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
        # 构建包含特殊 token 的输入序列
    ) -> List[int]:
        """
        Build model inputs from a sequence or a pair of sequences for sequence classification tasks by concatenating and
        adding special tokens. A MobileBERT sequence has the following format:

        - single sequence: `[CLS] X [SEP]`
        - pair of sequences: `[CLS] A [SEP] B [SEP]`

        Args:
            token_ids_0 (`List[int]`):
                List of IDs to which the special tokens will be added.
            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.

        Returns:
            `List[int]`: List of input IDs with the appropriate special tokens.
        """
        # Check if only one sequence is provided
        if token_ids_1 is None:
            # Return input IDs with [CLS], sequence tokens, and [SEP]
            return [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
        
        # For sequence pair, create token IDs with [CLS], first sequence, [SEP], second sequence, and final [SEP]
        cls = [self.cls_token_id]
        sep = [self.sep_token_id]
        return cls + token_ids_0 + sep + token_ids_1 + sep

    def get_special_tokens_mask(
        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
    ) -> List[int]:
        """
        Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
        special tokens using the tokenizer `prepare_for_model` method.

        Args:
            token_ids_0 (`List[int]`):
                List of IDs.
            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.
            already_has_special_tokens (`bool`, *optional*, defaults to `False`):
                Whether or not the token list is already formatted with special tokens for the model.

        Returns:
            `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
        """

        # If the input token lists already have special tokens, delegate to the base class method
        if already_has_special_tokens:
            return super().get_special_tokens_mask(
                token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
            )

        # Calculate special tokens mask for sequences without existing special tokens
        if token_ids_1 is not None:
            # For sequence pair, return a mask with 1s for special tokens and 0s for sequence tokens
            return [1] + ([0] * len(token_ids_0)) + [1] + ([0] * len(token_ids_1)) + [1]
        else:
            # For single sequence, return a mask with 1s for special tokens and 0s for sequence tokens
            return [1] + ([0] * len(token_ids_0)) + [1]

    def create_token_type_ids_from_sequences(
        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
    ) -> List[int]:
        """
        Create token type IDs tensor from token id pairs for sequence pairs. Token type IDs are binary tensors with 0s and 1s.
        0 indicates the first sequence, and 1 indicates the second sequence.

        Args:
            token_ids_0 (`List[int]`):
                List of IDs for the first sequence.
            token_ids_1 (`List[int]`, *optional*):
                Optional list of IDs for the second sequence in a pair.

        Returns:
            `List[int]`: A list of token type IDs representing the sequences.
        """
    def create_mobilebert_attention_mask(
        self,
        token_ids_0: List[int],
        token_ids_1: Optional[List[int]] = None
    ) -> List[int]:
        """
        Create a mask from the two sequences passed to be used in a sequence-pair classification task. A MobileBERT sequence
        pair mask has the following format:

        ```
        0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
        | first sequence    | second sequence |
        ```

        If `token_ids_1` is `None`, this method only returns the first portion of the mask (0s).

        Args:
            token_ids_0 (`List[int]`):
                List of token IDs for the first sequence.
            token_ids_1 (`List[int]`, *optional*):
                Optional second list of token IDs for sequence pairs.

        Returns:
            `List[int]`: List representing token type IDs according to the given sequence(s).
        """
        # Define the separator token ID
        sep = [self.sep_token_id]
        # Define the classification token ID
        cls = [self.cls_token_id]
        
        # If token_ids_1 is None, return a mask with only the first sequence (0s)
        if token_ids_1 is None:
            return len(cls + token_ids_0 + sep) * [0]
        
        # Concatenate token IDs for both sequences with separators and compute the mask
        return len(cls + token_ids_0 + sep) * [0] + len(token_ids_1 + sep) * [1]

    def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
        # Initialize index for checking consecutive vocabulary indices
        index = 0
        
        # Determine the vocabulary file path based on whether save_directory is a directory or a filename
        if os.path.isdir(save_directory):
            vocab_file = os.path.join(
                save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
            )
        else:
            vocab_file = (filename_prefix + "-" if filename_prefix else "") + save_directory
        
        # Write the vocabulary to the specified file
        with open(vocab_file, "w", encoding="utf-8") as writer:
            # Iterate over sorted vocabulary items and write each token to the file
            for token, token_index in sorted(self.vocab.items(), key=lambda kv: kv[1]):
                # Check for non-consecutive vocabulary indices and issue a warning if found
                if index != token_index:
                    logger.warning(
                        f"Saving vocabulary to {vocab_file}: vocabulary indices are not consecutive."
                        " Please check that the vocabulary is not corrupted!"
                    )
                    index = token_index
                # Write token followed by a newline
                writer.write(token + "\n")
                index += 1
        
        # Return the path to the saved vocabulary file
        return (vocab_file,)
# Copied from transformers.models.bert.tokenization_bert.BasicTokenizer
# 定义 BasicTokenizer 类,用于执行基本的分词操作(如标点符号分割、小写处理等)。
class BasicTokenizer(object):
    """
    Constructs a BasicTokenizer that will run basic tokenization (punctuation splitting, lower casing, etc.).

    Args:
        do_lower_case (`bool`, *optional*, defaults to `True`):
            是否在分词时将输入转换为小写。
        never_split (`Iterable`, *optional*):
            在分词过程中永不分割的 token 集合。仅在 `do_basic_tokenize=True` 时有效。
        tokenize_chinese_chars (`bool`, *optional*, defaults to `True`):
            是否分词中包含中文字符。这对于日文来说可能需要禁用(参见这个问题)。
        strip_accents (`bool`, *optional*):
            是否去除所有的重音符号。如果未指定,则根据 `lowercase` 的值决定(与原始的 BERT 一致)。
        do_split_on_punc (`bool`, *optional*, defaults to `True`):
            在某些情况下,我们希望跳过基本的标点符号分割,以便稍后的分词可以捕获单词的完整上下文,例如缩略词。

    """

    def __init__(
        self,
        do_lower_case=True,
        never_split=None,
        tokenize_chinese_chars=True,
        strip_accents=None,
        do_split_on_punc=True,
    ):
        # 如果 `never_split` 参数为 None,则初始化为空列表。
        if never_split is None:
            never_split = []
        
        # 将类实例化时传入的参数赋值给对应的类成员变量。
        self.do_lower_case = do_lower_case
        self.never_split = set(never_split)  # 将 `never_split` 转换为集合,方便快速查找。
        self.tokenize_chinese_chars = tokenize_chinese_chars
        self.strip_accents = strip_accents
        self.do_split_on_punc = do_split_on_punc
    def tokenize(self, text, never_split=None):
        """
        Basic Tokenization of a piece of text. For sub-word tokenization, see WordPieceTokenizer.

        Args:
            never_split (`List[str]`, *optional*)
                Kept for backward compatibility purposes. Now implemented directly at the base class level (see
                [`PreTrainedTokenizer.tokenize`]) List of token not to split.
        """
        # 使用联合操作将 `never_split` 参数与对象属性 `self.never_split` 合并成一个新的集合
        never_split = self.never_split.union(set(never_split)) if never_split else self.never_split
        # 清理文本中的特殊字符和格式
        text = self._clean_text(text)

        # 以下代码段是为了处理多语言和中文模型而添加的,从2018年11月1日开始生效。
        # 现在也应用于英文模型,尽管这些模型没有在任何中文数据上训练,
        # 通常不包含任何中文数据(英文维基百科中有些中文单词,因此词汇表中有些中文字符)。
        if self.tokenize_chinese_chars:
            text = self._tokenize_chinese_chars(text)
        
        # 标准化文本中的 Unicode 编码,确保相同字符使用同一种 Unicode 规范
        unicode_normalized_text = unicodedata.normalize("NFC", text)
        # 将标准化后的文本按空白分词,得到原始的 token 列表
        orig_tokens = whitespace_tokenize(unicode_normalized_text)
        split_tokens = []
        for token in orig_tokens:
            # 如果 token 不在 `never_split` 中,则根据 tokenizer 的设置进行处理
            if token not in never_split:
                if self.do_lower_case:
                    # 如果设置为小写,则将 token 转换为小写
                    token = token.lower()
                    # 如果设置了去除重音符号,则执行去除重音操作
                    if self.strip_accents is not False:
                        token = self._run_strip_accents(token)
                elif self.strip_accents:
                    # 如果仅设置了去除重音符号,则执行去除重音操作
                    token = self._run_strip_accents(token)
            # 将处理后的 token 按标点分割,加入到 `split_tokens` 列表中
            split_tokens.extend(self._run_split_on_punc(token, never_split))

        # 将处理后的 `split_tokens` 再次按空白分词,得到最终的 token 列表
        output_tokens = whitespace_tokenize(" ".join(split_tokens))
        return output_tokens

    def _run_strip_accents(self, text):
        """Strips accents from a piece of text."""
        # 标准化文本中的 Unicode 编码,将组合字符分解为基字符和重音符号
        text = unicodedata.normalize("NFD", text)
        output = []
        for char in text:
            # 检查字符的 Unicode 类别,如果是重音符号,则跳过
            cat = unicodedata.category(char)
            if cat == "Mn":
                continue
            # 将不是重音符号的字符添加到输出列表中
            output.append(char)
        # 将输出列表中的字符连接成字符串,返回处理后的文本
        return "".join(output)
    # 在给定的文本上根据标点符号进行分割
    def _run_split_on_punc(self, text, never_split=None):
        """Splits punctuation on a piece of text."""
        # 如果不需要根据标点符号分割或者文本在不分割列表中,则返回原文本列表
        if not self.do_split_on_punc or (never_split is not None and text in never_split):
            return [text]
        # 将文本转换为字符列表
        chars = list(text)
        i = 0
        start_new_word = True
        output = []
        while i < len(chars):
            char = chars[i]
            # 如果是标点符号,则作为新的单词处理
            if _is_punctuation(char):
                output.append([char])  # 将标点符号作为单独的列表项加入输出
                start_new_word = True
            else:
                if start_new_word:
                    output.append([])  # 开始新单词时创建一个空列表
                start_new_word = False
                output[-1].append(char)  # 将当前字符添加到最后一个单词列表的末尾
            i += 1

        return ["".join(x) for x in output]  # 将分割后的单词列表重新组合成字符串并返回

    # 在中文字符周围添加空格
    def _tokenize_chinese_chars(self, text):
        """Adds whitespace around any CJK character."""
        output = []
        for char in text:
            cp = ord(char)
            # 如果字符是中文字符,则在其前后添加空格
            if self._is_chinese_char(cp):
                output.append(" ")
                output.append(char)
                output.append(" ")
            else:
                output.append(char)
        return "".join(output)  # 将列表中的字符连接成字符串并返回

    # 检查字符的 Unicode 码点是否是中文字符
    def _is_chinese_char(self, cp):
        """Checks whether CP is the codepoint of a CJK character."""
        # 这里将“中文字符”定义为CJK Unicode块中的字符:https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
        #
        # 需要注意的是,CJK Unicode块并不包含所有日文和韩文字符,现代韩文和日文的字母属于不同的Unicode块。
        # 这些字母用于书写空格分隔的单词,因此不特别处理,与其他语言一样处理。
        if (
            (cp >= 0x4E00 and cp <= 0x9FFF)  # 基本CJK字符(4E00-9FFF)
            or (cp >= 0x3400 and cp <= 0x4DBF)  # CJK扩展A(3400-4DBF)
            or (cp >= 0x20000 and cp <= 0x2A6DF)  # CJK扩展B(20000-2A6DF)
            or (cp >= 0x2A700 and cp <= 0x2B73F)  # CJK扩展C(2A700-2B73F)
            or (cp >= 0x2B740 and cp <= 0x2B81F)  # CJK扩展D(2B740-2B81F)
            or (cp >= 0x2B820 and cp <= 0x2CEAF)  # CJK扩展E(2B820-2CEAF)
            or (cp >= 0xF900 and cp <= 0xFAFF)  # 兼容CJK字符(F900-FAFF)
            or (cp >= 0x2F800 and cp <= 0x2FA1F)  # 兼容扩展(2F800-2FA1F)
        ):
            return True

        return False

    # 在文本中执行无效字符移除和空白清理操作
    def _clean_text(self, text):
        """Performs invalid character removal and whitespace cleanup on text."""
        output = []
        for char in text:
            cp = ord(char)
            # 如果字符是无效字符或控制字符,则跳过
            if cp == 0 or cp == 0xFFFD or _is_control(char):
                continue
            # 如果字符是空白字符,则替换为单个空格
            if _is_whitespace(char):
                output.append(" ")
            else:
                output.append(char)
        return "".join(output)  # 将清理后的字符列表连接成字符串并返回
# Copied from transformers.models.bert.tokenization_bert.WordpieceTokenizer
class WordpieceTokenizer(object):
    """Runs WordPiece tokenization."""

    def __init__(self, vocab, unk_token, max_input_chars_per_word=100):
        # 初始化WordpieceTokenizer类的实例,设置词汇表、未知标记和每个单词最大输入字符数
        self.vocab = vocab
        self.unk_token = unk_token
        self.max_input_chars_per_word = max_input_chars_per_word

    def tokenize(self, text):
        """
        Tokenizes a piece of text into its word pieces. This uses a greedy longest-match-first algorithm to perform
        tokenization using the given vocabulary.

        For example, `input = "unaffable"` wil return as output `["un", "##aff", "##able"]`.

        Args:
            text: A single token or whitespace separated tokens. This should have
                already been passed through *BasicTokenizer*.

        Returns:
            A list of wordpiece tokens.
        """
        # 初始化一个空列表,用于存储WordPiece分词的结果
        output_tokens = []
        # 对输入的文本进行空格分词,得到单词列表
        for token in whitespace_tokenize(text):
            # 将单词转换为字符列表
            chars = list(token)
            # 如果单词长度超过设定的最大字符数,则将未知标记添加到输出列表,并继续下一个单词
            if len(chars) > self.max_input_chars_per_word:
                output_tokens.append(self.unk_token)
                continue

            # 初始化标志变量和起始索引
            is_bad = False
            start = 0
            sub_tokens = []
            # 使用最长匹配优先的贪婪算法进行分词
            while start < len(chars):
                end = len(chars)
                cur_substr = None
                # 从当前位置向前截取子串,并加上前缀"##",检查是否在词汇表中
                while start < end:
                    substr = "".join(chars[start:end])
                    if start > 0:
                        substr = "##" + substr
                    if substr in self.vocab:
                        cur_substr = substr
                        break
                    end -= 1
                # 如果没有找到匹配的子串,则标记为无效
                if cur_substr is None:
                    is_bad = True
                    break
                # 将找到的子串加入子词列表,并更新起始索引
                sub_tokens.append(cur_substr)
                start = end

            # 根据是否标记为无效,将对应的结果添加到输出列表
            if is_bad:
                output_tokens.append(self.unk_token)
            else:
                output_tokens.extend(sub_tokens)
        # 返回最终的WordPiece分词结果列表
        return output_tokens

.\models\mobilebert\tokenization_mobilebert_fast.py

# coding=utf-8
# 设置文件编码为UTF-8

# Copyright 2020 The HuggingFace Team. All rights reserved.
# 版权声明

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 根据 Apache 许可证 2.0 版本授权,除非符合许可证要求,否则不得使用本文件
# 可以在以下网址获取许可证副本:
# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# 除非适用法律要求或书面同意,否则本软件按“原样”分发,不提供任何明示或暗示的担保或条件
# 有关详细信息,请参阅许可证

"""Tokenization classes for MobileBERT."""
# MobileBERT 的分词类

import json
from typing import List, Optional, Tuple

from tokenizers import normalizers

from ...tokenization_utils_fast import PreTrainedTokenizerFast
from ...utils import logging
from .tokenization_mobilebert import MobileBertTokenizer

# 导入必要的模块和类

logger = logging.get_logger(__name__)

VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.json"}

# 定义词汇文件和分词器文件的名称映射字典

PRETRAINED_VOCAB_FILES_MAP = {
    "vocab_file": {"mobilebert-uncased": "https://huggingface.co/google/mobilebert-uncased/resolve/main/vocab.txt"},
    "tokenizer_file": {
        "mobilebert-uncased": "https://huggingface.co/google/mobilebert-uncased/resolve/main/tokenizer.json"
    },
}

# 预训练模型的词汇文件和分词器文件映射字典

PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {"mobilebert-uncased": 512}

# 预训练模型的位置嵌入大小字典,此处是 MobileBERT-uncased 的大小为 512

PRETRAINED_INIT_CONFIGURATION = {}

# 预训练模型的初始化配置为空字典


# Copied from transformers.models.bert.tokenization_bert_fast.BertTokenizerFast with BERT->MobileBERT,Bert->MobileBert
# 从 transformers.models.bert.tokenization_bert_fast.BertTokenizerFast 复制而来,将 BERT 替换为 MobileBERT,Bert 替换为 MobileBert
class MobileBertTokenizerFast(PreTrainedTokenizerFast):
    r"""
    Construct a "fast" MobileBERT tokenizer (backed by HuggingFace's *tokenizers* library). Based on WordPiece.

    This tokenizer inherits from [`PreTrainedTokenizerFast`] which contains most of the main methods. Users should
    refer to this superclass for more information regarding those methods.
    """

# 构造一个“快速” MobileBERT 分词器,基于 HuggingFace 的 tokenizers 库,基于 WordPiece
# 此分词器继承自 PreTrainedTokenizerFast,包含大多数主要方法,用户可以参考该超类获取更多方法信息
    Args:
        vocab_file (`str`):
            File containing the vocabulary.
        do_lower_case (`bool`, *optional*, defaults to `True`):
            Whether or not to lowercase the input when tokenizing.
        unk_token (`str`, *optional*, defaults to `"[UNK]"`):
            The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
            token instead.
        sep_token (`str`, *optional*, defaults to `"[SEP]"`):
            The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences for
            sequence classification or for a text and a question for question answering. It is also used as the last
            token of a sequence built with special tokens.
        pad_token (`str`, *optional*, defaults to `"[PAD]"`):
            The token used for padding, for example when batching sequences of different lengths.
        cls_token (`str`, *optional*, defaults to `"[CLS]"`):
            The classifier token which is used when doing sequence classification (classification of the whole sequence
            instead of per-token classification). It is the first token of the sequence when built with special tokens.
        mask_token (`str`, *optional*, defaults to `"[MASK]"`):
            The token used for masking values. This is the token used when training this model with masked language
            modeling. This is the token which the model will try to predict.
        clean_text (`bool`, *optional*, defaults to `True`):
            Whether or not to clean the text before tokenization by removing any control characters and replacing all
            whitespaces by the classic one.
        tokenize_chinese_chars (`bool`, *optional*, defaults to `True`):
            Whether or not to tokenize Chinese characters. This should likely be deactivated for Japanese (see [this
            issue](https://github.com/huggingface/transformers/issues/328)).
        strip_accents (`bool`, *optional*):
            Whether or not to strip all accents. If this option is not specified, then it will be determined by the
            value for `lowercase` (as in the original MobileBERT).
        wordpieces_prefix (`str`, *optional*, defaults to `"##"`):
            The prefix for subwords.
    """
    # These constants define the file names expected for different vocabularies
    vocab_files_names = VOCAB_FILES_NAMES
    # This maps the expected pretrained vocabulary files for different models
    pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
    # This specifies the initial configuration for pretrained models
    pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
    # This maps maximum input sizes for pretrained models that use positional embeddings
    max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
    # This defines the class of the tokenizer which will be used, MobileBertTokenizer in this case

    def __init__(
        self,
        vocab_file=None,
        tokenizer_file=None,
        do_lower_case=True,
        unk_token="[UNK]",
        sep_token="[SEP]",
        pad_token="[PAD]",
        cls_token="[CLS]",
        mask_token="[MASK]",
        tokenize_chinese_chars=True,
        strip_accents=None,
        **kwargs,
    ):
        # 调用父类的构造函数,初始化模型的词汇文件、分词器文件等参数
        super().__init__(
            vocab_file,
            tokenizer_file=tokenizer_file,
            do_lower_case=do_lower_case,
            unk_token=unk_token,
            sep_token=sep_token,
            pad_token=pad_token,
            cls_token=cls_token,
            mask_token=mask_token,
            tokenize_chinese_chars=tokenize_chinese_chars,
            strip_accents=strip_accents,
            **kwargs,
        )

        # 从后端分词器获取当前的正常化状态
        normalizer_state = json.loads(self.backend_tokenizer.normalizer.__getstate__())
        # 检查正常化器状态是否与初始化时的参数相匹配,若不匹配则更新
        if (
            normalizer_state.get("lowercase", do_lower_case) != do_lower_case
            or normalizer_state.get("strip_accents", strip_accents) != strip_accents
            or normalizer_state.get("handle_chinese_chars", tokenize_chinese_chars) != tokenize_chinese_chars
        ):
            # 获取正常化器的类名,并根据当前设置更新状态
            normalizer_class = getattr(normalizers, normalizer_state.pop("type"))
            normalizer_state["lowercase"] = do_lower_case
            normalizer_state["strip_accents"] = strip_accents
            normalizer_state["handle_chinese_chars"] = tokenize_chinese_chars
            # 更新后端分词器的正常化器
            self.backend_tokenizer.normalizer = normalizer_class(**normalizer_state)

        # 设置当前实例的小写参数
        self.do_lower_case = do_lower_case

    def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
        """
        Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
        adding special tokens. A MobileBERT sequence has the following format:

        - single sequence: `[CLS] X [SEP]`
        - pair of sequences: `[CLS] A [SEP] B [SEP]`

        Args:
            token_ids_0 (`List[int]`):
                List of IDs to which the special tokens will be added.
            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.

        Returns:
            `List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
        """
        # 构建模型输入,根据输入的token_ids_0和token_ids_1连接和添加特殊标记
        output = [self.cls_token_id] + token_ids_0 + [self.sep_token_id]

        # 如果有第二个序列token_ids_1,则将其加入到输出中
        if token_ids_1 is not None:
            output += token_ids_1 + [self.sep_token_id]

        # 返回包含特殊标记的输入列表
        return output

    def create_token_type_ids_from_sequences(
        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
    def create_mobilebert_sequence_classification_mask(
        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
    ) -> List[int]:
        """
        Create a mask from the two sequences passed to be used in a sequence-pair classification task. A MobileBERT sequence
        pair mask has the following format:

        ```
        0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
        | first sequence    | second sequence |
        ```

        If `token_ids_1` is `None`, this method only returns the first portion of the mask (0s).

        Args:
            token_ids_0 (`List[int]`):
                List of IDs for the first sequence.
            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.

        Returns:
            `List[int]`: List of token type IDs according to the given sequence(s).
        """
        # Define separator and classifier tokens
        sep = [self.sep_token_id]
        cls = [self.cls_token_id]
        
        # If token_ids_1 is None, return mask for single sequence
        if token_ids_1 is None:
            return len(cls + token_ids_0 + sep) * [0]
        
        # Return mask for sequence pair
        return len(cls + token_ids_0 + sep) * [0] + len(token_ids_1 + sep) * [1]

    def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
        """
        Save the tokenizer model's vocabulary to a specified directory.

        Args:
            save_directory (str):
                Directory path where the vocabulary files will be saved.
            filename_prefix (Optional[str]):
                Optional prefix for the saved files.

        Returns:
            Tuple[str]: Tuple containing the filenames where the vocabulary is saved.
        """
        # Save the tokenizer model's vocabulary to the specified directory
        files = self._tokenizer.model.save(save_directory, name=filename_prefix)
        return tuple(files)

.\models\mobilebert\__init__.py

# 引入依赖类型检查模块
from typing import TYPE_CHECKING

# 引入内部工具函数和异常类
from ...utils import (
    OptionalDependencyNotAvailable,
    _LazyModule,
    is_tf_available,
    is_tokenizers_available,
    is_torch_available,
)

# 定义模块导入结构字典,用于按需加载模块
_import_structure = {
    "configuration_mobilebert": [
        "MOBILEBERT_PRETRAINED_CONFIG_ARCHIVE_MAP",  # MobileBERT预训练配置文件映射表
        "MobileBertConfig",  # MobileBERT配置类
        "MobileBertOnnxConfig",  # MobileBERT ONNX配置类
    ],
    "tokenization_mobilebert": ["MobileBertTokenizer"],  # MobileBERT分词器类
}

# 检查是否存在tokenizers库,若不存在则抛出异常
try:
    if not is_tokenizers_available():
        raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
    pass
else:
    _import_structure["tokenization_mobilebert_fast"] = ["MobileBertTokenizerFast"]  # 引入快速分词器类

# 检查是否存在torch库,若不存在则抛出异常
try:
    if not is_torch_available():
        raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
    pass
else:
    # 引入MobileBERT的PyTorch模块
    _import_structure["modeling_mobilebert"] = [
        "MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST",  # MobileBERT预训练模型归档列表
        "MobileBertForMaskedLM",  # 用于Masked Language Modeling的MobileBERT模型
        "MobileBertForMultipleChoice",  # 用于多项选择任务的MobileBERT模型
        "MobileBertForNextSentencePrediction",  # 用于下一句预测任务的MobileBERT模型
        "MobileBertForPreTraining",  # MobileBERT预训练模型
        "MobileBertForQuestionAnswering",  # 用于问答任务的MobileBERT模型
        "MobileBertForSequenceClassification",  # 用于序列分类任务的MobileBERT模型
        "MobileBertForTokenClassification",  # 用于标记分类任务的MobileBERT模型
        "MobileBertLayer",  # MobileBERT的层模块
        "MobileBertModel",  # MobileBERT模型
        "MobileBertPreTrainedModel",  # MobileBERT预训练模型基类
        "load_tf_weights_in_mobilebert",  # 加载MobileBERT的TensorFlow权重
    ]

# 检查是否存在tensorflow库,若不存在则抛出异常
try:
    if not is_tf_available():
        raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
    pass
else:
    # 引入MobileBERT的TensorFlow模块
    _import_structure["modeling_tf_mobilebert"] = [
        "TF_MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST",  # MobileBERT预训练模型归档列表(TensorFlow)
        "TFMobileBertForMaskedLM",  # 用于Masked Language Modeling的MobileBERT模型(TensorFlow)
        "TFMobileBertForMultipleChoice",  # 用于多项选择任务的MobileBERT模型(TensorFlow)
        "TFMobileBertForNextSentencePrediction",  # 用于下一句预测任务的MobileBERT模型(TensorFlow)
        "TFMobileBertForPreTraining",  # MobileBERT预训练模型(TensorFlow)
        "TFMobileBertForQuestionAnswering",  # 用于问答任务的MobileBERT模型(TensorFlow)
        "TFMobileBertForSequenceClassification",  # 用于序列分类任务的MobileBERT模型(TensorFlow)
        "TFMobileBertForTokenClassification",  # 用于标记分类任务的MobileBERT模型(TensorFlow)
        "TFMobileBertMainLayer",  # MobileBERT的主层模块(TensorFlow)
        "TFMobileBertModel",  # MobileBERT模型(TensorFlow)
        "TFMobileBertPreTrainedModel",  # MobileBERT预训练模型基类(TensorFlow)
    ]

# 如果是类型检查模式,引入必要的MobileBERT配置和分词器类
if TYPE_CHECKING:
    from .configuration_mobilebert import (
        MOBILEBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,  # MobileBERT预训练配置文件映射表
        MobileBertConfig,  # MobileBERT配置类
        MobileBertOnnxConfig,  # MobileBERT ONNX配置类
    )
    from .tokenization_mobilebert import MobileBertTokenizer  # MobileBERT分词器类

    # 再次检查是否存在tokenizers库,若不存在则忽略
    try:
        if not is_tokenizers_available():
            raise OptionalDependencyNotAvailable()
    except OptionalDependencyNotAvailable:
        pass
    else:
        from .tokenization_mobilebert_fast import MobileBertTokenizerFast
    ```
    # 如果上面的条件不成立,即没有从 .tokenization_mobilebert_fast 导入 MobileBertTokenizerFast

    ```    
    try:
        if not is_torch_available():
            raise OptionalDependencyNotAvailable()
    except OptionalDependencyNotAvailable:
        # 如果 Torch 不可用,则抛出 OptionalDependencyNotAvailable 异常
        pass
    else:
        # 如果 Torch 可用,则从 .modeling_mobilebert 导入以下模块
        from .modeling_mobilebert import (
            MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
            MobileBertForMaskedLM,
            MobileBertForMultipleChoice,
            MobileBertForNextSentencePrediction,
            MobileBertForPreTraining,
            MobileBertForQuestionAnswering,
            MobileBertForSequenceClassification,
            MobileBertForTokenClassification,
            MobileBertLayer,
            MobileBertModel,
            MobileBertPreTrainedModel,
            load_tf_weights_in_mobilebert,
        )
    ```

    ```
    try:
        if not is_tf_available():
            raise OptionalDependencyNotAvailable()
    except OptionalDependencyNotAvailable:
        # 如果 TensorFlow 不可用,则抛出 OptionalDependencyNotAvailable 异常
        pass
    else:
        # 如果 TensorFlow 可用,则从 .modeling_tf_mobilebert 导入以下模块
        from .modeling_tf_mobilebert import (
            TF_MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
            TFMobileBertForMaskedLM,
            TFMobileBertForMultipleChoice,
            TFMobileBertForNextSentencePrediction,
            TFMobileBertForPreTraining,
            TFMobileBertForQuestionAnswering,
            TFMobileBertForSequenceClassification,
            TFMobileBertForTokenClassification,
            TFMobileBertMainLayer,
            TFMobileBertModel,
            TFMobileBertPreTrainedModel,
        )
    ```
else:
    # 导入 sys 模块,用于操作 Python 解释器相关的功能
    import sys

    # 将当前模块注册到 sys.modules 中,使用 _LazyModule 封装,使模块在需要时被延迟加载
    sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)

.\models\mobilenet_v1\configuration_mobilenet_v1.py

# coding=utf-8
# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" MobileNetV1 model configuration"""

# 引入 OrderedDict 用于有序字典,Mapping 用于类型提示
from collections import OrderedDict
from typing import Mapping

# 引入 version 函数从 packaging 模块中
from packaging import version

# 从相应的路径导入所需的配置类和工具
from ...configuration_utils import PretrainedConfig
from ...onnx import OnnxConfig
from ...utils import logging

# 获取 logger 对象
logger = logging.get_logger(__name__)

# MobileNetV1 预训练配置文件的映射,每个模型映射到其配置文件的 URL
MOBILENET_V1_PRETRAINED_CONFIG_ARCHIVE_MAP = {
    "google/mobilenet_v1_1.0_224": "https://huggingface.co/google/mobilenet_v1_1.0_224/resolve/main/config.json",
    "google/mobilenet_v1_0.75_192": "https://huggingface.co/google/mobilenet_v1_0.75_192/resolve/main/config.json",
    # 查看所有 MobileNetV1 模型请访问 https://huggingface.co/models?filter=mobilenet_v1
}

# MobileNetV1 配置类,继承自 PretrainedConfig 类
class MobileNetV1Config(PretrainedConfig):
    r"""
    This is the configuration class to store the configuration of a [`MobileNetV1Model`]. It is used to instantiate a
    MobileNetV1 model according to the specified arguments, defining the model architecture. Instantiating a
    configuration with the defaults will yield a similar configuration to that of the MobileNetV1
    [google/mobilenet_v1_1.0_224](https://huggingface.co/google/mobilenet_v1_1.0_224) architecture.

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.
    """
    # 空的类定义,用于后续填充 MobileNetV1 的具体配置参数
    pass
    # 定义 MobileNetV1Config 类型的模型配置
    model_type = "mobilenet_v1"
    
    # MobileNetV1Config 类的构造函数,设置模型的各种参数和选项
    def __init__(
        self,
        num_channels=3,  # 输入通道数,默认为3
        image_size=224,  # 图像大小(分辨率),默认为224
        depth_multiplier=1.0,  # 层的通道数乘数因子,默认为1.0
        min_depth=8,  # 所有层至少有的通道数,默认为8
        hidden_act="relu6",  # 在 Transformer 编码器和卷积层中使用的非线性激活函数,默认为 "relu6"
        tf_padding=True,  # 是否在卷积层中使用 TensorFlow 的填充规则,默认为 True
        classifier_dropout_prob=0.999,  # 附加分类器的 dropout 比例,默认为0.999
        initializer_range=0.02,  # 初始化所有权重矩阵的截断正态分布的标准差,默认为0.02
        layer_norm_eps=0.001,  # 层归一化层使用的 epsilon 值,默认为0.001
        **kwargs,  # 其他参数
    ):
        # 调用父类的构造函数,并传递其他参数
        super().__init__(**kwargs)
    
        # 如果 depth_multiplier 小于等于 0,则抛出 ValueError
        if depth_multiplier <= 0:
            raise ValueError("depth_multiplier must be greater than zero.")
    
        # 设置模型的属性
        self.num_channels = num_channels
        self.image_size = image_size
        self.depth_multiplier = depth_multiplier
        self.min_depth = min_depth
        self.hidden_act = hidden_act
        self.tf_padding = tf_padding
        self.classifier_dropout_prob = classifier_dropout_prob
        self.initializer_range = initializer_range
        self.layer_norm_eps = layer_norm_eps
# 定义 MobileNetV1OnnxConfig 类,继承自 OnnxConfig 类
class MobileNetV1OnnxConfig(OnnxConfig):
    
    # 设定 torch_onnx_minimum_version 属性为 1.11 的版本对象
    torch_onnx_minimum_version = version.parse("1.11")

    # 定义 inputs 属性,返回一个有序字典,包含输入名称到维度映射的字典
    @property
    def inputs(self) -> Mapping[str, Mapping[int, str]]:
        return OrderedDict([("pixel_values", {0: "batch"})])

    # 定义 outputs 属性,根据任务类型返回不同的有序字典,包含输出名称到维度映射的字典
    @property
    def outputs(self) -> Mapping[str, Mapping[int, str]]:
        if self.task == "image-classification":
            return OrderedDict([("logits", {0: "batch"})])
        else:
            return OrderedDict([("last_hidden_state", {0: "batch"}), ("pooler_output", {0: "batch"})])

    # 定义 atol_for_validation 属性,返回用于验证的绝对误差阈值
    @property
    def atol_for_validation(self) -> float:
        return 1e-4

.\models\mobilenet_v1\convert_original_tf_checkpoint_to_pytorch.py

# 设置编码格式为 UTF-8
# 版权声明:2022 年由 HuggingFace Inc. 团队拥有
#
# 根据 Apache 许可证 2.0 版本许可使用本文件
# 除非符合许可证的要求,否则不得使用本文件
# 您可以在以下网址获取许可证副本:
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# 除非适用法律要求或书面同意,否则本软件分发时应以“原样”分发,
# 不附带任何形式的担保或条件。有关许可的详细信息,请参见许可证。
"""从 tensorflow/models 库中转换 MobileNetV1 检查点。"""


import argparse  # 导入解析命令行参数的模块
import json  # 导入处理 JSON 数据的模块
import re  # 导入正则表达式模块
from pathlib import Path  # 导入处理路径操作的模块

import requests  # 导入处理 HTTP 请求的模块
import torch  # 导入 PyTorch 深度学习框架
from huggingface_hub import hf_hub_download  # 导入从 Hugging Face Hub 下载资源的函数
from PIL import Image  # 导入处理图像的模块

from transformers import (  # 导入 Hugging Face Transformers 库中的相关模块和类
    MobileNetV1Config,
    MobileNetV1ForImageClassification,
    MobileNetV1ImageProcessor,
    load_tf_weights_in_mobilenet_v1,
)
from transformers.utils import logging  # 导入日志记录工具

logging.set_verbosity_info()  # 设置日志输出详细程度为信息级别
logger = logging.get_logger(__name__)  # 获取当前模块的日志记录器


def get_mobilenet_v1_config(model_name):
    config = MobileNetV1Config(layer_norm_eps=0.001)  # 创建 MobileNetV1 配置对象,设置层归一化的 epsilon 值

    if "_quant" in model_name:
        raise ValueError("Quantized models are not supported.")  # 如果模型名中包含 "_quant",则抛出异常

    # 使用正则表达式从模型名称中提取深度乘数和图像大小
    matches = re.match(r"^mobilenet_v1_([^_]*)_([^_]*)$", model_name)
    if matches:
        config.depth_multiplier = float(matches[1])  # 设置配置对象的深度乘数
        config.image_size = int(matches[2])  # 设置配置对象的图像大小

    # TensorFlow 版本的 MobileNetV1 预测 1001 类别而不是通常的 1000 类
    # 第一个类(索引 0)为“背景”
    config.num_labels = 1001  # 设置配置对象的类别数目为 1001
    filename = "imagenet-1k-id2label.json"
    repo_id = "huggingface/label-files"
    id2label = json.load(open(hf_hub_download(repo_id, filename, repo_type="dataset"), "r"))  # 从 Hub 下载并加载 ID 到标签的映射
    id2label = {int(k) + 1: v for k, v in id2label.items()}  # 调整 ID 映射
    id2label[0] = "background"  # 设置索引 0 的标签为“背景”
    config.id2label = id2label  # 设置配置对象的 ID 到标签的映射
    config.label2id = {v: k for k, v in id2label.items()}  # 设置配置对象的标签到 ID 的映射

    return config  # 返回配置对象


# 我们将在一张可爱猫咪的图像上验证我们的结果
def prepare_img():
    url = "http://images.cocodataset.org/val2017/000000039769.jpg"  # 图像 URL
    im = Image.open(requests.get(url, stream=True).raw)  # 通过 HTTP 请求打开图像,并获取图像对象
    return im  # 返回图像对象


@torch.no_grad()
def convert_movilevit_checkpoint(model_name, checkpoint_path, pytorch_dump_folder_path, push_to_hub=False):
    """
    Copy/paste/tweak model's weights to our MobileNetV1 structure.
    将模型的权重复制/粘贴/调整到我们的 MobileNetV1 结构中。
    """
    config = get_mobilenet_v1_config(model_name)  # 获取 MobileNetV1 的配置

    # 加载 🤗 模型
    model = MobileNetV1ForImageClassification(config).eval()  # 创建 MobileNetV1 图像分类模型,并设置为评估模式

    # 从 TensorFlow 检查点加载权重
    load_tf_weights_in_mobilenet_v1(model, config, checkpoint_path)  # 将 TensorFlow 检查点中的权重加载到模型中

    # 使用 MobileNetV1ImageProcessor 在图像上检查输出
    image_processor = MobileNetV1ImageProcessor(
        crop_size={"width": config.image_size, "height": config.image_size},  # 设置裁剪后的图像大小
        size={"shortest_edge": config.image_size + 32},  # 设置调整大小后的最短边长度
    )
    # 使用图像处理器处理准备好的图像,返回编码后的张量表示
    encoding = image_processor(images=prepare_img(), return_tensors="pt")
    # 使用模型对编码后的图像进行推理,得到输出
    outputs = model(**encoding)
    # 从输出中获取logits
    logits = outputs.logits

    # 断言logits的形状为(1, 1001),即1个样本,1001个类别的预测值
    assert logits.shape == (1, 1001)

    # 根据模型名称选择预期的logits值
    if model_name == "mobilenet_v1_1.0_224":
        expected_logits = torch.tensor([-4.1739, -1.1233, 3.1205])
    elif model_name == "mobilenet_v1_0.75_192":
        expected_logits = torch.tensor([-3.9440, -2.3141, -0.3333])
    else:
        expected_logits = None

    # 如果预期的logits不为None,则断言模型输出的前三个类别的logits与预期值在给定的误差范围内相似
    if expected_logits is not None:
        assert torch.allclose(logits[0, :3], expected_logits, atol=1e-4)

    # 创建目录用于保存PyTorch模型和图像处理器
    Path(pytorch_dump_folder_path).mkdir(exist_ok=True)
    # 打印保存模型的信息,包括模型名称和保存路径
    print(f"Saving model {model_name} to {pytorch_dump_folder_path}")
    # 将模型保存到指定路径
    model.save_pretrained(pytorch_dump_folder_path)
    # 打印保存图像处理器的信息,包括保存路径
    print(f"Saving image processor to {pytorch_dump_folder_path}")
    # 将图像处理器保存到指定路径
    image_processor.save_pretrained(pytorch_dump_folder_path)

    # 如果需要推送到Hub
    if push_to_hub:
        # 打印推送信息
        print("Pushing to the hub...")
        # 组合模型名称为库的ID
        repo_id = "google/" + model_name
        # 推送图像处理器到Hub
        image_processor.push_to_hub(repo_id)
        # 推送模型到Hub
        model.push_to_hub(repo_id)
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    # 创建一个参数解析器对象

    # Required parameters
    parser.add_argument(
        "--model_name",
        default="mobilenet_v1_1.0_224",
        type=str,
        help="Name of the MobileNetV1 model you'd like to convert. Should in the form 'mobilenet_v1_<depth>_<size>'."
    )
    # 添加一个必需的参数:模型名称,默认为"mobilenet_v1_1.0_224"

    parser.add_argument(
        "--checkpoint_path", required=True, type=str, help="Path to the original TensorFlow checkpoint (.ckpt file)."
    )
    # 添加一个必需的参数:原始 TensorFlow checkpoint 文件的路径

    parser.add_argument(
        "--pytorch_dump_folder_path", required=True, type=str, help="Path to the output PyTorch model directory."
    )
    # 添加一个必需的参数:输出 PyTorch 模型的目录路径

    parser.add_argument(
        "--push_to_hub", action="store_true", help="Whether or not to push the converted model to the 🤗 hub."
    )
    # 添加一个参数:是否将转换后的模型推送到 🤗 hub

    args = parser.parse_args()
    # 解析命令行参数并将其存储在 args 变量中

    convert_movilevit_checkpoint(
        args.model_name, args.checkpoint_path, args.pytorch_dump_folder_path, args.push_to_hub
    )
    # 调用转换函数,传入命令行参数中的模型名称、checkpoint路径、PyTorch输出路径和推送标志

.\models\mobilenet_v1\feature_extraction_mobilenet_v1.py

# coding=utf-8
# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Feature extractor class for MobileNetV1."""

import warnings  # 导入警告模块

from ...utils import logging  # 导入日志工具
from .image_processing_mobilenet_v1 import MobileNetV1ImageProcessor  # 导入MobileNetV1的图像处理类


logger = logging.get_logger(__name__)  # 获取当前模块的日志记录器


class MobileNetV1FeatureExtractor(MobileNetV1ImageProcessor):  # 定义MobileNetV1特征提取器类,继承自MobileNetV1ImageProcessor类
    def __init__(self, *args, **kwargs) -> None:
        warnings.warn(
            "The class MobileNetV1FeatureExtractor is deprecated and will be removed in version 5 of Transformers."
            " Please use MobileNetV1ImageProcessor instead.",
            FutureWarning,  # 发出FutureWarning警告,提示MobileNetV1FeatureExtractor类将在Transformers的第五个版本中被移除,建议使用MobileNetV1ImageProcessor类
        )
        super().__init__(*args, **kwargs)  # 调用父类MobileNetV1ImageProcessor的初始化方法

.\models\mobilenet_v1\image_processing_mobilenet_v1.py

# coding=utf-8
# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
# 代码文件的版权声明,声明此代码版权归 HuggingFace Inc. 团队所有

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# 引入 Apache License 2.0,指明本代码遵循的许可协议,允许自由使用、分发及修改

"""Image processor class for MobileNetV1."""
# MobileNetV1 图像处理类的定义

from typing import Dict, List, Optional, Union

import numpy as np  # 导入 NumPy 库

# 导入所需的图像处理工具函数和类
from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict
from ...image_transforms import (
    get_resize_output_image_size,  # 导入图像尺寸调整函数
    resize,  # 导入图像缩放函数
    to_channel_dimension_format,  # 导入通道维度格式转换函数
)
from ...image_utils import (
    IMAGENET_STANDARD_MEAN,  # 导入 ImageNet 标准均值
    IMAGENET_STANDARD_STD,  # 导入 ImageNet 标准标准差
    ChannelDimension,  # 导入通道维度类
    ImageInput,  # 导入图像输入类
    PILImageResampling,  # 导入 PIL 图像重采样枚举
    infer_channel_dimension_format,  # 推断通道维度格式的函数
    is_scaled_image,  # 判断是否为缩放图像的函数
    make_list_of_images,  # 将图像列表化的函数
    to_numpy_array,  # 将图像转换为 NumPy 数组的函数
    valid_images,  # 验证图像合法性的函数
    validate_kwargs,  # 验证关键字参数的函数
    validate_preprocess_arguments,  # 验证预处理参数的函数
)
from ...utils import TensorType, logging  # 导入 Tensor 类型及日志记录工具

logger = logging.get_logger(__name__)  # 获取当前模块的日志记录器


class MobileNetV1ImageProcessor(BaseImageProcessor):
    r"""
    Constructs a MobileNetV1 image processor.
    构造一个 MobileNetV1 图像处理器类。
    # 定义函数参数
    Args:
        # 是否调整图像的(高度,宽度)尺寸到指定的尺寸,默认为True
        do_resize (`bool`, *optional*, defaults to `True`):
            Whether to resize the image's (height, width) dimensions to the specified `size`. Can be overridden by
            `do_resize` in the `preprocess` method.
        # 图像调整大小后的尺寸,默认为`{"shortest_edge": 256}`
        size (`Dict[str, int]` *optional*, defaults to `{"shortest_edge": 256}`):
            Size of the image after resizing. The shortest edge of the image is resized to size["shortest_edge"], with
            the longest edge resized to keep the input aspect ratio. Can be overridden by `size` in the `preprocess`
            method.
        # 图像调整大小时使用的重采样滤波器,默认为`PILImageResampling.BILINEAR`
        resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.BILINEAR`):
            Resampling filter to use if resizing the image. Can be overridden by the `resample` parameter in the
            `preprocess` method.
        # 是否在图像中心裁剪,默认为True
        do_center_crop (`bool`, *optional*, defaults to `True`):
            Whether to center crop the image. If the input size is smaller than `crop_size` along any edge, the image
            is padded with 0's and then center cropped. Can be overridden by the `do_center_crop` parameter in the
            `preprocess` method.
        # 应用中心裁剪时所需的输出大小,默认为`{"height": 224, "width": 224}`
        crop_size (`Dict[str, int]`, *optional*, defaults to `{"height": 224, "width": 224}`):
            Desired output size when applying center-cropping. Only has an effect if `do_center_crop` is set to `True`.
            Can be overridden by the `crop_size` parameter in the `preprocess` method.
        # 是否按指定的比例对图像进行重新缩放,默认为True
        do_rescale (`bool`, *optional*, defaults to `True`):
            Whether to rescale the image by the specified scale `rescale_factor`. Can be overridden by the `do_rescale`
            parameter in the `preprocess` method.
        # 如果重新缩放图像,使用的比例因子,默认为1/255
        rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
            Scale factor to use if rescaling the image. Can be overridden by the `rescale_factor` parameter in the
            `preprocess` method.
        # 是否对图像进行正规化
        do_normalize:
            Whether to normalize the image. Can be overridden by the `do_normalize` parameter in the `preprocess`
            method.
        # 如果对图像进行正规化,则使用的均值,默认为`IMAGENET_STANDARD_MEAN`
        image_mean (`float` or `List[float]`, *optional*, defaults to `IMAGENET_STANDARD_MEAN`):
            Mean to use if normalizing the image. This is a float or list of floats the length of the number of
            channels in the image. Can be overridden by the `image_mean` parameter in the `preprocess` method.
        # 如果对图像进行正规化,则使用的标准差,默认为`IMAGENET_STANDARD_STD`
        image_std (`float` or `List[float]`, *optional*, defaults to `IMAGENET_STANDARD_STD`):
            Standard deviation to use if normalizing the image. This is a float or list of floats the length of the
            number of channels in the image. Can be overridden by the `image_std` parameter in the `preprocess` method.
    """
    
    # 模型输入的名称为"pixel_values"
    model_input_names = ["pixel_values"]
    # 初始化方法,设置图像处理器对象的各项属性
    def __init__(
        self,
        do_resize: bool = True,  # 是否进行图像大小调整的标志
        size: Optional[Dict[str, int]] = None,  # 图像调整后的大小
        resample: PILImageResampling = PILImageResampling.BILINEAR,  # 图像调整时的重采样方法
        do_center_crop: bool = True,  # 是否进行中心裁剪的标志
        crop_size: Dict[str, int] = None,  # 裁剪后的图像大小
        do_rescale: bool = True,  # 是否进行图像像素值缩放的标志
        rescale_factor: Union[int, float] = 1 / 255,  # 图像像素值缩放的因子
        do_normalize: bool = True,  # 是否进行图像标准化的标志
        image_mean: Optional[Union[float, List[float]]] = None,  # 图像标准化的均值
        image_std: Optional[Union[float, List[float]]] = None,  # 图像标准化的标准差
        **kwargs,
    ) -> None:
        # 调用父类的初始化方法
        super().__init__(**kwargs)
        # 如果未提供调整大小的具体尺寸,使用默认值 {"shortest_edge": 256}
        size = size if size is not None else {"shortest_edge": 256}
        # 根据给定的尺寸字典获取尺寸信息,确保不会是方形
        size = get_size_dict(size, default_to_square=False)
        # 如果未提供裁剪大小的具体尺寸,使用默认值 {"height": 224, "width": 224}
        crop_size = crop_size if crop_size is not None else {"height": 224, "width": 224}
        # 根据给定的裁剪尺寸字典获取裁剪尺寸信息
        crop_size = get_size_dict(crop_size)
        # 初始化对象的属性
        self.do_resize = do_resize
        self.size = size
        self.resample = resample
        self.do_center_crop = do_center_crop
        self.crop_size = crop_size
        self.do_rescale = do_rescale
        self.rescale_factor = rescale_factor
        self.do_normalize = do_normalize
        self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN
        self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD
        # 设置有效的处理器关键字列表,用于后续的处理判断
        self._valid_processor_keys = [
            "images",
            "do_resize",
            "size",
            "resample",
            "do_center_crop",
            "crop_size",
            "do_rescale",
            "rescale_factor",
            "do_normalize",
            "image_mean",
            "image_std",
            "return_tensors",
            "data_format",
            "input_data_format",
        ]

    # 从transformers.models.clip.image_processing_clip.CLIPImageProcessor.resize方法复制而来
    def resize(
        self,
        image: np.ndarray,  # 输入的图像数据,类型为NumPy数组
        size: Dict[str, int],  # 调整后的图像尺寸字典
        resample: PILImageResampling = PILImageResampling.BICUBIC,  # 图像调整时的重采样方法
        data_format: Optional[Union[str, ChannelDimension]] = None,  # 输出数据的格式
        input_data_format: Optional[Union[str, ChannelDimension]] = None,  # 输入数据的格式
        **kwargs,
        """
        Resize an image. The shortest edge of the image is resized to size["shortest_edge"], with the longest edge
        resized to keep the input aspect ratio.

        Args:
            image (`np.ndarray`):
                Image to resize.
            size (`Dict[str, int]`):
                Size of the output image. Should contain either "shortest_edge" or "height" and "width".
            resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.BICUBIC`):
                Resampling filter to use when resizing the image.
            data_format (`str` or `ChannelDimension`, *optional*):
                The channel dimension format of the image. If not provided, it will be the same as the input image.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format of the input image. If not provided, it will be inferred.
        """
        # Determine if default square behavior should be applied
        default_to_square = True

        # Check if "shortest_edge" is specified in size dictionary
        if "shortest_edge" in size:
            # If "shortest_edge" is specified, resize the image based on it
            size = size["shortest_edge"]
            default_to_square = False
        # Check if both "height" and "width" are specified in size dictionary
        elif "height" in size and "width" in size:
            # If both dimensions are specified, resize the image based on them
            size = (size["height"], size["width"])
        else:
            # If neither "shortest_edge" nor ("height" and "width") are specified, raise an error
            raise ValueError("Size must contain either 'shortest_edge' or 'height' and 'width'.")

        # Calculate the output size for resizing the image
        output_size = get_resize_output_image_size(
            image,
            size=size,
            default_to_square=default_to_square,
            input_data_format=input_data_format,
        )

        # Perform the resizing operation on the image using the calculated output size
        return resize(
            image,
            size=output_size,
            resample=resample,
            data_format=data_format,
            input_data_format=input_data_format,
            **kwargs,
        )

.\models\mobilenet_v1\modeling_mobilenet_v1.py

# coding=utf-8
# Copyright 2022 Apple Inc. and The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" PyTorch MobileNetV1 model."""

from typing import Optional, Union

import torch
from torch import nn
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss

from ...activations import ACT2FN
from ...modeling_outputs import BaseModelOutputWithPoolingAndNoAttention, ImageClassifierOutputWithNoAttention
from ...modeling_utils import PreTrainedModel
from ...utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, logging
from .configuration_mobilenet_v1 import MobileNetV1Config

logger = logging.get_logger(__name__)

# General docstring
_CONFIG_FOR_DOC = "MobileNetV1Config"

# Base docstring
_CHECKPOINT_FOR_DOC = "google/mobilenet_v1_1.0_224"
_EXPECTED_OUTPUT_SHAPE = [1, 1024, 7, 7]

# Image classification docstring
_IMAGE_CLASS_CHECKPOINT = "google/mobilenet_v1_1.0_224"
_IMAGE_CLASS_EXPECTED_OUTPUT = "tabby, tabby cat"

MOBILENET_V1_PRETRAINED_MODEL_ARCHIVE_LIST = [
    "google/mobilenet_v1_1.0_224",
    "google/mobilenet_v1_0.75_192",
    # See all MobileNetV1 models at https://huggingface.co/models?filter=mobilenet_v1
]

def _build_tf_to_pytorch_map(model, config, tf_weights=None):
    """
    A map of modules from TF to PyTorch.
    """
    # 初始化一个空的 TF 到 PyTorch 的映射字典
    tf_to_pt_map = {}

    if isinstance(model, MobileNetV1ForImageClassification):
        # 如果模型是 MobileNetV1ForImageClassification 的实例,则获取其 mobilenet_v1 属性
        backbone = model.mobilenet_v1
    else:
        # 否则,直接使用整个模型作为 backbone
        backbone = model

    # TF 模型中的前缀
    prefix = "MobilenetV1/Conv2d_0/"
    # 将 TF 中的权重映射到 PyTorch 模型的对应位置
    tf_to_pt_map[prefix + "weights"] = backbone.conv_stem.convolution.weight
    tf_to_pt_map[prefix + "BatchNorm/beta"] = backbone.conv_stem.normalization.bias
    tf_to_pt_map[prefix + "BatchNorm/gamma"] = backbone.conv_stem.normalization.weight
    tf_to_pt_map[prefix + "BatchNorm/moving_mean"] = backbone.conv_stem.normalization.running_mean
    tf_to_pt_map[prefix + "BatchNorm/moving_variance"] = backbone.conv_stem.normalization.running_var
    # 循环遍历范围为 0 到 12
    for i in range(13):
        # 计算 TensorFlow 中的索引(从 1 开始)
        tf_index = i + 1
        # 计算 PyTorch 中的索引(每个 i 对应两个)
        pt_index = i * 2

        # 获取指定索引的 backbone 层
        pointer = backbone.layer[pt_index]
        # 创建 MobileNetV1/Conv2d_{tf_index}_depthwise/ 前缀
        prefix = f"MobilenetV1/Conv2d_{tf_index}_depthwise/"
        # 将 TensorFlow 参数映射到 PyTorch 参数:深度卷积层权重
        tf_to_pt_map[prefix + "depthwise_weights"] = pointer.convolution.weight
        # 将 TensorFlow 参数映射到 PyTorch 参数:批归一化层偏置
        tf_to_pt_map[prefix + "BatchNorm/beta"] = pointer.normalization.bias
        # 将 TensorFlow 参数映射到 PyTorch 参数:批归一化层权重
        tf_to_pt_map[prefix + "BatchNorm/gamma"] = pointer.normalization.weight
        # 将 TensorFlow 参数映射到 PyTorch 参数:批归一化层移动均值
        tf_to_pt_map[prefix + "BatchNorm/moving_mean"] = pointer.normalization.running_mean
        # 将 TensorFlow 参数映射到 PyTorch 参数:批归一化层移动方差
        tf_to_pt_map[prefix + "BatchNorm/moving_variance"] = pointer.normalization.running_var

        # 获取指定索引的 backbone 层
        pointer = backbone.layer[pt_index + 1]
        # 创建 MobileNetV1/Conv2d_{tf_index}_pointwise/ 前缀
        prefix = f"MobilenetV1/Conv2d_{tf_index}_pointwise/"
        # 将 TensorFlow 参数映射到 PyTorch 参数:逐点卷积层权重
        tf_to_pt_map[prefix + "weights"] = pointer.convolution.weight
        # 将 TensorFlow 参数映射到 PyTorch 参数:批归一化层偏置
        tf_to_pt_map[prefix + "BatchNorm/beta"] = pointer.normalization.bias
        # 将 TensorFlow 参数映射到 PyTorch 参数:批归一化层权重
        tf_to_pt_map[prefix + "BatchNorm/gamma"] = pointer.normalization.weight
        # 将 TensorFlow 参数映射到 PyTorch 参数:批归一化层移动均值
        tf_to_pt_map[prefix + "BatchNorm/moving_mean"] = pointer.normalization.running_mean
        # 将 TensorFlow 参数映射到 PyTorch 参数:批归一化层移动方差
        tf_to_pt_map[prefix + "BatchNorm/moving_variance"] = pointer.normalization.running_var

    # 如果模型是 MobileNetV1ForImageClassification 类型
    if isinstance(model, MobileNetV1ForImageClassification):
        # 创建 MobilenetV1/Logits/Conv2d_1c_1x1/ 前缀
        prefix = "MobilenetV1/Logits/Conv2d_1c_1x1/"
        # 将 TensorFlow 参数映射到 PyTorch 参数:分类器权重
        tf_to_pt_map[prefix + "weights"] = model.classifier.weight
        # 将 TensorFlow 参数映射到 PyTorch 参数:分类器偏置
        tf_to_pt_map[prefix + "biases"] = model.classifier.bias

    # 返回 TensorFlow 到 PyTorch 参数映射字典
    return tf_to_pt_map
# 将 TensorFlow 模型的权重加载到 PyTorch 模型中
def load_tf_weights_in_mobilenet_v1(model, config, tf_checkpoint_path):
    try:
        import numpy as np
        import tensorflow as tf
    except ImportError:
        logger.error(
            "Loading a TensorFlow models in PyTorch, requires TensorFlow to be installed. Please see "
            "https://www.tensorflow.org/install/ for installation instructions."
        )
        raise

    # 从 TensorFlow 模型加载权重变量列表
    init_vars = tf.train.list_variables(tf_checkpoint_path)
    tf_weights = {}
    for name, shape in init_vars:
        logger.info(f"Loading TF weight {name} with shape {shape}")
        # 加载 TensorFlow 模型的变量数据
        array = tf.train.load_variable(tf_checkpoint_path, name)
        tf_weights[name] = array

    # 构建 TensorFlow 到 PyTorch 权重映射
    tf_to_pt_map = _build_tf_to_pytorch_map(model, config, tf_weights)

    for name, pointer in tf_to_pt_map.items():
        logger.info(f"Importing {name}")
        if name not in tf_weights:
            logger.info(f"{name} not in tf pre-trained weights, skipping")
            continue

        array = tf_weights[name]

        # 根据权重名字中的特定标识进行转置操作
        if "depthwise_weights" in name:
            logger.info("Transposing depthwise")
            array = np.transpose(array, (2, 3, 0, 1))
        elif "weights" in name:
            logger.info("Transposing")
            if len(pointer.shape) == 2:  # 复制到线性层
                array = array.squeeze().transpose()
            else:
                array = np.transpose(array, (3, 2, 0, 1))

        # 检查指针和数组的形状是否匹配
        if pointer.shape != array.shape:
            raise ValueError(f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched")

        logger.info(f"Initialize PyTorch weight {name} {array.shape}")
        # 将 NumPy 数组转换为 PyTorch 张量并赋值给指针
        pointer.data = torch.from_numpy(array)

        # 从字典中移除已处理的权重名字及其特定变体
        tf_weights.pop(name, None)
        tf_weights.pop(name + "/RMSProp", None)
        tf_weights.pop(name + "/RMSProp_1", None)
        tf_weights.pop(name + "/ExponentialMovingAverage", None)

    # 打印未复制到 PyTorch 模型中的权重名字列表
    logger.info(f"Weights not copied to PyTorch model: {', '.join(tf_weights.keys())}")
    # 返回加载了 TensorFlow 权重的 PyTorch 模型
    return model


# 将 TensorFlow 风格的 "SAME" 填充应用到卷积层
def apply_tf_padding(features: torch.Tensor, conv_layer: nn.Conv2d) -> torch.Tensor:
    """
    Apply TensorFlow-style "SAME" padding to a convolution layer. See the notes at:
    https://www.tensorflow.org/api_docs/python/tf/nn#notes_on_padding_2
    """
    in_height, in_width = features.shape[-2:]
    stride_height, stride_width = conv_layer.stride
    kernel_height, kernel_width = conv_layer.kernel_size

    # 计算垂直方向和水平方向的填充量
    if in_height % stride_height == 0:
        pad_along_height = max(kernel_height - stride_height, 0)
    else:
        pad_along_height = max(kernel_height - (in_height % stride_height), 0)

    if in_width % stride_width == 0:
        pad_along_width = max(kernel_width - stride_width, 0)
    else:
        pad_along_width = max(kernel_width - (in_width % stride_width), 0)

    pad_left = pad_along_width // 2
    pad_right = pad_along_width - pad_left
    # 计算垂直方向上的顶部填充量,使用整数除法向下取整
    pad_top = pad_along_height // 2
    # 计算垂直方向上的底部填充量,保证总的填充量为 pad_along_height
    pad_bottom = pad_along_height - pad_top
    
    # 定义填充的元组,顺序为 (左, 右, 上, 下)
    padding = (pad_left, pad_right, pad_top, pad_bottom)
    # 使用 PyTorch 的 nn.functional.pad 函数对 features 进行填充,采用常数填充方式,填充值为 0.0
    return nn.functional.pad(features, padding, "constant", 0.0)
class MobileNetV1ConvLayer(nn.Module):
    # 定义 MobileNetV1 模型的卷积层模块
    def __init__(
        self,
        config: MobileNetV1Config,
        in_channels: int,
        out_channels: int,
        kernel_size: int,
        stride: Optional[int] = 1,
        groups: Optional[int] = 1,
        bias: bool = False,
        use_normalization: Optional[bool] = True,
        use_activation: Optional[bool or str] = True,
    ) -> None:
        # 初始化函数,设置各种参数和层

        super().__init__()
        self.config = config

        # 检查输入和输出通道数是否能被分组数整除
        if in_channels % groups != 0:
            raise ValueError(f"Input channels ({in_channels}) are not divisible by {groups} groups.")
        if out_channels % groups != 0:
            raise ValueError(f"Output channels ({out_channels}) are not divisible by {groups} groups.")

        # 计算填充大小,根据配置是否进行 TensorFlow 风格的填充
        padding = 0 if config.tf_padding else int((kernel_size - 1) / 2)

        # 创建卷积层对象
        self.convolution = nn.Conv2d(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            groups=groups,
            bias=bias,
            padding_mode="zeros",
        )

        # 如果使用归一化层,则创建 Batch Normalization 层
        if use_normalization:
            self.normalization = nn.BatchNorm2d(
                num_features=out_channels,
                eps=config.layer_norm_eps,
                momentum=0.9997,
                affine=True,
                track_running_stats=True,
            )
        else:
            self.normalization = None

        # 根据配置选择是否使用激活函数,并设置激活函数对象
        if use_activation:
            if isinstance(use_activation, str):
                self.activation = ACT2FN[use_activation]
            elif isinstance(config.hidden_act, str):
                self.activation = ACT2FN[config.hidden_act]
            else:
                self.activation = config.hidden_act
        else:
            self.activation = None

    def forward(self, features: torch.Tensor) -> torch.Tensor:
        # 前向传播函数,定义模型的数据流向

        # 如果配置为 TensorFlow 风格的填充,则应用填充函数
        if self.config.tf_padding:
            features = apply_tf_padding(features, self.convolution)
        
        # 经过卷积层处理
        features = self.convolution(features)
        
        # 如果有归一化层,则应用归一化
        if self.normalization is not None:
            features = self.normalization(features)
        
        # 如果有激活函数,则应用激活函数
        if self.activation is not None:
            features = self.activation(features)
        
        # 返回处理后的特征
        return features


class MobileNetV1PreTrainedModel(PreTrainedModel):
    """
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    """

    # MobileNetV1 预训练模型的抽象类,用于初始化权重和简单的预训练模型下载和加载接口。

    config_class = MobileNetV1Config
    load_tf_weights = load_tf_weights_in_mobilenet_v1
    base_model_prefix = "mobilenet_v1"
    main_input_name = "pixel_values"
    supports_gradient_checkpointing = False
    def _init_weights(self, module: Union[nn.Linear, nn.Conv2d]) -> None:
        """Initialize the weights"""
        # 检查当前模块是否为线性层或二维卷积层
        if isinstance(module, (nn.Linear, nn.Conv2d)):
            # 如果是线性层或二维卷积层,使用正态分布初始化权重,均值为0,标准差为self.config.initializer_range
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            # 如果存在偏置项,将偏置项初始化为零
            if module.bias is not None:
                module.bias.data.zero_()
        # 如果当前模块是二维批标准化层
        elif isinstance(module, nn.BatchNorm2d):
            # 将批标准化层的偏置项初始化为零
            module.bias.data.zero_()
            # 将批标准化层的权重初始化为1
            module.weight.data.fill_(1.0)
# MOBILENET_V1_START_DOCSTRING 的值是一个原始字符串,用于描述 MobileNetV1Model 的模型信息和参数说明。
MOBILENET_V1_START_DOCSTRING = r"""
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass. Use it
    as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`MobileNetV1Config`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
"""

# MOBILENET_V1_INPUTS_DOCSTRING 的值是一个原始字符串,用于描述 MobileNetV1Model 的输入参数说明。
MOBILENET_V1_INPUTS_DOCSTRING = r"""
    Args:
        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
            [`MobileNetV1ImageProcessor.__call__`] for details.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""

# 使用 @add_start_docstrings 装饰器为 MobileNetV1Model 添加文档字符串,描述了模型输出和模型参数的说明。
@add_start_docstrings(
    "The bare MobileNetV1 model outputting raw hidden-states without any specific head on top.",
    MOBILENET_V1_START_DOCSTRING,
)
class MobileNetV1Model(MobileNetV1PreTrainedModel):
    # MobileNetV1Model 类的定义,继承自 MobileNetV1PreTrainedModel 类。
    # 初始化函数,接受 MobileNetV1Config 实例和一个布尔类型参数 add_pooling_layer
    def __init__(self, config: MobileNetV1Config, add_pooling_layer: bool = True):
        # 调用父类的初始化方法
        super().__init__(config)
        # 将传入的配置信息保存到 self.config 属性中
        self.config = config

        # 设定初始深度为 32
        depth = 32
        # 根据深度乘数和最小深度计算出初始输出通道数
        out_channels = max(int(depth * config.depth_multiplier), config.min_depth)

        # 创建 MobileNetV1ConvLayer 实例作为卷积的初始层 conv_stem
        self.conv_stem = MobileNetV1ConvLayer(
            config,
            in_channels=config.num_channels,
            out_channels=out_channels,
            kernel_size=3,
            stride=2,
        )

        # 预设每个卷积层的步幅
        strides = [1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 2, 1]

        # 创建一个空的 nn.ModuleList,用于存储所有的卷积层
        self.layer = nn.ModuleList()
        # 循环创建 13 层卷积层
        for i in range(13):
            # 每一层卷积层的输入通道数等于上一层的输出通道数
            in_channels = out_channels

            # 如果当前层的步幅为 2 或者是第一层(i == 0),则需要更新深度和输出通道数
            if strides[i] == 2 or i == 0:
                depth *= 2
                out_channels = max(int(depth * config.depth_multiplier), config.min_depth)

            # 添加一个深度卷积层
            self.layer.append(
                MobileNetV1ConvLayer(
                    config,
                    in_channels=in_channels,
                    out_channels=in_channels,
                    kernel_size=3,
                    stride=strides[i],
                    groups=in_channels,
                )
            )

            # 添加一个 1x1 的卷积层
            self.layer.append(
                MobileNetV1ConvLayer(
                    config,
                    in_channels=in_channels,
                    out_channels=out_channels,
                    kernel_size=1,
                )
            )

        # 如果 add_pooling_layer 为 True,则创建一个自适应平均池化层
        self.pooler = nn.AdaptiveAvgPool2d((1, 1)) if add_pooling_layer else None

        # 调用内部方法完成权重初始化和最终处理
        self.post_init()

    # 用于剪枝不需要的注意力头,但目前未实现具体功能
    def _prune_heads(self, heads_to_prune):
        raise NotImplementedError

    # 前向传播函数,接受像素值、是否返回隐藏状态、是否返回字典等参数
    @add_start_docstrings_to_model_forward(MOBILENET_V1_INPUTS_DOCSTRING)
    @add_code_sample_docstrings(
        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=BaseModelOutputWithPoolingAndNoAttention,
        config_class=_CONFIG_FOR_DOC,
        modality="vision",
        expected_output=_EXPECTED_OUTPUT_SHAPE,
    )
    def forward(
        self,
        pixel_values: Optional[torch.Tensor] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
        ) -> Union[tuple, BaseModelOutputWithPoolingAndNoAttention]:
        # 设置是否输出所有隐藏状态,默认为模型配置中的设置
        output_hidden_states = (
            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
        )
        # 设置是否返回字典形式的输出,默认为模型配置中的设置
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        # 如果未指定像素值,抛出数值错误异常
        if pixel_values is None:
            raise ValueError("You have to specify pixel_values")

        # 使用卷积层处理输入的像素值
        hidden_states = self.conv_stem(pixel_values)

        # 如果需要输出所有隐藏状态,则初始化一个空元组
        all_hidden_states = () if output_hidden_states else None

        # 遍历每个层次的模块
        for i, layer_module in enumerate(self.layer):
            # 依次将输入的隐藏状态传递给每个层次的模块进行处理
            hidden_states = layer_module(hidden_states)

            # 如果需要输出所有隐藏状态,则将当前层的隐藏状态添加到列表中
            if output_hidden_states:
                all_hidden_states = all_hidden_states + (hidden_states,)

        # 获取最终的隐藏状态作为最后一个隐藏层的输出
        last_hidden_state = hidden_states

        # 如果定义了池化器,则对最终的隐藏状态进行池化处理并展平
        if self.pooler is not None:
            pooled_output = torch.flatten(self.pooler(last_hidden_state), start_dim=1)
        else:
            pooled_output = None

        # 如果不需要返回字典形式的输出,则返回一个元组,包含非空的结果
        if not return_dict:
            return tuple(v for v in [last_hidden_state, pooled_output, all_hidden_states] if v is not None)

        # 如果需要返回字典形式的输出,则构建一个相应的输出对象
        return BaseModelOutputWithPoolingAndNoAttention(
            last_hidden_state=last_hidden_state,
            pooler_output=pooled_output,
            hidden_states=all_hidden_states,
        )
# 使用装饰器添加文档字符串到类的起始部分,描述了该类是基于 MobileNetV1 模型的图像分类模型
@add_start_docstrings(
    """
    MobileNetV1 model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    """,
    MOBILENET_V1_START_DOCSTRING,  # 添加了来自 MOBILENET_V1_START_DOCSTRING 的文档字符串
)
class MobileNetV1ForImageClassification(MobileNetV1PreTrainedModel):
    def __init__(self, config: MobileNetV1Config) -> None:
        super().__init__(config)

        # 设置分类器的类别数目
        self.num_labels = config.num_labels
        # 创建 MobileNetV1 模型
        self.mobilenet_v1 = MobileNetV1Model(config)

        # 获取 MobileNetV1 最后一层卷积的输出通道数
        last_hidden_size = self.mobilenet_v1.layer[-1].convolution.out_channels

        # 分类器头部
        # 使用给定的 dropout 概率创建 Dropout 层
        self.dropout = nn.Dropout(config.classifier_dropout_prob, inplace=True)
        # 创建线性层作为分类器,输出维度为最后一层卷积的输出通道数到类别数目的映射
        self.classifier = nn.Linear(last_hidden_size, config.num_labels) if config.num_labels > 0 else nn.Identity()

        # 初始化权重并应用最终处理
        self.post_init()

    # 使用装饰器添加模型前向方法的文档字符串,描述了输入参数和期望的输出
    @add_start_docstrings_to_model_forward(MOBILENET_V1_INPUTS_DOCSTRING)
    @add_code_sample_docstrings(
        checkpoint=_IMAGE_CLASS_CHECKPOINT,  # 提供了模型使用的检查点信息
        output_type=ImageClassifierOutputWithNoAttention,  # 指定了输出类型
        config_class=_CONFIG_FOR_DOC,  # 提供了用于文档的配置类信息
        expected_output=_IMAGE_CLASS_EXPECTED_OUTPUT,  # 描述了期望的输出
    )
    def forward(
        self,
        pixel_values: Optional[torch.Tensor] = None,
        output_hidden_states: Optional[bool] = None,
        labels: Optional[torch.Tensor] = None,
        return_dict: Optional[bool] = None,
        # 方法参数:pixel_values 接收图像的像素值张量,可以为空
        # output_hidden_states 控制是否输出隐藏状态的标志,可以为空
        # labels 接收标签张量,可以为空
        # return_dict 控制是否返回字典类型的输出,可以为空
    # 返回类型注解,可以返回元组或者带有无注意力输出的图像分类器输出
    ) -> Union[tuple, ImageClassifierOutputWithNoAttention]:
        r"""
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            用于计算图像分类/回归损失的标签。索引应在 `[0, ..., config.num_labels - 1]` 范围内。
            如果 `config.num_labels == 1`,则计算回归损失(均方误差损失)。
            如果 `config.num_labels > 1`,则计算分类损失(交叉熵损失)。
        """
        # 如果 return_dict 不为 None,则使用该值;否则使用 self.config.use_return_dict
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
    
        # 调用 MobileNetV1 模型进行前向传播,返回输出
        outputs = self.mobilenet_v1(pixel_values, output_hidden_states=output_hidden_states, return_dict=return_dict)
    
        # 如果 return_dict 为 True,则使用 outputs.pooler_output 作为汇聚输出;否则使用 outputs 的第二个元素
        pooled_output = outputs.pooler_output if return_dict else outputs[1]
    
        # 对汇聚输出应用 dropout 和分类器,得到 logits
        logits = self.classifier(self.dropout(pooled_output))
    
        # 初始化损失为 None
        loss = None
        # 如果 labels 不为 None,则计算损失
        if labels is not None:
            # 如果问题类型未定义,则根据情况设置问题类型
            if self.config.problem_type is None:
                if self.num_labels == 1:
                    self.config.problem_type = "regression"
                elif self.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int):
                    self.config.problem_type = "single_label_classification"
                else:
                    self.config.problem_type = "multi_label_classification"
    
            # 根据问题类型计算相应的损失
            if self.config.problem_type == "regression":
                loss_fct = MSELoss()
                if self.num_labels == 1:
                    loss = loss_fct(logits.squeeze(), labels.squeeze())
                else:
                    loss = loss_fct(logits, labels)
            elif self.config.problem_type == "single_label_classification":
                loss_fct = CrossEntropyLoss()
                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
            elif self.config.problem_type == "multi_label_classification":
                loss_fct = BCEWithLogitsLoss()
                loss = loss_fct(logits, labels)
    
        # 如果 return_dict 为 False,则返回 logits 和 outputs 的其余部分作为输出元组
        if not return_dict:
            output = (logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output
    
        # 如果 return_dict 为 True,则返回 ImageClassifierOutputWithNoAttention 类的实例
        return ImageClassifierOutputWithNoAttention(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
        )

.\models\mobilenet_v1\__init__.py

# 版权声明和许可信息
#
# 根据 Apache License, Version 2.0 授权使用本代码
# 除非遵循许可,否则不得使用本文件
# 可以从以下链接获取许可的副本:
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# 如果法律要求或书面同意,软件会基于"原样"分发,
# 没有任何明示或暗示的担保或条件。
# 有关具体的语言权利和限制,请参阅许可证。
from typing import TYPE_CHECKING

# 从工具包导入必要的异常和模块
from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available, is_vision_available

# 定义需要导入的模块结构
_import_structure = {
    "configuration_mobilenet_v1": [
        "MOBILENET_V1_PRETRAINED_CONFIG_ARCHIVE_MAP",
        "MobileNetV1Config",
        "MobileNetV1OnnxConfig",
    ],
}

# 检查视觉功能是否可用,若不可用则引发异常
try:
    if not is_vision_available():
        raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
    pass
else:
    # 添加特征提取器和图像处理器到导入结构中
    _import_structure["feature_extraction_mobilenet_v1"] = ["MobileNetV1FeatureExtractor"]
    _import_structure["image_processing_mobilenet_v1"] = ["MobileNetV1ImageProcessor"]

# 检查 Torch 是否可用,若不可用则引发异常
try:
    if not is_torch_available():
        raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
    pass
else:
    # 添加模型相关模块到导入结构中
    _import_structure["modeling_mobilenet_v1"] = [
        "MOBILENET_V1_PRETRAINED_MODEL_ARCHIVE_LIST",
        "MobileNetV1ForImageClassification",
        "MobileNetV1Model",
        "MobileNetV1PreTrainedModel",
        "load_tf_weights_in_mobilenet_v1",
    ]

# 如果是类型检查阶段,导入特定的配置和模块
if TYPE_CHECKING:
    from .configuration_mobilenet_v1 import (
        MOBILENET_V1_PRETRAINED_CONFIG_ARCHIVE_MAP,
        MobileNetV1Config,
        MobileNetV1OnnxConfig,
    )

    try:
        if not is_vision_available():
            raise OptionalDependencyNotAvailable()
    except OptionalDependencyNotAvailable:
        pass
    else:
        # 导入特征提取器和图像处理器
        from .feature_extraction_mobilenet_v1 import MobileNetV1FeatureExtractor
        from .image_processing_mobilenet_v1 import MobileNetV1ImageProcessor

    try:
        if not is_torch_available():
            raise OptionalDependencyNotAvailable()
    except OptionalDependencyNotAvailable:
        pass
    else:
        # 导入模型相关模块
        from .modeling_mobilenet_v1 import (
            MOBILENET_V1_PRETRAINED_MODEL_ARCHIVE_LIST,
            MobileNetV1ForImageClassification,
            MobileNetV1Model,
            MobileNetV1PreTrainedModel,
            load_tf_weights_in_mobilenet_v1,
        )

# 在非类型检查阶段,使用 LazyModule 加载导入结构
else:
    import sys

    # 将当前模块替换为 LazyModule 的实例
    sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)

.\models\mobilenet_v2\configuration_mobilenet_v2.py

# 导入必要的模块和类
from collections import OrderedDict
from typing import Mapping

# 导入版本管理模块
from packaging import version

# 导入配置基类
from ...configuration_utils import PretrainedConfig

# 导入ONNX配置模块
from ...onnx import OnnxConfig

# 导入日志记录工具
from ...utils import logging

# 获取当前模块的日志记录器
logger = logging.get_logger(__name__)

# 预训练模型配置文件映射表,映射了不同预训练模型的名称和对应的配置文件链接
MOBILENET_V2_PRETRAINED_CONFIG_ARCHIVE_MAP = {
    "google/mobilenet_v2_1.4_224": "https://huggingface.co/google/mobilenet_v2_1.4_224/resolve/main/config.json",
    "google/mobilenet_v2_1.0_224": "https://huggingface.co/google/mobilenet_v2_1.0_224/resolve/main/config.json",
    "google/mobilenet_v2_0.75_160": "https://huggingface.co/google/mobilenet_v2_0.75_160/resolve/main/config.json",
    "google/mobilenet_v2_0.35_96": "https://huggingface.co/google/mobilenet_v2_0.35_96/resolve/main/config.json",
    # 查看所有MobileNetV2模型:https://huggingface.co/models?filter=mobilenet_v2
}


class MobileNetV2Config(PretrainedConfig):
    r"""
    这是一个配置类,用于存储[`MobileNetV2Model`]的配置。根据指定的参数实例化MobileNetV2模型,定义模型架构。
    使用默认参数实例化配置将产生与MobileNetV2 [google/mobilenet_v2_1.0_224]架构相似的配置。

    配置对象继承自[`PretrainedConfig`],可用于控制模型输出。阅读[`PretrainedConfig`]的文档获取更多信息。
    """
    
    def __init__(self, **kwargs):
        # 调用父类的初始化方法,传递参数给父类构造函数
        super().__init__(**kwargs)

# 不完整的代码截断,无法提供完整的类定义
    Args:
        num_channels (`int`, *optional*, defaults to 3):
            输入图像的通道数,默认为3。
        image_size (`int`, *optional*, defaults to 224):
            每张图像的分辨率大小,默认为224。
        depth_multiplier (`float`, *optional*, defaults to 1.0):
            每层中通道数的缩放倍数。默认为1.0,表示网络从32个通道开始。有时也称为“alpha”或“宽度倍增器”。
        depth_divisible_by (`int`, *optional*, defaults to 8):
            每层的通道数始终是此数的倍数,默认为8。
        min_depth (`int`, *optional*, defaults to 8):
            所有层至少具有的通道数,默认为8。
        expand_ratio (`float`, *optional*, defaults to 6.0):
            每个块中第一层的输出通道数是输入通道数乘以扩展比例。
        output_stride (`int`, *optional*, defaults to 32):
            输入和输出特征图之间的空间分辨率比例。默认情况下,模型将输入尺寸减少32倍。
            如果 `output_stride` 是8或16,模型会在深度wise层上使用扩张卷积,以确保特征图不会比输入图像小超过8倍或16倍。
        first_layer_is_expansion (`bool`, *optional*, defaults to `True`):
            如果第一个卷积层也是第一个扩展块的扩展层,则为True。
        finegrained_output (`bool`, *optional*, defaults to `True`):
            如果为True,则最终卷积层中的输出通道数将保持较大值(1280),即使 `depth_multiplier` 小于1。
        hidden_act (`str` or `function`, *optional*, defaults to `"relu6"`):
            在Transformer编码器和卷积层中使用的非线性激活函数(函数或字符串)。
        tf_padding (`bool`, *optional*, defaults to `True`):
            是否在卷积层中使用TensorFlow的填充规则。
        classifier_dropout_prob (`float`, *optional*, defaults to 0.8):
            附加分类器的dropout比率。
        initializer_range (`float`, *optional*, defaults to 0.02):
            用于初始化所有权重矩阵的截断正态初始化器的标准差。
        layer_norm_eps (`float`, *optional*, defaults to 0.001):
            层归一化层使用的epsilon值。
        semantic_loss_ignore_index (`int`, *optional*, defaults to 255):
            语义分割模型损失函数中忽略的索引。
    Example:

    ```
    >>> from transformers import MobileNetV2Config, MobileNetV2Model

    >>> # Initializing a "mobilenet_v2_1.0_224" style configuration
    >>> configuration = MobileNetV2Config()
    # 定义一个字符串变量,表示模型类型为 MobileNetV2
    model_type = "mobilenet_v2"
    
    # 定义 MobileNetV2Model 类,继承自某个父类(未显示出来)
    class MobileNetV2Model:
    
        # 初始化方法,设置模型的各项参数和超参数
        def __init__(
            self,
            num_channels=3,  # 输入图像的通道数,默认为3(RGB图像)
            image_size=224,  # 输入图像的尺寸,默认为224x224像素
            depth_multiplier=1.0,  # 深度乘数,控制模型的宽度,默认为1.0
            depth_divisible_by=8,  # 深度可被这个数整除,默认为8
            min_depth=8,  # 最小深度,默认为8
            expand_ratio=6.0,  # 扩展比率,默认为6.0
            output_stride=32,  # 输出步长,默认为32
            first_layer_is_expansion=True,  # 第一层是否是扩展层,默认为True
            finegrained_output=True,  # 是否输出细粒度特征,默认为True
            hidden_act="relu6",  # 隐藏层激活函数,默认为 relu6
            tf_padding=True,  # 是否使用 TensorFlow 的填充方式,默认为True
            classifier_dropout_prob=0.8,  # 分类器的 dropout 概率,默认为0.8
            initializer_range=0.02,  # 初始化范围,默认为0.02
            layer_norm_eps=0.001,  # Layer Normalization 的 epsilon 参数,默认为0.001
            semantic_loss_ignore_index=255,  # 语义损失函数中的忽略索引,默认为255
            **kwargs,  # 其他参数
        ):
            # 调用父类的初始化方法,传入其他关键字参数
            super().__init__(**kwargs)
    
            # 如果 depth_multiplier 小于等于0,抛出数值错误异常
            if depth_multiplier <= 0:
                raise ValueError("depth_multiplier must be greater than zero.")
    
            # 设置模型对象的各项属性
            self.num_channels = num_channels
            self.image_size = image_size
            self.depth_multiplier = depth_multiplier
            self.depth_divisible_by = depth_divisible_by
            self.min_depth = min_depth
            self.expand_ratio = expand_ratio
            self.output_stride = output_stride
            self.first_layer_is_expansion = first_layer_is_expansion
            self.finegrained_output = finegrained_output
            self.hidden_act = hidden_act
            self.tf_padding = tf_padding
            self.classifier_dropout_prob = classifier_dropout_prob
            self.initializer_range = initializer_range
            self.layer_norm_eps = layer_norm_eps
            self.semantic_loss_ignore_index = semantic_loss_ignore_index
# 定义一个 MobileNetV2OnnxConfig 类,继承自 OnnxConfig 类
class MobileNetV2OnnxConfig(OnnxConfig):
    # 设置 torch 转换为 ONNX 的最低版本要求为 1.11
    torch_onnx_minimum_version = version.parse("1.11")

    # 返回模型输入的描述信息,使用有序字典来指定每个输入的名称及其维度信息
    @property
    def inputs(self) -> Mapping[str, Mapping[int, str]]:
        return OrderedDict([("pixel_values", {0: "batch"})])

    # 根据任务类型返回模型输出的描述信息,有条件地返回分类器的逻辑输出或者特征提取器的输出
    @property
    def outputs(self) -> Mapping[str, Mapping[int, str]]:
        if self.task == "image-classification":
            return OrderedDict([("logits", {0: "batch"})])
        else:
            return OrderedDict([("last_hidden_state", {0: "batch"}), ("pooler_output", {0: "batch"})])

    # 返回用于验证时的绝对误差容限
    @property
    def atol_for_validation(self) -> float:
        return 1e-4

.\models\mobilenet_v2\convert_original_tf_checkpoint_to_pytorch.py

# 使用 UTF-8 编码声明文件编码方式
# 版权声明及许可信息,使用 Apache License 2.0
# 导入所需模块和库
import argparse  # 导入解析命令行参数的模块
import json  # 导入处理 JSON 格式数据的模块
import re  # 导入正则表达式操作的模块
from pathlib import Path  # 导入处理文件和路径的模块

import requests  # 导入处理 HTTP 请求的模块
import torch  # 导入 PyTorch 深度学习库
from huggingface_hub import hf_hub_download  # 从 Hugging Face Hub 下载资源
from PIL import Image  # 导入 Python Imaging Library 处理图像的模块

from transformers import (  # 导入 Hugging Face 的 transformers 库中的类和函数
    MobileNetV2Config,  # MobileNetV2 模型的配置类
    MobileNetV2ForImageClassification,  # 用于图像分类任务的 MobileNetV2 模型
    MobileNetV2ForSemanticSegmentation,  # 用于语义分割任务的 MobileNetV2 模型
    MobileNetV2ImageProcessor,  # 处理 MobileNetV2 图像的类
    load_tf_weights_in_mobilenet_v2,  # 加载 TensorFlow 模型权重到 MobileNetV2 的函数
)
from transformers.utils import logging  # 导入 transformers 库的日志模块

# 设置日志输出级别为信息级别
logging.set_verbosity_info()
# 获取当前模块的日志记录器对象
logger = logging.get_logger(__name__)


def get_mobilenet_v2_config(model_name):
    # 创建 MobileNetV2 的配置对象,设置层标准化的 epsilon 值
    config = MobileNetV2Config(layer_norm_eps=0.001)

    # 如果模型名称包含 "quant",则抛出值错误异常,不支持量化模型
    if "quant" in model_name:
        raise ValueError("Quantized models are not supported.")

    # 使用正则表达式匹配模型名称,提取深度乘数和图像大小信息
    matches = re.match(r"^.*mobilenet_v2_([^_]*)_([^_]*)$", model_name)
    if matches:
        config.depth_multiplier = float(matches[1])  # 设置深度乘数
        config.image_size = int(matches[2])  # 设置图像大小

    # 如果模型名称以 "deeplabv3_" 开头,则配置适用于 DeepLabV3 的特定参数
    if model_name.startswith("deeplabv3_"):
        config.output_stride = 8  # 设置输出步幅为 8
        config.num_labels = 21  # 设置类别数量为 21
        filename = "pascal-voc-id2label.json"  # 设置类别映射文件名
    else:
        # 对于其他 MobileNetV2 变体,默认设置为预测 1001 个类别(背景 + 1000 类别)
        config.num_labels = 1001  # 设置类别数量为 1001
        filename = "imagenet-1k-id2label.json"  # 设置类别映射文件名

    # 从 Hugging Face Hub 下载类别映射文件到本地,并加载为字典格式
    repo_id = "huggingface/label-files"
    id2label = json.load(open(hf_hub_download(repo_id, filename, repo_type="dataset"), "r"))

    # 根据类别数量调整类别映射字典
    if config.num_labels == 1001:
        id2label = {int(k) + 1: v for k, v in id2label.items()}
        id2label[0] = "background"  # 将索引 0 映射为背景类别
    else:
        id2label = {int(k): v for k, v in id2label.items()}

    # 将类别映射字典设置到配置对象中
    config.id2label = id2label
    config.label2id = {v: k for k, v in id2label.items()}  # 创建反向映射

    return config


# 准备用于测试的图像数据,从 COCO 数据集中下载一张可爱猫咪的图像
# 返回 PIL.Image 对象
def prepare_img():
    url = "http://images.cocodataset.org/val2017/000000039769.jpg"
    im = Image.open(requests.get(url, stream=True).raw)
    return im


# 使用装饰器标记,声明函数不需要进行梯度计算
@torch.no_grad()
def convert_movilevit_checkpoint(model_name, checkpoint_path, pytorch_dump_folder_path, push_to_hub=False):
    """
    将模型的权重复制/粘贴/调整到我们的 MobileNetV2 结构中。
    """
    # 获取 MobileNetV2 的配置对象
    config = get_mobilenet_v2_config(model_name)

    # 加载 🤗 模型
    if model_name.startswith("deeplabv3_"):
        model = MobileNetV2ForSemanticSegmentation(config).eval()  # 创建语义分割任务的 MobileNetV2 模型对象
    else:
        # 如果不是从预训练模型加载,则创建一个 MobileNetV2ForImageClassification 实例并设置为评估模式
        model = MobileNetV2ForImageClassification(config).eval()

    # 从 TensorFlow 检查点加载权重到 MobileNetV2 模型
    load_tf_weights_in_mobilenet_v2(model, config, checkpoint_path)

    # 使用 MobileNetV2ImageProcessor 准备图像,设置裁剪大小和最短边大小
    image_processor = MobileNetV2ImageProcessor(
        crop_size={"width": config.image_size, "height": config.image_size},
        size={"shortest_edge": config.image_size + 32},
    )
    # 准备图像并编码
    encoding = image_processor(images=prepare_img(), return_tensors="pt")
    # 将编码后的图像输入模型得到输出
    outputs = model(**encoding)
    # 获取模型输出的 logits
    logits = outputs.logits

    # 如果模型名称以 "deeplabv3_" 开头
    if model_name.startswith("deeplabv3_"):
        # 确保 logits 的形状为 (1, 21, 65, 65)
        assert logits.shape == (1, 21, 65, 65)

        # 如果模型名称为 "deeplabv3_mobilenet_v2_1.0_513"
        if model_name == "deeplabv3_mobilenet_v2_1.0_513":
            # 预期的 logits 值
            expected_logits = torch.tensor(
                [
                    [[17.5790, 17.7581, 18.3355], [18.3257, 18.4230, 18.8973], [18.6169, 18.8650, 19.2187]],
                    [[-2.1595, -2.0977, -2.3741], [-2.4226, -2.3028, -2.6835], [-2.7819, -2.5991, -2.7706]],
                    [[4.2058, 4.8317, 4.7638], [4.4136, 5.0361, 4.9383], [4.5028, 4.9644, 4.8734]],
                ]
            )

        else:
            # 如果模型名称未知,抛出 ValueError 异常
            raise ValueError(f"Unknown model name: {model_name}")

        # 确保 logits 的前 3x3 子张量与预期值非常接近
        assert torch.allclose(logits[0, :3, :3, :3], expected_logits, atol=1e-4)
    else:
        # 如果模型名称不是以 "deeplabv3_" 开头,确保 logits 的形状为 (1, 1001)
        assert logits.shape == (1, 1001)

        # 根据模型名称选择预期的 logits 值
        if model_name == "mobilenet_v2_1.4_224":
            expected_logits = torch.tensor([0.0181, -1.0015, 0.4688])
        elif model_name == "mobilenet_v2_1.0_224":
            expected_logits = torch.tensor([0.2445, -1.1993, 0.1905])
        elif model_name == "mobilenet_v2_0.75_160":
            expected_logits = torch.tensor([0.2482, 0.4136, 0.6669])
        elif model_name == "mobilenet_v2_0.35_96":
            expected_logits = torch.tensor([0.1451, -0.4624, 0.7192])
        else:
            expected_logits = None

        # 如果预期的 logits 值不为 None,则确保 logits 的前 3 个值与预期值非常接近
        if expected_logits is not None:
            assert torch.allclose(logits[0, :3], expected_logits, atol=1e-4)

    # 确保 PyTorch dump 文件夹路径存在,如果不存在则创建
    Path(pytorch_dump_folder_path).mkdir(exist_ok=True)
    print(f"Saving model {model_name} to {pytorch_dump_folder_path}")
    # 将模型保存到 PyTorch dump 文件夹路径
    model.save_pretrained(pytorch_dump_folder_path)
    print(f"Saving image processor to {pytorch_dump_folder_path}")
    # 将图像处理器保存到 PyTorch dump 文件夹路径
    image_processor.save_pretrained(pytorch_dump_folder_path)

    # 如果需要推送到 Hub
    if push_to_hub:
        print("Pushing to the hub...")
        # 构建 repo_id,并推送 image_processor 和 model 到 Hub
        repo_id = "google/" + model_name
        image_processor.push_to_hub(repo_id)
        model.push_to_hub(repo_id)
if __name__ == "__main__":
    # 如果这个脚本作为主程序运行,则执行以下代码块

    # 创建参数解析器对象
    parser = argparse.ArgumentParser()

    # 添加必需的参数
    parser.add_argument(
        "--model_name",
        default="mobilenet_v2_1.0_224",
        type=str,
        help="Name of the MobileNetV2 model you'd like to convert. Should be in the form 'mobilenet_v2_<depth>_<size>'.",
    )

    # 添加必需的参数
    parser.add_argument(
        "--checkpoint_path",
        required=True,
        type=str,
        help="Path to the original TensorFlow checkpoint (.ckpt file)."
    )

    # 添加必需的参数
    parser.add_argument(
        "--pytorch_dump_folder_path",
        required=True,
        type=str,
        help="Path to the output PyTorch model directory."
    )

    # 添加可选的参数
    parser.add_argument(
        "--push_to_hub",
        action="store_true",
        help="Whether or not to push the converted model to the 🤗 hub."
    )

    # 解析命令行参数
    args = parser.parse_args()

    # 调用函数来执行 TensorFlow 到 PyTorch 模型的转换
    convert_movilevit_checkpoint(
        args.model_name, args.checkpoint_path, args.pytorch_dump_folder_path, args.push_to_hub
    )

.\models\mobilenet_v2\feature_extraction_mobilenet_v2.py

# coding=utf-8
# 定义了编码格式为 UTF-8,确保支持多种字符集
# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
# 版权声明,声明了代码的版权信息和保留所有权利

# Licensed under the Apache License, Version 2.0 (the "License");
# 指定本代码采用 Apache License, Version 2.0 授权许可
# you may not use this file except in compliance with the License.
# 除非符合许可证的要求,否则不得使用本文件

# You may obtain a copy of the License at
# 您可以在以下网址获取许可证的副本
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# 除非适用法律要求或书面同意,否则本软件按"原样"分发,不附带任何明示或暗示的保证或条件

# See the License for the specific language governing permissions and
# limitations under the License.
# 请参阅许可证以查看特定语言下的权限和限制

"""Feature extractor class for MobileNetV2."""
# 为 MobileNetV2 设计的特征提取器类

import warnings
# 导入警告模块,用于发出警告消息

from ...utils import logging
# 导入日志工具模块中的 logging 函数
from .image_processing_mobilenet_v2 import MobileNetV2ImageProcessor
# 从当前目录下的 image_processing_mobilenet_v2 模块中导入 MobileNetV2ImageProcessor 类

logger = logging.get_logger(__name__)
# 获取当前模块的日志记录器对象

class MobileNetV2FeatureExtractor(MobileNetV2ImageProcessor):
    def __init__(self, *args, **kwargs) -> None:
        warnings.warn(
            "The class MobileNetV2FeatureExtractor is deprecated and will be removed in version 5 of Transformers."
            " Please use MobileNetV2ImageProcessor instead.",
            FutureWarning,
        )
        # 发出警告,提醒用户 MobileNetV2FeatureExtractor 类已经废弃,并将在 Transformers 版本 5 中移除
        # 建议用户使用 MobileNetV2ImageProcessor 替代

        super().__init__(*args, **kwargs)
        # 调用父类 MobileNetV2ImageProcessor 的初始化方法,传递所有参数

.\models\mobilenet_v2\image_processing_mobilenet_v2.py

# coding=utf-8
# 设置文件编码为 UTF-8

# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
# 版权声明,保留所有权利

# Licensed under the Apache License, Version 2.0 (the "License");
# 根据 Apache License, Version 2.0 许可证进行许可

# you may not use this file except in compliance with the License.
# 除非符合许可证要求,否则不得使用此文件

# You may obtain a copy of the License at
# 您可以在以下链接获取许可证的副本
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# 根据许可证,软件按"原样"分发,不提供任何明示或暗示的保证或条件

# See the License for the specific language governing permissions and
# limitations under the License.
# 请参阅许可证,了解特定语言的权限和限制

"""Image processor class for MobileNetV2."""
# MobileNetV2 图像处理器类

from typing import Dict, List, Optional, Tuple, Union

import numpy as np  # 导入 NumPy 库

from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict
# 导入基本图像处理工具,批处理特征,获取大小字典函数

from ...image_transforms import (
    get_resize_output_image_size,  # 导入获取调整大小后图像尺寸的函数
    resize,  # 导入调整大小的函数
    to_channel_dimension_format,  # 导入转换通道维度格式的函数
)

from ...image_utils import (
    IMAGENET_STANDARD_MEAN,  # 导入 ImageNet 标准均值
    IMAGENET_STANDARD_STD,  # 导入 ImageNet 标准标准差
    ChannelDimension,  # 导入通道维度类
    ImageInput,  # 导入图像输入类
    PILImageResampling,  # 导入 PIL 图像重采样枚举
    infer_channel_dimension_format,  # 导入推断通道维度格式的函数
    is_scaled_image,  # 导入检查是否为缩放图像的函数
    make_list_of_images,  # 导入生成图像列表的函数
    to_numpy_array,  # 导入转换为 NumPy 数组的函数
    valid_images,  # 导入验证图像函数
    validate_kwargs,  # 导入验证关键字参数的函数
    validate_preprocess_arguments,  # 导入验证预处理参数的函数
)

from ...utils import TensorType, is_torch_available, is_torch_tensor, logging
# 导入张量类型,检查是否有 Torch 可用,检查是否为 Torch 张量,日志记录函数

if is_torch_available():  # 如果 Torch 可用
    import torch  # 导入 Torch 库

logger = logging.get_logger(__name__)  # 获取当前模块的日志记录器


class MobileNetV2ImageProcessor(BaseImageProcessor):
    r"""
    Constructs a MobileNetV2 image processor.
    构建一个 MobileNetV2 图像处理器。
    """
    # 定义函数的参数和默认值,用于图像预处理
    Args:
        do_resize (`bool`, *optional*, defaults to `True`):
            是否调整图像的高度和宽度尺寸到指定的 `size`。可以在 `preprocess` 方法中通过 `do_resize` 参数进行覆盖。
        size (`Dict[str, int]` *optional*, defaults to `{"shortest_edge": 256}`):
            调整后的图像尺寸。图像的最短边被调整为 `size["shortest_edge"]`,保持输入的宽高比。可以在 `preprocess` 方法中通过 `size` 参数进行覆盖。
        resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.BILINEAR`):
            调整图像尺寸时使用的重采样滤波器。可以在 `preprocess` 方法中通过 `resample` 参数进行覆盖。
        do_center_crop (`bool`, *optional*, defaults to `True`):
            是否对图像进行中心裁剪。如果输入尺寸小于任何边缘的 `crop_size`,则用 0 填充图像,然后进行中心裁剪。可以在 `preprocess` 方法中通过 `do_center_crop` 参数进行覆盖。
        crop_size (`Dict[str, int]`, *optional*, defaults to `{"height": 224, "width": 224}`):
            应用中心裁剪时的期望输出尺寸。仅在 `do_center_crop` 设置为 `True` 时生效。可以在 `preprocess` 方法中通过 `crop_size` 参数进行覆盖。
        do_rescale (`bool`, *optional*, defaults to `True`):
            是否按指定的比例因子 `rescale_factor` 对图像进行重新缩放。可以在 `preprocess` 方法中通过 `do_rescale` 参数进行覆盖。
        rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
            如果重新缩放图像时使用的缩放因子。可以在 `preprocess` 方法中通过 `rescale_factor` 参数进行覆盖。
        do_normalize:
            是否对图像进行归一化。可以在 `preprocess` 方法中通过 `do_normalize` 参数进行覆盖。
        image_mean (`float` or `List[float]`, *optional*, defaults to `IMAGENET_STANDARD_MEAN`):
            归一化图像时使用的均值。这是一个浮点数或与图像通道数相同长度的浮点数列表。可以在 `preprocess` 方法中通过 `image_mean` 参数进行覆盖。
        image_std (`float` or `List[float]`, *optional*, defaults to `IMAGENET_STANDARD_STD`):
            归一化图像时使用的标准差。这是一个浮点数或与图像通道数相同长度的浮点数列表。可以在 `preprocess` 方法中通过 `image_std` 参数进行覆盖。
    """

    # 定义模型输入的名称列表
    model_input_names = ["pixel_values"]
    # 初始化方法,设置图像处理器的各种参数和默认值
    def __init__(
        self,
        do_resize: bool = True,  # 是否进行大小调整,默认为True
        size: Optional[Dict[str, int]] = None,  # 图像大小的字典,可选,默认为None
        resample: PILImageResampling = PILImageResampling.BILINEAR,  # 重采样方法,默认为双线性插值
        do_center_crop: bool = True,  # 是否进行中心裁剪,默认为True
        crop_size: Dict[str, int] = None,  # 裁剪尺寸的字典,可选,默认为None
        do_rescale: bool = True,  # 是否进行重新缩放,默认为True
        rescale_factor: Union[int, float] = 1 / 255,  # 重新缩放因子,默认为1/255
        do_normalize: bool = True,  # 是否进行归一化,默认为True
        image_mean: Optional[Union[float, List[float]]] = None,  # 图像均值,可选,默认为None
        image_std: Optional[Union[float, List[float]]] = None,  # 图像标准差,可选,默认为None
        **kwargs,  # 其他参数
    ) -> None:
        super().__init__(**kwargs)  # 调用父类的初始化方法
        size = size if size is not None else {"shortest_edge": 256}  # 如果size为None,则设置默认最短边为256
        size = get_size_dict(size, default_to_square=False)  # 根据size字典获取图像尺寸的字典,不默认为正方形
        crop_size = crop_size if crop_size is not None else {"height": 224, "width": 224}  # 如果crop_size为None,则设置默认裁剪尺寸为224x224
        crop_size = get_size_dict(crop_size, param_name="crop_size")  # 根据crop_size字典获取裁剪尺寸的字典
        self.do_resize = do_resize  # 设置是否进行大小调整的属性
        self.size = size  # 设置图像大小的属性
        self.resample = resample  # 设置重采样方法的属性
        self.do_center_crop = do_center_crop  # 设置是否进行中心裁剪的属性
        self.crop_size = crop_size  # 设置裁剪尺寸的属性
        self.do_rescale = do_rescale  # 设置是否进行重新缩放的属性
        self.rescale_factor = rescale_factor  # 设置重新缩放因子的属性
        self.do_normalize = do_normalize  # 设置是否进行归一化的属性
        self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN  # 设置图像均值的属性,如果为None则使用预设值
        self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD  # 设置图像标准差的属性,如果为None则使用预设值
        self._valid_processor_keys = [
            "images",  # 图像关键字
            "do_resize",  # 是否进行大小调整的关键字
            "size",  # 图像大小的关键字
            "resample",  # 重采样方法的关键字
            "do_center_crop",  # 是否进行中心裁剪的关键字
            "crop_size",  # 裁剪尺寸的关键字
            "do_rescale",  # 是否进行重新缩放的关键字
            "rescale_factor",  # 重新缩放因子的关键字
            "do_normalize",  # 是否进行归一化的关键字
            "image_mean",  # 图像均值的关键字
            "image_std",  # 图像标准差的关键字
            "return_tensors",  # 返回张量的关键字
            "data_format",  # 数据格式的关键字
            "input_data_format",  # 输入数据格式的关键字
        ]

    # 从transformers.models.mobilenet_v1.image_processing_mobilenet_v1.MobileNetV1ImageProcessor.resize复制而来
    def resize(
        self,
        image: np.ndarray,  # 输入图像的numpy数组
        size: Dict[str, int],  # 目标尺寸的字典
        resample: PILImageResampling = PILImageResampling.BICUBIC,  # 重采样方法,默认为双三次插值
        data_format: Optional[Union[str, ChannelDimension]] = None,  # 数据格式,可选,默认为None
        input_data_format: Optional[Union[str, ChannelDimension]] = None,  # 输入数据格式,可选,默认为None
        **kwargs,  # 其他参数
    ) -> np.ndarray:
        """
        Resize an image. The shortest edge of the image is resized to size["shortest_edge"], with the longest edge
        resized to keep the input aspect ratio.

        Args:
            image (`np.ndarray`):
                Image to resize.
            size (`Dict[str, int]`):
                Size of the output image.
            resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.BICUBIC`):
                Resampling filter to use when resizing the image.
            data_format (`str` or `ChannelDimension`, *optional*):
                The channel dimension format of the image. If not provided, it will be the same as the input image.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format of the input image. If not provided, it will be inferred.
        """
        # 默认将图像调整为正方形
        default_to_square = True
        # 如果输入的尺寸字典中包含 "shortest_edge" 键
        if "shortest_edge" in size:
            # 将 size 重置为 shortest_edge 的值
            size = size["shortest_edge"]
            # 取消默认将图像调整为正方形的设置
            default_to_square = False
        # 如果输入的尺寸字典中同时包含 "height" 和 "width" 键
        elif "height" in size and "width" in size:
            # 将 size 重置为 (height, width) 的元组
            size = (size["height"], size["width"])
        else:
            # 如果尺寸字典中既没有 "shortest_edge" 也没有同时包含 "height" 和 "width",则抛出数值错误
            raise ValueError("Size must contain either 'shortest_edge' or 'height' and 'width'.")

        # 获取调整后的输出图像尺寸
        output_size = get_resize_output_image_size(
            image,
            size=size,
            default_to_square=default_to_square,
            input_data_format=input_data_format,
        )
        # 返回调整大小后的图像
        return resize(
            image,
            size=output_size,
            resample=resample,
            data_format=data_format,
            input_data_format=input_data_format,
            **kwargs,
        )

    def preprocess(
        self,
        images: ImageInput,
        do_resize: Optional[bool] = None,
        size: Dict[str, int] = None,
        resample: PILImageResampling = None,
        do_center_crop: bool = None,
        crop_size: Dict[str, int] = None,
        do_rescale: Optional[bool] = None,
        rescale_factor: Optional[float] = None,
        do_normalize: Optional[bool] = None,
        image_mean: Optional[Union[float, List[float]]] = None,
        image_std: Optional[Union[float, List[float]]] = None,
        return_tensors: Optional[Union[str, TensorType]] = None,
        data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
        input_data_format: Optional[Union[str, ChannelDimension]] = None,
        **kwargs,
    ):
        """
        Preprocesses images according to specified operations.

        Args:
            images (`ImageInput`): Input images to preprocess.
            do_resize (`bool`, *optional*): Whether to resize the images.
            size (`Dict[str, int]`, *optional*): Target size of the images after resizing.
            resample (`PILImageResampling`, *optional*): Resampling filter for resizing.
            do_center_crop (`bool`, *optional*): Whether to perform center cropping.
            crop_size (`Dict[str, int]`, *optional*): Size of the crop.
            do_rescale (`bool`, *optional*): Whether to rescale the images.
            rescale_factor (`float`, *optional*): Scaling factor for rescaling.
            do_normalize (`bool`, *optional*): Whether to normalize the images.
            image_mean (`float` or `List[float]`, *optional*): Mean values for normalization.
            image_std (`float` or `List[float]`, *optional*): Standard deviation values for normalization.
            return_tensors (`str` or `TensorType`, *optional*): Desired tensor type for output.
            data_format (`str` or `ChannelDimension`): Channel dimension format of the images.
            input_data_format (`str` or `ChannelDimension`, *optional*): Channel dimension format of the input images.
            **kwargs: Additional keyword arguments.

        Returns:
            Preprocessed images according to the specified operations.
        """
        # 此处省略了具体的实现内容,根据函数定义,该方法对输入的图像进行预处理,并根据参数执行相应的操作。
        # 具体的预处理操作包括但不限于调整大小、中心裁剪、重新缩放、归一化等。
        pass
    def post_process_semantic_segmentation(self, outputs, target_sizes: List[Tuple] = None):
        """
        Converts the output of [`MobileNetV2ForSemanticSegmentation`] into semantic segmentation maps. Only supports PyTorch.

        Args:
            outputs ([`MobileNetV2ForSemanticSegmentation`]):
                Raw outputs of the model.
            target_sizes (`List[Tuple]` of length `batch_size`, *optional*):
                List of tuples corresponding to the requested final size (height, width) of each prediction. If unset,
                predictions will not be resized.

        Returns:
            semantic_segmentation: `List[torch.Tensor]` of length `batch_size`, where each item is a semantic
            segmentation map of shape (height, width) corresponding to the target_sizes entry (if `target_sizes` is
            specified). Each entry of each `torch.Tensor` correspond to a semantic class id.
        """
        # TODO: add support for other frameworks

        # 获取输出中的 logits
        logits = outputs.logits

        # 如果指定了目标大小,则调整 logits 并计算语义分割图
        if target_sizes is not None:
            # 检查 logits 的数量与目标大小列表的长度是否一致
            if len(logits) != len(target_sizes):
                raise ValueError(
                    "Make sure that you pass in as many target sizes as the batch dimension of the logits"
                )

            # 如果 target_sizes 是 torch tensor,则转换为 numpy 数组
            if is_torch_tensor(target_sizes):
                target_sizes = target_sizes.numpy()

            # 初始化语义分割结果列表
            semantic_segmentation = []

            # 遍历每个 logits
            for idx in range(len(logits)):
                # 使用双线性插值调整 logits 的尺寸
                resized_logits = torch.nn.functional.interpolate(
                    logits[idx].unsqueeze(dim=0), size=target_sizes[idx], mode="bilinear", align_corners=False
                )
                # 获取调整大小后的语义分割图
                semantic_map = resized_logits[0].argmax(dim=0)
                # 将语义分割图添加到结果列表中
                semantic_segmentation.append(semantic_map)
        else:
            # 如果未指定目标大小,则直接计算 logits 的每个样本的语义分割图
            semantic_segmentation = logits.argmax(dim=1)
            semantic_segmentation = [semantic_segmentation[i] for i in range(semantic_segmentation.shape[0])]

        # 返回语义分割结果列表
        return semantic_segmentation
posted @ 2024-06-29 17:02  绝不原创的飞龙  阅读(4)  评论(0编辑  收藏  举报