NER 命名体识别

综述:

image-20220227142513639

万字综述!命名实体识别(NER)的过去和现在. https://mp.weixin.qq.com/s/sNXG-K5aPRFFQ3F373-e0A

brat工具标注

LSTM_CRF

https://zhuanlan.zhihu.com/p/44042528

https://github.com/macanv/BERT-BiLSTM-CRF-NER

https://github.com/z814081807/DeepNER

NER系列

https://www.depends-on-the-definition.com/tags/named-entity-recognition/

大模型:

  • BERT
  • FLAT
  • bert-mrc

NER数据格式

  • BIO

  • BIOES

load data

define model

class Config:
    def __init__(self):
        # control
        self.use_bert = 1
        self.finetune = 0
        self.use_bilstm = 0
        self.model_path = "/content/drive/MyDrive/Colab Notebooks/ner/input/model"

        # model param
        self.hidden_dim = 256
        self.embedding_dim = 256
        self.epoch = 1
        self.dropout_rate = 0.5        
        self.num_classes = 15
        self.vocab_size = 1000

        #
        self.batch_size = 16
        self.print_per_batch = 100
import tensorflow_addons as tfa
from tensorflow_addons.text.crf import crf_log_likelihood

class NerModel(tf.keras.Model):
    def __init__(self, configs):
        super(NerModel, self).__init__()
        self.use_bert = configs.use_bert
        self.finetune = configs.finetune
        self.use_bilstm = configs.use_bilstm
        self.hidden_dim = configs.hidden_dim
        self.dropout_rate = configs.dropout_rate
        self.num_classes = configs.num_classes
        self.vocab_size = configs.vocab_size
        
        if self.use_bert:
            self.backbone = TFAutoModel.from_pretrained(configs.model_path)
        
        self.embedding = tf.keras.layers.Embedding(self.vocab_size, configs.embedding_dim, mask_zero=True)
        self.dropout = tf.keras.layers.Dropout(self.dropout_rate)
        self.bilstm = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(self.hidden_dim, return_sequences=True))
        self.dense = tf.keras.layers.Dense(self.num_classes)
        self.transition_params = tf.Variable(tf.random.uniform(shape=(self.num_classes, self.num_classes)))

    @tf.function
    def call(self, inputs, inputs_length, targets, training=None):
        if self.use_bert:
            embedding_inputs = self.backbone(inputs[0], attention_mask=inputs[1])[0]
            if self.finetune:
                pass
            else:
                self.backbone.trainable = False
        else:
            embedding_inputs = self.embedding(inputs)

        outputs = self.dropout(embedding_inputs, training)

        if self.use_bilstm:
            outputs = self.bilstm(outputs)

        logits = self.dense(outputs)
        tensor_targets = tf.convert_to_tensor(targets, dtype=tf.int32)
        log_likelihood, self.transition_params = crf_log_likelihood(
            logits, tensor_targets, inputs_length, transition_params=self.transition_params)
        return logits, log_likelihood, self.transition_params

train and evalute

# train

def save_model(obj, export_dir):
  tf.saved_model.save(obj, export_dir)

def get_acc_one_step(logits, text_lens, labels_batch):
    paths = []
    accuracy = 0
    for logit, text_len, labels in zip(logits, text_lens, labels_batch):
        viterbi_path, _ = tfa.text.viterbi_decode(logit[:text_len], model.transition_params)
        paths.append(viterbi_path)
        correct_prediction = tf.equal(
            tf.convert_to_tensor(tf.keras.preprocessing.sequence.pad_sequences([viterbi_path], padding='post'),
                                 dtype=tf.int32),
            tf.convert_to_tensor(tf.keras.preprocessing.sequence.pad_sequences([labels[:text_len]], padding='post'),
                                 dtype=tf.int32)
        )
        accuracy = accuracy + tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        # print(tf.reduce_mean(tf.cast(correct_prediction, tf.float32)))
    accuracy = accuracy / len(paths)
    return accuracy


@tf.function
def train_one_step(text_batch, inputs_length,  labels_batch):
  with tf.GradientTape() as tape:
      logits, log_likelihood, _ = model(text_batch, inputs_length, labels_batch,training=True)
      loss = - tf.reduce_mean(log_likelihood)
  gradients = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))
  return loss,logits

import time

for i in range(5):
    logger.info('epoch:{}/{}'.format(i + 1, config.epoch))
    print('p epoch:{}/{}'.format(i + 1, config.epoch))
    t0 = time.time()
    for step, (input_ids,attention_mask, labels_batch) in enumerate(train_ds):
        input_ids,attention_mask = np.array(input_ids), np.array(attention_mask)
        inputs = [input_ids,attention_mask]
        # labels_batch = tf.keras.preprocessing.sequence.pad_sequences(labels_batch, padding='post',maxlen=1024)
        inputs_length = tf.math.count_nonzero(labels_batch, 1)

        loss,logits = train_one_step(inputs, inputs_length, labels_batch)

        if step % config.print_per_batch == 0:
            acc = get_acc_one_step(logits, inputs_length, labels_batch)
            print('p training batch: %5d, loss: %.5f, acc: %.5f' % (step, loss, acc))
            print(f"training is {(step+1)*8} samples, spend time: {time.time()-t0}")
            logger.info('training batch: %5d, loss: %.5f, acc: %.5f' % (step, loss, acc))
    save_model(model, f"/content/drive/MyDrive/Colab Notebooks/ner/input/temp/longformer_crf_epoch_{i}")
posted @ 2022-04-09 15:35  鱼与鱼  阅读(57)  评论(0编辑  收藏  举报