Dual-embedded LSTM for QA match: 双embedding的LSTM聊天匹配式模型

先上模型结构图,

LSTM模型的话,rnn的一种,用法很常见基本上就是用来做序列模型的encoding,有很多的关于LSTM的paper自行谷歌;

下面这个模型是我自己试验出来的,效果还不错,可以用来做聊天机器人的深度学习训练,只要有语料库;

用了embedding + bidirectional LSTM + full connected + max-pooling 这些方法,也不难

最后还是上一段tensorflow的代码, 模型的实现部分

def model_imp(ques, ques_len, ans, ans_len, ans_f, ans_f_len, batch_size):
    n_characters = 2

    w_embed = tf.get_variable('w_embed', shape = [hparams.vocab_size, n_characters*hparams.embedding_dim], initializer = tf.random_uniform_initializer(-1.0, 1.0))
    w_embed_2 = tf.get_variable('w_embed_2', shape = [hparams.vocab_size, n_characters*hparams.embedding_dim], initializer = tf.random_uniform_initializer(-1.0, 1.0))

    # 1.2 --- rnn for question ---
    ques_1 = tf.nn.embedding_lookup(w_embed, ques, name = 'ques_1')
    ques_2 = tf.nn.embedding_lookup(w_embed_2, ques, name = 'ques_2')

    # 1.2.0 --- calculate the distribution for the question --- 
    with tf.variable_scope('character') as vs_latent_character:
        cell = tf.nn.rnn_cell.LSTMCell(hparams.rnn_dim,forget_bias = 2.0,use_peepholes=True,state_is_tuple=True)
        cell_r = tf.nn.rnn_cell.LSTMCell(hparams.rnn_dim,forget_bias = 2.0,use_peepholes=True,state_is_tuple=True)
        output, state = tf.nn.bidirectional_dynamic_rnn(cell, cell_r, tf.concat(2, [ques_1, ques_2]), sequence_length = ques_len, dtype = tf.float32)
        character_information = tf.reduce_max(tf.concat(2, [output[0], output[1]]), 1)
        T = tf.get_variable('T', shape = [hparams.rnn_dim*2, n_characters])
        character_dist = tf.expand_dims(tf.nn.softmax(tf.matmul(character_information, T), -1), 1)
        #character = tf.argmax(tf.matmul(character_information, T), 1)
        #character_dist = tf.expand_dims(tf.one_hot(character, n_characters, on_value = 1.0, off_value = 0.0), 1)
        print character_dist.get_shape()




    # 1.2.1 -- Three different ques-ans combinations --- 
    with tf.variable_scope('rnn_ques') as vs_ques:
        cell = tf.nn.rnn_cell.LSTMCell(hparams.rnn_dim,forget_bias = 2.0,use_peepholes=True,state_is_tuple=True)
        cell_r = tf.nn.rnn_cell.LSTMCell(hparams.rnn_dim,forget_bias = 2.0,use_peepholes=True,state_is_tuple=True)
        output_ques, state_ques = tf.nn.bidirectional_dynamic_rnn(cell, cell_r, ques_1, sequence_length = ques_len, dtype = tf.float32)

    with tf.variable_scope('rnn_ques2') as vs_ques:
        cell_2 = tf.nn.rnn_cell.LSTMCell(hparams.rnn_dim,forget_bias = 2.0,use_peepholes=True,state_is_tuple=True)
        cell_r_2 = tf.nn.rnn_cell.LSTMCell(hparams.rnn_dim,forget_bias = 2.0,use_peepholes=True,state_is_tuple=True)
        output_ques_2, state_ques = tf.nn.bidirectional_dynamic_rnn(cell_2, cell_r_2, ques_2, sequence_length = ques_len, dtype = tf.float32)
        
    ques_output_1 = tf.reduce_max(tf.concat(2, [output_ques[0], output_ques[1]]), 1)
    ques_output_2 = tf.reduce_max(tf.concat(2, [output_ques_2[0], output_ques_2[1]]), 1)
    ques_output = tf.batch_matmul(character_dist ,tf.pack([ques_output_1, ques_output_2], axis = 1))
    ques_output = tf.squeeze(ques_output, [1])
 
    M = tf.get_variable('M', shape = [hparams.rnn_dim*2, hparams.rnn_dim*2], initializer = tf.random_uniform_initializer(-1.0, 1.0))
    ques_output = tf.matmul(ques_output, M)


    # 1.3 --- rnn for ans ---
    ans_1 = tf.nn.embedding_lookup(w_embed, ans, name = 'ans')
    ans_f_1 = tf.nn.embedding_lookup(w_embed, ans_f, name = 'ans_f')
    ans_2 = tf.nn.embedding_lookup(w_embed_2, ans, name = 'ans')
    ans_f_2 = tf.nn.embedding_lookup(w_embed_2, ans_f, name = 'ans_f')
    with tf.variable_scope('rnn_ques', reuse=True) as vs_ans:    
        output_1, state = tf.nn.bidirectional_dynamic_rnn(cell, cell_r, ans_1, sequence_length = ans_len,dtype = tf.float32)
        output_f1, state = tf.nn.bidirectional_dynamic_rnn(cell, cell_r, ans_f_1, sequence_length = ans_f_len,dtype = tf.float32)
    with tf.variable_scope('rnn_ques2', reuse=True) as vs_ans:
        output_2, state = tf.nn.bidirectional_dynamic_rnn(cell_2, cell_r_2, ans_2, sequence_length = ans_len,dtype = tf.float32)
        output_f2, state = tf.nn.bidirectional_dynamic_rnn(cell_2, cell_r_2, ans_f_2, sequence_length = ans_f_len,dtype = tf.float32)

    ans_output_1 = tf.reduce_max(tf.concat(2, [output_1[0], output_1[1]]), 1)
    ans_output_2 = tf.reduce_max(tf.concat(2, [output_2[0], output_2[1]]), 1)

    ans_output =tf.batch_matmul(character_dist ,tf.pack([ans_output_1, ans_output_2], axis=1))
    ans_output = tf.squeeze(ans_output, [1])


    ans_output_f1 = tf.reduce_max(tf.concat(2, [output_f1[0], output_f1[1]]), 1)
    ans_output_f2 = tf.reduce_max(tf.concat(2, [output_f2[0], output_f2[1]]), 1)

    ans_output_f =tf.batch_matmul(character_dist ,tf.pack([ans_output_f1, ans_output_f2], axis=1))
    ans_output_f = tf.squeeze(ans_output_f, [1])


    # 1.4 -----------------    the prediction part ---------------------------

    ques_output = tf.nn.l2_normalize(ques_output, 1)
    ans_output = tf.nn.l2_normalize(ans_output, 1)
    ans_output_f = tf.nn.l2_normalize(ans_output_f, 1)

    prob = [ques_output, ans_output]
    simi = tf.reduce_sum(tf.mul(ques_output, ans_output), 1)
    simi_f = tf.reduce_sum(tf.mul(ques_output, ans_output_f), 1)

    loss = tf.maximum(0.0, 0.25 - simi + simi_f)

    loss_ = tf.reduce_mean(loss)
    return prob, loss_

 

posted @ 2017-03-16 15:35  LarryGates  阅读(2267)  评论(1编辑  收藏  举报