yangyang12138

导航

情感分析

1.网络结构

 

 

 2.数据源

数据集的训练集和测试集分别包含25000个已标注过的电影评论。其中,负面评论的得分小于等于4,正面评论的得分大于等于7,满分10分。

文章参考:https://www.paddlepaddle.org.cn/documentation/docs/zh/user_guides/nlp_case/understand_sentiment/README.cn.html

 

from __future__ import print_function
import paddle
import paddle.fluid as fluid
import numpy as np
import sys
import math

CLASS_DIM = 2     #情感分类的类别数
EMB_DIM = 128     #词向量的维度
HID_DIM = 512     #隐藏层的维度
STACKED_NUM = 3   #LSTM双向栈的层数
BATCH_SIZE = 128  #batch的大小

word_dict = paddle.dataset.imdb.word_dict()

train_reader = paddle.batch(
    paddle.reader.shuffle(
        paddle.dataset.imdb.train(word_dict), buf_size=25000),
    batch_size=BATCH_SIZE)

test_reader = paddle.batch(
    paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)

word_dict是词和index的映射,paddle.dataset.imdb.train(word_dict)中获得的数据集格式为[([index1,index2……,indexn],a)]a是0或1

use_cuda = False
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
data = fluid.data(name="word", shape=[None,1], dtype="int64", lod_level=1)

创建程序,并定义数据源头

 

 

3.构建神经网络

emb = fluid.layers.embedding(input=data, size=[len(word_dict), 128], is_sparse=True)
fc1 = fluid.layers.fc(input=emb, size=HID_DIM)

lstm1, cell1 = fluid.layers.dynamic_lstm(input=fc1, size=HID_DIM)

inputs = [fc1, lstm1]

for i in range(2, STACKED_NUM + 1):
    fc = fluid.layers.fc(input=inputs, size=HID_DIM)
    lstm, cell = fluid.layers.dynamic_lstm(input=fc, size=HID_DIM, is_reverse=(i % 2) == 0)
    inputs = [fc, lstm]


fc_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max')
lstm_last = fluid.layers.sequence_pool(input=inputs[1], pool_type='max')


prediction = fluid.layers.fc(input=[fc_last, lstm_last], size=CLASS_DIM, act='softmax')

4.构建损失函数和优化器

label = fluid.data(name="label", shape=[None, 1], dtype="int64")
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(cost)
accuracy = fluid.layers.accuracy(input=prediction, label=label)
sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=0.002)
sgd_optimizer.minimize(avg_cost)

5.初始化项目定义feeder

exe.run(fluid.default_startup_program())

feed_var_list_loop = [main_program.global_block().var(var_name) for var_name in ['word', 'label']]
feeder = fluid.DataFeeder(feed_list=feed_var_list_loop, place=place)

test_program = fluid.default_main_program().clone(for_test=True)

6.训练数据

_end = False
for epoch_id in range(pass_num):
    if _end:
        break
    for step_id, data in enumerate(train_reader()):  
        metrics = exe.run(main_program, feed=feeder.feed(data),fetch_list=[avg_cost, accuracy])


        if step_id == 30:
            if params_dirname is not None:
                fluid.io.save_inference_model('o.model', ["words"], prediction, exe)
            _end = True
            break

 

posted on 2020-03-29 05:57  杨杨09265  阅读(209)  评论(0编辑  收藏  举报