利用词嵌入的自然语言二分训练

# author: Roy.G
import shopping_data
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense,Embedding,Flatten

x_train,y_train,x_test,y_test=shopping_data.load_data()
print('-'*40+'\n',x_train.shape)
print(x_train[0],'\n',y_train[0])

vocalen,word_index=shopping_data.createWordIndex(x_train,y_train)
#将word编程对应的词典序号
print('wocalen:',vocalen,'index',word_index)
x_train_index=shopping_data.word2Index(x_train,word_index)
x_test_index=shopping_data.word2Index(x_test,word_index)
# 将训练和测试数据转换为序号的数字 ,数字来源于word_index

maxlen=25
x_train_index=sequence.pad_sequences(x_train_index,maxlen=maxlen)
x_test_index=sequence.pad_sequences(x_test_index,maxlen=maxlen)
#将属于整齐化为25个维度

model = Sequential()
model.add(Embedding (trainable=True,input_dim=vocalen,output_dim=300,input_length=maxlen) )
#trainable='False'是否可以通过训练修改词嵌入矩阵,input_dim=vocalen输入的维度,output_dim=300输出的维度,input_lenth=maxlen输入的序列长度
model.add(Flatten())
model.add(Dense(units=256,activation='relu'))
model.add(Dense(units=256,activation='relu'))
model.add(Dense(units=256,activation='relu'))
model.add(Dense(units=1,activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
#(loss='binary_crossentropy'二分交叉熵函数,optimizer='adam'动态自适应优化器,比SGD要快,metrics=['accuracy'])

model.fit(x_train_index,y_train,batch_size=512,epochs=50)
score,accuracy=model.evaluate(x_test_index,y_test)

print('score:',score)
print('accuracy:',accuracy)

posted on 2022-02-20 22:57  ttm6489  阅读(36)  评论(0编辑  收藏  举报

导航