textcnn


 1 import numpy as np
 2 import pandas as pd
 3 import tensorflow as tf
 4 
 5 import data_helper
 6 
 7 n_class = 3
 8 learning_rate = 0.05
 9 s_limit_len = 10
10 word_embedding_size = 100
11 voc_size = 7000
12 
13 def get_weights(shape):
14     return tf.Variable(tf.truncated_normal(shape,stddev=0.1))
15 def get_bias(shape):
16     return tf.Variable(tf.constant(0.1))
17 
18 def conv2d(input_x, W):
19     return tf.nn.conv2d(input_x,W,strides=[1,1,1,1],padding="SAME")
20 
21 def maxpooling(x,kszie,strides):
22     return tf.nn.max_pool(x,ksize=kszie,strides=strides,padding="SAME")
23 
24 inputs = tf.placeholder(tf.int32,[None,s_limit_len],name="inputs")
25 labels = tf.placeholder(tf.int32,[None,n_class],name="label_one-hot")
26 
27 
28 embedding_w = tf.Variable(tf.truncated_normal([voc_size,word_embedding_size],stddev=0.1,dtype=tf.float32))
29 embedding_layer = tf.nn.embedding_lookup(embedding_w,inputs)
30 
31 conv1_W = get_weights([1,word_embedding_size])
32 conv1 = tf.nn.conv2d(embedding_layer,conv1_W)
33 
34 conv3_W = get_weights([3,word_embedding_size])
35 conv3 = tf.nn.conv2d(embedding_layer,conv3_W)
36 
37 conv5_W = get_weights([5,word_embedding_size])
38 conv5 = tf.nn.conv2d(embedding_layer,conv5_W)
39 
40 conv7_W = get_weights([7,word_embedding_size])
41 conv7 = tf.nn.conv2d(embedding_layer,conv7_W)
42 
43 feature_map_1 = maxpooling(conv1)
44 feature_map_3 = maxpooling(conv3)
45 feature_map_5 = maxpooling(conv5)
46 feature_map_7 = maxpooling(conv7)
47 
48 tf.concat()
View Code

 

 
 1 import tensorflow  as tf
 2 import numpy as np
 3 import pandas as pd
 4 from sklearn import  metrics
 5 
 6 class text_cnn():
 7     def __init__(self, n_class, s_limit_len, voc_size, embedding_size, filters, filter_nums,word2vec=None):
 8         self.s_limit_len = s_limit_len
 9         self.inputs = tf.placeholder(tf.int32,[None,self.s_limit_len],name = "inputs")
10         self.y = tf.placeholder(tf.int32,[None,self.n_class])
11         self.keep_prob = tf.placeholder(tf.flaot32)
12         if word2vec is None:
13             embedding_w = tf.Variable(tf.truncated_normal([voc_size, embedding_size], stddev= 0.1))
14         else:
15             embedding_w = word2vec
16 
17         embedding_layer = tf.nn.embedding_lookup(embedding_w, self.inputs)
18         #conv2d 要求图片是三维的x,y,z(chanel)所以要加一个维度,实际大小不变,也就是说tf.reshape也是可以的操作,但是如果是reshape的话要注意大小
19         embedding_layer = tf.expand_dims(embedding_layer,-1)
20 
21         #conv and pooling
22         pool_arr = []
23         filter_sum = 0
24         for filter_size,filter_nums in zip(filters,filter_nums):
25             conv_w = tf.Variable(tf.truncated_normal([filter_size, embedding_size,1,filter_nums],stddev=0.1))
26             conv_b = tf.Variable(tf.constant(0.1))
27             conv = tf.nn.conv2d(embedding_layer,conv_w,[1,1,1,1],padding="SAME",name = "conv")
28             conv_out = tf.nn.relu(tf.matmul(conv)+conv_b)
29 
30             pool = tf.nn.max_pool(conv_out,[1,s_limit_len-filter_size+1,1,1],strides=[1,1,1,1],padding="SAME",name = "POOL")
31             pool_arr.append(pool)
32             filter_sum += filter_nums
33 
34         pool_out = tf.concat(pool_arr,3)
35         print("reshape_before",pool_out)
36         pool_flat = tf.reshape(pool_out,[-1,filter_sum],name = "pool_flat")
37         print("reshape_after",pool_out)
38 
39 if __name__ == "__main__":
40     pass
View Code

  1 import numpy as np
  2 import pandas as pd
  3 import tensorflow as tf
  4 from sklearn import metrics
  5 import data_helper
  6 
  7 n_class = 2
  8 learning_rate = 0.001
  9 s_limit_len = 10
 10 word_embedding_size = 100
 11 voc_size = 7000
 12 filter_nums = 4
 13 def get_weights(shape):
 14     return tf.Variable(tf.truncated_normal(shape,stddev=0.1))
 15 def get_bias(shape):
 16     return tf.Variable(tf.constant(0.1))
 17 
 18 def conv2d(input_x, W):
 19     return tf.nn.conv2d(input_x,W,strides=[1,1,1,1],padding="VALID")
 20 
 21 def maxpooling(x,kszie,strides):
 22     return tf.nn.max_pool(x,ksize=kszie,strides=strides,padding="VALID")
 23 
 24 inputs = tf.placeholder(tf.int32,[None,s_limit_len],name="inputs")
 25 labels = tf.placeholder(tf.int32,[None,n_class],name="label_one-hot")
 26 keep_prob = tf.placeholder(tf.float32,name="keep_prob")
 27 
 28 embedding_w = tf.Variable(tf.truncated_normal([voc_size,word_embedding_size],stddev=0.1,dtype=tf.float32))
 29 #这里需要多延展一个维度
 30 embedding_layer = tf.expand_dims(tf.nn.embedding_lookup(embedding_w,inputs),-1)
 31 # convoltional layers
 32 
 33 
 34 conv_dict = {1:2,3:3,5:4}
 35 filter_types = [1,3,5,7]
 36 filter_types = [1,2,3,3]
 37 
 38 # conv1_W = tf.Variable(tf.truncated_normal([1,word_embedding_size,1,1]),name="conv1_w")
 39 # conv1_B = tf.Variable(tf.constant(0.1),name="conv1_b")
 40 # conv1 = tf.nn.relu(tf.nn.conv2d(embedding_layer,conv1_W,[1,1,1,1],padding="SAME")+conv1_B)
 41 # tf.nn.max_pool(conv1,[1,s_limit_len,1,1],[1,1,1,1])
 42 # print("conv1",conv1)
 43 
 44 
 45 conv1_W = get_weights([1,word_embedding_size,1,1])
 46 conv1_bias = get_bias([1])
 47 conv1 = tf.nn.relu(conv2d(embedding_layer,conv1_W)+conv1_bias)
 48 
 49 conv3_W = get_weights([3,word_embedding_size,1,1])
 50 conv3_bias = get_bias([1])
 51 conv3 = tf.nn.relu(conv2d(embedding_layer,conv3_W)+conv3_bias)
 52 
 53 conv5_W = get_weights([5,word_embedding_size,1,1])
 54 conv5_b = get_bias([1])
 55 conv5 = tf.nn.relu(conv2d(embedding_layer,conv5_W)+conv5_b)
 56 
 57 conv7_W = get_weights([7,word_embedding_size,1,1])
 58 conv7_B = get_bias([1])
 59 conv7 = tf.nn.relu(conv2d(embedding_layer,conv7_W)+conv7_B)
 60 
 61 #max_pool_layers
 62 feature_map_1 = maxpooling(conv1, [1,s_limit_len-1+1, 1, 1], [1, 1, 1, 1])
 63 feature_map_3 = maxpooling(conv3, [1,s_limit_len-3+1, 1, 1], [1, 1, 1, 1])
 64 feature_map_5 = maxpooling(conv5, [1,s_limit_len-5+1, 1, 1], [1, 1, 1, 1])
 65 feature_map_7 = maxpooling(conv7, [1,s_limit_len-7+1, 1, 1], [1, 1, 1, 1])
 66 
 67 print("feature_map size:",feature_map_1,feature_map_3,feature_map_5,feature_map_7)
 68 pool_outs =  tf.concat([feature_map_1,feature_map_3,feature_map_5, feature_map_7], 3)
 69 print("pool out:",pool_outs)
 70 pool_flat = tf.reshape(pool_outs,[-1,filter_nums])
 71 print("pool flat:",pool_flat)
 72 #full connect layers
 73 h_drop = tf.nn.dropout(pool_flat,keep_prob)
 74 
 75 full_W = tf.Variable(tf.truncated_normal([4,n_class],stddev=0.1 ,dtype=tf.float32))
 76 full_B = tf.Variable(tf.constant(0.1,dtype=tf.float32))
 77 
 78 outputs = tf.nn.softmax(tf.matmul(h_drop,full_W)+full_B)
 79 pred = tf.argmax(outputs,1)
 80 loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=outputs,labels=labels))
 81 acc = tf.reduce_mean(tf.cast(tf.equal(pred,tf.argmax(labels,1)),tf.float32))
 82 train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)
 83 sess = tf.Session()
 84 sess.run(tf.global_variables_initializer())
 85 
 86 train_x, train_y, words_dict, labels_dict, all_len = data_helper.load("../data/train.txt",1000,s_limit_len)
 87 test_x,test_y, testlen =  data_helper.load_test_data("../data/test_filter_2.txt",s_limit_len,words_dict,labels_dict)
 88 
 89 def test(sess,acc,pred,tes_x,test_y):
 90     y_pred, acc_test = sess.run([pred,acc],feed_dict={inputs:test_x,labels:test_y,keep_prob:1.0})
 91     y_true = sess.run(tf.argmax(test_y,1))
 92 
 93     print(metrics.classification_report(y_true,y_pred))
 94 
 95 
 96 
 97 for epoach in range(1000):
 98     iter = 0
 99     test(sess,acc,pred,test_x,test_y)
100     batchs = data_helper.get_batch(64,train_x,train_y,all_len)
101     for [batch_x,batch_y,batch_len] in batchs:
102         _,loss_,acc_,pred_list = sess.run([train_op,loss,acc,pred],feed_dict={inputs:batch_x, labels:batch_y,keep_prob:0.5})
103         if iter % 50 == 0:
104             print(pred_list[:15])
105             print("epoach-{0} iter-{1} loss:{2} acc-{3}".format(epoach,iter,loss_,acc_))
106         # print(acc_)
107         iter += 1
View Code

 


posted @ 2017-12-21 23:59  不说话的汤姆猫  阅读(399)  评论(0编辑  收藏  举报