textcnn
1 import numpy as np 2 import pandas as pd 3 import tensorflow as tf 4 5 import data_helper 6 7 n_class = 3 8 learning_rate = 0.05 9 s_limit_len = 10 10 word_embedding_size = 100 11 voc_size = 7000 12 13 def get_weights(shape): 14 return tf.Variable(tf.truncated_normal(shape,stddev=0.1)) 15 def get_bias(shape): 16 return tf.Variable(tf.constant(0.1)) 17 18 def conv2d(input_x, W): 19 return tf.nn.conv2d(input_x,W,strides=[1,1,1,1],padding="SAME") 20 21 def maxpooling(x,kszie,strides): 22 return tf.nn.max_pool(x,ksize=kszie,strides=strides,padding="SAME") 23 24 inputs = tf.placeholder(tf.int32,[None,s_limit_len],name="inputs") 25 labels = tf.placeholder(tf.int32,[None,n_class],name="label_one-hot") 26 27 28 embedding_w = tf.Variable(tf.truncated_normal([voc_size,word_embedding_size],stddev=0.1,dtype=tf.float32)) 29 embedding_layer = tf.nn.embedding_lookup(embedding_w,inputs) 30 31 conv1_W = get_weights([1,word_embedding_size]) 32 conv1 = tf.nn.conv2d(embedding_layer,conv1_W) 33 34 conv3_W = get_weights([3,word_embedding_size]) 35 conv3 = tf.nn.conv2d(embedding_layer,conv3_W) 36 37 conv5_W = get_weights([5,word_embedding_size]) 38 conv5 = tf.nn.conv2d(embedding_layer,conv5_W) 39 40 conv7_W = get_weights([7,word_embedding_size]) 41 conv7 = tf.nn.conv2d(embedding_layer,conv7_W) 42 43 feature_map_1 = maxpooling(conv1) 44 feature_map_3 = maxpooling(conv3) 45 feature_map_5 = maxpooling(conv5) 46 feature_map_7 = maxpooling(conv7) 47 48 tf.concat()
1 import tensorflow as tf 2 import numpy as np 3 import pandas as pd 4 from sklearn import metrics 5 6 class text_cnn(): 7 def __init__(self, n_class, s_limit_len, voc_size, embedding_size, filters, filter_nums,word2vec=None): 8 self.s_limit_len = s_limit_len 9 self.inputs = tf.placeholder(tf.int32,[None,self.s_limit_len],name = "inputs") 10 self.y = tf.placeholder(tf.int32,[None,self.n_class]) 11 self.keep_prob = tf.placeholder(tf.flaot32) 12 if word2vec is None: 13 embedding_w = tf.Variable(tf.truncated_normal([voc_size, embedding_size], stddev= 0.1)) 14 else: 15 embedding_w = word2vec 16 17 embedding_layer = tf.nn.embedding_lookup(embedding_w, self.inputs) 18 #conv2d 要求图片是三维的x,y,z(chanel)所以要加一个维度,实际大小不变,也就是说tf.reshape也是可以的操作,但是如果是reshape的话要注意大小 19 embedding_layer = tf.expand_dims(embedding_layer,-1) 20 21 #conv and pooling 22 pool_arr = [] 23 filter_sum = 0 24 for filter_size,filter_nums in zip(filters,filter_nums): 25 conv_w = tf.Variable(tf.truncated_normal([filter_size, embedding_size,1,filter_nums],stddev=0.1)) 26 conv_b = tf.Variable(tf.constant(0.1)) 27 conv = tf.nn.conv2d(embedding_layer,conv_w,[1,1,1,1],padding="SAME",name = "conv") 28 conv_out = tf.nn.relu(tf.matmul(conv)+conv_b) 29 30 pool = tf.nn.max_pool(conv_out,[1,s_limit_len-filter_size+1,1,1],strides=[1,1,1,1],padding="SAME",name = "POOL") 31 pool_arr.append(pool) 32 filter_sum += filter_nums 33 34 pool_out = tf.concat(pool_arr,3) 35 print("reshape_before",pool_out) 36 pool_flat = tf.reshape(pool_out,[-1,filter_sum],name = "pool_flat") 37 print("reshape_after",pool_out) 38 39 if __name__ == "__main__": 40 pass
1 import numpy as np 2 import pandas as pd 3 import tensorflow as tf 4 from sklearn import metrics 5 import data_helper 6 7 n_class = 2 8 learning_rate = 0.001 9 s_limit_len = 10 10 word_embedding_size = 100 11 voc_size = 7000 12 filter_nums = 4 13 def get_weights(shape): 14 return tf.Variable(tf.truncated_normal(shape,stddev=0.1)) 15 def get_bias(shape): 16 return tf.Variable(tf.constant(0.1)) 17 18 def conv2d(input_x, W): 19 return tf.nn.conv2d(input_x,W,strides=[1,1,1,1],padding="VALID") 20 21 def maxpooling(x,kszie,strides): 22 return tf.nn.max_pool(x,ksize=kszie,strides=strides,padding="VALID") 23 24 inputs = tf.placeholder(tf.int32,[None,s_limit_len],name="inputs") 25 labels = tf.placeholder(tf.int32,[None,n_class],name="label_one-hot") 26 keep_prob = tf.placeholder(tf.float32,name="keep_prob") 27 28 embedding_w = tf.Variable(tf.truncated_normal([voc_size,word_embedding_size],stddev=0.1,dtype=tf.float32)) 29 #这里需要多延展一个维度 30 embedding_layer = tf.expand_dims(tf.nn.embedding_lookup(embedding_w,inputs),-1) 31 # convoltional layers 32 33 34 conv_dict = {1:2,3:3,5:4} 35 filter_types = [1,3,5,7] 36 filter_types = [1,2,3,3] 37 38 # conv1_W = tf.Variable(tf.truncated_normal([1,word_embedding_size,1,1]),name="conv1_w") 39 # conv1_B = tf.Variable(tf.constant(0.1),name="conv1_b") 40 # conv1 = tf.nn.relu(tf.nn.conv2d(embedding_layer,conv1_W,[1,1,1,1],padding="SAME")+conv1_B) 41 # tf.nn.max_pool(conv1,[1,s_limit_len,1,1],[1,1,1,1]) 42 # print("conv1",conv1) 43 44 45 conv1_W = get_weights([1,word_embedding_size,1,1]) 46 conv1_bias = get_bias([1]) 47 conv1 = tf.nn.relu(conv2d(embedding_layer,conv1_W)+conv1_bias) 48 49 conv3_W = get_weights([3,word_embedding_size,1,1]) 50 conv3_bias = get_bias([1]) 51 conv3 = tf.nn.relu(conv2d(embedding_layer,conv3_W)+conv3_bias) 52 53 conv5_W = get_weights([5,word_embedding_size,1,1]) 54 conv5_b = get_bias([1]) 55 conv5 = tf.nn.relu(conv2d(embedding_layer,conv5_W)+conv5_b) 56 57 conv7_W = get_weights([7,word_embedding_size,1,1]) 58 conv7_B = get_bias([1]) 59 conv7 = tf.nn.relu(conv2d(embedding_layer,conv7_W)+conv7_B) 60 61 #max_pool_layers 62 feature_map_1 = maxpooling(conv1, [1,s_limit_len-1+1, 1, 1], [1, 1, 1, 1]) 63 feature_map_3 = maxpooling(conv3, [1,s_limit_len-3+1, 1, 1], [1, 1, 1, 1]) 64 feature_map_5 = maxpooling(conv5, [1,s_limit_len-5+1, 1, 1], [1, 1, 1, 1]) 65 feature_map_7 = maxpooling(conv7, [1,s_limit_len-7+1, 1, 1], [1, 1, 1, 1]) 66 67 print("feature_map size:",feature_map_1,feature_map_3,feature_map_5,feature_map_7) 68 pool_outs = tf.concat([feature_map_1,feature_map_3,feature_map_5, feature_map_7], 3) 69 print("pool out:",pool_outs) 70 pool_flat = tf.reshape(pool_outs,[-1,filter_nums]) 71 print("pool flat:",pool_flat) 72 #full connect layers 73 h_drop = tf.nn.dropout(pool_flat,keep_prob) 74 75 full_W = tf.Variable(tf.truncated_normal([4,n_class],stddev=0.1 ,dtype=tf.float32)) 76 full_B = tf.Variable(tf.constant(0.1,dtype=tf.float32)) 77 78 outputs = tf.nn.softmax(tf.matmul(h_drop,full_W)+full_B) 79 pred = tf.argmax(outputs,1) 80 loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=outputs,labels=labels)) 81 acc = tf.reduce_mean(tf.cast(tf.equal(pred,tf.argmax(labels,1)),tf.float32)) 82 train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss) 83 sess = tf.Session() 84 sess.run(tf.global_variables_initializer()) 85 86 train_x, train_y, words_dict, labels_dict, all_len = data_helper.load("../data/train.txt",1000,s_limit_len) 87 test_x,test_y, testlen = data_helper.load_test_data("../data/test_filter_2.txt",s_limit_len,words_dict,labels_dict) 88 89 def test(sess,acc,pred,tes_x,test_y): 90 y_pred, acc_test = sess.run([pred,acc],feed_dict={inputs:test_x,labels:test_y,keep_prob:1.0}) 91 y_true = sess.run(tf.argmax(test_y,1)) 92 93 print(metrics.classification_report(y_true,y_pred)) 94 95 96 97 for epoach in range(1000): 98 iter = 0 99 test(sess,acc,pred,test_x,test_y) 100 batchs = data_helper.get_batch(64,train_x,train_y,all_len) 101 for [batch_x,batch_y,batch_len] in batchs: 102 _,loss_,acc_,pred_list = sess.run([train_op,loss,acc,pred],feed_dict={inputs:batch_x, labels:batch_y,keep_prob:0.5}) 103 if iter % 50 == 0: 104 print(pred_list[:15]) 105 print("epoach-{0} iter-{1} loss:{2} acc-{3}".format(epoach,iter,loss_,acc_)) 106 # print(acc_) 107 iter += 1