Bridging the Gap between Deep Learning and Sparse Matrix Format Selection
典型的CNN网络,学习,代码风格都一样的
import os import sys ROOTDIR = os.path.abspath(os.path.join(sys.path[0], '../../..')) sys.path.append(ROOTDIR) import tensorflow as tf import numpy as np from dnnspmv.model.dataset import DataSet from dnnspmv.model.lib.sample_wrapper import DlSample as Sampler # read data def load_data(filename): try: data = np.load(filename) ds = DataSet(data['img'], data['code']) except: print("Can not find data file") ds = None finally: return ds # help functions to build graph def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial) def bias_variable(shape): initial = tf.constant(0.1, shape=shape) return tf.Variable(initial) def conv2d(x, W, strides=[1, 1, 1, 1]): return tf.nn.conv2d(x, W, strides=strides, padding='SAME') def max_pool_2x2(x): return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') def single_net(RES): with tf.name_scope('input'): x = tf.placeholder(tf.float32, shape=[None, RES, RES], name='x') y_ = tf.placeholder(tf.float32, shape=[None, 4], name='y') x_image = tf.reshape(x, [-1, RES, RES, 1], name='x-reshape') # first layer with tf.name_scope('layer1'): W_conv1 = weight_variable([3, 3, 1, 16]) b_conv1 = bias_variable([16]) h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) h_pool1 = max_pool_2x2(h_conv1) # [-1, 64, 64, 16] # second layer with tf.name_scope('layer2'): W_conv2 = weight_variable([3, 3, 16, 32]) b_conv2 = bias_variable([32]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2, strides=[1, 2, 2, 1]) + b_conv2) h_pool2 = max_pool_2x2(h_conv2) # [-1, 16, 16, 32] with tf.name_scope('layer3'): W_conv3 = weight_variable([3, 3, 32, 64]) b_conv3 = bias_variable([64]) h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3, strides=[1, 2, 2, 1]) + b_conv3) h_pool3 = max_pool_2x2(h_conv3) # [-1, 4, 4, 64] = [-1, 1024] # dense layer with tf.name_scope('fc1'): W_fc1 = weight_variable([4 * 4 * 64, 512]) b_fc1 = bias_variable([512]) h_pool3_flat = tf.reshape(h_pool3, [-1, 4 * 4 * 64]) h_fc1 = tf.nn.relu(tf.matmul(h_pool3_flat, W_fc1) + b_fc1) # [-1, 512] return x, y_, h_fc1 class DLSpMVModel(object): def __init__(self, train_data, test_data): self.RES = 0 self.mean = 0 self.std = 1 self.train = load_data(train_data) if self.train: print(self.train.images.shape, self.train.labels.shape) self.RES = self.train.images.shape[-1] # 128 self.mean = np.mean(self.train.images[:,0,:,:], axis=0) self.std = np.std(self.train.images[:,0,:,:], axis=0) self.test = load_data(test_data) if self.test and self.RES == 0: print(self.test.images.shape, self.test.labels.shape) self.RES = self.test.images.shape[-1] # 128 self.STEPS = 10000 def build_graph(self): pass def training(self): print("Model is in training mode") assert self.train is not None and self.test is not None, "data not loaded" with tf.name_scope('upper'): x, y_, h_fc1_upper = single_net(self.RES) with tf.name_scope('lower'): x2, y2_, h_fc1_lower = single_net(self.RES) h_fc1 = tf.concat([h_fc1_upper, h_fc1_lower], axis=1) # [-1, 512 * 2] with tf.name_scope('dropout'): keep_prob = tf.placeholder(tf.float32, name='keep_prob') h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) with tf.name_scope('out'): W_fc2 = weight_variable([512 * 2, 4]) b_fc2 = bias_variable([4]) y_conv = tf.add(tf.matmul(h_fc1_drop, W_fc2), b_fc2, name='y_conv_restore') #这就得到输出了 with tf.name_scope('cross_entropy'): cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=y_, logits=y_conv) # takes unnormalized output ) with tf.name_scope('train'): train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean( tf.cast(correct_prediction, tf.float32), name='acc_to_restore') tf.summary.scalar('accuracy', accuracy) merged = tf.summary.merge_all() saver = tf.train.Saver() # traditional saving api # train the model with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range(self.STEPS): batch = self.train.next_batch(50) if i % 100 == 0: train_accuracy = sess.run(accuracy, feed_dict={x: batch[0][:,0,:,:], y_: batch[1], x2: batch[0][:,1,:,:], y2_: batch[1], keep_prob: 1.0}) print('step %d, training accuracy %g' % (i, train_accuracy)) else: _ = sess.run(train_step, feed_dict={x: batch[0][:,0,:,:], y_: batch[1], x2: batch[0][:,1,:,:], y2_: batch[1], keep_prob: 0.5}) # test print('test accuracy %g' % accuracy.eval(feed_dict={x: self.test.images[:,0,:,:], y_: self.test.labels, x2: self.test.images[:,1,:,:], y2_: self.test.labels, keep_prob: 1.0})) # save model and checkpoint save_path = saver.save(sess, os.path.join(ROOTDIR, "dnnspmv/model/spmv/model-{}.ckpt".format(self.STEPS))) print("Model saved in file %s" % save_path) def testing(self): """ restore model and checkpoint [description] """ print("Model is in testing mode") assert self.test is not None, "data not loaded" tf.reset_default_graph() # the graph is empty now, must build graph before restore value with tf.Session() as sess: # retore graph saver = tf.train.import_meta_graph(os.path.join(ROOTDIR, 'dnnspmv/model/spmv/model-{}.ckpt.meta'.format(self.STEPS))) # the current graph can be explored by graph = tf.get_default_graph() # restore value saver.restore(sess, tf.train.latest_checkpoint(os.path.join(ROOTDIR, 'dnnspmv/model/spmv'))) print("Model restored") x = graph.get_tensor_by_name("upper/input/x:0") y = graph.get_tensor_by_name("upper/input/y:0") x2 = graph.get_tensor_by_name("lower/input/x:0") y2_ = graph.get_tensor_by_name("lower/input/y:0") keep_prob = graph.get_tensor_by_name("dropout/keep_prob:0") # for tensor, use get_tensor_by_name() # for operation, use get_operation_by_name() # NOTE: Tensor names must be of the form "<op_name>:<output_index>" acc = graph.get_tensor_by_name('train/acc_to_restore:0') # test print("-------------------------------------------------------") print('Test accuracy %g' % sess.run(acc, feed_dict={x: self.test.images[:,0,:,:], y: self.test.labels, x2: self.test.images[:,1,:,:], y2_: self.test.labels, keep_prob: 1.0})) print("-------------------------------------------------------") # for prediction def _img_norm(self, img): return (img - self.mean) / self.std def predict(self, matrix_mtx): print("Model is in prediction mode") assert self.train is not None, "train data required" format_dict = ['COO', 'CSR', 'DIA', 'ELL'] sl = Sampler() img, img_ = sl.sample(matrix_mtx, self.RES) img = self._img_norm(img); img_ = self._img_norm(img_) imgs = img.reshape(1, self.RES, self.RES); imgs_ = img_.reshape(1, self.RES, self.RES) tf.reset_default_graph() # the graph is empty now, must build graph before restore value with tf.Session() as sess: # retore graph saver = tf.train.import_meta_graph(os.path.join(ROOTDIR, 'dnnspmv/model/spmv/model-10000.ckpt.meta')) # the current graph can be explored by graph = tf.get_default_graph() # restore value saver.restore(sess, tf.train.latest_checkpoint(os.path.join(ROOTDIR, 'dnnspmv/model/spmv'))) print("Model restored") x = graph.get_tensor_by_name("upper/input/x:0") x2 = graph.get_tensor_by_name("lower/input/x:0") keep_prob = graph.get_tensor_by_name("dropout/keep_prob:0") y_conv = graph.get_tensor_by_name('out/y_conv_restore:0') y_pred = sess.run(y_conv, feed_dict={x: imgs, x2: imgs_, keep_prob: 1.0}) fmts = [*map(lambda i: format_dict[i], np.argmax(y_pred, axis=1))] print("-------------------------------------------------------") print('The predicted best format for matrix {} is {}'.format(os.path.basename(matrix_mtx), fmts)) print("-------------------------------------------------------") return def main(): if len(sys.argv) < 2: print("Usage: {} FLAG{train, test, predict}") exit() FLAG = sys.argv[1].lower() model = DLSpMVModel(os.path.join(ROOTDIR, 'dnnspmv/data/train-data.npz'), os.path.join(ROOTDIR, 'dnnspmv/data/test-data.npz')) print(type(model)) if FLAG == 'train': model.training() elif FLAG == 'test': model.testing() elif FLAG == 'predict': if len(sys.argv) < 3: print("Predict mode: {} predict <mtxfile>".format(sys.argv[0])) exit() mtxfile = sys.argv[2] model.predict(mtxfile.encode()) if __name__ == '__main__': main()
其网络结构真的很简单
这篇文章的暂时的一些想法:
整个程序的处理步骤应该是这样的:首先,线下选取很多矩阵,当做训练集。训练集中的矩阵,按照四种稀疏矩阵存储方式都跑一遍,然后测试得出该矩阵的最佳存储方式,作为label。
然后到了CNN训练环节。所有大小可能不统一的矩阵按照DistanceHistogramRepresentation 的格式表示成统一大小格式的矩阵,输入构造好的CNN。
然后就得到了训练好的CNN,可以用来预测最佳存储矩阵。
读这篇文章的收获如下:
1.使用CNN的方法来完成矩阵最佳适应存储方式匹配。CNN的分叉格式比较新颖。
2.使用DistanceHistogramRepresentation 的格式表示成统一大小格式的矩阵。以前不知道怎么将矩阵大小统一成同样的大小。该方法提供了新的思路。
3.使用迁移学习的方法,将训练好的CNN网络移植到其他的平台上去重复使用。
有点疑惑的地方:
1.感觉模型太简单了,相对于SMAT那篇文章来说。SMAT好像有许多的threshold以及对于矩阵,硬件平台都提取了很多的丰富的特征。但是这里提取的只是依据矩阵的形状然后用类似CNN图像识别的方法来获取最佳存储格式。我感觉还是要好好看一看SMAT,那里提取了很多的参数,然后用决策树的方法来进行机器学习。
和师兄交流以后终于明白了,难度不是机器学习方法,而是获取数据的正确训练方法,底层编译优化,cache等,使代码适应机器。