torch常有的神经网络例子解释
http://blog.csdn.net/u010946556/article/details/51339385
粗略内容来自该博客,本人虽然知道bp的功能,但是却不知道如何实现一直都在纠结,即使知道torch几行代码就能够架设复杂的神经网络跑数据,但是不会写自己的复杂的网络的人不算真正研究者。
fp是用来求loss是多少的,bp是使用分步求导来求梯度的,这很简单。
代码测试可用,在load文件时可能会报错,所以选择绝对路径。这是数据文件的网盘http://pan.baidu.com/s/1c2035BE
讲一下这个网络的具体设计
卷积操作,压缩操作,卷积,压缩,卷积,压缩……全连接层。
全连接层能够考虑到全部的信息,所以迭代速度慢,所以一般只用在后面几层,全连接层也是实际上计算loss的最后层,高斯层不会改变原有的分类数值的高低。
要在itorch notebook上运行,mark一下
require 'paths'; require 'nn'; ---Load TrainSet trainset = torch.load('/home/hu/cifar10-train.t7'); testset = torch.load('/home/hu/cifar10-train.t7'); classes = {'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'}; ---Add size() function and Tensor index operator setmetatable(trainset, {__index = function(t, i) return {t.data[i], t.label[i]} end} ); trainset.data = trainset.data:double() function trainset:size() return self.data:size(1) end ---Normalize data mean = {} stdv = {} for i=1,3 do mean[i] = trainset.data[{ {}, {i}, {}, {} }]:mean() print('Channel ' .. i .. ', Mean: ' .. mean[i]) trainset.data[{ {}, {i}, {}, {} }]:add(-mean[i]) stdv[i] = trainset.data[{ {}, {i}, {}, {} }]:std() print('Channel ' .. i .. ', Standard Deviation:' .. stdv[i]) trainset.data[{ {}, {i}, {}, {} }]:div(stdv[i]) end net = nn.Sequential() --change 1 channel to 3 channels --net:add(nn.SpatialConvolution(1, 6, 5, 5)) net:add(nn.SpatialConvolution(3, 6, 5, 5)) net:add(nn.ReLU()) net:add(nn.SpatialMaxPooling(2,2,2,2)) net:add(nn.SpatialConvolution(6, 16, 5, 5)) net:add(nn.ReLU()) net:add(nn.SpatialMaxPooling(2,2,2,2)) net:add(nn.View(16*5*5)) net:add(nn.Linear(16*5*5, 120)) net:add(nn.ReLU()) net:add(nn.Linear(120, 84)) net:add(nn.ReLU()) net:add(nn.Linear(84, 10)) net:add(nn.LogSoftMax()) criterion = nn.ClassNLLCriterion(); trainer = nn.StochasticGradient(net, criterion) trainer.learningRate = 0.001 trainer.maxIteration = 5 trainer:train(trainset) --normalize test data testset.data = testset.data:double(); for i = 1,3 do testset.data[{ {}, {i}, {}, {} }]:add(-mean[i]) testset.data[{ {}, {i}, {}, {} }]:div(stdv[i]) end --predict test and print confidences print(classes[testset.label[400]]) itorch.image(testset.data[400]) predicted = net:forward(testset.data[400]) print(predicted:exp()) --sort confidence and print predicted result confidences, indices = torch.sort(predicted, true) print(confidences[1]) print(indices[1]) print(classes[indices[1]]) --correct rate in total correct = 0 for i=1,10000 do local groundtruth = testset.label[i] local prediction = net:forward(testset.data[i]) local confidences, indices = torch.sort(prediction, true) if groundtruth == indices[1] then correct = correct + 1 end end print(correct, 100*correct/10000 .. '%') --correct rate every class class_performance = {0,0,0,0,0,0,0,0,0,0} for i = 1,10000 do local groundtruth = testset.label[i] local prediction = net:forward(testset.data[i]) local confidences, indices = torch.sort(prediction, true) if groundtruth == indices[1] then class_performance[groundtruth] = class_performance[groundtruth] + 1 end end for i = 1, #classes do print(classes[i], 100*class_performance[i]/1000 .. '%') end
虽然被告知只要修改dataset, net和criterion就能跑cudnn,但是实在没有成功,幸运的是,在网上找到了cuda跑的代码
th cuda.lua -gpu 0 -backend cudnn
require 'nn' require 'cunn' require 'torch' require 'cutorch' -- load data trainset = torch.load('/home/hu/cifar10-train.t7') testset = torch.load('/home/hu/cifar10-test.t7') setmetatable(trainset,{ __index = function(t,i) return {t.data[i], t.label[i]} end }); function trainset:size() return self.data:size(1) end -- transfer data to GPU -- Lua cannot handle ByteTensor trainset.data = trainset.data:cuda() trainset.label = trainset.label:cuda() -- normalize meanv = {} stdv = {} for i = 1,3 do meanv[i] = trainset.data[ {{}, {i},{}, {}}]:mean() trainset.data[ {{}, {i},{}, {}}]:add(-meanv[i]) print('mean'..i..'_'..meanv[i]) stdv[i] = trainset.data[ {{}, {i},{}, {}}]:std() trainset.data[ {{}, {i},{}, {}}]:div(stdv[i]) print('std'..i..'_'..meanv[i]) end ------------------ Define Network net = nn.Sequential() net:add(nn.SpatialConvolution(3, 6, 5, 5)) -- 3 input image channels, 6 output channels, 5x5 convolution kernel net:add(nn.ReLU()) -- non-linearity net:add(nn.SpatialMaxPooling(2,2,2,2)) -- A max-pooling operation that looks at 2x2 windows and finds the max. net:add(nn.SpatialConvolution(6, 16, 5, 5)) net:add(nn.ReLU()) -- non-linearity net:add(nn.SpatialMaxPooling(2,2,2,2)) net:add(nn.View(16*5*5)) -- reshapes from a 3D tensor of 16x5x5 into 1D tensor of 16*5*5 net:add(nn.Linear(16*5*5, 120)) -- fully connected layer (matrix multiplication between input and weights) net:add(nn.ReLU()) -- non-linearity net:add(nn.Linear(120, 84)) net:add(nn.ReLU()) -- non-linearity net:add(nn.Linear(84, 10)) -- 10 is the number of outputs of the network (in this case, 10 digits) net:add(nn.LogSoftMax()) print(net) ----------------Define criterion----------- criterion = nn.ClassNLLCriterion() ---------------tranfer net and criterion to GPU------- net = net:cuda() criterion = criterion:cuda() -----------------StochasticGradient with Criterion-- timer = torch.Timer() trainer = nn.StochasticGradient(net, criterion) trainer.learningRate = 0.001 trainer.maxIteration = 5 trainer:train(trainset) -------------------test------------- --Firstly, normalize testset for i = 1,3 do testset.data[{{},{i},{},{}}]:add(-meanv[i]) testset.data[{{},{i},{},{}}]:div(stdv[i]) end testset.data = testset.data:cuda() testset.label = testset.label:cuda() --calculate accuracies accuracies = {0,0,0,0,0,0,0,0,0,0} for i=1,10000 do predict = net:forward(trainset.data[i]) sampleLabel = trainset.label[i] local confidences , indice = torch.sort(predict,true) if indice[1] == sampleLabel then accuracies[sampleLabel] = accuracies[sampleLabel]+1 end end -- start,end,step, different from matlab which with start, step, end for i =1,10,1 do print('accuracies: '..i..'_'..(accuracies[i])/10 ..'%') end print('Elapsed time:'..timer:time().real..'seconds')