Face alignment at 3000FPS via Regressing Local Binrary features 理解
这篇是Ren Shaoqing发表在cvpr2014上的paper,论文是在CPR框架下做的,想了解CPR的同学可以参见我之前的博客,网上有同学给出了code,该code部分实现了LBF,链接为https://github.com/jwyang/face-alignment。下面介绍算法的整个流程。
简单的说,该算法利用random forest和shape indexed features得到local binary features,然后利用linear regression 求解regressor,检测时利用训练得到的feature mapping和linear projection得倒update shape ΔS,然后与上一个stage相加得到当前stage的shape,之后不断迭代直到迭代结束。下面我会根据code做详细的描述。
算法分为train和test两个部分,首先介绍train.m,详细的介绍会在code中给出
%TRAIN_MODEL Summary of this function goes here % Function: train face alignment model % Detailed explanation goes here % Input: % dbnames: the names of database % Configure the parameters for training model global params; global Tr_Data; config_tr; %这里COFW是paper rcpr中用到的数据库,该数据库与lfpw,helen等不同的地方在于每个shape的标签中包含是否受遮挡。 if size(dbnames) > 1 & sum(strcmp(dbnames, 'COFW')) > 0 disp('Sorry, COFW cannnot be combined with others') return; end %读取meanshape if sum(strcmp(dbnames, 'COFW')) > 0 load('../initial_shape/InitialShape_29.mat'); params.meanshape = S0; else load('../initial_shape/InitialShape_68.mat'); params.meanshape = S0; end %是否选择并行计算 if params.isparallel disp('Attention, if error occurs, plese ensure you used the correct version of parallel initialization.'); if isempty(gcp('nocreate')) parpool(4); else disp('Already initialized'); end %{ if matlabpool('size') <= 0 matlabpool('open','local',4); else disp('Already initialized'); end %} end % load trainning data from hardware Tr_Data = []; % Tr_Bboxes = []; for i = 1:length(dbnames) % load training samples (including training images, and groundtruth shapes) imgpathlistfile = strcat('..\datasets\', dbnames{i}, '\Path_Images.txt'); tr_data = loadsamples(imgpathlistfile, 2);%读取数据 Tr_Data = [Tr_Data; tr_data]; end % Augmentate data for traing: assign multiple initial shapes to each image Data = Tr_Data; % (1:10:end); Param = params; %翻转图像,目的是为了enlarge training data if Param.flipflag % if conduct flipping Data_flip = cell(size(Data, 1), 1); for i = 1:length(Data_flip) Data_flip{i}.img_gray = fliplr(Data{i}.img_gray); Data_flip{i}.width_orig = Data{i}.width_orig; Data_flip{i}.height_orig = Data{i}.height_orig; Data_flip{i}.width = Data{i}.width; Data_flip{i}.height = Data{i}.height; Data_flip{i}.shape_gt = flipshape(Data{i}.shape_gt); Data_flip{i}.shape_gt(:, 1) = Data{i}.width - Data_flip{i}.shape_gt(:, 1); Data_flip{i}.bbox_gt = Data{i}.bbox_gt; Data_flip{i}.bbox_gt(1) = Data_flip{i}.width - Data_flip{i}.bbox_gt(1) - Data_flip{i}.bbox_gt(3); Data_flip{i}.bbox_facedet = Data{i}.bbox_facedet; Data_flip{i}.bbox_facedet(1) = Data_flip{i}.width - Data_flip{i}.bbox_facedet(1) - Data_flip{i}.bbox_facedet(3); end Data = [Data; Data_flip]; end % choose corresponding points for training for i = 1:length(Data) Data{i}.shape_gt = Data{i}.shape_gt(Param.ind_usedpts, :);%选取特定的landmark Data{i}.bbox_gt = getbbox(Data{i}.shape_gt);%根据ground truth shape 确定bounding bbox % modify detection boxes shape_facedet = resetshape(Data{i}.bbox_facedet, Param.meanshape);%将mean shape reproject face detection bbox上 shape_facedet = shape_facedet(Param.ind_usedpts, :); Data{i}.bbox_facedet = getbbox(shape_facedet); end Param.meanshape = S0(Param.ind_usedpts, :);%选取特定的landmark dbsize = length(Data); % load('Ts_bbox.mat'); augnumber = Param.augnumber; for i = 1:dbsize % initializ the shape of current face image by randomly selecting multiple shapes from other face images % indice = ceil(dbsize*rand(1, augnumber)); indice_rotate = ceil(dbsize*rand(1, augnumber)); indice_shift = ceil(dbsize*rand(1, augnumber)); scales = 1 + 0.2*(rand([1 augnumber]) - 0.5); Data{i}.intermediate_shapes = cell(1, Param.max_numstage); Data{i}.intermediate_bboxes = cell(1, Param.max_numstage); Data{i}.intermediate_shapes{1} = zeros([size(Param.meanshape), augnumber]); Data{i}.intermediate_bboxes{1} = zeros([augnumber, size(Data{i}.bbox_gt, 2)]); Data{i}.shapes_residual = zeros([size(Param.meanshape), augnumber]); Data{i}.tf2meanshape = cell(augnumber, 1); Data{i}.meanshape2tf = cell(augnumber, 1); % if Data{i}.isdet == 1 % Data{i}.bbox_facedet = Data{i}.bbox_facedet*ts_bbox; % end for sr = 1:params.augnumber if sr == 1 % estimate the similarity transformation from initial shape to mean shape % Data{i}.intermediate_shapes{1}(:,:, sr) = resetshape(Data{i}.bbox_gt, Param.meanshape); % Data{i}.intermediate_bboxes{1}(sr, :) = Data{i}.bbox_gt; Data{i}.intermediate_shapes{1}(:,:, sr) = resetshape(Data{i}.bbox_facedet, Param.meanshape);% Data{i}.intermediate_bboxes{1}(sr, :) = Data{i}.bbox_facedet; meanshape_resize = resetshape(Data{i}.intermediate_bboxes{1}(sr, :), Param.meanshape);将mean shape reproject face detection bbox上 %计算当前的shape与mean shape之间的相似性变换 Data{i}.tf2meanshape{1} = fitgeotrans(bsxfun(@minus, Data{i}.intermediate_shapes{1}(1:end,:, 1), mean(Data{i}.intermediate_shapes{1}(1:end,:, 1))), ... (bsxfun(@minus, meanshape_resize(1:end, :), mean(meanshape_resize(1:end, :)))), 'NonreflectiveSimilarity'); Data{i}.meanshape2tf{1} = fitgeotrans((bsxfun(@minus, meanshape_resize(1:end, :), mean(meanshape_resize(1:end, :)))), ... bsxfun(@minus, Data{i}.intermediate_shapes{1}(1:end,:, 1), mean(Data{i}.intermediate_shapes{1}(1:end,:, 1))), 'NonreflectiveSimilarity'); % calculate the residual shape from initial shape to groundtruth shape under normalization scale shape_residual = bsxfun(@rdivide, Data{i}.shape_gt - Data{i}.intermediate_shapes{1}(:,:, 1), [Data{i}.intermediate_bboxes{1}(1, 3) Data{i}.intermediate_bboxes{1}(1, 4)]); % transform the shape residual in the image coordinate to the mean shape coordinate [u, v] = transformPointsForward(Data{i}.tf2meanshape{1}, shape_residual(:, 1)', shape_residual(:, 2)'); Data{i}.shapes_residual(:, 1, 1) = u'; Data{i}.shapes_residual(:, 2, 1) = v'; else % randomly rotate the shape % shape = resetshape(Data{i}.bbox_gt, Param.meanshape); % Data{indice_rotate(sr)}.shape_gt shape = resetshape(Data{i}.bbox_facedet, Param.meanshape); % Data{indice_rotate(sr)}.shape_gt %根据随机选取的scale,rotation,translate计算新的初始shape然后投影到bbox上 if params.augnumber_scale ~= 0 shape = scaleshape(shape, scales(sr)); end if params.augnumber_rotate ~= 0 shape = rotateshape(shape); end if params.augnumber_shift ~= 0 shape = translateshape(shape, Data{indice_shift(sr)}.shape_gt); end Data{i}.intermediate_shapes{1}(:, :, sr) = shape; Data{i}.intermediate_bboxes{1}(sr, :) = getbbox(shape); meanshape_resize = resetshape(Data{i}.intermediate_bboxes{1}(sr, :), Param.meanshape); Data{i}.tf2meanshape{sr} = fitgeotrans(bsxfun(@minus, Data{i}.intermediate_shapes{1}(1:end,:, sr), mean(Data{i}.intermediate_shapes{1}(1:end,:, sr))), ... bsxfun(@minus, meanshape_resize(1:end, :), mean(meanshape_resize(1:end, :))), 'NonreflectiveSimilarity'); Data{i}.meanshape2tf{sr} = fitgeotrans(bsxfun(@minus, meanshape_resize(1:end, :), mean(meanshape_resize(1:end, :))), ... bsxfun(@minus, Data{i}.intermediate_shapes{1}(1:end,:, sr), mean(Data{i}.intermediate_shapes{1}(1:end,:, sr))), 'NonreflectiveSimilarity'); shape_residual = bsxfun(@rdivide, Data{i}.shape_gt - Data{i}.intermediate_shapes{1}(:,:, sr), [Data{i}.intermediate_bboxes{1}(sr, 3) Data{i}.intermediate_bboxes{1}(sr, 4)]); [u, v] = transformPointsForward(Data{i}.tf2meanshape{1}, shape_residual(:, 1)', shape_residual(:, 2)'); Data{i}.shapes_residual(:, 1, sr) = u'; Data{i}.shapes_residual(:, 2, sr) = v'; % Data{i}.shapes_residual(:, :, sr) = tformfwd(Data{i}.tf2meanshape{sr}, shape_residual(:, 1), shape_residual(:, 2)); end end end % train random forests for each landmark randf = cell(Param.max_numstage, 1); Ws = cell(Param.max_numstage, 1); %{ if nargin > 2 n = size(LBFRegModel_initial.ranf, 1); for i = 1:n randf(1:n, :) = LBFRegModel_initial.ranf; end end %} %开始训练 for s = 1:Param.max_numstage % learn random forest for s-th stage disp('train random forests for landmarks...'); %{ if isempty(randf{s}) if exist(strcat('randfs\randf', num2str(s), '.mat')) load(strcat('randfs\randf', num2str(s), '.mat')); else tic; randf{s} = train_randomfs(Data, Param, s); toc; save(strcat('randfs\randf', num2str(s), '.mat'), 'randf', '-v7.3'); end end %} %利用随机森林训练得倒feature mapping if isempty(randf{s}) tic; randf{s} = train_randomfs(Data, Param, s); toc; end % derive binary codes given learned random forest in current stage disp('extract local binary features...'); tic; binfeatures = derivebinaryfeat(randf{s}, Data, Param, s); % save(strcat('LBFeats\LBFeats', num2str(s), '.mat'), 'binfeatures', '-v7.3'); toc; % learn global linear regrassion given binary feature disp('learn global regressors...'); tic; [W, Data] = globalregression(binfeatures, Data, Param, s); Ws{s} = W; % save(strcat('Ws\W', num2str(s), '.mat'), 'W', '-v7.3'); toc; end %保存regression model LBFRegModel.ranf = randf; LBFRegModel.Ws = Ws;
接下来介绍train_randomfs.m,该文件是利用随机森林训练得倒feature mapping,具体流程为假设shape 有68个landmark,对每个lanmark训练一个随机森林,每个森林中包含多个决策树,具体介绍见code。
function rfs = train_randomfs(Tr_Data, params, stage) %TRAIN_RANDOMFS Summary of this function goes here % Function: train random forest for each landmark % Detailed explanation goes here % Input: % lmarkID: ID of landmark % stage: the stage of training process % Output: % randf: learned random forest dbsize = length(Tr_Data); % rf = cell(1, params.max_numtrees); %这里是将所有training data 按照一定的重合度分开,分开的data分别作为训练一个决策树的data overlap_ratio = params.bagging_overlap; Q = floor(double(dbsize)/((1-params.bagging_overlap)*(params.max_numtrees))); Data = cell(1, params.max_numtrees); for t = 1:params.max_numtrees % calculate the number of samples for each random tree % train t-th random tree is = max(floor((t-1)*Q - (t-1)*Q*overlap_ratio + 1), 1); ie = min(is + Q, dbsize); Data{t} = Tr_Data(is:ie); end % divide local region into grid params.radius = ([0:1/30:1]'); params.angles = 2*pi*[0:1/36:1]'; rfs = cell(length(params.meanshape), params.max_numtrees); %数据的初始化 parfor i = 1:length(params.meanshape) rf = cell(1, params.max_numtrees); % disp(strcat(num2str(i), 'th landmark is processing...')); for t = 1:params.max_numtrees % disp(strcat('training', {''}, num2str(t), '-th tree for', {''}, num2str(lmarkID), '-th landmark')); % calculate the number of samples for each random tree % train t-th random tree is = max(floor((t-1)*Q - (t-1)*Q*overlap_ratio + 1), 1); ie = min(is + Q, dbsize); max_numnodes = 2^params.max_depth - 1; rf{t}.ind_samples = cell(max_numnodes, 1);%样本的索引值 rf{t}.issplit = zeros(max_numnodes, 1);%是否分裂 rf{t}.pnode = zeros(max_numnodes, 1);%父亲节点 rf{t}.depth = zeros(max_numnodes, 1);%深度 rf{t}.cnodes = zeros(max_numnodes, 2);%孩子节点 rf{t}.isleafnode = zeros(max_numnodes, 1);%是否是叶子节点 rf{t}.feat = zeros(max_numnodes, 4);%特征 rf{t}.thresh = zeros(max_numnodes, 1);%阈值 rf{t}.ind_samples{1} = 1:(ie - is + 1)*(params.augnumber); rf{t}.issplit(1) = 0; rf{t}.pnode(1) = 0; rf{t}.depth(1) = 1; rf{t}.cnodes(1, 1:2) = [0 0]; rf{t}.isleafnode(1) = 1; rf{t}.feat(1, :) = zeros(1, 4); rf{t}.thresh(1) = 0; num_nodes = 1; num_leafnodes = 1; stop = 0; while(~stop) num_nodes_iter = num_nodes; num_split = 0; for n = 1:num_nodes_iter if ~rf{t}.issplit(n) if rf{t}.depth(n) == params.max_depth % || length(rf{t}.ind_samples{n}) < 20 if rf{t}.depth(n) == 1 rf{t}.depth(n) = 1; end rf{t}.issplit(n) = 1; else % separate the samples into left and right path [thresh, feat, lcind, rcind, isvalid] = splitnode(i, rf{t}.ind_samples{n}, Data{t}, params, stage);%对每个节点分裂为左子树和右子树 %{ if ~isvalid rf{t}.feat(n, :) = [0 0 0 0]; rf{t}.thresh(n) = 0; rf{t}.issplit(n) = 1; rf{t}.cnodes(n, :) = [0 0]; rf{t}.isleafnode(n) = 1; continue; end %} % set the threshold and featture for current node rf{t}.feat(n, :) = feat; rf{t}.thresh(n) = thresh; rf{t}.issplit(n) = 1; rf{t}.cnodes(n, :) = [num_nodes+1 num_nodes+2]; rf{t}.isleafnode(n) = 0; % add left and right child nodes into the random tree rf{t}.ind_samples{num_nodes+1} = lcind; rf{t}.issplit(num_nodes+1) = 0; rf{t}.pnode(num_nodes+1) = n; rf{t}.depth(num_nodes+1) = rf{t}.depth(n) + 1; rf{t}.cnodes(num_nodes+1, :) = [0 0]; rf{t}.isleafnode(num_nodes+1) = 1; rf{t}.ind_samples{num_nodes+2} = rcind; rf{t}.issplit(num_nodes+2) = 0; rf{t}.pnode(num_nodes+2) = n; rf{t}.depth(num_nodes+2) = rf{t}.depth(n) + 1; rf{t}.cnodes(num_nodes+2, :) = [0 0]; rf{t}.isleafnode(num_nodes+2) = 1; num_split = num_split + 1; num_leafnodes = num_leafnodes + 1; num_nodes = num_nodes + 2; end end end if num_split == 0 stop = 1; else rf{t}.num_leafnodes = num_leafnodes; rf{t}.num_nodes = num_nodes; rf{t}.id_leafnodes = find(rf{t}.isleafnode == 1); end end end % disp(strcat(num2str(i), 'th landmark is over')); rfs(i, :) = rf; end end function [thresh, feat, lcind, rcind, isvalid] = splitnode(lmarkID, ind_samples, Tr_Data, params, stage) if isempty(ind_samples) thresh = 0; feat = [0 0 0 0]; rcind = []; lcind = []; isvalid = 1; return; end % generate params.max_rand cndidate feature % anglepairs = samplerandfeat(params.max_numfeat); % radiuspairs = [rand([params.max_numfeat, 1]) rand([params.max_numfeat, 1])]; [radiuspairs, anglepairs] = getproposals(params.max_numfeats(stage), params.radius, params.angles);%随机选取pixel值,这里是随机得到半径和角度 angles_cos = cos(anglepairs); angles_sin = sin(anglepairs); %shape indexed features是指随机选取两个landmark,然后在两个landmark周围随机选取offest,得倒相对应的pixel值,将两个pixel值相减得到features % extract pixel difference features from pairs pdfeats = zeros(params.max_numfeats(stage), length(ind_samples)); shapes_residual = zeros(length(ind_samples), 2); for i = 1:length(ind_samples) s = floor((ind_samples(i)-1)/(params.augnumber)) + 1; k = mod(ind_samples(i)-1, (params.augnumber)) + 1; % calculate the relative location under the coordinate of meanshape pixel_a_x_imgcoord = (angles_cos(:, 1)).*radiuspairs(:, 1)*params.max_raio_radius(stage)*Tr_Data{s}.intermediate_bboxes{stage}(k, 3); pixel_a_y_imgcoord = (angles_sin(:, 1)).*radiuspairs(:, 1)*params.max_raio_radius(stage)*Tr_Data{s}.intermediate_bboxes{stage}(k, 4); pixel_b_x_imgcoord = (angles_cos(:, 2)).*radiuspairs(:, 2)*params.max_raio_radius(stage)*Tr_Data{s}.intermediate_bboxes{stage}(k, 3); pixel_b_y_imgcoord = (angles_sin(:, 2)).*radiuspairs(:, 2)*params.max_raio_radius(stage)*Tr_Data{s}.intermediate_bboxes{stage}(k, 4); % no transformation %{ pixel_a_x_lmcoord = pixel_a_x_imgcoord; pixel_a_y_lmcoord = pixel_a_y_imgcoord; pixel_b_x_lmcoord = pixel_b_x_imgcoord; pixel_b_y_lmcoord = pixel_b_y_imgcoord; %} % transform the pixels from image coordinate (meanshape) to coordinate of current shape [pixel_a_x_lmcoord, pixel_a_y_lmcoord] = transformPointsForward(Tr_Data{s}.meanshape2tf{k}, pixel_a_x_imgcoord', pixel_a_y_imgcoord'); pixel_a_x_lmcoord = pixel_a_x_lmcoord'; pixel_a_y_lmcoord = pixel_a_y_lmcoord'; [pixel_b_x_lmcoord, pixel_b_y_lmcoord] = transformPointsForward(Tr_Data{s}.meanshape2tf{k}, pixel_b_x_imgcoord', pixel_b_y_imgcoord'); pixel_b_x_lmcoord = pixel_b_x_lmcoord'; pixel_b_y_lmcoord = pixel_b_y_lmcoord'; pixel_a_x = int32(bsxfun(@plus, pixel_a_x_lmcoord, Tr_Data{s}.intermediate_shapes{stage}(lmarkID, 1, k))); pixel_a_y = int32(bsxfun(@plus, pixel_a_y_lmcoord, Tr_Data{s}.intermediate_shapes{stage}(lmarkID, 2, k))); pixel_b_x = int32(bsxfun(@plus, pixel_b_x_lmcoord, Tr_Data{s}.intermediate_shapes{stage}(lmarkID, 1, k))); pixel_b_y = int32(bsxfun(@plus, pixel_b_y_lmcoord, Tr_Data{s}.intermediate_shapes{stage}(lmarkID, 2, k))); width = (Tr_Data{s}.width); height = (Tr_Data{s}.height); pixel_a_x = max(1, min(pixel_a_x, width)); pixel_a_y = max(1, min(pixel_a_y, height)); pixel_b_x = max(1, min(pixel_b_x, width)); pixel_b_y = max(1, min(pixel_b_y, height)); pdfeats(:, i) = double(Tr_Data{s}.img_gray(pixel_a_y + (pixel_a_x-1)*height)) - double(Tr_Data{s}.img_gray(pixel_b_y + (pixel_b_x-1)*height));%计算shape indexed features %./ double(Tr_Data{s}.img_gray(pixel_a_y + (pixel_a_x-1)*height)) + double(Tr_Data{s}.img_gray(pixel_b_y + (pixel_b_x-1)*height)); % drawshapes(Tr_Data{s}.img_gray, [pixel_a_x pixel_a_y pixel_b_x pixel_b_y]); % hold off; shapes_residual(i, :) = Tr_Data{s}.shapes_residual(lmarkID, :, k); end E_x_2 = mean(shapes_residual(:, 1).^2); E_x = mean(shapes_residual(:, 1)); E_y_2 = mean(shapes_residual(:, 2).^2); E_y = mean(shapes_residual(:, 2)); % var_overall = length(ind_samples)*((E_x_2 - E_x^2) + (E_y_2 - E_y^2));%计算方差 % var_overall = length(ind_samples)*(var(shapes_residual(:, 1)) + var(shapes_residual(:, 2))); % max_step = min(length(ind_samples), params.max_numthreshs); % step = floor(length(ind_samples)/max_step); max_step = 1; var_reductions = zeros(params.max_numfeats(stage), max_step); thresholds = zeros(params.max_numfeats(stage), max_step); [pdfeats_sorted] = sort(pdfeats, 2);%将特征排序 % shapes_residual = shapes_residual(ind, :); for i = 1:params.max_numfeats(stage) % for t = 1:max_step t = 1; ind = ceil(length(ind_samples)*(0.5 + 0.9*(rand(1) - 0.5))); threshold = pdfeats_sorted(i, ind); % pdfeats_sorted(i, t*step); % 随机选取阈值 thresholds(i, t) = threshold; ind_lc = (pdfeats(i, :) < threshold);%小于阈值的样本放入左子树 ind_rc = (pdfeats(i, :) >= threshold);%大于阈值的样本放入右子树 % figure, hold on, plot(shapes_residual(ind_lc, 1), shapes_residual(ind_lc, 2), 'r.') % plot(shapes_residual(ind_rc, 1), shapes_residual(ind_rc, 2), 'g.') % close; % compute E_x_2_lc = mean(shapes_residual(ind_lc, 1).^2); E_x_lc = mean(shapes_residual(ind_lc, 1)); E_y_2_lc = mean(shapes_residual(ind_lc, 2).^2); E_y_lc = mean(shapes_residual(ind_lc, 2)); var_lc = (E_x_2_lc + E_y_2_lc)- (E_x_lc^2 + E_y_lc^2);%左子树的方差 E_x_2_rc = (E_x_2*length(ind_samples) - E_x_2_lc*sum(ind_lc))/sum(ind_rc); E_x_rc = (E_x*length(ind_samples) - E_x_lc*sum(ind_lc))/sum(ind_rc); E_y_2_rc = (E_y_2*length(ind_samples) - E_y_2_lc*sum(ind_lc))/sum(ind_rc); E_y_rc = (E_y*length(ind_samples) - E_y_lc*sum(ind_lc))/sum(ind_rc); var_rc = (E_x_2_rc + E_y_2_rc)- (E_x_rc^2 + E_y_rc^2);%右子树的方差 var_reduce = var_overall - sum(ind_lc)*var_lc - sum(ind_rc)*var_rc;%减少的方差 % var_reduce = var_overall - sum(ind_lc)*(var(shapes_residual(ind_lc, 1)) + var(shapes_residual(ind_lc, 2))) - sum(ind_rc)*(var(shapes_residual(ind_rc, 1)) + var(shapes_residual(ind_rc, 2))); var_reductions(i, t) = var_reduce; % end % plot(var_reductions(i, :)); end [~, ind_colmax] = max(var_reductions);%选取减少方差的最大值的索引 %这里之所以选择减少的方差的最大值我的理解是减少的方差的越大,则剩下的方差越小,而剩下的方差等于左子树的方差加上右子树的方差,故左子树和右子树的方差越小,方差越小,表示数据越集中,分散的程度越小,这样说分类越正确。 ind_max = 1; %{ if var_max <= 0 isvalid = 0; else isvalid = 1; end %} isvalid = 1; %将对应的特征和阈值保存起来,这些参数即是所求的feature mapping thresh = thresholds(ind_colmax(ind_max), ind_max); feat = [anglepairs(ind_colmax(ind_max), :) radiuspairs(ind_colmax(ind_max), :)]; lcind = ind_samples(find(pdfeats(ind_colmax(ind_max), :) < thresh)); rcind = ind_samples(find(pdfeats(ind_colmax(ind_max), :) >= thresh)); end
之后将training data通过刚才得到的feature mapping 计算binary feature,即每个training data到达的每个树的叶子节点即表示为1,其他为0,然后将每个树的binary feature连接接起来,具体见derivebinaryfeat.m
function binfeatures = derivebinaryfeat(randf, Tr_Data, params, stage) %DERIVEBINARYFEAT Summary of this function goes here % Function: Derive binary features for each sample given learned random forest % Detailed explanation goes here % Input: % lmarkID: the ID of landmark % randf: learned random forest in current stage % Tr_Data: training data % params: parameters for curent stage % calculate the overall dimension of binary feature, concatenate the % features for all landmarks, and the feature of one landmark is sum % leafnodes of all random trees; dims_binfeat = 0; ind_bincode = zeros(size(randf)); %计算binary features的维度 for l = 1:size(randf, 1) for t = 1:size(randf, 2) ind_bincode(l, t) = randf{l, t}.num_leafnodes; dims_binfeat = dims_binfeat + randf{l, t}.num_leafnodes; end end % initilaize the memory for binfeatures dbsize = length(Tr_Data); % faster implementation %{ tic; binfeatures = zeros(dbsize*(params.augnumber), dims_binfeat); img_sizes = zeros(2, dbsize*(params.augnumber)); shapes = zeros([size(params.meanshape), dbsize*(params.augnumber)]); tfs2meanshape = cell(1, dbsize*params.augnumber); img_areas = zeros(1, dbsize); parfor i = 1:dbsize img_areas(i) = Tr_Data{i}.width*Tr_Data{i}.height; end imgs_gray = zeros(dbsize, max(img_areas)); for i = 1:dbsize area = img_areas(i); imgs_gray(i, 1:area) = Tr_Data{i}.img_gray(:)'; shapes(:, :, (i-1)*params.augnumber+1:i*params.augnumber) = Tr_Data{i}.intermediate_shapes{stage}; for k = 1:params.augnumber img_sizes(:, (i-1)*params.augnumber+k) = [Tr_Data{i}.width; Tr_Data{i}.height]; tfs2meanshape{(i-1)*params.augnumber+k} = Tr_Data{i}.tf2meanshape{k}; end end binfeature_lmarks = cell(1, size(params.meanshape, 1)); num_leafnodes = zeros(1, size(params.meanshape, 1)); for l = 1:size(params.meanshape, 1) [binfeature_lmarks{l}, num_leafnodes(l)] = lbf_faster(randf{l}, imgs_gray, img_sizes, shapes(l, :, :), tfs2meanshape, params, stage); end cumnum_leafnodes = [0 cumsum(num_leafnodes)]; binfeatures_faster = binfeatures; for l = 1:size(params.meanshape, 1) binfeatures_faster(:, cumnum_leafnodes(l)+1:cumnum_leafnodes(l+1)) = binfeature_lmarks{l}; end toc; %} feats = cell(size(params.meanshape, 1), 1); isleaf = cell(size(params.meanshape, 1), 1); threshs = cell(size(params.meanshape, 1), 1); cnodes = cell(size(params.meanshape, 1), 1); % prepare for the derivation of local binary codes for l = 1:size(params.meanshape, 1) % concatenate all nodes of all random trees rf = randf(l, :); num_rfnodes = 0; for t = 1:params.max_numtrees num_rfnodes = num_rfnodes + rf{t}.num_nodes; end % fast implementation feats{l} = zeros(num_rfnodes, 4); isleaf{l} = zeros(num_rfnodes, 1); threshs{l} = zeros(num_rfnodes, 1); cnodes{l} = zeros(num_rfnodes, 2); id_rfnode = 1; for t = 1:params.max_numtrees feats{l}(id_rfnode:(id_rfnode + rf{t}.num_nodes - 1), :) = rf{t}.feat(1:rf{t}.num_nodes, :); isleaf{l}(id_rfnode:(id_rfnode + rf{t}.num_nodes - 1), :) = rf{t}.isleafnode(1:rf{t}.num_nodes, :); threshs{l}(id_rfnode:(id_rfnode + rf{t}.num_nodes - 1), :) = rf{t}.thresh(1:rf{t}.num_nodes, :); cnodes{l}(id_rfnode:(id_rfnode + rf{t}.num_nodes - 1), :) = rf{t}.cnodes(1:rf{t}.num_nodes, :); id_rfnode = id_rfnode + rf{t}.num_nodes; end end % extract feature for each samples parfor i = 1:dbsize*(params.augnumber) k = floor((i-1)/(params.augnumber)) + 1; s = mod(i-1, (params.augnumber)) + 1; img_gray = Tr_Data{k}.img_gray; bbox = Tr_Data{k}.intermediate_bboxes{stage}(s, :); shapes = Tr_Data{k}.intermediate_shapes{stage}; shape = shapes(:, :, s); % tf2meanshape = Tr_Data{k}.tf2meanshape{s}; meanshape2tf = Tr_Data{k}.meanshape2tf{s}; %{ feats_cell = cell(size(params.meanshape, 1), params.max_numtrees); for l = 1:size(params.meanshape, 1) % extract feature for each landmark from the random forest for t = 1:params.max_numtrees % extract feature for each ladnmark from a random tree bincode = traversetree(randf{l}{t}, img_gray, bbox, shape(l, :), tf2meanshape, params, stage); feats_cell{l, t} = bincode; end end binfeature = zeros(1, dims_binfeat); for l = 1:size(params.meanshape, 1) % extract feature for each landmark from the random forest offset = sum(ind_bincodes(1:l-1, end)); for t = 1:params.max_numtrees ind_s = ind_bincodes(l, t) + 1 + offset; ind_e = ind_bincodes(l, t+1) + offset; % extract feature for each ladnmark from a random tree binfeature(ind_s:ind_e) = feats_cell{l, t}; end end %} % fast implementation binfeature_lmarks = cell(1, size(params.meanshape, 1)); num_leafnodes = zeros(1, size(params.meanshape, 1)); for l = 1:size(params.meanshape, 1) % concatenate all nodes of all random trees [binfeature_lmarks{l}, num_leafnodes(l)]= lbf_fast(randf(l, :), feats{l}, isleaf{l}, threshs{l}, cnodes{l}, img_gray, bbox, shape(l, :), meanshape2tf, params, stage); end cumnum_leafnodes = [0 cumsum(num_leafnodes)]; binfeature_alllmarks = zeros(1, cumnum_leafnodes(end)); for l = 1:size(params.meanshape, 1) binfeature_alllmarks(cumnum_leafnodes(l)+1:cumnum_leafnodes(l+1)) = binfeature_lmarks{l}; end %{ % faster implementation (thinking...) binfeature_lmarks = cell(1, size(params.meanshape, 1)); num_leafnodes = zeros(1, size(params.meanshape, 1)); for l = 1:size(params.meanshape, 1) % concatenate all nodes of all random trees [binfeature_lmarks{l}, num_leafnodes(l)]= lbf_faster(randf{l}, img_gray, bbox, shape(l, :), tf2meanshape, params, stage); end cumnum_leafnodes = [0 cumsum(num_leafnodes)]; binfeature_alllmarks = zeros(1, cumnum_leafnodes(end)); for l = 1:size(params.meanshape, 1) binfeature_alllmarks(cumnum_leafnodes(l)+1:cumnum_leafnodes(l+1)) = binfeature_lmarks{l}; end %} binfeatures(i, :) = binfeature_alllmarks; end end function bincode = traversetree(tree, img_gray, bbox, shape, tf2meanshape, params, stage) currnode = tree.rtnodes{1}; bincode = zeros(1, tree.num_leafnodes); width = size(img_gray, 2); height = size(img_gray, 1); while(1) anglepairs = currnode.feat(1:2); radiuspairs = currnode.feat(3:4); % calculate the relative location under the coordinate of meanshape pixel_a_x_imgcoord = cos(anglepairs(:, 1)).*radiuspairs(:, 1)*params.max_raio_radius(stage)*bbox(3); pixel_a_y_imgcoord = sin(anglepairs(:, 1)).*radiuspairs(:, 1)*params.max_raio_radius(stage)*bbox(4); pixel_b_x_imgcoord = cos(anglepairs(:, 2)).*radiuspairs(:, 2)*params.max_raio_radius(stage)*bbox(3); pixel_b_y_imgcoord = sin(anglepairs(:, 2)).*radiuspairs(:, 2)*params.max_raio_radius(stage)*bbox(4); % transform the pixels from image coordinate (meanshape) to coordinate of current shape [pixel_a_x_lmcoord, pixel_a_y_lmcoord] = tforminv(tf2meanshape, pixel_a_x_imgcoord, pixel_a_y_imgcoord); [pixel_b_x_lmcoord, pixel_b_y_lmcoord] = tforminv(tf2meanshape, pixel_b_x_imgcoord, pixel_b_y_imgcoord); pixel_a_x = ceil(pixel_a_x_lmcoord + shape(1)); pixel_a_y = ceil(pixel_a_y_lmcoord + shape(2)); pixel_b_x = ceil(pixel_b_x_lmcoord + shape(1)); pixel_b_y = ceil(pixel_b_y_lmcoord + shape(2)); pixel_a_x = max(1, min(pixel_a_x, width)); pixel_a_y = max(1, min(pixel_a_y, height)); pixel_b_x = max(1, min(pixel_b_x, width)); pixel_b_y = max(1, min(pixel_b_y, height)); f = int16(img_gray(pixel_a_y + (pixel_a_x-1)*height)) - int16(img_gray(pixel_b_y + (pixel_b_x-1)*height)); if f < (currnode.thresh) id_chilenode = currnode.cnodes(1); currnode = tree.rtnodes{id_chilenode}; else id_chilenode = currnode.cnodes(2); currnode = tree.rtnodes{id_chilenode}; end if isempty(currnode.cnodes) % if the current node is a leaf node, then stop traversin bincode(tree.id_leafnodes == id_chilenode) = 1; break; end end end function [binfeature, num_leafnodes] = lbf_fast(rf, feats, isleaf, threshs, cnodes, img_gray, bbox, shape, meanshape2tf, params, stage) %{ num_rfnodes = 0; for t = 1:params.max_numtrees num_rfnodes = num_rfnodes + rf{t}.num_nodes; end % fast implementation feats = zeros(num_rfnodes, 4); isleaf = zeros(num_rfnodes, 1); threshs = zeros(num_rfnodes, 1); cnodes = zeros(num_rfnodes, 2); id_rfnode = 1; for t = 1:params.max_numtrees feats(id_rfnode:(id_rfnode + rf{t}.num_nodes - 1), :) = rf{t}.feat(1:rf{t}.num_nodes, :); isleaf(id_rfnode:(id_rfnode + rf{t}.num_nodes - 1), :) = rf{t}.isleafnode(1:rf{t}.num_nodes, :); threshs(id_rfnode:(id_rfnode + rf{t}.num_nodes - 1), :) = rf{t}.thresh(1:rf{t}.num_nodes, :); cnodes(id_rfnode:(id_rfnode + rf{t}.num_nodes - 1), :) = rf{t}.cnodes(1:rf{t}.num_nodes, :); id_rfnode = id_rfnode + rf{t}.num_nodes; end %} width = size(img_gray, 2); height = size(img_gray, 1); anglepairs = feats(:, 1:2); radiuspairs = feats(:, 3:4); % calculate the relative location under the coordinate of meanshape pixel_a_x_imgcoord = cos(anglepairs(:, 1)).*radiuspairs(:, 1)*params.max_raio_radius(stage)*bbox(3); pixel_a_y_imgcoord = sin(anglepairs(:, 1)).*radiuspairs(:, 1)*params.max_raio_radius(stage)*bbox(4); pixel_b_x_imgcoord = cos(anglepairs(:, 2)).*radiuspairs(:, 2)*params.max_raio_radius(stage)*bbox(3); pixel_b_y_imgcoord = sin(anglepairs(:, 2)).*radiuspairs(:, 2)*params.max_raio_radius(stage)*bbox(4); % no transformaiton %{ pixel_a_x_lmcoord = pixel_a_x_imgcoord; pixel_a_y_lmcoord = pixel_a_y_imgcoord; pixel_b_x_lmcoord = pixel_b_x_imgcoord; pixel_b_y_lmcoord = pixel_b_y_imgcoord; %} % transform the pixels from image coordinate (meanshape) to coordinate of current shape [pixel_a_x_lmcoord, pixel_a_y_lmcoord] = transformPointsForward(meanshape2tf, pixel_a_x_imgcoord', pixel_a_y_imgcoord'); pixel_a_x_lmcoord = pixel_a_x_lmcoord'; pixel_a_y_lmcoord = pixel_a_y_lmcoord'; [pixel_b_x_lmcoord, pixel_b_y_lmcoord] = transformPointsForward(meanshape2tf, pixel_b_x_imgcoord', pixel_b_y_imgcoord'); pixel_b_x_lmcoord = pixel_b_x_lmcoord'; pixel_b_y_lmcoord = pixel_b_y_lmcoord'; pixel_a_x = ceil(pixel_a_x_lmcoord + shape(1)); pixel_a_y = ceil(pixel_a_y_lmcoord + shape(2)); pixel_b_x = ceil(pixel_b_x_lmcoord + shape(1)); pixel_b_y = ceil(pixel_b_y_lmcoord + shape(2)); pixel_a_x = max(1, min(pixel_a_x, width)); pixel_a_y = max(1, min(pixel_a_y, height)); pixel_b_x = max(1, min(pixel_b_x, width)); pixel_b_y = max(1, min(pixel_b_y, height)); pdfeats = double(img_gray(pixel_a_y + (pixel_a_x-1)*height)) - double(img_gray(pixel_b_y + (pixel_b_x-1)*height)); % ./ double(img_gray(pixel_a_y + (pixel_a_x-1)*height)) + double(img_gray(pixel_b_y + (pixel_b_x-1)*height)); cind = (pdfeats >= threshs) + 1; % obtain the indice of child nodes for all nodes, if the current node is % leaf node, then the indice of its child node is 0 ind_cnodes = cnodes; % diag(cnodes(:, cind)); binfeature = zeros(1, sum(isleaf)); cumnum_nodes = 0; cumnum_leafnodes = 0; for t = 1:params.max_numtrees num_nodes = (rf{t}.num_nodes); id_cnode = 1; while(1) if isleaf(id_cnode + cumnum_nodes) binfeature(cumnum_leafnodes + find(rf{t}.id_leafnodes == id_cnode)) = 1; cumnum_nodes = cumnum_nodes + num_nodes; cumnum_leafnodes = cumnum_leafnodes + rf{t}.num_leafnodes; break; end id_cnode = ind_cnodes(cumnum_nodes + id_cnode, cind(cumnum_nodes + id_cnode)); end end num_leafnodes = sum(isleaf); end
之后利用linear regression求解paper中的公式(4)
function [W, Tr_Data] = globalregression(binaryfeatures, Tr_Data, params, stage) %GLOBALREGRESSION Summary of this function goes here % Function: implement global regression given binary features and % groundtruth shape % Detailed explanation goes here % Input: % binaryfeatures: extracted binary features from all samples (N X d ) % Tr_Data: training data % params: parameters for model % Output: % W: regression matrix % organize the groundtruth shape dbsize = length(Tr_Data); deltashapes = zeros(dbsize*(params.augnumber), 2*size(params.meanshape, 1)); % concatenate 2-D coordinates into a vector (N X (2*L)) dist_pupils = zeros(dbsize*(params.augnumber), 1); gtshapes = zeros([size(params.meanshape) dbsize*(params.augnumber)]); % concatenate 2-D coordinates into a vector (N X (2*L)) %数据初始化 %linear regression简单的说就是计算A*W=Y,Y就是deltashapes,A是binaryfeatures,所求即是W for i = 1:dbsize*(params.augnumber) k = floor((i-1)/(params.augnumber)) + 1; s = mod(i-1, (params.augnumber)) + 1; shape_gt = Tr_Data{k}.shape_gt; if size(shape_gt, 1) == 68 dist_pupils(i) = norm((mean(shape_gt(37:42, :)) - mean(shape_gt(43:48, :)))); elseif size(shape_gt, 1) == 51 dist_pupils(i) = norm((mean(shape_gt(20, :)) - mean(shape_gt(29, :)))); elseif size(shape_gt, 1) == 29 dist_pupils(i) = norm((mean(shape_gt(9:2:17, :)) - mean(shape_gt(10:2:18, :)))); else dist_pupils(i) = norm((mean(shape_gt(1:2, :)) - mean(shape_gt(end - 1:end, :)))); end gtshapes(:, :, i) = shape_gt; delta_shape = Tr_Data{k}.shapes_residual(:, :, s); deltashapes(i, :) = delta_shape(:)'; end % conduct regression using libliear % X : binaryfeatures % Y: gtshapes %利用工具包liblinear param = sprintf('-s 12 -p 0 -c %f -q heart_scale', 1/(size(binaryfeatures, 1)));%参数 W_liblinear = zeros(size(binaryfeatures, 2), size(deltashapes, 2)); tic; parfor o = 1:size(deltashapes, 2) model = train(deltashapes(:, o), sparse(binaryfeatures), param); W_liblinear(:, o) = model.w'; end toc; W = W_liblinear; % Predict the location of lanmarks using current regression matrix deltashapes_bar = binaryfeatures*W; predshapes = zeros([size(params.meanshape) size(binaryfeatures, 1)]); % concatenate 2-D coordinates into a vector (N X (2*L)) for i = 1:dbsize*(params.augnumber) k = floor((i-1)/(params.augnumber)) + 1; s = mod(i-1, (params.augnumber)) + 1; shapes_stage = Tr_Data{k}.intermediate_shapes{stage}; shape_stage = shapes_stage(:, :, s); deltashapes_bar_xy = reshape(deltashapes_bar(i, :), [uint8(size(deltashapes_bar, 2)/2) 2]); % transform above delta shape into the coordinate of current intermmediate shape % delta_shape_interm_coord = [deltashapes_bar_x(i, :)', deltashapes_bar_y(i, :)']; [u, v] = transformPointsForward(Tr_Data{k}.meanshape2tf{s}, deltashapes_bar_xy(:, 1)', deltashapes_bar_xy(:, 2)'); delta_shape_interm_coord = [u', v']; % derive the delta shape in the coordinate system of meanshape delta_shape_interm_coord = bsxfun(@times, delta_shape_interm_coord, Tr_Data{k}.intermediate_bboxes{stage}(s, 3:4)); shape_newstage = shape_stage + delta_shape_interm_coord; predshapes(:, :, i) = shape_newstage; %得到当前stage 的shape Tr_Data{k}.intermediate_shapes{stage+1}(:, :, s) = shape_newstage; % update transformation of current intermediate shape to meanshape,目的是为了之后stage的训练 Tr_Data{k}.intermediate_bboxes{stage+1}(s, :) = getbbox(Tr_Data{k}.intermediate_shapes{stage+1}(:, :, s)); meanshape_resize = (resetshape(Tr_Data{k}.intermediate_bboxes{stage+1}(s, :), params.meanshape)); shape_residual = bsxfun(@rdivide, Tr_Data{k}.shape_gt - shape_newstage, Tr_Data{k}.intermediate_bboxes{stage+1}(s, 3:4)); Tr_Data{k}.tf2meanshape{s} = fitgeotrans(bsxfun(@minus, Tr_Data{k}.intermediate_shapes{stage+1}(1:end,:, s), mean(Tr_Data{k}.intermediate_shapes{stage+1}(1:end,:, s))), ... bsxfun(@minus, meanshape_resize(1:end, :), mean(meanshape_resize(1:end, :))), 'NonreflectiveSimilarity'); Tr_Data{k}.meanshape2tf{s} = fitgeotrans(bsxfun(@minus, meanshape_resize(1:end, :), mean(meanshape_resize(1:end, :))), ... bsxfun(@minus, Tr_Data{k}.intermediate_shapes{stage+1}(1:end,:, s), mean(Tr_Data{k}.intermediate_shapes{stage+1}(1:end,:, s))), 'NonreflectiveSimilarity'); [u, v] = transformPointsForward(Tr_Data{k}.tf2meanshape{s}, shape_residual(:, 1), shape_residual(:, 2)); Tr_Data{k}.shapes_residual(:, 1, s) = u'; Tr_Data{k}.shapes_residual(:, 2, s) = v'; %{ drawshapes(Tr_Data{k}.img_gray, [Tr_Data{k}.shape_gt shape_stage shape_newstage]); hold off; drawnow; w = waitforbuttonpress; %} end %计算误差 error_per_image = compute_error(gtshapes, predshapes); MRSE = 100*mean(error_per_image); MRSE_display = sprintf('Mean Root Square Error for %d Test Samples: %f', length(error_per_image), MRSE); disp(MRSE_display); end function [ error_per_image ] = compute_error( ground_truth_all, detected_points_all ) %compute_error % compute the average point-to-point Euclidean error normalized by the % inter-ocular distance (measured as the Euclidean distance between the % outer corners of the eyes) % % Inputs: % grounth_truth_all, size: num_of_points x 2 x num_of_images % detected_points_all, size: num_of_points x 2 x num_of_images % Output: % error_per_image, size: num_of_images x 1 num_of_images = size(ground_truth_all,3); num_of_points = size(ground_truth_all,1); error_per_image = zeros(num_of_images,1); for i =1:num_of_images detected_points = detected_points_all(:,:,i); ground_truth_points = ground_truth_all(:,:,i); if num_of_points == 68 interocular_distance = norm(mean(ground_truth_points(37:42,:))-mean(ground_truth_points(43:48,:))); % norm((mean(shape_gt(37:42, :)) - mean(shape_gt(43:48, :)))); elseif num_of_points == 51 interocular_distance = norm(ground_truth_points(20,:) - ground_truth_points(29,:)); elseif num_of_points == 29 interocular_distance = norm(mean(ground_truth_points(9:2:17,:))-mean(ground_truth_points(10:2:18,:))); else interocular_distance = norm(mean(ground_truth_points(1:2,:))-mean(ground_truth_points(end - 1:end,:))); end sum=0; for j=1:num_of_points sum = sum+norm(detected_points(j,:)-ground_truth_points(j,:)); end error_per_image(i) = sum/(num_of_points*interocular_distance); end end
test流程与train的流程类似,详细code可见test_model.m