clear %% generate data prettySpiral = 0; if ~prettySpiral % generate some random gaussian like data rand('state', 0); randn('state', 0); N= 50; D= 2; X1 = mgd(N, D, [4 3], [2 -1;-1 2]); X2 = mgd(N, D, [1 1], [2 1;1 1]); X3 = mgd(N, D, [3 -3], [1 0;0 4]); X= [X1; X2; X3]; X= bsxfun(@rdivide, bsxfun(@minus, X, mean(X)), var(X)); y= [ones(N, 1); ones(N, 1)*2; ones(N, 1)*3]; scatter(X(:,1), X(:,2), 20, y) else % generate twirl data! N= 50; t = linspace(0.5, 2*pi, N); x = t.*cos(t); y = t.*sin(t); t = linspace(0.5, 2*pi, N); x2 = t.*cos(t+2); y2 = t.*sin(t+2); t = linspace(0.5, 2*pi, N); x3 = t.*cos(t+4); y3 = t.*sin(t+4); X= [[x' y']; [x2' y2']; [x3' y3']]; X= bsxfun(@rdivide, bsxfun(@minus, X, mean(X)), var(X)); y= [ones(N, 1); ones(N, 1)*2; ones(N, 1)*3]; scatter(X(:,1), X(:,2), 20, y) end %% classify rho = 1; c1 =10; c2 =10; epsilon = 0.2; threshold = 0; result=[]; % ker = 'linear'; ker = 'rbf'; sigma = 1/10;%sigma = 1/200; par = NonLinearDualBoundSVORIM(X, y, c1, c2, epsilon, rho, ker, sigma); %f = TestPrecisionNonLinear(par,X, y,X, y, ker,epsilon,sigma); %% Plot the figure contour_level = [-epsilon,0, epsilon]; xrange = [-1.5 1.5]; yrange = [-1.5 1.5]; % step size for how finely you want to visualize the decision boundary. inc = 0.005; % generate grid coordinates. this will be the basis of the decision % boundary visualization. [x1, x2] = meshgrid(xrange(1):inc:xrange(2), yrange(1):inc:yrange(2)); % size of the (x, y) image, which will also be the size of the % decision boundary image that is used as the plot background. image_size = size(x1); xy = [x1(:) x2(:)]; % make (x,y) pairs as a bunch of row vectors. % set up the domain over which you want to visualize the decision % boundary % d = []; % for k=1:max(y) % par.normw{k}=1; % d(:,k) = decisionfun(xy, par, X,y,k,epsilon, ker,sigma)'; % end % [~,idx] = min(abs(d)/par.normw{k},[],2); % nd=max(y); nd = (max(y)*(max(y)-1)/2); d = []; pred=zeros(size(xy,1),nd); for k=1:nd par.normw{k}=1; d(:,k) = decisionfun(xy, par, X,y,k,epsilon, ker,sigma)'; end pred(d<-threshold) = -1; pred(d >threshold) = 1; nclass = max(y); expLosses=zeros(size(pred,1),nclass); for i=1:nclass, expLosses(:,i) = sum(pred == repmat(par.Code(i,:),size(pred,1),1),2); end [minVal,finalOutput] = max(expLosses,[],2); idx = finalOutput; plt = 2; %1, just show the decison region with different colors; 2, show the decision hyperlane between class 1 and class 3 switch plt case 1 % reshape the idx (which contains the class label) into an image. decisionmap = reshape(idx, image_size); imagesc(xrange,yrange,decisionmap); % plot the class training data. hold on; set(gca,'ydir','normal'); cmap = [1 0.8 0.8; 0.95 1 0.95; 0.9 0.9 1]; colormap(cmap); plot(X(y==1,1), X(y==1,2), 'o', 'MarkerFaceColor', [.9 .3 .3], 'MarkerEdgeColor','k'); plot(X(y==2,1), X(y==2,2), 'o', 'MarkerFaceColor', [.3 .9 .3], 'MarkerEdgeColor','k'); plot(X(y==3,1), X(y==3,2), 'o', 'MarkerFaceColor', [.3 .3 .9], 'MarkerEdgeColor','k'); hold on; % title(sprintf('%d trees, Train time: %.2fs, Test time: %.2fs\n', opts.numTrees, timetrain, timetest)); case 2 %% show SVs color = {[.9 .3 .3],[.3 .9 .3],[.3 .3 .9]}; SVs = (par.SVs{2}>1e-4); for i=1:max(y) % show the SVs using biger marker plot(X(y==i&SVs==1,1),X(y==i&SVs==1,2), 'o', 'MarkerFaceColor', color{i}, 'MarkerEdgeColor','k'); hold on % plot the points of not SVs plot(X(y==i&SVs~=1,1),X(y==i&SVs~=1,2), 'o', 'MarkerFaceColor', color{i}, 'MarkerEdgeColor',color{i}); end hold on; title(sprintf('Ratio of SVs is %.2fs\n', mean(SVs))); color = {'r-','g-','b*','r.','go','b*'}; color1 = {'r-','g--','b*','r.','go','b*'}; contour_level1 = [-epsilon, 0, epsilon]; contour_level2 = [-epsilon, 0, epsilon]; contour_level0 = [-1,0,1]; % for k = 1:nd for k=2 decisionmapk = reshape(d(:,k), image_size); contour(x1,x2, decisionmapk, [-1 1], color1{k},'LineWidth',0.5); contour(x1,x2, decisionmapk, [contour_level(1) contour_level(1)], color{k}); contour(x1,x2, decisionmapk, [contour_level(2) contour_level(2) ], color{k},'LineWidth',2); contour(x1,x2, decisionmapk, [contour_level(3) contour_level(3) ], color{k}); contour(x1,x2, decisionmapk, [contour_level0(3) contour_level0(3) ], color1{k},'LineWidth',0.5); end end
function par = NonLinearDualBoundSVORIM(traindata, targets, c1, c2, epsilon, rho, ker, sigma) %'traindata' is a training data matrix , each line is a sample vector %'targets' is a label vector,should start from 1 to p model='EX'; % rho is the augmented Lagrangian parameter. % history is a structure that contains the objective value, the primal and % dual residual norms, and the tolerances for the primal and dual residual % norms at each iteration. %Data preprocessing [n, m] = size(traindata); Lab=sort(unique(targets)); p=length(Lab); % the number of total rank l= zeros(1,p); id={}; X=[];Y=[]; i=1; Id = []; while i<=p id{i}=find(targets==Lab(i)); l(i)=length(id{i}); X=[X;traindata(id{i},:)]; Y=[Y;targets(id{i})]; Id = [Id, id{i}]; i=i+1; end [~,Id0]=sort(Id); lc=cumsum(l); w = []; b = []; s = cell(1,p); r = cell(1,p); K=Kernel(ker, X',X',sigma); K=(K+K')/2; nch = nchoosek([1:p],2); Code = zeros(p,size(nch,1)); for k =1:size(nch,1) Code(nch(k,:),k) = [-1,1]; i = nch(k,1); j =nch(k,2); s{k} =lc(i)-l(i)+1:lc(i); r{k} = lc(j)-l(j)+1:lc(j); c{k} = [1:n]; c{k}([lc(i)-l(i)+1:lc(i) lc(j)-l(j)+1:lc(j)]) = []; Ak = X(c{k},:); Lk = X(s{k},:); Rk =X(r{k},:); row=[c{k} c{k} s{k} r{k}]; Hk = K(row,row); % model= subSVOR(Ak,Hk,Lk,Rk,c1, c2, epsilon, rho); model = subSVOR_quadgrog(Ak, Hk, Lk,Rk,c1, c2, epsilon); P{k} = model.P; p0{k} = model.p; alpha{k} = model.alpha; alphax{k}= model.alphax; normw{k} = model.normw; time(k) = model.time; b(k,1) = model.b; Id1= [s{k} c{k} r{k}]; SVs{k}= model.SVs(Id1); end par.l= s; par.r = r; par.c= c; par.P = P; par.p = p0; par.alpha = alpha; par.alphax = alphax; par.normw = normw; par.time = time; par.b = b; par.X=X; par.maxtime = max(par.time); par.SVs = SVs; par.Y=Y; par.Code = Code; % par.w = w; % par.b = b; end function par = subSVOR_quadgrog(Ak, H, Lk,Rk, c1, c2, epsilon) t_start = tic; %Global constants and defaults QUIET = 0; m = size(Ak,2); lk = size(Ak,1); rk1 = size(Lk,1); rk2 = size(Rk,1); rk = rk1+rk2; %ADMM solver mP=2*lk +rk; %dimension of Phi mG=4*lk + 2*rk; %dimension of Gamma mU=mG+1; %dimension of U mp1 = 1 : lk; mp2 = lk+1 : 2*lk; mp3 = 2*lk+1: mP; c= zeros(mU,1); c([mp1 mp2]+1) = c1; c(mp3+1) = c2; q = ones(mP,1); q(mp1) = epsilon; q(mp2) = epsilon; q(mp3) = -1; p = ones(mP,1); p(mp2) = -1; p(mP-rk2+1:mP) = -1; % H = [Ak; -Ak; Bk{1}; -Bk{2}]*[Ak; -Ak; Bk{1}; -Bk{2}]'; %linear Kernel % Qk=[Ak; Ak; Lk; Rk]; % H= Kernel(ker, Qk',Qk',sigma); % H=(H'+H)/2+1; H0 = (H+1).*(p*p'); % % options = optimoptions('quadprog',... % % 'Algorithm','interior-point-convex','Display','off'); options = optimoptions('quadprog',... 'Algorithm','interior-point-convex','MaxIter',200,'Display','off'); % % x = quadprog(H,f,A,b,Aeq,beq,lb,ub,x0,options) A = []; b = []; f = q; Aeq =[]; beq = []; lb = zeros(mP,1); ub = c(2:mP+1); x0 = []; P = quadprog(H0,f,A,b,Aeq,beq,lb,ub,x0,options); % diagnostics, reporting, termination checks par.P= P; par.p= p; par.alpha = P(mp1); par.alphax = P(mp2); P3=P(mp3); par.SVs = [P3(1:rk1);abs(P(mp1)-P(mp2));P3(rk1+1:rk1+rk2)]; % par.normw = sqrt(P'*(H0.*(p'*p))*P); par.normw =1; bk =(p'*P); par.b =bk; % switch ker % case 'linear' % par.w = [Ak; -Ak; Bk{1}; -Bk{2}]'*P; % b1 = Ak(P(mp1)~=0,:)* par.w-epsilon; % b2 = Ak(P(mp2)~=0,:)* par.w+epsilon; % par.b = mean([b1;b2]); % end if ~QUIET par.time = toc(t_start); end end function [par, history] = subSVOR(Ak,H, Lk,Rk, c1, c2, epsilon, rho) %'traindata' is a training data matrix , each line is a sample vector %'targets' is a label vector,should start from 1 to p % rho is the augmented Lagrangian parameter. % history is a structure that contains the objective value, the primal and % dual residual norms, and the tolerances for the primal and dual residual % norms at each iteration. t_start = tic; %Data preprocessing %Global constants and defaults QUIET = 0; MAX_ITER = 200; % ABSTOL = 1e-4; % RELTOL = 1e-2; ABSTOL = 1e-6; RELTOL = 1e-3; lk = size(Ak,1); rk1 = size(Lk,1); rk2 = size(Rk,1); rk = rk1+rk2; %ADMM solver mP=2*lk +rk; %dimension of Phi mG=4*lk + 2*rk; %dimension of Gamma mU=mG; %dimension of U P = zeros(mP,1); %Phi={ w,b, xi, xi*} G = zeros(mG,1); %Gamma={eta,eta*,delta, phi, phi*} U = zeros(mU,1); %U- -update mp1 = 1 : lk; mp2 = lk+1 : 2*lk; mp3 = 2*lk+1: mP; c= zeros(mU,1); c([mp1 mp2]) = c1; c(mp3) = c2; q = ones(mP,1); q(mp1) = epsilon; q(mp2) = epsilon; q(mp3) = -1; p = ones(mP,1); p(mp2) = -1; p(mP-rk2+1:mP) = -1; % H = [Ak; -Ak; Bk{1}; -Bk{2}]*[Ak; -Ak; Bk{1}; -Bk{2}]'; %linear Kernel % Qk=[Ak; Ak; Lk; Rk]; % H0= Kernel(ker, Qk',Qk',sigma);%Kernel Matrix of Bound SVM % H = (H0+1).*(p*p'); % H0= Kernel(ker, Qk',Qk',sigma);%Kernel Matrix of Bound SVM H = (H+1).*(p*p'); k=1; while k <=MAX_ITER %Phi={ w,b, xi, xi*}-update V = U + B(mP,G) - c; br = - q - rho * AtX(mP,V); [P, niters] = cgsolve(H, br, rho); %Gamma={eta,eta*,delta, phi, phi*}-update with relaxation Gold = G; G = pos(Bt(mP,c-AX(P)-U)); %U- -update r = AX(P) + B(mP,G) - c; U = U + r; % history.objval(k) = objective(H,P,q); s = rho*AtX(mP,B(mP,G - Gold)); history.r_norm(k) = norm(r); history.s_norm(k) = norm(s); history.eps_pri(k) = sqrt(mU)*ABSTOL + RELTOL*max([norm(AX(P)), norm(B(mP,G)), norm(c)]); history.eps_dual(k)= sqrt(mP)*ABSTOL + RELTOL*norm(rho*AtX(mP,U)); if history.r_norm(k) < history.eps_pri(k) && ... history.s_norm(k) < history.eps_dual(k); break end k = k+1; end if ~QUIET par.time = toc(t_start); end par.P= P; par.p= p; par.alpha = P(mp1); par.alphax = P(mp2); % par.normw = sqrt(P'*(H0.*(p'*p))*P); par.normw=1; bk =(p'*P); par.b =bk; if ~QUIET par.time = toc(t_start); end end % function obj = objective(H,P,q) % obj = 1/2 * vHv(H,P) + q'*P; % end function [x, niters] = cgsolve(H, b,rho,tol, maxiters) % cgsolve : Solve Ax=b by conjugate gradients % % Given symmetric positive definite sparse matrix A and vector b, % this runs conjugate gradient to solve for x in A*x=b. % It iterates until the residual norm is reduced by 10^-6, % or for at most max(100,sqrt(n)) iterations n = length(b); if (nargin < 4) tol = 1e-6; maxiters = max(100,sqrt(n)); elseif(nargin < 5) maxiters = max(100,sqrt(n)); end normb = norm(b); x = zeros(n,1); r = b; rtr = r'*r; d = r; niters = 0; while sqrt(rtr)/normb > tol && niters < maxiters niters = niters+1; % Ad = A*d; Ad = AtAX(H, d,rho); alpha = rtr / (d'*Ad); x = x + alpha * d; r = r - alpha * Ad; rtrold = rtr; rtr = r'*r; beta = rtr / rtrold; d = r + beta * d; end end %Ad = A*d function Ad = AtAX(H, d,rho) Ad = H*d +2* rho*d; end function F = AtX(mP,V) F = V(1:mP)+V(mP+1:end); end function h = AX(P) h = [P;P]; end function h = vHv(H,d) h = d'*(H*d) ; end function Bv= B(mP,v) Bv(:,1) = [v(1:mP);-v(mP+1:end)]; end function Btd = Bt(mP,d) Btd = [d(1:mP);-d(mP+1:end)]; end function A = pos(A) A(A<0)=0; end