逻辑回归前向选择的R代码
逻辑回归见前述随笔
Code
1#计算pi
2pi_fun<-function(x,Beta){
3 Beta<-matrix(as.vector(Beta),ncol=1)
4 x<-matrix(as.vector(x),ncol=nrow(Beta))
5 g_fun<-x%*%Beta
6 exp(g_fun)/(1+exp(g_fun))
7}
8#计算信息函数
9funs<-function(x,y,Beta){
10 x<-matrix(as.vector(x),nrow=nrow(y))
11 Beta<-matrix(as.vector(Beta),ncol=1)
12
13 pi_value<- pi_fun(x,Beta)
14 U<-t(x)%*%(y-pi_value);
15 uni_matrix<-matrix(rep(1,nrow(pi_value)),nrow= nrow(pi_value));
16 H<-t(x)%*%diag(as.vector(pi_value*( uni_matrix -pi_value)))%*%x
17 list(U=U,H=H)
18}
19#牛顿迭代计算Beta
20Newtons<-function(fun,x,y,ep=1e-8,it_max=100){
21 x<-matrix(as.vector(x),nrow=nrow(y))
22 Beta=matrix(rep(0,ncol(x)),nrow=ncol(x))
23 Index<-0;
24 k<-1
25 while(k<=it_max){
26 x1<-Beta;obj<-fun(x,y,Beta);
27 Beta<-Beta+solve(obj$H,obj$U);
28 norm<-sqrt(t((Beta-x1))%*%(Beta-x1))
29 if(norm<ep){
30 index<-1;break
31 }
32 k<-k+1
33 }
34 obj<-fun(x,y,Beta);
35
36 list(Beta=Beta,it=k,index=index,U=obj$U,H=obj$H)
37}
38#拟合检验
39ModelFitStat<-function(x,y,Beta){
40 x<-matrix(as.vector(x),nrow=nrow(y))
41 Beta<-matrix(as.vector(Beta),ncol=1)
42
43 uni_matrix<-matrix(rep(1,nrow(y)),nrow= nrow(y));
44 LOGL<--2*(t(y)%*%log(pi_fun(x,Beta))+t(uni_matrix-y)%*%log(uni_matrix-pi_fun(x,Beta)))
45 AIC<-LOGL+2*(ncol(x))
46 SC<-LOGL+(ncol(x))*log(nrow(x))
47 list(LOGL=LOGL,AIC=AIC,SC=SC)
48}
49
50#回归方程显著性检验
51GlobalNullTest<-function(x,y,Beta,BetaIntercept){
52 y<-matrix(as.vector(y),ncol=1)
53 x<-matrix(as.vector(x),nrow=nrow(y))
54 Beta<-matrix(as.vector(Beta),ncol=1)
55
56 pi_value<- pi_fun(x,Beta)
57 df<-nrow(Beta)-1
58 ##compute Likelihood ratio
59
60 MF<-ModelFitStat(x,y,Beta)
61 n1<-sum(y[y>0])
62 n<-nrow(y)
63 LR<--2*(n1*log(n1)+(n-n1)*log(n-n1)-n*log(n))-MF$LOGL
64 LR_p_value<-pchisq(LR,df,lower.tail=FALSE)
65 LR_Test<-list(LR=LR,DF=df,LR_p_value=LR_p_value)
66
67 ##compute Score
68
69 BetaIntercept<-matrix(c(as.vector(BetaIntercept),rep(0,ncol(x)-1)),ncol=1)
70 obj<-funs(x,y,BetaIntercept)
71 Score<-t(obj$U)%*%solve(obj$H)%*%obj$U
72 Score_p_value<-pchisq(Score,df,lower.tail=FALSE)
73 Score_Test<-list(Score=Score,DF=df,Score_p_value=Score_p_value)
74
75 ##compute Wald test
76 obj<-funs(x,y,Beta)
77 I_Diag<-diag((solve(obj$H)))
78
79 Q<-matrix(c(rep(0,nrow(Beta)-1),as.vector(diag(rep(1,nrow(Beta)-1)))),nrow=nrow(Beta)-1)
80 Wald<-t(Q%*%Beta)%*%solve(Q%*%diag(I_Diag)%*%t(Q))%*%(Q%*%Beta)
81
82 Wald_p_value<-pchisq(Wald,df,lower.tail=FALSE)
83 Wald_Test<-list(Wald=Wald,DF=df,Wald_p_value=Wald_p_value)
84
85 list(LR_Test=LR_Test,Score_Test=Score_Test,Wald_Test=Wald_Test)
86}
87#配合的小函数
88WhichEqual1<-function(x){
89 a<-NULL
90 for(i in 1:length(x)){
91 if(x[i]==1){
92 a<-c(a,i)
93 }
94 }
95 a
96}
97
98CheckOut<-function(source,check){
99 for(j in 1:length(source)){
100 for(k in 1:length(check)){
101 if(source[j]==check[k])
102 source[j]<-0
103 }
104 }
105 source[source>0]
106}
107
108#前向选择
109##forword selection
110ForwardSel<-function(x,y,check_pvalue=0.05){
111 ##as matrix
112 x<-matrix(as.vector(x),nrow=nrow(y))
113 ##indication of variable
114 indict<-rep(0,ncol(x)) ##which column enter
115 ##intercept entered
116 indict[1]<-1
117 Beta<-NULL
118 print("intercept entered")
119 Result<-Newtons(funs,x[,1],y)
120 Beta<-Result$Beta
121 BetaIntercept<-Result$Beta
122 uni_matrix<-matrix(rep(1,nrow(y)),nrow= nrow(y));
123 LOGL<--2*(t(y)%*%log(pi_fun(x[,1],Beta))+t(uni_matrix-y)%*%log(uni_matrix-pi_fun(x[,1],Beta)))
124 print(paste("-2Log=",LOGL))
125 indexVector<-WhichEqual1(indict)
126 ##check other variable
127 repeat{
128 pvalue<-rep(1,ncol(x))
129
130 k<-2:length(indict)
131 k<-CheckOut(k,indexVector)
132 for(i in k){
133 obj<-funs(c(x[,indexVector],x[,i]),y,c(as.vector(Beta),0))
134 Score<-t(obj$U)%*%solve(obj$H,obj$U)
135 Score_pvalue<-pchisq(Score,1,lower.tail=FALSE)
136 pvalue[i]<-Score_pvalue
137 }
138 print(pvalue)
139 pvalue_min<-min(pvalue)
140 if(pvalue_min<check_pvalue){
141 j<-which.min(pvalue)
142 print(paste(x_name[j]," entered:"))
143 ##set indication of variable
144 indict[j]<-1
145 indexVector<-WhichEqual1(indict)
146 Result<-Newtons(funs,x[,indexVector],y)
147 Beta<-NULL
148 Beta<-Result$Beta
149 #print("Beta")
150 #print(Beta)
151
152 ##compute model fit statistics
153 print("Model Fit Statistics")
154 MFStat<-ModelFitStat(x[,indexVector],y,Beta)
155 print(MFStat)
156 ##test globel null hypothesis:Beta=0
157 print("Testing Global Null Hypothese:Beta=0")
158 GNTest<-GlobalNullTest(x[,indexVector],y,Beta,BetaIntercept)
159 print(GNTest)
160 }
161 if(pvalue_min>=check_pvalue||all(indict)>0){
162 print("No effects met the 0.05 significance level for entry into the model")
163 break
164 }
165 }
166
167 ##Analysis of Maximum Likelihood Estimates
168 indexVector<-WhichEqual1(indict)
169 print("Analysis of Maximum Likelihood Estimates")
170 print(Beta)
171
172 ##compute variable Wald chi-square
173 obj<-funs(x[,indexVector],y,Beta)
174 H_Diag<-sqrt(diag(solve(obj$H)))
175 WaldChisq<-(as.vector(Beta)/H_Diag)^2
176 WaldChisqPvalue<-pchisq(WaldChisq,1,lower.tail=FALSE)
177 WaldChisqTest<-list(WaldChisq=WaldChisq,WaldChisqPvalue=WaldChisqPvalue)
178 print(WaldChisqTest)
179 }
1#计算pi
2pi_fun<-function(x,Beta){
3 Beta<-matrix(as.vector(Beta),ncol=1)
4 x<-matrix(as.vector(x),ncol=nrow(Beta))
5 g_fun<-x%*%Beta
6 exp(g_fun)/(1+exp(g_fun))
7}
8#计算信息函数
9funs<-function(x,y,Beta){
10 x<-matrix(as.vector(x),nrow=nrow(y))
11 Beta<-matrix(as.vector(Beta),ncol=1)
12
13 pi_value<- pi_fun(x,Beta)
14 U<-t(x)%*%(y-pi_value);
15 uni_matrix<-matrix(rep(1,nrow(pi_value)),nrow= nrow(pi_value));
16 H<-t(x)%*%diag(as.vector(pi_value*( uni_matrix -pi_value)))%*%x
17 list(U=U,H=H)
18}
19#牛顿迭代计算Beta
20Newtons<-function(fun,x,y,ep=1e-8,it_max=100){
21 x<-matrix(as.vector(x),nrow=nrow(y))
22 Beta=matrix(rep(0,ncol(x)),nrow=ncol(x))
23 Index<-0;
24 k<-1
25 while(k<=it_max){
26 x1<-Beta;obj<-fun(x,y,Beta);
27 Beta<-Beta+solve(obj$H,obj$U);
28 norm<-sqrt(t((Beta-x1))%*%(Beta-x1))
29 if(norm<ep){
30 index<-1;break
31 }
32 k<-k+1
33 }
34 obj<-fun(x,y,Beta);
35
36 list(Beta=Beta,it=k,index=index,U=obj$U,H=obj$H)
37}
38#拟合检验
39ModelFitStat<-function(x,y,Beta){
40 x<-matrix(as.vector(x),nrow=nrow(y))
41 Beta<-matrix(as.vector(Beta),ncol=1)
42
43 uni_matrix<-matrix(rep(1,nrow(y)),nrow= nrow(y));
44 LOGL<--2*(t(y)%*%log(pi_fun(x,Beta))+t(uni_matrix-y)%*%log(uni_matrix-pi_fun(x,Beta)))
45 AIC<-LOGL+2*(ncol(x))
46 SC<-LOGL+(ncol(x))*log(nrow(x))
47 list(LOGL=LOGL,AIC=AIC,SC=SC)
48}
49
50#回归方程显著性检验
51GlobalNullTest<-function(x,y,Beta,BetaIntercept){
52 y<-matrix(as.vector(y),ncol=1)
53 x<-matrix(as.vector(x),nrow=nrow(y))
54 Beta<-matrix(as.vector(Beta),ncol=1)
55
56 pi_value<- pi_fun(x,Beta)
57 df<-nrow(Beta)-1
58 ##compute Likelihood ratio
59
60 MF<-ModelFitStat(x,y,Beta)
61 n1<-sum(y[y>0])
62 n<-nrow(y)
63 LR<--2*(n1*log(n1)+(n-n1)*log(n-n1)-n*log(n))-MF$LOGL
64 LR_p_value<-pchisq(LR,df,lower.tail=FALSE)
65 LR_Test<-list(LR=LR,DF=df,LR_p_value=LR_p_value)
66
67 ##compute Score
68
69 BetaIntercept<-matrix(c(as.vector(BetaIntercept),rep(0,ncol(x)-1)),ncol=1)
70 obj<-funs(x,y,BetaIntercept)
71 Score<-t(obj$U)%*%solve(obj$H)%*%obj$U
72 Score_p_value<-pchisq(Score,df,lower.tail=FALSE)
73 Score_Test<-list(Score=Score,DF=df,Score_p_value=Score_p_value)
74
75 ##compute Wald test
76 obj<-funs(x,y,Beta)
77 I_Diag<-diag((solve(obj$H)))
78
79 Q<-matrix(c(rep(0,nrow(Beta)-1),as.vector(diag(rep(1,nrow(Beta)-1)))),nrow=nrow(Beta)-1)
80 Wald<-t(Q%*%Beta)%*%solve(Q%*%diag(I_Diag)%*%t(Q))%*%(Q%*%Beta)
81
82 Wald_p_value<-pchisq(Wald,df,lower.tail=FALSE)
83 Wald_Test<-list(Wald=Wald,DF=df,Wald_p_value=Wald_p_value)
84
85 list(LR_Test=LR_Test,Score_Test=Score_Test,Wald_Test=Wald_Test)
86}
87#配合的小函数
88WhichEqual1<-function(x){
89 a<-NULL
90 for(i in 1:length(x)){
91 if(x[i]==1){
92 a<-c(a,i)
93 }
94 }
95 a
96}
97
98CheckOut<-function(source,check){
99 for(j in 1:length(source)){
100 for(k in 1:length(check)){
101 if(source[j]==check[k])
102 source[j]<-0
103 }
104 }
105 source[source>0]
106}
107
108#前向选择
109##forword selection
110ForwardSel<-function(x,y,check_pvalue=0.05){
111 ##as matrix
112 x<-matrix(as.vector(x),nrow=nrow(y))
113 ##indication of variable
114 indict<-rep(0,ncol(x)) ##which column enter
115 ##intercept entered
116 indict[1]<-1
117 Beta<-NULL
118 print("intercept entered")
119 Result<-Newtons(funs,x[,1],y)
120 Beta<-Result$Beta
121 BetaIntercept<-Result$Beta
122 uni_matrix<-matrix(rep(1,nrow(y)),nrow= nrow(y));
123 LOGL<--2*(t(y)%*%log(pi_fun(x[,1],Beta))+t(uni_matrix-y)%*%log(uni_matrix-pi_fun(x[,1],Beta)))
124 print(paste("-2Log=",LOGL))
125 indexVector<-WhichEqual1(indict)
126 ##check other variable
127 repeat{
128 pvalue<-rep(1,ncol(x))
129
130 k<-2:length(indict)
131 k<-CheckOut(k,indexVector)
132 for(i in k){
133 obj<-funs(c(x[,indexVector],x[,i]),y,c(as.vector(Beta),0))
134 Score<-t(obj$U)%*%solve(obj$H,obj$U)
135 Score_pvalue<-pchisq(Score,1,lower.tail=FALSE)
136 pvalue[i]<-Score_pvalue
137 }
138 print(pvalue)
139 pvalue_min<-min(pvalue)
140 if(pvalue_min<check_pvalue){
141 j<-which.min(pvalue)
142 print(paste(x_name[j]," entered:"))
143 ##set indication of variable
144 indict[j]<-1
145 indexVector<-WhichEqual1(indict)
146 Result<-Newtons(funs,x[,indexVector],y)
147 Beta<-NULL
148 Beta<-Result$Beta
149 #print("Beta")
150 #print(Beta)
151
152 ##compute model fit statistics
153 print("Model Fit Statistics")
154 MFStat<-ModelFitStat(x[,indexVector],y,Beta)
155 print(MFStat)
156 ##test globel null hypothesis:Beta=0
157 print("Testing Global Null Hypothese:Beta=0")
158 GNTest<-GlobalNullTest(x[,indexVector],y,Beta,BetaIntercept)
159 print(GNTest)
160 }
161 if(pvalue_min>=check_pvalue||all(indict)>0){
162 print("No effects met the 0.05 significance level for entry into the model")
163 break
164 }
165 }
166
167 ##Analysis of Maximum Likelihood Estimates
168 indexVector<-WhichEqual1(indict)
169 print("Analysis of Maximum Likelihood Estimates")
170 print(Beta)
171
172 ##compute variable Wald chi-square
173 obj<-funs(x[,indexVector],y,Beta)
174 H_Diag<-sqrt(diag(solve(obj$H)))
175 WaldChisq<-(as.vector(Beta)/H_Diag)^2
176 WaldChisqPvalue<-pchisq(WaldChisq,1,lower.tail=FALSE)
177 WaldChisqTest<-list(WaldChisq=WaldChisq,WaldChisqPvalue=WaldChisqPvalue)
178 print(WaldChisqTest)
179 }
使用例子
Code
1#example3
2rs<-read.csv("example2R.csv",head=TRUE)
3x<-matrix(c(rep(1,189),rs[,3],rs[,4],rs[,12],rs[,13],rs[,6],rs[,7],rs[,8],rs[,9],rs[,10]),nrow=189)
4y<-matrix(rs[,2],nrow=189)
5x_name<-c("INTERCEPT","AGE","LWT","RACE2","RECE3","SMOKE","PTL","HT","UI","FTV")
6ForwardSel(x,y)
1#example3
2rs<-read.csv("example2R.csv",head=TRUE)
3x<-matrix(c(rep(1,189),rs[,3],rs[,4],rs[,12],rs[,13],rs[,6],rs[,7],rs[,8],rs[,9],rs[,10]),nrow=189)
4y<-matrix(rs[,2],nrow=189)
5x_name<-c("INTERCEPT","AGE","LWT","RACE2","RECE3","SMOKE","PTL","HT","UI","FTV")
6ForwardSel(x,y)