逻辑回归后向选择的R代码
逻辑回归见前述文章。
Code
1#计算pi
2pi_fun<-function(x,Beta){
3 Beta<-matrix(as.vector(Beta),ncol=1)
4 x<-matrix(as.vector(x),ncol=nrow(Beta))
5 g_fun<-x%*%Beta
6 exp(g_fun)/(1+exp(g_fun))
7}
8#计算信息矩阵
9funs<-function(x,y,Beta){
10 x<-matrix(as.vector(x),nrow=nrow(y))
11 Beta<-matrix(as.vector(Beta),ncol=1)
12
13 pi_value<- pi_fun(x,Beta)
14 U<-t(x)%*%(y-pi_value);
15 uni_matrix<-matrix(rep(1,nrow(pi_value)),nrow= nrow(pi_value));
16 H<-t(x)%*%diag(as.vector(pi_value*( uni_matrix -pi_value)))%*%x
17 list(U=U,H=H)
18}
19#牛顿迭代法计算Beta
20Newtons<-function(fun,x,y,ep=1e-8,it_max=100){
21 x<-matrix(as.vector(x),nrow=nrow(y))
22 Beta=matrix(rep(0,ncol(x)),nrow=ncol(x))
23
24 Index<-0;
25 k<-1
26 while(k<=it_max){
27 x1<-Beta;obj<-fun(x,y,Beta);
28 Beta<-Beta+solve(obj$H,obj$U);
29 norm<-sqrt(t((Beta-x1))%*%(Beta-x1))
30 if(norm<ep){
31 index<-1;break
32 }
33 k<-k+1
34 }
35 obj<-fun(x,y,Beta);
36
37 list(Beta=Beta,it=k,index=index,U=obj$U,H=obj$H)
38}
39#拟合检验
40ModelFitStat<-function(x,y,Beta){
41 x<-matrix(as.vector(x),nrow=nrow(y))
42 Beta<-matrix(as.vector(Beta),ncol=1)
43
44 uni_matrix<-matrix(rep(1,nrow(y)),nrow= nrow(y));
45 LOGL<--2*(t(y)%*%log(pi_fun(x,Beta))+t(uni_matrix-y)%*%log(uni_matrix-pi_fun(x,Beta)))
46 AIC<-LOGL+2*(ncol(x))
47 SC<-LOGL+(ncol(x))*log(nrow(x))
48 list(LOGL=LOGL,AIC=AIC,SC=SC)
49}
50
51#回归方程显著性检验
52GlobalNullTest<-function(x,y,Beta,BetaIntercept){
53 y<-matrix(as.vector(y),ncol=1)
54 x<-matrix(as.vector(x),nrow=nrow(y))
55 Beta<-matrix(as.vector(Beta),ncol=1)
56
57 pi_value<- pi_fun(x,Beta)
58 df<-nrow(Beta)-1
59 ##compute Likelihood ratio
60
61 MF<-ModelFitStat(x,y,Beta)
62 n1<-sum(y[y>0])
63 n<-nrow(y)
64 LR<--2*(n1*log(n1)+(n-n1)*log(n-n1)-n*log(n))-MF$LOGL
65 LR_p_value<-pchisq(LR,df,lower.tail=FALSE)
66 LR_Test<-list(LR=LR,DF=df,LR_p_value=LR_p_value)
67
68 ##compute Score
69
70 BetaIntercept<-matrix(c(as.vector(BetaIntercept),rep(0,ncol(x)-1)),ncol=1)
71 obj<-funs(x,y,BetaIntercept)
72 Score<-t(obj$U)%*%solve(obj$H)%*%obj$U
73 Score_p_value<-pchisq(Score,df,lower.tail=FALSE)
74 Score_Test<-list(Score=Score,DF=df,Score_p_value=Score_p_value)
75
76 ##compute Wald test
77 obj<-funs(x,y,Beta)
78 I_Diag<-diag((solve(obj$H)))
79
80 Q<-matrix(c(rep(0,nrow(Beta)-1),as.vector(diag(rep(1,nrow(Beta)-1)))),nrow=nrow(Beta)-1)
81 Wald<-t(Q%*%Beta)%*%solve(Q%*%diag(I_Diag)%*%t(Q))%*%(Q%*%Beta)
82
83 Wald_p_value<-pchisq(Wald,df,lower.tail=FALSE)
84 Wald_Test<-list(Wald=Wald,DF=df,Wald_p_value=Wald_p_value)
85
86 list(LR_Test=LR_Test,Score_Test=Score_Test,Wald_Test=Wald_Test)
87}
88#小函数
89WhichEqual1<-function(x){
90 a<-NULL
91 for(i in 1:length(x)){
92 if(x[i]==1){
93 a<-c(a,i)
94 }
95 }
96 a
97}
98
99CheckOut<-function(source,check){
100 for(j in 1:length(source)){
101 for(k in 1:length(check)){
102 if(source[j]==check[k])
103 source[j]<-0
104 }
105 }
106 source[source>0]
107}
108
109#后向删除
110BackwardSel<-function(x,y,check_pvalue=0.05){
111 ##as matrix
112 y<-matrix(as.vector(y),ncol=1)
113 x<-matrix(as.vector(x),nrow=nrow(y))
114 ##indication of variable
115 indict<-rep(1,ncol(x)) ##which column remove
116 ## 1:keep
117 ## 0:remove
118
119 repeat{
120
121 indexVector<-WhichEqual1(indict)
122
123 ##compute variable Wald chi-square
124 Result<-Newtons(funs,x[,indexVector],y)
125 obj<-funs(x[,indexVector],y,Result$Beta)
126 H_Diag<-sqrt(diag(solve(obj$H)))
127 WaldChisq<-(as.vector(Result$Beta)/H_Diag)^2
128 WaldChisqPvalue<-pchisq(WaldChisq,1,lower.tail=FALSE)
129 WaldChisqTest<-list(WaldChisq=WaldChisq,WaldChisqPvalue=WaldChisqPvalue)
130 print(WaldChisqTest)
131
132 ##Model Fit Statistics
133 MFStat<-ModelFitStat(x[,indexVector],y,Result$Beta)
134 print("Model Fit Statistics")
135 print(MFStat)
136
137 ##Testing Global Null Hypothesis: BETA=0
138 ResultIntercept<-Newtons(funs,x[,1],y)
139 GNTest<-GlobalNullTest(x[,indexVector],y,Result$Beta,ResultIntercept$Beta)
140 print("Testing Global Null Hypothesis: BETA=0")
141 print(GNTest)
142
143 ##check variable
144 pvalue_max<-max(WaldChisqPvalue[2:length(WaldChisqPvalue)])##not check for intercept
145 if(pvalue_max>check_pvalue){
146 j<-which.max(WaldChisqPvalue[2:length(WaldChisqPvalue)])##not check for intercept
147 print(paste(x_name[indexVector[j+1]]," is removed:"))
148 ##set indication of variable
149 indict[indexVector[j+1]]<-0
150 }
151 if(pvalue_max<=check_pvalue){
152 print("No (additional) effects met the 0.05 significance level for removal from the model.")
153 print("Analysis of Maximum Likelihood Estimates")
154 print(Result$Beta)
155 print(WaldChisqTest)
156 break
157 }
158 }
159
160}
1#计算pi
2pi_fun<-function(x,Beta){
3 Beta<-matrix(as.vector(Beta),ncol=1)
4 x<-matrix(as.vector(x),ncol=nrow(Beta))
5 g_fun<-x%*%Beta
6 exp(g_fun)/(1+exp(g_fun))
7}
8#计算信息矩阵
9funs<-function(x,y,Beta){
10 x<-matrix(as.vector(x),nrow=nrow(y))
11 Beta<-matrix(as.vector(Beta),ncol=1)
12
13 pi_value<- pi_fun(x,Beta)
14 U<-t(x)%*%(y-pi_value);
15 uni_matrix<-matrix(rep(1,nrow(pi_value)),nrow= nrow(pi_value));
16 H<-t(x)%*%diag(as.vector(pi_value*( uni_matrix -pi_value)))%*%x
17 list(U=U,H=H)
18}
19#牛顿迭代法计算Beta
20Newtons<-function(fun,x,y,ep=1e-8,it_max=100){
21 x<-matrix(as.vector(x),nrow=nrow(y))
22 Beta=matrix(rep(0,ncol(x)),nrow=ncol(x))
23
24 Index<-0;
25 k<-1
26 while(k<=it_max){
27 x1<-Beta;obj<-fun(x,y,Beta);
28 Beta<-Beta+solve(obj$H,obj$U);
29 norm<-sqrt(t((Beta-x1))%*%(Beta-x1))
30 if(norm<ep){
31 index<-1;break
32 }
33 k<-k+1
34 }
35 obj<-fun(x,y,Beta);
36
37 list(Beta=Beta,it=k,index=index,U=obj$U,H=obj$H)
38}
39#拟合检验
40ModelFitStat<-function(x,y,Beta){
41 x<-matrix(as.vector(x),nrow=nrow(y))
42 Beta<-matrix(as.vector(Beta),ncol=1)
43
44 uni_matrix<-matrix(rep(1,nrow(y)),nrow= nrow(y));
45 LOGL<--2*(t(y)%*%log(pi_fun(x,Beta))+t(uni_matrix-y)%*%log(uni_matrix-pi_fun(x,Beta)))
46 AIC<-LOGL+2*(ncol(x))
47 SC<-LOGL+(ncol(x))*log(nrow(x))
48 list(LOGL=LOGL,AIC=AIC,SC=SC)
49}
50
51#回归方程显著性检验
52GlobalNullTest<-function(x,y,Beta,BetaIntercept){
53 y<-matrix(as.vector(y),ncol=1)
54 x<-matrix(as.vector(x),nrow=nrow(y))
55 Beta<-matrix(as.vector(Beta),ncol=1)
56
57 pi_value<- pi_fun(x,Beta)
58 df<-nrow(Beta)-1
59 ##compute Likelihood ratio
60
61 MF<-ModelFitStat(x,y,Beta)
62 n1<-sum(y[y>0])
63 n<-nrow(y)
64 LR<--2*(n1*log(n1)+(n-n1)*log(n-n1)-n*log(n))-MF$LOGL
65 LR_p_value<-pchisq(LR,df,lower.tail=FALSE)
66 LR_Test<-list(LR=LR,DF=df,LR_p_value=LR_p_value)
67
68 ##compute Score
69
70 BetaIntercept<-matrix(c(as.vector(BetaIntercept),rep(0,ncol(x)-1)),ncol=1)
71 obj<-funs(x,y,BetaIntercept)
72 Score<-t(obj$U)%*%solve(obj$H)%*%obj$U
73 Score_p_value<-pchisq(Score,df,lower.tail=FALSE)
74 Score_Test<-list(Score=Score,DF=df,Score_p_value=Score_p_value)
75
76 ##compute Wald test
77 obj<-funs(x,y,Beta)
78 I_Diag<-diag((solve(obj$H)))
79
80 Q<-matrix(c(rep(0,nrow(Beta)-1),as.vector(diag(rep(1,nrow(Beta)-1)))),nrow=nrow(Beta)-1)
81 Wald<-t(Q%*%Beta)%*%solve(Q%*%diag(I_Diag)%*%t(Q))%*%(Q%*%Beta)
82
83 Wald_p_value<-pchisq(Wald,df,lower.tail=FALSE)
84 Wald_Test<-list(Wald=Wald,DF=df,Wald_p_value=Wald_p_value)
85
86 list(LR_Test=LR_Test,Score_Test=Score_Test,Wald_Test=Wald_Test)
87}
88#小函数
89WhichEqual1<-function(x){
90 a<-NULL
91 for(i in 1:length(x)){
92 if(x[i]==1){
93 a<-c(a,i)
94 }
95 }
96 a
97}
98
99CheckOut<-function(source,check){
100 for(j in 1:length(source)){
101 for(k in 1:length(check)){
102 if(source[j]==check[k])
103 source[j]<-0
104 }
105 }
106 source[source>0]
107}
108
109#后向删除
110BackwardSel<-function(x,y,check_pvalue=0.05){
111 ##as matrix
112 y<-matrix(as.vector(y),ncol=1)
113 x<-matrix(as.vector(x),nrow=nrow(y))
114 ##indication of variable
115 indict<-rep(1,ncol(x)) ##which column remove
116 ## 1:keep
117 ## 0:remove
118
119 repeat{
120
121 indexVector<-WhichEqual1(indict)
122
123 ##compute variable Wald chi-square
124 Result<-Newtons(funs,x[,indexVector],y)
125 obj<-funs(x[,indexVector],y,Result$Beta)
126 H_Diag<-sqrt(diag(solve(obj$H)))
127 WaldChisq<-(as.vector(Result$Beta)/H_Diag)^2
128 WaldChisqPvalue<-pchisq(WaldChisq,1,lower.tail=FALSE)
129 WaldChisqTest<-list(WaldChisq=WaldChisq,WaldChisqPvalue=WaldChisqPvalue)
130 print(WaldChisqTest)
131
132 ##Model Fit Statistics
133 MFStat<-ModelFitStat(x[,indexVector],y,Result$Beta)
134 print("Model Fit Statistics")
135 print(MFStat)
136
137 ##Testing Global Null Hypothesis: BETA=0
138 ResultIntercept<-Newtons(funs,x[,1],y)
139 GNTest<-GlobalNullTest(x[,indexVector],y,Result$Beta,ResultIntercept$Beta)
140 print("Testing Global Null Hypothesis: BETA=0")
141 print(GNTest)
142
143 ##check variable
144 pvalue_max<-max(WaldChisqPvalue[2:length(WaldChisqPvalue)])##not check for intercept
145 if(pvalue_max>check_pvalue){
146 j<-which.max(WaldChisqPvalue[2:length(WaldChisqPvalue)])##not check for intercept
147 print(paste(x_name[indexVector[j+1]]," is removed:"))
148 ##set indication of variable
149 indict[indexVector[j+1]]<-0
150 }
151 if(pvalue_max<=check_pvalue){
152 print("No (additional) effects met the 0.05 significance level for removal from the model.")
153 print("Analysis of Maximum Likelihood Estimates")
154 print(Result$Beta)
155 print(WaldChisqTest)
156 break
157 }
158 }
159
160}
使用方法如下
Code
1#example3--backward
2rs<-read.csv("example2R.csv",head=TRUE)
3x<-matrix(c(rep(1,189),rs[,3],rs[,4],rs[,12],rs[,13],rs[,6],rs[,7],rs[,8],rs[,9],rs[,10]),nrow=189)#选择列以充作自变量
4y<-matrix(rs[,2],nrow=189)#选择因变量
5x_name<-c("INTERCEPT","AGE","LWT","RACE2","RECE3","SMOKE","PTL","HT","UI","FTV")
6#自变量名字
7BackwardSel(x,y)
1#example3--backward
2rs<-read.csv("example2R.csv",head=TRUE)
3x<-matrix(c(rep(1,189),rs[,3],rs[,4],rs[,12],rs[,13],rs[,6],rs[,7],rs[,8],rs[,9],rs[,10]),nrow=189)#选择列以充作自变量
4y<-matrix(rs[,2],nrow=189)#选择因变量
5x_name<-c("INTERCEPT","AGE","LWT","RACE2","RECE3","SMOKE","PTL","HT","UI","FTV")
6#自变量名字
7BackwardSel(x,y)