逻辑回归见前述文章。

Code
1
#计算pi
2
pi_fun<-function(x,Beta)
{
3
Beta<-matrix(as.vector(Beta),ncol=1)
4
x<-matrix(as.vector(x),ncol=nrow(Beta))
5
g_fun<-x%*%Beta
6
exp(g_fun)/(1+exp(g_fun))
7
}
8
#计算信息矩阵
9
funs<-function(x,y,Beta)
{
10
x<-matrix(as.vector(x),nrow=nrow(y))
11
Beta<-matrix(as.vector(Beta),ncol=1)
12
13
pi_value<- pi_fun(x,Beta)
14
U<-t(x)%*%(y-pi_value);
15
uni_matrix<-matrix(rep(1,nrow(pi_value)),nrow= nrow(pi_value));
16
H<-t(x)%*%diag(as.vector(pi_value*( uni_matrix -pi_value)))%*%x
17
list(U=U,H=H)
18
}
19
#牛顿迭代法计算Beta
20
Newtons<-function(fun,x,y,ep=1e-8,it_max=100)
{
21
x<-matrix(as.vector(x),nrow=nrow(y))
22
Beta=matrix(rep(0,ncol(x)),nrow=ncol(x))
23
24
Index<-0;
25
k<-1
26
while(k<=it_max)
{
27
x1<-Beta;obj<-fun(x,y,Beta);
28
Beta<-Beta+solve(obj$H,obj$U);
29
norm<-sqrt(t((Beta-x1))%*%(Beta-x1))
30
if(norm<ep)
{
31
index<-1;break
32
}
33
k<-k+1
34
}
35
obj<-fun(x,y,Beta);
36
37
list(Beta=Beta,it=k,index=index,U=obj$U,H=obj$H)
38
}
39
#拟合检验
40
ModelFitStat<-function(x,y,Beta)
{
41
x<-matrix(as.vector(x),nrow=nrow(y))
42
Beta<-matrix(as.vector(Beta),ncol=1)
43
44
uni_matrix<-matrix(rep(1,nrow(y)),nrow= nrow(y));
45
LOGL<--2*(t(y)%*%log(pi_fun(x,Beta))+t(uni_matrix-y)%*%log(uni_matrix-pi_fun(x,Beta)))
46
AIC<-LOGL+2*(ncol(x))
47
SC<-LOGL+(ncol(x))*log(nrow(x))
48
list(LOGL=LOGL,AIC=AIC,SC=SC)
49
}
50
51
#回归方程显著性检验
52
GlobalNullTest<-function(x,y,Beta,BetaIntercept)
{
53
y<-matrix(as.vector(y),ncol=1)
54
x<-matrix(as.vector(x),nrow=nrow(y))
55
Beta<-matrix(as.vector(Beta),ncol=1)
56
57
pi_value<- pi_fun(x,Beta)
58
df<-nrow(Beta)-1
59
##compute Likelihood ratio
60
61
MF<-ModelFitStat(x,y,Beta)
62
n1<-sum(y[y>0])
63
n<-nrow(y)
64
LR<--2*(n1*log(n1)+(n-n1)*log(n-n1)-n*log(n))-MF$LOGL
65
LR_p_value<-pchisq(LR,df,lower.tail=FALSE)
66
LR_Test<-list(LR=LR,DF=df,LR_p_value=LR_p_value)
67
68
##compute Score
69
70
BetaIntercept<-matrix(c(as.vector(BetaIntercept),rep(0,ncol(x)-1)),ncol=1)
71
obj<-funs(x,y,BetaIntercept)
72
Score<-t(obj$U)%*%solve(obj$H)%*%obj$U
73
Score_p_value<-pchisq(Score,df,lower.tail=FALSE)
74
Score_Test<-list(Score=Score,DF=df,Score_p_value=Score_p_value)
75
76
##compute Wald test
77
obj<-funs(x,y,Beta)
78
I_Diag<-diag((solve(obj$H)))
79
80
Q<-matrix(c(rep(0,nrow(Beta)-1),as.vector(diag(rep(1,nrow(Beta)-1)))),nrow=nrow(Beta)-1)
81
Wald<-t(Q%*%Beta)%*%solve(Q%*%diag(I_Diag)%*%t(Q))%*%(Q%*%Beta)
82
83
Wald_p_value<-pchisq(Wald,df,lower.tail=FALSE)
84
Wald_Test<-list(Wald=Wald,DF=df,Wald_p_value=Wald_p_value)
85
86
list(LR_Test=LR_Test,Score_Test=Score_Test,Wald_Test=Wald_Test)
87
}
88
#小函数
89
WhichEqual1<-function(x)
{
90
a<-NULL
91
for(i in 1:length(x))
{
92
if(x[i]==1)
{
93
a<-c(a,i)
94
}
95
}
96
a
97
}
98
99
CheckOut<-function(source,check)
{
100
for(j in 1:length(source))
{
101
for(k in 1:length(check))
{
102
if(source[j]==check[k])
103
source[j]<-0
104
}
105
}
106
source[source>0]
107
}
108
109
#后向删除
110
BackwardSel<-function(x,y,check_pvalue=0.05)
{
111
##as matrix
112
y<-matrix(as.vector(y),ncol=1)
113
x<-matrix(as.vector(x),nrow=nrow(y))
114
##indication of variable
115
indict<-rep(1,ncol(x)) ##which column remove
116
## 1:keep
117
## 0:remove
118
119
repeat
{
120
121
indexVector<-WhichEqual1(indict)
122
123
##compute variable Wald chi-square
124
Result<-Newtons(funs,x[,indexVector],y)
125
obj<-funs(x[,indexVector],y,Result$Beta)
126
H_Diag<-sqrt(diag(solve(obj$H)))
127
WaldChisq<-(as.vector(Result$Beta)/H_Diag)^2
128
WaldChisqPvalue<-pchisq(WaldChisq,1,lower.tail=FALSE)
129
WaldChisqTest<-list(WaldChisq=WaldChisq,WaldChisqPvalue=WaldChisqPvalue)
130
print(WaldChisqTest)
131
132
##Model Fit Statistics
133
MFStat<-ModelFitStat(x[,indexVector],y,Result$Beta)
134
print("Model Fit Statistics")
135
print(MFStat)
136
137
##Testing Global Null Hypothesis: BETA=0
138
ResultIntercept<-Newtons(funs,x[,1],y)
139
GNTest<-GlobalNullTest(x[,indexVector],y,Result$Beta,ResultIntercept$Beta)
140
print("Testing Global Null Hypothesis: BETA=0")
141
print(GNTest)
142
143
##check variable
144
pvalue_max<-max(WaldChisqPvalue[2:length(WaldChisqPvalue)])##not check for intercept
145
if(pvalue_max>check_pvalue)
{
146
j<-which.max(WaldChisqPvalue[2:length(WaldChisqPvalue)])##not check for intercept
147
print(paste(x_name[indexVector[j+1]]," is removed:"))
148
##set indication of variable
149
indict[indexVector[j+1]]<-0
150
}
151
if(pvalue_max<=check_pvalue)
{
152
print("No (additional) effects met the 0.05 significance level for removal from the model.")
153
print("Analysis of Maximum Likelihood Estimates")
154
print(Result$Beta)
155
print(WaldChisqTest)
156
break
157
}
158
}
159
160
}
使用方法如下

Code
1
#example3--backward
2
rs<-read.csv("example2R.csv",head=TRUE)
3
x<-matrix(c(rep(1,189),rs[,3],rs[,4],rs[,12],rs[,13],rs[,6],rs[,7],rs[,8],rs[,9],rs[,10]),nrow=189)#选择列以充作自变量
4
y<-matrix(rs[,2],nrow=189)#选择因变量
5
x_name<-c("INTERCEPT","AGE","LWT","RACE2","RECE3","SMOKE","PTL","HT","UI","FTV")
6
#自变量名字
7
BackwardSel(x,y)