逻辑回归见前述随笔

Code
1
#计算pi
2
pi_fun<-function(x,Beta)
{
3
Beta<-matrix(as.vector(Beta),ncol=1)
4
x<-matrix(as.vector(x),ncol=nrow(Beta))
5
g_fun<-x%*%Beta
6
exp(g_fun)/(1+exp(g_fun))
7
}
8
#计算信息函数
9
funs<-function(x,y,Beta)
{
10
x<-matrix(as.vector(x),nrow=nrow(y))
11
Beta<-matrix(as.vector(Beta),ncol=1)
12
13
pi_value<- pi_fun(x,Beta)
14
U<-t(x)%*%(y-pi_value);
15
uni_matrix<-matrix(rep(1,nrow(pi_value)),nrow= nrow(pi_value));
16
H<-t(x)%*%diag(as.vector(pi_value*( uni_matrix -pi_value)))%*%x
17
list(U=U,H=H)
18
}
19
#牛顿迭代计算Beta
20
Newtons<-function(fun,x,y,ep=1e-8,it_max=100)
{
21
x<-matrix(as.vector(x),nrow=nrow(y))
22
Beta=matrix(rep(0,ncol(x)),nrow=ncol(x))
23
Index<-0;
24
k<-1
25
while(k<=it_max)
{
26
x1<-Beta;obj<-fun(x,y,Beta);
27
Beta<-Beta+solve(obj$H,obj$U);
28
norm<-sqrt(t((Beta-x1))%*%(Beta-x1))
29
if(norm<ep)
{
30
index<-1;break
31
}
32
k<-k+1
33
}
34
obj<-fun(x,y,Beta);
35
36
list(Beta=Beta,it=k,index=index,U=obj$U,H=obj$H)
37
}
38
#拟合检验
39
ModelFitStat<-function(x,y,Beta)
{
40
x<-matrix(as.vector(x),nrow=nrow(y))
41
Beta<-matrix(as.vector(Beta),ncol=1)
42
43
uni_matrix<-matrix(rep(1,nrow(y)),nrow= nrow(y));
44
LOGL<--2*(t(y)%*%log(pi_fun(x,Beta))+t(uni_matrix-y)%*%log(uni_matrix-pi_fun(x,Beta)))
45
AIC<-LOGL+2*(ncol(x))
46
SC<-LOGL+(ncol(x))*log(nrow(x))
47
list(LOGL=LOGL,AIC=AIC,SC=SC)
48
}
49
50
#回归方程显著性检验
51
GlobalNullTest<-function(x,y,Beta,BetaIntercept)
{
52
y<-matrix(as.vector(y),ncol=1)
53
x<-matrix(as.vector(x),nrow=nrow(y))
54
Beta<-matrix(as.vector(Beta),ncol=1)
55
56
pi_value<- pi_fun(x,Beta)
57
df<-nrow(Beta)-1
58
##compute Likelihood ratio
59
60
MF<-ModelFitStat(x,y,Beta)
61
n1<-sum(y[y>0])
62
n<-nrow(y)
63
LR<--2*(n1*log(n1)+(n-n1)*log(n-n1)-n*log(n))-MF$LOGL
64
LR_p_value<-pchisq(LR,df,lower.tail=FALSE)
65
LR_Test<-list(LR=LR,DF=df,LR_p_value=LR_p_value)
66
67
##compute Score
68
69
BetaIntercept<-matrix(c(as.vector(BetaIntercept),rep(0,ncol(x)-1)),ncol=1)
70
obj<-funs(x,y,BetaIntercept)
71
Score<-t(obj$U)%*%solve(obj$H)%*%obj$U
72
Score_p_value<-pchisq(Score,df,lower.tail=FALSE)
73
Score_Test<-list(Score=Score,DF=df,Score_p_value=Score_p_value)
74
75
##compute Wald test
76
obj<-funs(x,y,Beta)
77
I_Diag<-diag((solve(obj$H)))
78
79
Q<-matrix(c(rep(0,nrow(Beta)-1),as.vector(diag(rep(1,nrow(Beta)-1)))),nrow=nrow(Beta)-1)
80
Wald<-t(Q%*%Beta)%*%solve(Q%*%diag(I_Diag)%*%t(Q))%*%(Q%*%Beta)
81
82
Wald_p_value<-pchisq(Wald,df,lower.tail=FALSE)
83
Wald_Test<-list(Wald=Wald,DF=df,Wald_p_value=Wald_p_value)
84
85
list(LR_Test=LR_Test,Score_Test=Score_Test,Wald_Test=Wald_Test)
86
}
87
#配合的小函数
88
WhichEqual1<-function(x)
{
89
a<-NULL
90
for(i in 1:length(x))
{
91
if(x[i]==1)
{
92
a<-c(a,i)
93
}
94
}
95
a
96
}
97
98
CheckOut<-function(source,check)
{
99
for(j in 1:length(source))
{
100
for(k in 1:length(check))
{
101
if(source[j]==check[k])
102
source[j]<-0
103
}
104
}
105
source[source>0]
106
}
107
108
#前向选择
109
##forword selection
110
ForwardSel<-function(x,y,check_pvalue=0.05)
{
111
##as matrix
112
x<-matrix(as.vector(x),nrow=nrow(y))
113
##indication of variable
114
indict<-rep(0,ncol(x)) ##which column enter
115
##intercept entered
116
indict[1]<-1
117
Beta<-NULL
118
print("intercept entered
")
119
Result<-Newtons(funs,x[,1],y)
120
Beta<-Result$Beta
121
BetaIntercept<-Result$Beta
122
uni_matrix<-matrix(rep(1,nrow(y)),nrow= nrow(y));
123
LOGL<--2*(t(y)%*%log(pi_fun(x[,1],Beta))+t(uni_matrix-y)%*%log(uni_matrix-pi_fun(x[,1],Beta)))
124
print(paste("-2Log=",LOGL))
125
indexVector<-WhichEqual1(indict)
126
##check other variable
127
repeat
{
128
pvalue<-rep(1,ncol(x))
129
130
k<-2:length(indict)
131
k<-CheckOut(k,indexVector)
132
for(i in k)
{
133
obj<-funs(c(x[,indexVector],x[,i]),y,c(as.vector(Beta),0))
134
Score<-t(obj$U)%*%solve(obj$H,obj$U)
135
Score_pvalue<-pchisq(Score,1,lower.tail=FALSE)
136
pvalue[i]<-Score_pvalue
137
}
138
print(pvalue)
139
pvalue_min<-min(pvalue)
140
if(pvalue_min<check_pvalue)
{
141
j<-which.min(pvalue)
142
print(paste(x_name[j]," entered:"))
143
##set indication of variable
144
indict[j]<-1
145
indexVector<-WhichEqual1(indict)
146
Result<-Newtons(funs,x[,indexVector],y)
147
Beta<-NULL
148
Beta<-Result$Beta
149
#print("Beta")
150
#print(Beta)
151
152
##compute model fit statistics
153
print("Model Fit Statistics")
154
MFStat<-ModelFitStat(x[,indexVector],y,Beta)
155
print(MFStat)
156
##test globel null hypothesis:Beta=0
157
print("Testing Global Null Hypothese:Beta=0")
158
GNTest<-GlobalNullTest(x[,indexVector],y,Beta,BetaIntercept)
159
print(GNTest)
160
}
161
if(pvalue_min>=check_pvalue||all(indict)>0)
{
162
print("No effects met the 0.05 significance level for entry into the model")
163
break
164
}
165
}
166
167
##Analysis of Maximum Likelihood Estimates
168
indexVector<-WhichEqual1(indict)
169
print("Analysis of Maximum Likelihood Estimates")
170
print(Beta)
171
172
##compute variable Wald chi-square
173
obj<-funs(x[,indexVector],y,Beta)
174
H_Diag<-sqrt(diag(solve(obj$H)))
175
WaldChisq<-(as.vector(Beta)/H_Diag)^2
176
WaldChisqPvalue<-pchisq(WaldChisq,1,lower.tail=FALSE)
177
WaldChisqTest<-list(WaldChisq=WaldChisq,WaldChisqPvalue=WaldChisqPvalue)
178
print(WaldChisqTest)
179
}
使用例子

Code
1
#example3
2
rs<-read.csv("example2R.csv",head=TRUE)
3
x<-matrix(c(rep(1,189),rs[,3],rs[,4],rs[,12],rs[,13],rs[,6],rs[,7],rs[,8],rs[,9],rs[,10]),nrow=189)
4
y<-matrix(rs[,2],nrow=189)
5
x_name<-c("INTERCEPT","AGE","LWT","RACE2","RECE3","SMOKE","PTL","HT","UI","FTV")
6
ForwardSel(x,y)