-
Notifications
You must be signed in to change notification settings - Fork 1
/
PneumoniaSurvivalAnalysis.R
309 lines (263 loc) · 12.4 KB
/
PneumoniaSurvivalAnalysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
#Pneumonia Survival Analysis
#Author: Ioannis Matzakos Chorianopoulos
install.packages("KMsurv")
require(survival)
require(KMsurv)
data(pneumon)
#attach(pneumon)
#Generate survival estimates and plots of the Kaplan-Meier productlimit estimator for different numbers of siblings.
#creating the status variable and including it in the dataset
pneumon$status = (pneumon$chldage<12) * 1 + 0
pneumon$status
#recoding nsibs variable so that any number greater than 3 will get the value of 3
numsibs = pneumon$nsibs
numsibs
pneumon$nsibs[pneumon$nsibs>=3] = 3
pneumon$nsibs
#recoding wmonth, ages 4-6 months into value 4 and ages 7+ months into value 5
wage = pneumon$wmonth
wage
pneumon$wmonth[pneumon$wmonth>=4 & pneumon$wmonth<=6] = 4
pneumon$wmonth[wage>=7] = 5
pneumon$wmonth
#the original values of pneumon$nsibs and pneumon$wmonth are assigned to new attributes outside the dataset,
#numsibs and wage respectively, just in case they are needed later for any reason.
#fitting the model
model = survfit(Surv(chldage, status) ~ nsibs, data=pneumon, type = "kaplan")
summary(model)
#ploting the fitted model
par(mfrow=c(1,1))
plot(model, main="Kaplan-Meier Plot", xlab="Time", ylab="Probability of Survival", col=c(1,2,3,4), lwd=1)
legend('bottomright', c("nsibs=0","nsibs=1","nsibs=2","nsibs=3"), lty=1, col=c(1,2,3,4))
#log-rank test to determine whether there is a statistically significant
#difference in the hazard rates of pneumonia given different numbers of siblings.
#H0: no difference in hazard rates between groups.
#H1: there is a difference.
#log-rank test using survdiff
survdiff(Surv(chldage, status) ~ nsibs, rho = 0, data=pneumon)
#log-rank test using the summary of a cox regression model
summary(coxph(Surv(chldage, status) ~ nsibs, data=pneumon))
#multiple comparisons using bonferroni correction
alpha = 0.05/2/6
alpha
summary(coxph(Surv(chldage, status) ~ as.factor(nsibs),data=pneumon[pneumon$nsibs<2,]))
survdiff(Surv(chldage, status) ~ as.factor(nsibs), rho = 0, data=pneumon[pneumon$nsibs<2,])
p01=5.55e-05
p01
survdiff(Surv(chldage, status) ~ as.factor(nsibs), rho = 0, data=pneumon[(pneumon$nsibs==1|pneumon$nsibs==2),])
p12=0.0826
p12
survdiff(Surv(chldage, status) ~ as.factor(nsibs), rho = 0, data=pneumon[(pneumon$nsibs==1|pneumon$nsibs==3),])
p13=0.376
p13
survdiff(Surv(chldage, status) ~ as.factor(nsibs), rho = 0, data=pneumon[(pneumon$nsibs==2|pneumon$nsibs==3),])
p23=0.842
p23
survdiff(Surv(chldage, status) ~ as.factor(nsibs), rho = 0, data=pneumon[(pneumon$nsibs==0|pneumon$nsibs==2),])
p02=2.83e-06
p02
survdiff(Surv(chldage, status) ~ as.factor(nsibs), rho = 0, data=pneumon[(pneumon$nsibs==0|pneumon$nsibs==3),])
p03=p= 0.00662
p03
sig = (p01>alpha) # FALSE
sig
sig = (p12>alpha) # TRUE
sig
sig = (p13>alpha) # TRUE
sig
sig = (p23>alpha) # TRUE
sig
sig = (p03>alpha) # FALSE
sig
sig = (p02>alpha) # FALSE
sig
#Survival analysis using Cox regression
#Backward Elimination using 0.05 significance level
#Exploratory analysis of the 2-way interactions as possible predictors for pneumonia:
#the number of siblings, weaning age, maternal age, race, poverty, birthweight and maternal smoking.
model1 = coxph(Surv(chldage,status) ~ nsibs*(wmonth+mthage+factor(race)+factor(poverty)+factor(bweight)+factor(smoke))+
wmonth*(mthage+factor(race)+factor(poverty)+factor(bweight)+factor(smoke))+
mthage*(factor(race)+factor(poverty)+factor(bweight)+factor(smoke))+
factor(race)*(factor(poverty)+factor(bweight)+factor(smoke))+
factor(poverty)*(factor(bweight)+factor(smoke))+
factor(bweight)*factor(smoke), data=pneumon)
anova(model1)
#remove mthage:factor(poverty) 0.977833
model2 = coxph(Surv(chldage,status) ~ nsibs*(wmonth+mthage+factor(race)+factor(poverty)+factor(bweight)+factor(smoke))+
wmonth*(mthage+factor(race)+factor(poverty)+factor(bweight)+factor(smoke))+
mthage*(factor(race)+factor(bweight)+factor(smoke))+
factor(race)*(factor(poverty)+factor(bweight)+factor(smoke))+
factor(poverty)*(factor(bweight)+factor(smoke))+
factor(bweight)*factor(smoke), data=pneumon)
anova(model2)
#remove wmonth:factor(smoke) 0.907920
model3 = coxph(Surv(chldage,status) ~ nsibs*(wmonth+mthage+factor(race)+factor(poverty)+factor(bweight)+factor(smoke))+
wmonth*(mthage+factor(race)+factor(poverty)+factor(bweight))+
mthage*(factor(race)+factor(bweight)+factor(smoke))+
factor(race)*(factor(poverty)+factor(bweight)+factor(smoke))+
factor(poverty)*(factor(bweight)+factor(smoke))+
factor(bweight)*factor(smoke), data=pneumon)
anova(model3)
#remove nsibs:factor(smoke) 0.834793
model4 = coxph(Surv(chldage,status) ~ nsibs*(wmonth+mthage+factor(race)+factor(poverty)+factor(bweight))+
wmonth*(mthage+factor(race)+factor(poverty)+factor(bweight))+
mthage*(factor(race)+factor(bweight)+factor(smoke))+
factor(race)*(factor(poverty)+factor(bweight)+factor(smoke))+
factor(poverty)*(factor(bweight)+factor(smoke))+
factor(bweight)*factor(smoke), data=pneumon)
anova(model4)
#remove nsibs:factor(race) 0.685935
model5 = coxph(Surv(chldage,status) ~ nsibs*(wmonth+mthage+factor(poverty)+factor(bweight))+
wmonth*(mthage+factor(race)+factor(poverty)+factor(bweight))+
mthage*(factor(race)+factor(bweight)+factor(smoke))+
factor(race)*(factor(poverty)+factor(bweight)+factor(smoke))+
factor(poverty)*(factor(bweight)+factor(smoke))+
factor(bweight)*factor(smoke), data=pneumon)
anova(model5)
#remove mthage:factor(bweight) 0.682012
model6 = coxph(Surv(chldage,status) ~ nsibs*(wmonth+mthage+factor(poverty)+factor(bweight))+
wmonth*(mthage+factor(race)+factor(poverty)+factor(bweight))+
mthage*(factor(race)+factor(smoke))+
factor(race)*(factor(poverty)+factor(bweight)+factor(smoke))+
factor(poverty)*(factor(bweight)+factor(smoke))+
factor(bweight)*factor(smoke), data=pneumon)
anova(model6)
#remove factor(poverty):factor(race) 0.680383
model7 = coxph(Surv(chldage,status) ~ nsibs*(wmonth+mthage+factor(poverty)+factor(bweight))+
wmonth*(mthage+factor(race)+factor(poverty)+factor(bweight))+
mthage*(factor(race)+factor(smoke))+
factor(race)*(factor(bweight)+factor(smoke))+
factor(poverty)*(factor(bweight)+factor(smoke))+
factor(bweight)*factor(smoke), data=pneumon)
anova(model7)
#remove factor(poverty):factor(bweight) 0.576018
model8 = coxph(Surv(chldage,status) ~ nsibs*(wmonth+mthage+factor(poverty)+factor(bweight))+
wmonth*(mthage+factor(race)+factor(poverty)+factor(bweight))+
mthage*(factor(race)+factor(smoke))+
factor(race)*(factor(bweight)+factor(smoke))+
factor(poverty)*factor(smoke)+
factor(bweight)*factor(smoke), data=pneumon)
anova(model8)
#remove mthage:factor(race) 0.524433
model9 = coxph(Surv(chldage,status) ~ nsibs*(wmonth+mthage+factor(poverty)+factor(bweight))+
wmonth*(mthage+factor(race)+factor(poverty)+factor(bweight))+
mthage*factor(smoke)+
factor(race)*(factor(bweight)+factor(smoke))+
factor(poverty)*factor(smoke)+
factor(bweight)*factor(smoke), data=pneumon)
anova(model9)
#remove factor(poverty):factor(smoke) 0.474097
model10 = coxph(Surv(chldage,status) ~ nsibs*(wmonth+mthage+factor(poverty)+factor(bweight))+
wmonth*(mthage+factor(race)+factor(poverty)+factor(bweight))+
mthage*factor(smoke)+
factor(race)*(factor(bweight)+factor(smoke))+
factor(poverty)+
factor(bweight)*factor(smoke), data=pneumon)
anova(model10)
#remove mthage:factor(smoke) 0.464248
model11 = coxph(Surv(chldage,status) ~ nsibs*(wmonth+mthage+factor(poverty)+factor(bweight))+
wmonth*(mthage+factor(race)+factor(poverty)+factor(bweight))+
mthage+
factor(race)*(factor(bweight)+factor(smoke))+
factor(poverty)+
factor(bweight)*factor(smoke), data=pneumon)
anova(model11)
#remove nsibs:factor(bweight) 0.447708
model12 = coxph(Surv(chldage,status) ~ nsibs*(wmonth+mthage+factor(poverty))+
wmonth*(mthage+factor(race)+factor(poverty)+factor(bweight))+
mthage+
factor(race)*(factor(bweight)+factor(smoke))+
factor(poverty)+
factor(bweight)*factor(smoke), data=pneumon)
anova(model12)
#remove wmonth:factor(race) 0.340012
model13 = coxph(Surv(chldage,status) ~ nsibs*(wmonth+mthage+factor(poverty))+
wmonth*(mthage+factor(poverty)+factor(bweight))+
mthage+
factor(race)*(factor(bweight)+factor(smoke))+
factor(poverty)+
factor(bweight)*factor(smoke), data=pneumon)
anova(model13)
#remove factor(race):factor(smoke) 0.241391
model14 = coxph(Surv(chldage,status) ~ nsibs*(wmonth+mthage+factor(poverty))+
wmonth*(mthage+factor(poverty)+factor(bweight))+
mthage+
factor(race)*factor(bweight)+
factor(poverty)+
factor(bweight)*factor(smoke), data=pneumon)
anova(model14)
#remove nsibs:factor(poverty) 0.211686
model15 = coxph(Surv(chldage,status) ~ nsibs*(wmonth+mthage)+
wmonth*(mthage+factor(poverty)+factor(bweight))+
mthage+
factor(race)*factor(bweight)+
factor(poverty)+
factor(bweight)*factor(smoke), data=pneumon)
anova(model15)
#remove nsibs:wmonth 0.152700
model16 = coxph(Surv(chldage,status) ~ nsibs*mthage+
wmonth*(mthage+factor(poverty)+factor(bweight))+
mthage+
factor(race)*factor(bweight)+
factor(poverty)+
factor(bweight)*factor(smoke), data=pneumon)
anova(model16)
#remove wmonth:factor(poverty) 0.099467
model17 = coxph(Surv(chldage,status) ~ nsibs*mthage+
wmonth*(mthage+factor(bweight))+
mthage+
factor(race)*factor(bweight)+
factor(poverty)+
factor(bweight)*factor(smoke), data=pneumon)
anova(model17)
#remove nsibs:mthage 0.061513
model18 = coxph(Surv(chldage,status) ~ nsibs+
wmonth*(mthage+factor(bweight))+
mthage+
factor(race)*factor(bweight)+
factor(poverty)+
factor(bweight)*factor(smoke), data=pneumon)
anova(model18)
#remove wmonth:mthage 0.053040
model19 = coxph(Surv(chldage,status) ~ nsibs+
wmonth*factor(bweight)+
mthage+
factor(race)*factor(bweight)+
factor(poverty)+
factor(bweight)*factor(smoke), data=pneumon)
anova(model19)
#remove wmonth:factor(bweight) 0.124220
model20 = coxph(Surv(chldage,status) ~ nsibs+
wmonth+
mthage+
factor(race)*factor(bweight)+
factor(poverty)+
factor(bweight)*factor(smoke), data=pneumon)
anova(model20)
#remove factor(race):factor(bweight) 0.071561
model21 = coxph(Surv(chldage,status) ~ nsibs+
wmonth+
mthage+
factor(race)+
factor(poverty)+
factor(bweight)*factor(smoke), data=pneumon)
anova(model21)
#remove factor(race) 0.3613858
model22 = coxph(Surv(chldage,status) ~ nsibs+
wmonth+
mthage+
factor(poverty)+
factor(bweight)*factor(smoke), data=pneumon)
anova(model22)
#remove factor(poverty) 0.1362200
model23 = coxph(Surv(chldage,status) ~ nsibs+wmonth+mthage+factor(bweight)*factor(smoke), data=pneumon)
anova(model23)
summary(model23)
#All the variables in the model are significant so the backward elimination process stops.
#Additionally, the variables involved in the factor(bweight):factor(smoke) interaction can not be removed,
#because their interaction is significant. So the final model is model23
#checking the proportionality assumption
cox.zph(model23)
par(mfrow=c(2,4))
plot(cox.zph(model23), col=c(2,4))
contrasts(factor(pneumon$nsibs))