-
Notifications
You must be signed in to change notification settings - Fork 0
/
04_cross_validation.R
125 lines (90 loc) · 3.9 KB
/
04_cross_validation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
##############################
# Code - Section 4.2 - 4.2.2 #
##############################
# 4.2 Joint models for time-varying consumption and adherence to ACE/ARB therapy
# 4.2.2 Comparison of the two approaches
# This code allows to perform 10-fold cross-validation in order to assess the predictive
# performances of models M2 and M4 (see Table 2) in terms of calibration and discrimination.
# In particular, it computes the integrated prediction errors IPE(u|t) and the dynamic
# discrimination indexes dynC(u,delta_t) at time points u = 12 months (1 year), i.e.
# mid-term mortality, and u = 60 months (5 years), i.e. long-term mortality, for the 10-folds.
# Indixes means and standard deviations for the two models are then computed in Table 5.
# For further details see Section 4.2.2 of the manuscript.
rm( list = ls() )
library(data.table)
library(doSNOW)
library(foreach)
library(JMbayes)
library(MASS)
library(parallel)
library(splines)
# RStudio: Session -> Set Working Directory -> To Source File Location
directory_path <- "XXXX/Code" ## change to your working directory
setwd(directory_path)
# Load auxiliary functions CrossValJM_M2 and CrossValJM_M4.
source('utils_functions/utils_cross_validation.R')
# Load data
load('data/fake_data.Rdata')
data <- data.table(fake_data)
# Remove standardized age and comorbidity:
# they must be standardized on training data.
data <- data[, age := NULL]
data <- data[, n_com := NULL]
# Create K=10 random folds
K <- 10
IDs <- unique(data$ID)
n <- length(IDs)
set.seed(123)
splits <- split(seq_len(n), sample(rep(seq_len(K), length.out = n)))
## Performances of model M2
# Setup parallel backend to use many processors
cores <- detectCores()
cl <- makeCluster(cores-1) #not to overload your computer
registerDoSNOW(cl)
# Progress bar
pb <- txtProgressBar(max = K, style = 3)
progress <- function(n) setTxtProgressBar(pb, n)
opts <- list(progress = progress)
IPE_dynC_M2 <- foreach(k=1:K, .combine=rbind,
.options.snow = opts) %dopar% {
# Training dataset
trainingData <- data[!(data$ID %in% IDs[unlist(splits[k])]),]
# Testing dataset: fold k
testingData <- data[data$ID %in% IDs[unlist(splits[k])],]
out_matrix <- CrossValJM_M2(k, trainingData, testingData)
out_matrix
}
# Stop cluster and close progress bar
stopCluster(cl)
close(pb)
## Performances of model M4
# Setup parallel backend to use many processors
cl <- makeCluster(cores-1)
registerDoSNOW(cl)
# Progress bar
pb <- txtProgressBar(max = K, style = 3)
progress <- function(n) setTxtProgressBar(pb, n)
opts <- list(progress = progress)
IPE_dynC_M4 <- foreach(k=1:K, .combine=rbind,
.options.snow = opts) %dopar% {
# Training dataset
trainingData <- data[!(data$ID %in% IDs[unlist(splits[k])]),]
# Testing dataset: fold k
testingData <- data[data$ID %in% IDs[unlist(splits[k])],]
out_matrix <- CrossValJM_M4(k, trainingData, testingData)
out_matrix
}
# Stop cluster and close progress bar
stopCluster(cl)
close(pb)
#---------#
# Table 5 #
#---------#
Table_5 <- cbind(round(colMeans(IPE_dynC_M2[,-1]),4),
round(apply(IPE_dynC_M2[,-1], 2, sd),4),
round(colMeans(IPE_dynC_M4[,-1]),4),
round(apply(IPE_dynC_M4[,-1], 2, sd),4)
)
rownames(Table_5) <- c('IPE(12|3)', 'IPE(60|12)', 'dynC(12,1)', 'dynC(60,6)')
colnames(Table_5) <- c('M2 - Mean', 'M2 - s.d.','M4 - Mean', 'M4 - s.d.')
Table_5