Skip to content

Commit

Permalink
Background hazard with left/interval censoring
Browse files Browse the repository at this point in the history
  • Loading branch information
chjackson committed Nov 28, 2023
1 parent e48e3e7 commit 618adb6
Show file tree
Hide file tree
Showing 5 changed files with 107 additions and 45 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,4 @@ vignettes/Sweave.sty
^docs$
^pkgdown$
codecov.yml
data-raw
4 changes: 2 additions & 2 deletions R/deriv.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ Dminusloglik.flexsurv <- function(optpars, Y, X=0, weights, bhazard, rtrunc, dli
dstcall[[names(pars)[i]]] <-
dlist$inv.transforms[[i]](pars[[i]])
for (i in seq_along(aux)){
ddcall[[names(aux)[i]]] <- dsccall[[names(aux)[i]]] <-
dstcall[[names(aux)[i]]] <- aux[[i]]
ddcall[[names(aux)[i]]] <- dsccall[[names(aux)[i]]] <-
dstcall[[names(aux)[i]]] <- aux[[i]]
}
for (i in dlist$pars) {
ddcall[[i]] <- ddcall[[i]][dead]
Expand Down
82 changes: 56 additions & 26 deletions R/flexsurvreg.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,14 @@ logLikFactory <- function(Y, X=0, weights, bhazard, rtrunc, dlist,
## which are the subjects with known event times
event <- Y[,"status"] == 1
event.times <- Y[event, "time1"]
left.censor <- Y[!event, "time2"]
right.censor <- Y[!event, "time1"]
lcens.times <- Y[!event, "time2"]
rcens.times <- Y[!event, "time1"]

par.transform <- buildTransformer(inits, nbpars, dlist)

aux.pars <- buildAuxParms(aux, dlist)

default.offset <- rep.int(0, length(event.times))
do.hazard <- any(bhazard > 0)
do.bhazard <- any(bhazard > 0)

loglik <- rep.int(0, nrow(Y))
## the ... here is to work around optim
Expand Down Expand Up @@ -100,12 +99,12 @@ logLikFactory <- function(Y, X=0, weights, bhazard, rtrunc, dlist,
## Left censoring times (upper bound for event time)
if (!all(event)){
pmaxargs <- fnargs.nevent
pmaxargs$q <- left.censor # Inf if right-censored, giving pmax=1
pmaxargs$q <- lcens.times # Inf if right-censored, giving pmax=1
pmax <- call_distfn_quiet(dfns$p, pmaxargs)
pmax[pmaxargs$q==Inf] <- 1 # in case user-defined function doesn't already do this
## Right censoring times (lower bound for event time)
pargs <- fnargs.nevent
pargs$q <- right.censor
pargs$q <- rcens.times
pmin <- call_distfn_quiet(dfns$p, pargs)
}

Expand All @@ -120,20 +119,32 @@ logLikFactory <- function(Y, X=0, weights, bhazard, rtrunc, dlist,
pupper[rtrunc==Inf] <- 1 # in case the user's function doesn't already do this
pobs <- pupper - plower # prob of being observed = 1 - 0 if no truncation

if (do.hazard){
# Hazard adjustment for relative survival models: required for estimation
if (do.bhazard){
# Adjust for background hazard in relative survival models
pargs <- fnargs.event
pargs$q <- event.times
pminb <- call_distfn_quiet(dfns$p, pargs)
loghaz <- logdens - log(1 - pminb)
offseti <- log(1 + bhazard[event] / exp(loghaz))
logsurv_excess <- log(1 - pminb)
loghaz_excess <- logdens - logsurv_excess
haz_excess <- exp(loghaz_excess)
logdens_offset <- log(1 + bhazard[event] / haz_excess) # = log(haz_allcause / haz_excess)
if (!all(event)) { # background survival S* and left or interval censoring
b_condsurv <- bhazard[!event] # this is S*(end) / S*(start)
b_condsurv[lcens.times==Inf] <- 0 # when end=Inf, i.e. right censoring
}
if (any(is.finite(rtrunc)))
stop("models with both right truncation and background hazards not supported")
} else {
offseti <- default.offset
logdens_offset <- 0
}
## Express as vector of individual likelihood contributions
loglik[event] <- (logdens + offseti)
if (!all(event))
loglik[!event] <- (log(pmax - pmin))
loglik[event] <- (logdens + logdens_offset)
if (!all(event)){
if (do.bhazard)
loglik[!event] <- log((pmax - 1)*b_condsurv + 1 - pmin)
else
loglik[!event] <- log(pmax - pmin)
}

loglik <- loglik - log(pobs)

Expand Down Expand Up @@ -441,9 +452,22 @@ compress.model.matrices <- function(mml){
##' @param bhazard Optional variable giving expected hazards for relative
##' survival models. The model is described by Nelson et al. (2007).
##'
##' \code{bhazard} should contain a vector of values for each person in
##' the data, but only the values for the individuals whose event is observed are
##' used. \code{bhazard} refers to the hazard at the observed event time.
##' \code{bhazard} should contain a vector of values for each person in
##' the data.
##'
##' \itemize{
##' \item For people with observed events, \code{bhazard} refers to the
##' hazard at the observed event time.
##'
##' \item For people whose event time is
##' left-censored or interval-censored, \code{bhazard} should contain the
##' probability of surviving until the end of the corresponding interval,
##' conditionally on being alive at the start.
##'
##' \item For people whose event time
##' is right-censored, the value of \code{bhazard} is ignored and does not
##' need to be specified.
##' }
##'
##' If \code{bhazard} is supplied, then the parameter estimates returned by
##' \code{flexsurvreg} and the outputs returned by \code{summary.flexsurvreg}
Expand Down Expand Up @@ -665,7 +689,7 @@ compress.model.matrices <- function(mml){
##' for use in \code{\link{flexsurvreg}}, construct a list with the following
##' elements:
##'
##' \describe{ \item{"name"}{A string naming the distribution. If this
##' \describe{ \item{\code{"name"}}{A string naming the distribution. If this
##' is called \code{"dist"}, for example, then there must be visible in the
##' working environment, at least, either
##'
Expand Down Expand Up @@ -713,21 +737,21 @@ compress.model.matrices <- function(mml){
##' function must return a matrix with rows corresponding to times, and columns
##' corresponding to the parameters of the distribution. The derivatives are
##' used, if available, to speed up the model fitting with \code{\link{optim}}.
##' } \item{"pars"}{Vector of strings naming the parameters of the
##' } \item{\code{"pars"}}{Vector of strings naming the parameters of the
##' distribution. These must be the same names as the arguments of the density
##' and probability functions. }
##' \item{"location"}{Name of the main parameter governing the mean of
##' \item{\code{"location"}}{Name of the main parameter governing the mean of
##' the distribution. This is the default parameter on which covariates are
##' placed in the \code{formula} supplied to \code{flexsurvreg}. }
##' \item{"transforms"}{List of R
##' \item{\code{"transforms"}}{List of R
##' functions which transform the range of values taken by each parameter onto
##' the real line. For example, \code{c(log, log)} for a distribution with two
##' positive parameters. }
##' \item{"inv.transforms"}{List of R functions defining the
##' \item{\code{"inv.transforms"}}{List of R functions defining the
##' corresponding inverse transformations. Note these must be lists, even for
##' single parameter distributions they should be supplied as, e.g.
##' \code{c(exp)} or \code{list(exp)}. }
##' \item{"inits"}{A function of the
##' \item{\code{"inits"}}{A function of the
##' observed survival times \code{t} (including right-censoring times, and
##' using the halfway point for interval-censored times) which returns a vector
##' of reasonable initial values for maximum likelihood estimation of each
Expand Down Expand Up @@ -930,8 +954,8 @@ flexsurvreg <- function(formula, anc=NULL, data, weights, bhazard, rtrunc, subse
if (is.null(optim.args$method)){
optim.args$method <- "BFGS"
}
gr <- if (dfns$deriv) Dminusloglik.flexsurv else NULL
has_analytic_hessian <- dfns$hessian && !isTRUE(hess.control$numeric)
gr <- if (dfns$deriv && deriv_supported(Y)) Dminusloglik.flexsurv else NULL
has_analytic_hessian <- dfns$hessian && !isTRUE(hess.control$numeric) && deriv_supported(Y)
optim.args <- c(optim.args,
list(par=optpars,
fn=logLikFactory(Y=Y, X=X,
Expand Down Expand Up @@ -1012,7 +1036,7 @@ flexsurvreg <- function(formula, anc=NULL, data, weights, bhazard, rtrunc, subse
}

check_deriv <- function(optpars, Y, X, weights, bhazard, rtrunc, dlist, inits, dfns, aux, mx, fixedpars){
if (isTRUE(getOption("flexsurv.test.analytic.derivatives"))){
if (isTRUE(getOption("flexsurv.test.analytic.derivatives")) && deriv_supported(Y)){
if (is.logical(fixedpars) && fixedpars==TRUE) { optpars <- inits; fixedpars=FALSE }
if (dfns$deriv)
deriv.test <- deriv.test(optpars=optpars, Y=Y, X=X, weights=weights, bhazard=bhazard, rtrunc=rtrunc, dlist=dlist, inits=inits, dfns=dfns, aux=aux, mx=mx, fixedpars=fixedpars)
Expand Down Expand Up @@ -1360,3 +1384,9 @@ UseMethod("AICc")
##' @rdname AICc
##' @export
AICC <- AICc

deriv_supported <- function(Y){
event <- Y[,"status"] == 1
left_cens <- is.finite(Y[!event, "time2"])
!any(left_cens)
}
31 changes: 22 additions & 9 deletions man/flexsurvreg.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 26 additions & 8 deletions tests/testthat/test_flexsurvreg.R
Original file line number Diff line number Diff line change
Expand Up @@ -361,9 +361,8 @@ test_that("Interval censoring",{
simt[status==0] <- 0.6
tmin <- simt
tmax <- ifelse(status==1, simt, Inf)
tmax.sr <- ifelse(status==1, simt, NA) # need -Inf instead of Inf, don't understand why. bug?
## Currently flexsurvreg fails with Inf, just as survreg fails, with Weibull. Consistent with survreg for the moment

tmax.sr <- ifelse(status==1, simt, NA)

sr1 <- survreg(Surv(tmin, tmax.sr, type="interval2") ~ 1, dist="weibull")
sr2 <- survreg(Surv(tmin, status) ~ 1, dist="weibull")
expect_equal(sr1$loglik[2], sr2$loglik[2])
Expand All @@ -379,14 +378,33 @@ test_that("Interval censoring",{
fs2 <- flexsurvreg(Surv(simt, status) ~ 1, dist="weibull")
expect_true(fs1$loglik != fs2$loglik)

### FIXME n events wrong for interval2.

## using type="interval"
status[status==0] <- 3
fs3 <- flexsurvreg(Surv(tmin, tmax, status, type="interval") ~ 1, dist="weibull")
expect_equal(fs1$loglik, fs3$loglik)

## interval censoring close around the event
set.seed(1)
tev <- rweibull(100, 1.1, 1.5)
tmin <- tev - 0.001
tmax <- tev + 0.001
fs1 <- flexsurvreg(Surv(tev) ~ 1, dist="weibull")
fs2 <- flexsurvreg(Surv(tmin, tmax, type="interval2") ~ 1, dist="weibull")
expect_equal(fs2$res["shape","est"], fs1$res["shape","est"], tol=1e-03)

## relative survival with left and interval censoring
## at left cens times, bhazard contains the background cond prob of surviving interval
bh <- rep(0.01, length(tmax))
back_csurv <- rep(exp(-0.002*0.01), length(tmax))
fs1 <- flexsurvreg(Surv(tev) ~ 1, dist="weibull", bhazard=bh)
fs2 <- flexsurvreg(Surv(tmin, tmax, type="interval2") ~ 1,
dist="weibull", bhazard=back_csurv)

fs1 <- flexsurvreg(Surv(tev) ~ 1,
dist="weibull", bhazard=bh)
fs2 <- flexsurvreg(Surv(tmin, tmax, type="interval2") ~ 1,
dist="weibull", bhazard=back_csurv, inits=c(1.24, 1.4))
expect_equal(fs2$res["shape","est"], fs1$res["shape","est"], tol=1e-03)
})

test_that("inits",{
Expand Down Expand Up @@ -454,7 +472,7 @@ test_that("Relative survival", {

expect_equal(mdl_0$loglik, mdl_1$loglik - sum(bhaz * lung$time/365))
})

test_that("warning with strata", {
## need double backslash to escape $
expect_warning(flexsurvreg(formula = Surv(ovarian$futime, ovarian$fustat) ~ strata(ovarian$resid.ds), dist="gengamma", inits=c(6,1,-1,0)),
Expand All @@ -481,8 +499,8 @@ test_that("No events in the data",{
mod <- flexsurvreg(Surv(tmin, tmax, type="interval2") ~ 1, dist="exponential")
expect_equal(mod$loglik, -337.9815, tolerance=1e-03)
modb <- flexsurvreg(Surv(tmin,tmax,type='interval2')~1,
bhazard=bhazard,dist="exponential")
expect_equal(mod$loglik, modb$loglik, tolerance=1e-03)
bhazard=exp(-bhazard*(tmax-tmin)),dist="exponential")
expect_equal(mod$res["rate","est"], modb$res["rate","est"], tolerance=1e-02)
})

test_that("No censoring in the data",{
Expand Down

0 comments on commit 618adb6

Please sign in to comment.