Background hazard with left/interval censoring

chjackson · Nov 28, 2023 · 618adb6 · 618adb6
1 parent e48e3e7
commit 618adb6
Show file tree

Hide file tree

Showing 5 changed files with 107 additions and 45 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -43,3 +43,4 @@ vignettes/Sweave.sty
 ^docs$
 ^pkgdown$
 codecov.yml
+data-raw
diff --git a/R/deriv.R b/R/deriv.R
@@ -28,8 +28,8 @@ Dminusloglik.flexsurv <- function(optpars, Y, X=0, weights, bhazard, rtrunc, dli
                 dstcall[[names(pars)[i]]] <-
                     dlist$inv.transforms[[i]](pars[[i]])
     for (i in seq_along(aux)){
-        ddcall[[names(aux)[i]]] <- dsccall[[names(aux)[i]]] <-
-            dstcall[[names(aux)[i]]] <- aux[[i]]
+      ddcall[[names(aux)[i]]] <- dsccall[[names(aux)[i]]] <-
+        dstcall[[names(aux)[i]]] <- aux[[i]]
     }
     for (i in dlist$pars) {
         ddcall[[i]] <- ddcall[[i]][dead]

diff --git a/R/flexsurvreg.R b/R/flexsurvreg.R
@@ -54,15 +54,14 @@ logLikFactory <- function(Y, X=0, weights, bhazard, rtrunc, dlist,
     ## which are the subjects with known event times
     event <- Y[,"status"] == 1
     event.times <- Y[event, "time1"]
-    left.censor <- Y[!event, "time2"]
-    right.censor <- Y[!event, "time1"]
+    lcens.times <- Y[!event, "time2"]
+    rcens.times <- Y[!event, "time1"]
 
     par.transform <- buildTransformer(inits, nbpars, dlist)
 
     aux.pars <- buildAuxParms(aux, dlist)
 
-    default.offset <- rep.int(0, length(event.times))
-    do.hazard <- any(bhazard > 0)
+    do.bhazard <- any(bhazard > 0)
 
     loglik <- rep.int(0, nrow(Y))
     ## the ... here is to work around optim
@@ -100,12 +99,12 @@ logLikFactory <- function(Y, X=0, weights, bhazard, rtrunc, dlist,
         ## Left censoring times (upper bound for event time) 
         if (!all(event)){
             pmaxargs <- fnargs.nevent
-            pmaxargs$q <- left.censor # Inf if right-censored, giving pmax=1
+            pmaxargs$q <- lcens.times # Inf if right-censored, giving pmax=1
             pmax <- call_distfn_quiet(dfns$p, pmaxargs)
             pmax[pmaxargs$q==Inf] <- 1  # in case user-defined function doesn't already do this
         ## Right censoring times (lower bound for event time) 
             pargs <- fnargs.nevent
-            pargs$q <- right.censor
+            pargs$q <- rcens.times
             pmin <- call_distfn_quiet(dfns$p, pargs)
         } 
 
@@ -120,20 +119,32 @@ logLikFactory <- function(Y, X=0, weights, bhazard, rtrunc, dlist,
         pupper[rtrunc==Inf] <- 1 # in case the user's function doesn't already do this
         pobs <- pupper - plower # prob of being observed = 1 - 0 if no truncation 
 
-        if (do.hazard){
-            # Hazard adjustment for relative survival models: required for estimation
+        if (do.bhazard){
+            # Adjust for background hazard in relative survival models
             pargs   <- fnargs.event
             pargs$q <- event.times
             pminb   <- call_distfn_quiet(dfns$p, pargs)
-            loghaz  <- logdens - log(1 - pminb)
-            offseti <- log(1 + bhazard[event] / exp(loghaz))
+            logsurv_excess <- log(1 - pminb)
+            loghaz_excess  <- logdens - logsurv_excess
+            haz_excess <- exp(loghaz_excess)
+            logdens_offset <- log(1 + bhazard[event] / haz_excess) # = log(haz_allcause / haz_excess)
+            if (!all(event)) {               # background survival S* and left or interval censoring 
+              b_condsurv <- bhazard[!event]  # this is S*(end) / S*(start)
+              b_condsurv[lcens.times==Inf] <- 0  # when end=Inf, i.e. right censoring
+            }
+            if (any(is.finite(rtrunc)))
+              stop("models with both right truncation and background hazards not supported")
         } else {
-            offseti <- default.offset
+            logdens_offset <- 0
         }
         ## Express as vector of individual likelihood contributions
-        loglik[event] <- (logdens + offseti)
-        if (!all(event))
-            loglik[!event] <- (log(pmax - pmin))
+        loglik[event] <- (logdens + logdens_offset)
+        if (!all(event)){
+           if (do.bhazard)
+             loglik[!event] <- log((pmax - 1)*b_condsurv  +  1 - pmin)
+           else
+             loglik[!event] <- log(pmax - pmin)
+        }
 
         loglik <- loglik - log(pobs)
 
@@ -441,9 +452,22 @@ compress.model.matrices <- function(mml){
 ##' @param bhazard Optional variable giving expected hazards for relative
 ##'   survival models.  The model is described by Nelson et al. (2007).
 ##'
-##'    \code{bhazard} should contain a vector of values for each person in
-##'   the data, but only the values for the individuals whose event is observed are
-##'   used. \code{bhazard} refers to the hazard at the observed event time.
+##'   \code{bhazard} should contain a vector of values for each person in
+##'   the data.
+##'
+##'  \itemize{
+##'  \item  For people with observed events, \code{bhazard} refers to the
+##'   hazard at the observed event time.
+##'
+##'  \item  For people whose event time is
+##'   left-censored or interval-censored, \code{bhazard} should contain the
+##'   probability of surviving until the end of the corresponding interval,
+##'   conditionally on being alive at the start.
+##'
+##'   \item For people whose event time
+##'   is right-censored, the value of \code{bhazard} is ignored and does not
+##'   need to be specified.
+##' }
 ##'
 ##'   If \code{bhazard} is supplied, then the parameter estimates returned by
 ##'   \code{flexsurvreg} and the outputs returned by \code{summary.flexsurvreg}
@@ -665,7 +689,7 @@ compress.model.matrices <- function(mml){
 ##'   for use in \code{\link{flexsurvreg}}, construct a list with the following
 ##'   elements:
 ##'
-##'   \describe{ \item{"name"}{A string naming the distribution.  If this
+##'   \describe{ \item{\code{"name"}}{A string naming the distribution.  If this
 ##'   is called \code{"dist"}, for example, then there must be visible in the
 ##'   working environment, at least, either
 ##'
@@ -713,21 +737,21 @@ compress.model.matrices <- function(mml){
 ##' function must return a matrix with rows corresponding to times, and columns
 ##' corresponding to the parameters of the distribution.  The derivatives are
 ##' used, if available, to speed up the model fitting with \code{\link{optim}}.
-##' } \item{"pars"}{Vector of strings naming the parameters of the
+##' } \item{\code{"pars"}}{Vector of strings naming the parameters of the
 ##' distribution. These must be the same names as the arguments of the density
 ##' and probability functions.  }
-##' \item{"location"}{Name of the main parameter governing the mean of
+##' \item{\code{"location"}}{Name of the main parameter governing the mean of
 ##' the distribution.  This is the default parameter on which covariates are
 ##' placed in the \code{formula} supplied to \code{flexsurvreg}. }
-##' \item{"transforms"}{List of R
+##' \item{\code{"transforms"}}{List of R
 ##' functions which transform the range of values taken by each parameter onto
 ##' the real line.  For example, \code{c(log, log)} for a distribution with two
 ##' positive parameters. }
-##' \item{"inv.transforms"}{List of R functions defining the
+##' \item{\code{"inv.transforms"}}{List of R functions defining the
 ##' corresponding inverse transformations.  Note these must be lists, even for
 ##' single parameter distributions they should be supplied as, e.g.
 ##' \code{c(exp)} or \code{list(exp)}. }
-##' \item{"inits"}{A function of the
+##' \item{\code{"inits"}}{A function of the
 ##' observed survival times \code{t} (including right-censoring times, and
 ##' using the halfway point for interval-censored times) which returns a vector
 ##' of reasonable initial values for maximum likelihood estimation of each
@@ -930,8 +954,8 @@ flexsurvreg <- function(formula, anc=NULL, data, weights, bhazard, rtrunc, subse
         if (is.null(optim.args$method)){
             optim.args$method <- "BFGS"
         }
-        gr <- if (dfns$deriv) Dminusloglik.flexsurv else NULL
-        has_analytic_hessian <- dfns$hessian && !isTRUE(hess.control$numeric)
+        gr <- if (dfns$deriv && deriv_supported(Y)) Dminusloglik.flexsurv else NULL
+        has_analytic_hessian <- dfns$hessian && !isTRUE(hess.control$numeric) && deriv_supported(Y)
         optim.args <- c(optim.args,
                         list(par=optpars,
                              fn=logLikFactory(Y=Y, X=X,
@@ -1012,7 +1036,7 @@ flexsurvreg <- function(formula, anc=NULL, data, weights, bhazard, rtrunc, subse
 }
 
 check_deriv <- function(optpars, Y, X, weights, bhazard, rtrunc, dlist, inits, dfns, aux, mx, fixedpars){
-  if (isTRUE(getOption("flexsurv.test.analytic.derivatives"))){
+  if (isTRUE(getOption("flexsurv.test.analytic.derivatives")) && deriv_supported(Y)){
     if (is.logical(fixedpars) && fixedpars==TRUE) { optpars <- inits; fixedpars=FALSE }
     if (dfns$deriv)
       deriv.test <- deriv.test(optpars=optpars, Y=Y, X=X, weights=weights, bhazard=bhazard, rtrunc=rtrunc, dlist=dlist, inits=inits, dfns=dfns, aux=aux, mx=mx, fixedpars=fixedpars)
@@ -1360,3 +1384,9 @@ UseMethod("AICc")
 ##' @rdname AICc
 ##' @export
 AICC <- AICc
+
+deriv_supported <- function(Y){
+  event <- Y[,"status"] == 1
+  left_cens <- is.finite(Y[!event, "time2"])
+  !any(left_cens)
+}
diff --git a/man/flexsurvreg.Rd b/man/flexsurvreg.Rd
diff --git a/tests/testthat/test_flexsurvreg.R b/tests/testthat/test_flexsurvreg.R
@@ -361,9 +361,8 @@ test_that("Interval censoring",{
     simt[status==0] <- 0.6
     tmin <- simt
     tmax <- ifelse(status==1, simt, Inf)
-    tmax.sr <- ifelse(status==1, simt, NA) # need -Inf instead of Inf, don't understand why. bug?
-## Currently flexsurvreg fails with Inf, just as survreg fails, with Weibull. Consistent with survreg for the moment
-
+    tmax.sr <- ifelse(status==1, simt, NA) 
+
     sr1 <- survreg(Surv(tmin, tmax.sr, type="interval2") ~ 1, dist="weibull")
     sr2 <- survreg(Surv(tmin, status) ~ 1, dist="weibull")
     expect_equal(sr1$loglik[2], sr2$loglik[2])
@@ -379,14 +378,33 @@ test_that("Interval censoring",{
     fs2 <- flexsurvreg(Surv(simt, status) ~ 1, dist="weibull")
     expect_true(fs1$loglik != fs2$loglik)
 
-    ### FIXME n events wrong for interval2. 
-
     ## using type="interval"
     status[status==0] <- 3
     fs3 <- flexsurvreg(Surv(tmin, tmax, status, type="interval") ~ 1, dist="weibull")
     expect_equal(fs1$loglik, fs3$loglik)
 
+    ## interval censoring close around the event
+    set.seed(1)
+    tev <- rweibull(100, 1.1, 1.5)
+    tmin <- tev - 0.001
+    tmax <- tev + 0.001
+    fs1 <- flexsurvreg(Surv(tev) ~ 1, dist="weibull")
+    fs2 <- flexsurvreg(Surv(tmin, tmax, type="interval2") ~ 1, dist="weibull")
+    expect_equal(fs2$res["shape","est"], fs1$res["shape","est"], tol=1e-03)
+
+    ## relative survival with left and interval censoring
+    ## at left cens times, bhazard contains the background cond prob of surviving interval
+    bh <- rep(0.01, length(tmax))
+    back_csurv <- rep(exp(-0.002*0.01), length(tmax))
+    fs1 <- flexsurvreg(Surv(tev) ~ 1, dist="weibull", bhazard=bh)
+    fs2 <- flexsurvreg(Surv(tmin, tmax, type="interval2") ~ 1, 
+                       dist="weibull", bhazard=back_csurv)
 
+    fs1 <- flexsurvreg(Surv(tev) ~ 1, 
+                       dist="weibull", bhazard=bh)
+    fs2 <- flexsurvreg(Surv(tmin, tmax, type="interval2") ~ 1, 
+                       dist="weibull", bhazard=back_csurv, inits=c(1.24, 1.4))
+    expect_equal(fs2$res["shape","est"], fs1$res["shape","est"], tol=1e-03)
 })
 
 test_that("inits",{
@@ -454,7 +472,7 @@ test_that("Relative survival", {
 
     expect_equal(mdl_0$loglik, mdl_1$loglik - sum(bhaz * lung$time/365))
 })
-
+    
 test_that("warning with strata", { 
     ## need double backslash to escape $
     expect_warning(flexsurvreg(formula = Surv(ovarian$futime, ovarian$fustat) ~ strata(ovarian$resid.ds), dist="gengamma", inits=c(6,1,-1,0)),
@@ -481,8 +499,8 @@ test_that("No events in the data",{
     mod <- flexsurvreg(Surv(tmin, tmax, type="interval2") ~ 1, dist="exponential")
     expect_equal(mod$loglik, -337.9815, tolerance=1e-03)
     modb <- flexsurvreg(Surv(tmin,tmax,type='interval2')~1,
-                       bhazard=bhazard,dist="exponential")
-    expect_equal(mod$loglik, modb$loglik, tolerance=1e-03)
+                       bhazard=exp(-bhazard*(tmax-tmin)),dist="exponential")
+    expect_equal(mod$res["rate","est"], modb$res["rate","est"], tolerance=1e-02)
 })
 
 test_that("No censoring in the data",{