Code for the paper Rufibach et al. (2022) (download from arxiv) written by the “follow-up quantification” task force of the oncology estimand WG.
This R markdown file provides easy accessible code to compute all the quantifiers for follow-up. The github repository where this document is hosted is available here.
To illustrate the functions all clinical trial data in this file is simulated. In the paper, the PH examples uses real data.
# --------------------------------------------------------------
# packages
# --------------------------------------------------------------
c("survival", "rpact", "survRM2")
packs <-for (i in 1:length(packs)){library(packs[i], character.only = TRUE)}
## Warning: Paket 'rpact' wurde unter R Version 4.1.3 erstellt
Below all functions are defined.
quantifyFU
: This function provides all the seven methods described in the paper draft.plot.qfu
: Plot the distributions from which medians are computed.# --------------------------------------------------------------
# functions
# --------------------------------------------------------------
# function to compute all the different variants of follow-up quantification
function(rando, event_time, event_type, ccod){
quantifyFU <-
## =================================================================
## compute all FU measures
## =================================================================
# input arguments:
# - rando: date of randomization
# - event_time: time to event or time to censoring
# - event_type: 0 = event, 1 = lost to FU, 2 = administratively censored
# - ccod: clinical cutoff date
require(survival)
length(rando)
n <-
# objects to collect distributions
NULL
res <-
# indicator for lost to follow up
as.numeric(event_type == 1)
ltfu_cens <-
# indicator for administratively censored
as.numeric(event_type == 2)
admin_cens <-
# usual censoring indicator: 0 = censored (for whatever reason), 1 = event
as.numeric(event_type == 0)
primary_event <-
# indicator for censoring
as.numeric(primary_event == 0)
event_time_cens <-
# observation time regardless of censoring
as.list(environment(ecdf(event_time)))
ecdf1 <-1]] <- cbind(ecdf1$x, 1 - ecdf1$y)
res[[ median(event_time)
m1 <-
# observation time for those event-free
event_time[event_time_cens == 1]
d2 <- as.list(environment(ecdf(d2)))
ecdf2 <-2]] <- cbind(ecdf2$x, 1 - ecdf2$y)
res[[ median(d2)
m2 <-
# time to censoring
survfit(Surv(event_time, event_time_cens) ~ 1)
so3 <-3]] <- so3
res[[ summary(so3)$table["median"]
m3 <-
# time to CCOD, potential followup
as.numeric((ccod - rando) / 365.25 * 12)
pfu <- as.list(environment(ecdf(pfu)))
ecdf4 <-4]] <- cbind(ecdf4$x, 1 - ecdf4$y)
res[[ median(pfu)
m4 <-
# known function time
rep(NA, n)
m5 <-== 1] <- event_time[event_time_cens == 1]
m5[event_time_cens == 1] <- pfu[primary_event == 1]
m5[primary_event as.list(environment(ecdf(m5)))
ecdf5 <-5]] <- cbind(ecdf5$x, 1 - ecdf5$y)
res[[ median(m5)
m5 <-
# Korn's potential follow-up time
sort(pfu)
spfu <- rep(NA, n)
pt <-for (i in 1:n){
# timepoint at which we compute potential followup (t' in Schemper et al)
spfu[i]
t <-
# proportion with pfu > t
mean(spfu > t)
pet <-
# time to LTFU
survfit(Surv(event_time, ltfu_cens) ~ 1, subset = (pfu >= t))
so6 <- ifelse(max(so6$time > t) == 0, 0, so6$surv[so6$time > t][1])
pltet <-
pltet * pet
pt[i] <-
}
6]] <- cbind(spfu, pt)
res[[
# now take the median of the distribution, see plot(t, pt, type = "s")
max(spfu[pt >= 0.5])
m6 <-
# Potential follow-up considering events
rep(NA, n)
m7 <-== 1] <- pfu[event_time_cens == 1]
m7[event_time_cens == 1] <- event_time[primary_event == 1]
m7[primary_event as.list(environment(ecdf(m7)))
ecdf7 <-7]] <- cbind(ecdf7$x, 1 - ecdf7$y)
res[[ median(m7)
m7 <-
# summarize results for medians
matrix(NA, nrow = 7, ncol = 1)
dat <-1] <- c(m1, m2, m3, m4, m5, m6, m7)
dat[, rownames(dat) <- c("Observation time regardless of censoring", "Observation time for those event-free",
"Time to censoring", "Time to CCOD", "Known function time", "Korn potential follow-up",
"Potential follow-up considering events")
colnames(dat) <- "median"
list("medians" = dat, "distributions" = res)
output <-class(output) <- "qfu"
return(output)
}
# function to plot distributions of various quantifiers
function(x, which = 1:7, median = TRUE){
plot.qfu <-
x$distributions
dat <-
par(las = 1)
plot(0, 0, type = "n", xlim = c(0, 1.05 * max(dat[[1]][, 1])), ylim = c(0, 1), xlab = "time-to-event endpoint", ylab = "probability",
main = "Distributions used for quantification of follow-up")
if (3 %in% which){lines(dat[[3]], col = 3, conf.int = FALSE)}
which[which != 3]
which <-for (i in which){lines(dat[[i]], col = i)}
if(isTRUE(median)){abline(h = 0.5, col = grey(0.5), lwd = 2)}
legend("bottomleft", rownames(x$medians)[which], lty = 1, col = (1:7)[which], bty = "n", lwd = 2)
}
# compute value of KM estimate and CI
function(time, event, t0, conf.level = 0.95){
confIntKM_t0 <-
1 - conf.level
alpha <-
# compute ci according to the formulas in Huesler and Zimmermann, Chapter 21
survfit(Surv(time, event) ~ 1, conf.int = 1 - alpha, conf.type = "plain", type = "kaplan", error = "greenwood", conf.lower = "peto")
obj <- summary(obj)$surv
St <- summary(obj)$time
t <- summary(obj)$n.risk
n <- matrix(NA, nrow = length(t0), ncol = 3)
res <-
for (i in 1:length(t0)){
t0[i]
ti <-if (min(t) > ti){res[i, ] <- c(1, NA, NA)}
if (min(t) <= ti){
if (ti %in% t){res[i, ] <- rep(NA, 3)} else {
min(St[t < ti])
Sti <- min(n[t < ti])
nti <- Sti ^ 2 * (1 - Sti) / nti
Var.peto <- exp(qnorm(1 - alpha / 2) * sqrt(Var.peto) / (Sti ^ (3 / 2) * (1 - Sti)))
Cti <- c(Sti / ((1 - Cti) * Sti + Cti), Cti * Sti / ((Cti - 1) * Sti + 1))
ci.km <- c(Sti, ci.km)}
res[i, ] <-# end if
} # end for
}
cbind(t0, res)
res <-dimnames(res)[[2]] <- c("t0", "S at t0", "lower.ci", "upper.ci")
return(res)
}
# bootstrap survival data
function(surv.obj, n, M = 1000, track = TRUE){
bootSurvivalSample <-
# bootstrap right-censored survival data according to Efron (1981)
# see Akritas (1986) for details
# surv.obj: Surv object to sample from
# n: sample size for samples to be drawn
# M: number of samples
nrow(surv.obj)
n.surv <- data.frame(matrix(NA, ncol = M, nrow = n))
res.mat <-
for (i in 1:M){
sample(1:n.surv, size = n, replace = TRUE)
ind <- surv.obj[ind]
res.mat[, i] <-if (track){print(paste("sample ", i, " of ", M, " done", sep = ""))}
}
return(res.mat)
}
# compute value and CI for difference of survival probabilities at milestone, via bootstrap
function(time, event, group, t0, M = 10 ^ 3, conf.level = 0.95){
confIntMilestoneDiff <-
(group == levels(group)[1])
ind <- Surv(time[ind], event[ind])
s.obj1 <- Surv(time[!ind], event[!ind])
s.obj2 <- sum(ind)
n1 <- sum(!ind)
n2 <-
bootSurvivalSample(surv.obj = s.obj1, n = n1, M = M, track = FALSE)
res1 <- bootSurvivalSample(surv.obj = s.obj2, n = n2, M = M, track = FALSE)
res2 <-
rep(NA, M)
boot.diff1 <- boot.diff1
boot.diff2 <-
for (i in 1:M){
# based on KM
confIntKM_t0(time = as.matrix(res1[, i])[, "time"], event = as.matrix(res1[, i])[, "status"], t0, conf.level = 0.95)[2]
t1km <- confIntKM_t0(time = as.matrix(res2[, i])[, "time"], event = as.matrix(res2[, i])[, "status"], t0, conf.level = 0.95)[2]
t2km <- t1km - t2km
boot.diff1[i] <-
# based on exponential fit
exp(- coef(survreg(res1[, i] ~ 1, dist = "exponential")))
r1 <- 1 - pexp(t0, rate = r1)
t1 <-
exp(- coef(survreg(res2[, i] ~ 1, dist = "exponential")))
r2 <- 1 - pexp(t0, rate = r2)
t2 <-
t1 - t2
boot.diff2[i] <-
}
# from bootstrap sample of differences just take symmetric quantiles to get CI for difference
1 - conf.level
alpha <- quantile(boot.diff1, probs = c(alpha / 2, 1 - alpha / 2), na.rm = TRUE)
ci1 <- quantile(boot.diff2, probs = c(alpha / 2, 1 - alpha / 2), na.rm = TRUE)
ci2 <-
list("km" = ci1, "exponential" = ci2)
res <-return(res)
}
# compute the extreme limits as described by Betensky
function(time, event){
stabilityKM <-
(event == 0)
ind <- max(time[event == 1])
maxevent <-
# extreme scenarios (not precisely) according to Betensky (2015)
# lower bound: every censored patient has event at censoring date
time
t_low <- event
c_low <- 1
c_low[ind] <-
# upper bound: every censored observation has censoring time equal to maximum event time
time
t_up <-& (time < maxevent)] <- maxevent
t_up[ind event
c_up <-
# collect results
list("t_low" = t_low, "c_low" = c_low, "t_up" = t_up, "c_up" = c_up)
res <-return(res)
}
rpact
A clinical trial with very similar properties as the Gallium data in Rufibach et al. (2022) is simulated using rpact
Wassmer and Pahlke (2021).
# simulate a clinical trial using rpact
# time unit is months
getDesignGroupSequential(informationRates = 1,
design <-typeOfDesign = "asOF", sided = 1, alpha = 0.025, beta = 0.2)
getSimulationSurvival(design,
simulationResult <-lambda2 = log(2) / 60, hazardRatio = 0.75,
dropoutRate1 = 0.025, dropoutRate2 = 0.025,
dropoutTime = 12,
accrualTime = 0:6,
accrualIntensity = 6 * 1:7,
plannedEvents = 350,
directionUpper = FALSE,
maxNumberOfSubjects = 1000,
maxNumberOfIterations = 1,
maxNumberOfRawDatasetsPerStage = 1,
seed = 2021)
# retrieve dataset
getRawData(simulationResult)
simdat <-
# create variable with randomization dates
# note that addition / subtraction of date objects happens in days
# --> multiplication by 365.25 / 12 ~= 30 below
as.Date("2016-01-31", origin = "1899-12-30")
day0 <- day0 + simdat$accrualTime * 365.25 / 12
rando <- simdat$timeUnderObservation
pfs <-
# event type variable: 0 = event, 1 = lost to FU, 2 = administratively censored
rep(NA, nrow(simdat))
event_type <-$event == TRUE] <- 0
event_type[simdat$event == FALSE & simdat$dropoutEvent == TRUE] <- 1
event_type[simdat$event == FALSE & simdat$dropoutEvent == FALSE] <- 2
event_type[simdat
# PFS event
as.numeric(event_type == 0)
pfsevent <-
# treatment arm
factor(simdat$treatmentGroup, levels = 1:2, labels = c("G", "R"))
arm <-
# define clinical cutoff date based on simulation result
day0 + simdat$observationTime[1] * 365.25 / 12
ccod <-
# check
summary(coxph(Surv(pfs, pfsevent) ~ arm))
so1 <- so1
## Call:
## coxph(formula = Surv(pfs, pfsevent) ~ arm)
##
## n= 1000, number of events= 350
##
## coef exp(coef) se(coef) z Pr(>|z|)
## armR 0.2351 1.2650 0.1073 2.191 0.0285 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## armR 1.265 0.7905 1.025 1.561
##
## Concordance= 0.528 (se = 0.014 )
## Likelihood ratio test= 4.82 on 1 df, p=0.03
## Wald test = 4.8 on 1 df, p=0.03
## Score (logrank) test = 4.82 on 1 df, p=0.03
Plot the Kaplan-Meier estimates:
par(las = 1)
plot(survfit(Surv(pfs, pfsevent) ~ arm), col = 2:3, mark = "'", lty = 1, xlim = c(0, 100),
xlab = "PFS", ylab = "probability of being event-free")
Compute survival probabilities, their differences and confidence intervals at milestones.
c(36, 48)
ms <- confIntKM_t0(time = pfs[arm == "R"], event = pfsevent[arm == "R"], t0 = ms, conf.level = 0.95)
msR1 <- confIntKM_t0(time = pfs[arm == "G"], event = pfsevent[arm == "G"], t0 = ms, conf.level = 0.95)
msG1 <- msR1
## t0 S at t0 lower.ci upper.ci
## [1,] 36 0.6625629 0.6054758 0.7152741
## [2,] 48 0.5870704 0.4922056 0.6758830
msG1
## t0 S at t0 lower.ci upper.ci
## [1,] 36 0.7149897 0.6622073 0.7624827
## [2,] 48 0.6536542 0.5662833 0.7317616
# differences
confIntMilestoneDiff(time = pfs, event = pfsevent, group = arm,
ms1d1 <-t0 = ms[1], M = 10 ^ 3, conf.level = 0.95)$km
confIntMilestoneDiff(time = pfs, event = pfsevent, group = arm,
ms2d1 <-t0 = ms[2], M = 10 ^ 3, conf.level = 0.95)$km
ms1d1
## 2.5% 97.5%
## -0.003376164 0.114936362
ms2d1
## 2.5% 97.5%
## -0.0007250327 0.1350419043
Now let us apply the above functions:
quantifyFU(rando = rando, event_time = pfs, event_type = event_type, ccod = ccod)
fu <-
# medians of all these distributions:
$medians fu
## median
## Observation time regardless of censoring 38.28363
## Observation time for those event-free 42.87887
## Time to censoring 44.05744
## Time to CCOD 45.78363
## Known function time 44.55744
## Korn potential follow-up 44.15268
## Potential follow-up considering events 40.31934
Plot distributions:
plot(fu)
rpact
# Simulation assuming a delayed treatment effect
0.05
alpha <- 0.2
beta <- getDesignGroupSequential(informationRates = 1, typeOfDesign = "asOF", sided = 1,
design <-alpha = alpha / 2, beta = beta)
c(0, 12)
piecewiseSurvivalTime <- log(2) / 60
baserate <- 0.65
hr_nph <- 0.025
dropoutRate <- 12
dropoutTime <- 0:6
accrualTime <- 6 * 1:7
accrualIntensity <- 389
plannedEvents <- 1000
maxNumberOfSubjects <- 10 ^ 3
maxNumberOfIterations <-
getSimulationSurvival(design,
simulationResultNPH <-piecewiseSurvivalTime = piecewiseSurvivalTime,
lambda2 = c(baserate, baserate),
lambda1 = c(baserate, hr_nph * baserate),
dropoutRate1 = dropoutRate,
dropoutRate2 = dropoutRate, dropoutTime = dropoutTime,
accrualTime = accrualTime,
accrualIntensity = accrualIntensity,
plannedEvents = plannedEvents,
directionUpper = FALSE,
maxNumberOfSubjects = maxNumberOfSubjects,
maxNumberOfIterations = maxNumberOfIterations,
maxNumberOfRawDatasetsPerStage = 10 ^ 4,
seed = 2021)
# power for logrank test
$overallReject simulationResultNPH
## [1] 0.805
# access raw simulation data
getRawData(simulationResultNPH)
simdat <-
# extract simulation run 1 for example in paper
1
simpick <- subset(simdat, iterationNumber == simpick)
simdat_run1 <-
# median time-to-dropout
- log(2) / log((1- dropoutRate)) * dropoutTime
med_do <-
# create variable with randomization dates
# note that addition / subtraction of date objects happens in days
# --> multiplication by 365.25 / 12 ~= 30 below
as.Date("2016-01-31", origin = "1899-12-30")
day0_nph <- day0_nph + simdat_run1$accrualTime * 365.25 / 12
rando_nph <- simdat_run1$timeUnderObservation
pfs1_nph <-
# event type variable: 0 = event, 1 = lost to FU, 2 = administratively censored
rep(NA, nrow(simdat_run1))
event_type_nph <-$event == TRUE] <- 0
event_type_nph[simdat_run1$event == FALSE & simdat_run1$dropoutEvent == TRUE] <- 1
event_type_nph[simdat_run1$event == FALSE & simdat_run1$dropoutEvent == FALSE] <- 2
event_type_nph[simdat_run1
# PFS event
as.numeric(event_type_nph == 0)
pfsevent1_nph <-
# treatment arm
factor(simdat_run1$treatmentGroup, levels = 1:2, labels = c("G", "R"))
arm_nph <-
# define clinical cutoff date based on simulation result
day0_nph + simdat_run1$observationTime[1] * 365.25 / 12 ccod1_nph <-
Plot Kaplan-Meier estimates:
par(las = 1, mar = c(4.5, 4.5, 2, 1), oma = c(0, 0, 3, 0))
survfit(Surv(pfs1_nph, pfsevent1_nph) ~ arm_nph)
so1_nph <-plot(so1_nph, col = 2:3, mark = "'", lty = 1, xlim = c(0, 100), xlab = "PFS (months)",
ylab = "probability of not having a PFS event")
abline(v = ms, col = grey(0.5), lwd = 2, lty = 2)
# title
mtext("Delayed separation", 3, line = 0, outer = TRUE)
Compute survival probabilities, their differences and confidence intervals at milestones.
c(36, 48)
ms <- confIntKM_t0(time = pfs1_nph[arm == "R"], event = pfsevent1_nph[arm == "R"],
msR1_nph <-t0 = ms, conf.level = 0.95)
confIntKM_t0(time = pfs1_nph[arm == "G"], event = pfsevent1_nph[arm == "G"],
msG1_nph <-t0 = ms, conf.level = 0.95)
msR1_nph
## t0 S at t0 lower.ci upper.ci
## [1,] 36 0.6626743 0.6066027 0.7145152
## [2,] 48 0.5801327 0.5109735 0.6462818
msG1_nph
## t0 S at t0 lower.ci upper.ci
## [1,] 36 0.7125514 0.6617708 0.7584903
## [2,] 48 0.6571132 0.5948195 0.7144277
# differences
confIntMilestoneDiff(time = pfs1_nph, event = pfsevent1_nph, group = arm,
ms1d1_nph <-t0 = ms[1], M = 10 ^ 3, conf.level = 0.95)$km
confIntMilestoneDiff(time = pfs1_nph, event = pfsevent1_nph, group = arm,
ms2d1_nph <-t0 = ms[2], M = 10 ^ 3, conf.level = 0.95)$km
ms1d1_nph
## 2.5% 97.5%
## -0.007217885 0.105350010
ms2d1_nph
## 2.5% 97.5%
## 0.01560682 0.13856650
par(las = 1, mar = c(4.5, 4.5, 2, 1), oma = c(0, 0, 3, 0))
survfit(Surv(pfs1_nph, pfsevent1_nph) ~ 1, subset = (arm_nph == "G"))
so2_nph <-plot(so2_nph, col = 2, mark = "'", lty = 1, xlim = c(0, 100), xlab = "PFS (months)",
ylab = "probability of not having a PFS event", conf.int = FALSE)
abline(v = ms, col = grey(0.5), lwd = 2, lty = 2)
# title
mtext("Delayed separation", 3, line = 0, outer = TRUE)
# add Betensky's scenarios
stabilityKM(time = pfs1_nph[arm_nph == "G"], pfsevent1_nph[arm_nph == "G"])
stab_del_t <- stabilityKM(time = pfs1_nph[arm_nph == "R"], pfsevent1_nph[arm_nph == "R"])
stab_del_c <-
# lower
survfit(Surv(stab_del_t$t_low, stab_del_t$c_low) ~ 1)
so2_nph_low <-lines(so2_nph_low, col = grey(0.5), lty = 1, conf.int = FALSE)
# upper
survfit(Surv(stab_del_t$t_up, stab_del_t$c_up) ~ 1)
so2_nph_up <-lines(so2_nph_up, col = grey(0.5), lty = 1, conf.int = FALSE)
# restriction timepoint for RMST
NULL # use minimum of the two last observed times in each arm
tau <-
# compute RMST for every simulated dataset
# use simulationResultNPH from p30
rep(NA, maxNumberOfIterations)
rmst_est <- rmst_est
rmst_pval <- rmst_est
logrank_pval <-
for (i in 1:maxNumberOfIterations){
subset(simdat, iterationNumber == i)
sim.i <-
sim.i$timeUnderObservation
time <- as.numeric(sim.i$event)
event <- factor(as.numeric(sim.i$treatmentGroup == 1))
arm <-
rmst2(time = time, status = event, arm = arm, tau = NULL)
rmst <-if (i == 1){rmst_sim1 <- rmst}
# we look at difference in RMST between arms
rmst$unadjusted.result[1, "Est."]
rmst_est[i] <- rmst$unadjusted.result[1, "p"]
rmst_pval[i] <-
# logrank p-value (score test from Cox regression)
summary(coxph(Surv(time, event) ~ arm))$sctest["pvalue"]
logrank_pval[i] <-
}
# empirical power
mean(rmst_pval <= 0.05)
rmst_power <- rmst_power
## [1] 0.692
mean(logrank_pval <= 0.05)
## [1] 0.805
$overallReject simulationResultNPH
## [1] 0.805
Now let us apply the above functions:
# compute various follow-up quantities
quantifyFU(rando = rando_nph, event_time = pfs1_nph,
fu1_nph <-event_type = event_type_nph, ccod = ccod1_nph)
fu1_nph$medians
fu1med_nph <-
# medians of all these distributions:
fu1med_nph
## median
## Observation time regardless of censoring 45.47638
## Observation time for those event-free 51.46786
## Time to censoring 52.84881
## Time to CCOD 54.64643
## Known function time 53.31310
## Korn potential follow-up 52.80119
## Potential follow-up considering events 48.07205
Plot distributions:
plot(fu1_nph)
Rufibach, Kaspar, Lynda Grinsted, Jiang Li, Hans-Jochen Weber, Cheng Zheng, and Jiangxiu Zhou. 2022. “Quantification of Follow-up Time in Oncology Clinical Trials with a Time-to-Event Endpoint: Asking the Right Questions.” arXiv. https://doi.org/10.48550/arxiv.2206.05216.
Wassmer, Gernot, and Friedrich Pahlke. 2021. Rpact: Confirmatory Adaptive Clinical Trial Design and Analysis. https://CRAN.R-project.org/package=rpact.