############################################################################### ## R code to implement the Fractional Risk Set (FRS) based Weighted Kapla-Meier (WKM) test proposed in ## ## Bandyopadhyay and Pumar (2016)."Comparing conditional survival functions with missing population marks in a competing risks model", CSDA,95: 150-160. ## ## Program also compares this test with the FRS-based Weighted Log-Rank (WLR) test of ## ## Bandyopadhyay and Datta (2008, JSPI). [Full reference: "Testing equality of survival distributions when the population marks are missing", JSPI, 138: 1722-1732] ## ## Code may not be optimal; use at your own risk! ## ## Author: Dipankar Bandyopadhyay, PhD, Dept. of Biostatistics, Virginia Commonwealth University ############################################################################### library(MASS) library(msm) r1 <- 1 ## (r1 is the rho for (rho, gamma) family) r2 <- 1 ## (r2 is the gamma for (rho, gamma) family) ######################## Kaplan-Meier for handling ties ############ # KM with two causes and a censoring. km <- function (T,del) {n <- length(T) tau <- sort(unique(T)) m <- length(tau) deln1 <- deln2 <- delnc <- deln <- deln0 <- s <- t<- y <- rep(0,m) for (i in 1:m) { temp <- tau[i] deln1[i] <- sum((T==temp) & (del==1)) deln2[i] <- sum((T==temp) & (del==2)) delnc[i] <- sum((T==temp) & (del==0)) deln[i] <- deln1[i] + deln2[i] + delnc[i] deln0[i] <- deln1[i] + deln2[i] } y <- n - c( 0,cumsum(deln[-m]) ) t <- 1 - (deln0/y) s <- cumprod(t) table <- cbind(tau,deln1,deln2,delnc,deln0,y,t,s) #return(table) } ################################ Regular KM with one cause and a censoring........... km1 <- function (T,del) {n <- length(T) tau <- sort( unique(T) ) m <- length(tau) deln1 <- deln2 <- delnc <- deln <- deln0 <- s <- t<- y <- rep(0,m) for (i in 1:m) { temp <- tau[i] deln1[i] <- sum((T==temp) & (del==1)) delnc[i] <- sum((T==temp) & (del==0)) #deln2[i] <- sum((T==temp) & (del==2)) deln[i] <- deln1[i] + delnc[i] deln0[i] <- deln1[i] } y <- n - c( 0,cumsum(deln[-m]) ) t <- 1 - (deln0/y) s <- cumprod(t) table <- cbind(tau,deln1,delnc,y,t,s) #return(table) } ### Load your dataset here ### I am using a dataset of 300 BMT patients registered at the University of Minnesota Medical Center ### Ti = Observed event time ### deld = failure/censoring indicator library(readxl) URL <- "https://people.vcu.edu/~dbandyop/BIOS647/Todddata.xlsx" temp_file <- tempfile(fileext = ".xlsx") download.file(url = URL, destfile = temp_file, mode = "wb") mydata = read_excel(path = temp_file) da = cbind(mydata[,6], mydata[,5]) Ti = da[,1] ## Observed Time-to-event deld = da[,2] ## Failure/censoring indicator s <- length(Ti) tab0 <- km(Ti,deld) m <- length(tab0[ ,1]) U <- tab0[,1] dn <- tab0[,5] s.t <- km1(U,dn) s.val <- s.t[ ,6] s.tn <- c(1, s.val[-s]) swt <- s.tn**(r1-1)*(1 - s.tn)**(r2-1) y <- tab0[ ,6] DN1bY0 <- tab0[ ,2]/y DN2bY0 <- tab0[ ,3]/y tab0<- cbind(tab0, DN1bY0,DN2bY0) S0 <- tab0[ ,8] S0s <- c(1, S0[-m]) tab0 <- cbind(tab0, S0s) P01 <- cumsum(S0s*DN1bY0) P02 <- cumsum(S0s*DN2bY0) tab0 <- cbind(tab0,P01,P02) P01l <- P01[m] P02l <- P02[m] P01p <- (P01l - P01)/S0 P02p <- (P02l - P02)/S0 P01p[P01p=="NaN"] <- 0 P02p[P02p=="NaN"] <-0 tab0 <- cbind(tab0,P01p,P02p) deln1 <- tab0[ ,2] deln2 <- tab0[ ,3] delnc <- tab0[ ,4] v1c<-rev(cumsum(rev(deln1))) v2c<-rev(cumsum(rev(deln2))) v3 <- delnc v3c1<-rev(cumsum(rev(P01p*v3))) v3c2<-rev(cumsum(rev(P02p*v3))) Y1 <- v1c + v3c1 # Fractional risk set for Cause 1 Y2 <- v2c + v3c2 # Fractional risk set for Cause 2 Y1.2 <- v1c Y2.2 <- v2c # tab0 <- cbind(tab0,v1c,v2c,v3c1, v3c2, Y1,Y2, Y1.1, Y2.1, Y1.2, Y2.2) ##### Bootstrap computation of variance starts here. bigDel1 <- bigDel2 <- bigDel1.lr <- NULL bigDel1.2 <- bigDel2.2 <- NULL boot <- 5000 ## Size of the bootsrap, feel free to change for ( b in 1:boot) ## bootstrap loop {cat("boot = ",b , "\n") bs <- sample(s,replace=TRUE) t <- Ti del <- deld myT <- t[bs] delbs <- del[bs] mydel <- sample(c(1,2), size = s, replace = TRUE, prob = c( (Y1[1]/s), (Y2[1]/s) ) ) mydel[delbs==0] <- 0 mytab <- cbind(myT,mydel) # This is the bootstrap sample mytab <- mytab[order(mytab[,1]), ] myT <- mytab[ ,1] mydel <- mytab[ ,2] mdel1 <- ifelse(mydel >0, 1, 0) tab <- km(myT, mydel) m <- length(tab[ ,1]) s.tb <- km1(myT, mdel1) s.valb <- s.tb[ ,6] s.tnb <- c(1, s.valb[-m]) y <- tab[ ,6] d2b <- mydel d2b[d2b==2] <- 1 md2b <- 1-d2b a1 <- km1(myT, md2b) wtb <- a1[ ,6] ## cwtb <- c(1, wtb[-m]) ### wtb <- 1 ## weight function taken to be 1 #### mwtb <- wtb**(rho-1)*(1-wtb)**(r-1) swtb <- s.tnb**(r1-1)*(1-s.tnb)**(r2-1) DN1bY0 <- tab[ ,2]/y DN2bY0 <- tab[ ,3]/y tab <- cbind(tab,DN1bY0,DN2bY0) S0 <- tab[,8] S0s <- c(1,S0[-m]) tab <- cbind(tab,S0s) P01 <- cumsum(S0s*DN1bY0) P02 <- cumsum(S0s*DN2bY0) tab <- cbind(tab,P01,P02) P01l <- P01[m] P02l <- P02[m] P01p <- (P01l - P01)/S0 P02p <- (P02l - P02)/S0 P01p[P01p=="NaN"] <- 0 P02p[P02p=="NaN"] <-0 tab <- cbind(tab,P01p,P02p) deln1 <- tab[,2] deln2 <- tab[,3] delnc <- tab[,4] v1c<-rev(cumsum(rev(deln1))) v2c<-rev(cumsum(rev(deln2))) v3 <- delnc v3c1<-rev(cumsum(rev(P01p*v3))) v3c2<-rev(cumsum(rev(P02p*v3))) Y1b <- v1c + v3c1 Y2b <- v2c + v3c2 tab <- cbind(tab,v1c,v2c,v3,v3c1,v3c2,Y1b,Y2b) ####### Now computing the fractional risk set (FRS) log-rank test, weight = 1 temp <- (Y1b+Y2b) temp[temp==0] <-1 L <- tab[ ,5] Del1b.lr<-sum(deln1 - Y1b*L/(temp)) Del1b.lr[Del1b.lr=="NaN"] <- 0 bigDel1.lr <- c(bigDel1.lr,Del1b.lr) ###### collecting results in a vector #Y1b[Y1b==0] <- 1 #Y2b[Y2b==0] <- 1 u1b <- 1 - (deln1/Y1b) u2b <- 1 - (deln2/Y2b) u1b[u1b=="NaN"] <- 0 u2b[u2b=="NaN"] <- 0 s1b <- cumprod(u1b) s2b <- cumprod(u2b) u11b <- 1- ( (1-deln1)/Y1b) u21b <- 1 - ( (1-deln2)/Y2b) u11b[u11b=="NaN"] <- 0 u21b[u21b=="NaN"] <- 0 u11b[u11b=="-Inf"] <- 1 u21b[u21b=="-Inf"] <- 1 c1b <- cumprod(u11b) c2b <- cumprod(u21b) c1bn <- c(1, c1b[-length(c1b)]) c2bn <- c(1, c2b[-length(c2b)]) p.1b <- Y1b[1]/(Y1b[1]+Y2b[1]) p.2b <- 1 - p.1b cwtb <- c1bn*c2bn/(p.1b*c1bn + p.2b*c2bn) cwtb[cwtb=="NaN"] <- 0 Del1b <- sqrt( Y1b[1]*Y2b[1]/(Y1b[1] + Y2b[1]) )*sum(cwtb*swtb*(s1b-s2b)) ## test statistic for bootstrap bigDel1 <- c(bigDel1,Del1b) ##### collecting it in a vector } ## Bootstrap ends here; sigma for our scheme generated.. bigDel1 <- subset(bigDel1, bigDel1!="NaN") bigDel1.lr <- subset(bigDel1.lr, bigDel1.lr!="NaN") sigmaour <- sqrt(var(bigDel1) ) ## bootstrap based standard error for FRS-KM test sigmaour.lr <- sqrt(var(bigDel1.lr) ) ## bootstrap based standard error for FRS-log-rank test ############### Original computation resumes; computing the test statistics for the original sample (Ti, deld) deln1 <- tab0[ ,2] deln2 <- tab0[ ,3] L <- tab0[ ,5] ####### Now for our FRS-Log rank test temp <- (Y1+Y2) temp[temp==0] <-1 Del1.lr<-sum(deln1 - Y1*L/(temp)) # This is for our estimated phi case; the numerator for the FRS-LR case u1 <- 1- (deln1/Y1) u2 <- 1- (deln2/Y2) u1[u1=="NaN"] <- 0 u2[u2=="NaN"] <- 0 s1 <- cumprod(u1) s2 <- cumprod(u2) u11 <- 1- ( (1-deln1)/Y1) u21 <- 1 - ( (1-deln2)/Y2) u11[u11=="NaN"] <- 0 u21[u21=="NaN"] <- 0 u11[u11=="-Inf"] <- 1 u21[u21=="-Inf"] <- 1 c1 <- cumprod(u11) c2 <- cumprod(u21) c1n <- c(1, c1[-length(c1)]) c2n <- c(1, c2[-length(c2)]) p.1 <- Y1[1]/(Y1[1]+Y2[1]) p.2 <- 1 - p.1 cwt <- c1n*c2n/(p.1*c1n + p.2*c2n) swt = swt[-1] cwt[cwt=="NaN"] <- 0 Del1 <- sqrt( Y1[1]*Y2[1]/(Y1[1] + Y2[1]) )*sum(cwt*swt*(s1-s2)) ## The numerator for the FRS-KM test ### Test (Z)- statistics Z1 <- Del1/sigmaour ### Z statistics [KM statistic/standard error] Z2 <- Del1.lr/sigmaour.lr ### Z statistics [log-rank statistic/standard error] pvalKM <- sum(abs(bigDel1) >= abs(Del1))/boot # p-value (2-sided for KM-based test) pvalLR <- sum(abs(bigDel1.lr) >= abs(Del1.lr))/boot # p-value (2-sided LR test) print(pvalKM) print(pvalLR) ###### You can also compute 1-sided p-values; see below. pvalKM.one <- sum(bigDel1 >= Del1)/boot pvalLR.one <- sum(bigDel1.lr >= Del1.lr)/boot print(pvalKM.one) print(pvalLR.one)