############################################################################### 
## R code to implement the Fractional Risk Set (FRS) based Weighted Kapla-Meier (WKM) test proposed in
##
## Bandyopadhyay and Pumar (2016)."Comparing conditional survival functions with missing population marks in a competing risks model", CSDA,95: 150-160.
## 
## Program also compares this test with the FRS-based Weighted Log-Rank (WLR) test of 
##
## Bandyopadhyay and Datta (2008, JSPI). [Full reference: "Testing equality of survival distributions when the population marks are missing", JSPI, 138: 1722-1732]
##
## Code may not be optimal; use at your own risk! 
##
## Author: Dipankar Bandyopadhyay, PhD, Dept. of Biostatistics, Virginia Commonwealth University 
###############################################################################

library(MASS)
library(msm)

r1 <- 1 ## (r1 is the rho for (rho, gamma) family)
r2 <- 1 ## (r2 is the gamma for (rho, gamma) family)



######################## Kaplan-Meier for handling ties ############

# KM with two causes and a censoring. 


km <- function (T,del) 
{n <- length(T)
 tau <- sort(unique(T))
 m <- length(tau)
 deln1 <- deln2 <- delnc <- deln <- deln0 <- s <- t<- y <- rep(0,m)
for (i in 1:m) 
 {
        temp <- tau[i]
	deln1[i] <- sum((T==temp) & (del==1))
        deln2[i] <- sum((T==temp) & (del==2))
        delnc[i] <- sum((T==temp) & (del==0))

deln[i]  <- deln1[i] + deln2[i] + delnc[i]
deln0[i] <- deln1[i] + deln2[i]  
  }

y <- n - c( 0,cumsum(deln[-m]) )
t <- 1 - (deln0/y)
s <- cumprod(t)
table <- cbind(tau,deln1,deln2,delnc,deln0,y,t,s)
#return(table)           
}

################################  Regular KM with one cause and a censoring...........
km1 <- function (T,del) 
{n <- length(T)
tau <- sort( unique(T) )
m <- length(tau)
deln1 <- deln2 <- delnc <- deln <- deln0 <- s <- t<- y <- rep(0,m)
for (i in 1:m) 
 {
         temp <- tau[i]
	deln1[i] <- sum((T==temp) & (del==1))
	delnc[i] <- sum((T==temp) & (del==0))
	#deln2[i] <- sum((T==temp) & (del==2))
deln[i]  <- deln1[i]  + delnc[i]
deln0[i] <- deln1[i]  
}
y <- n - c( 0,cumsum(deln[-m]) )
t <- 1 - (deln0/y)
s <- cumprod(t)
  
table <- cbind(tau,deln1,delnc,y,t,s)
#return(table)           
}


### Load your dataset here
### I am using a dataset of 300 BMT patients registered at the University of Minnesota Medical Center
### Ti = Observed event time
### deld = failure/censoring indicator


library(readxl)
URL <- "https://people.vcu.edu/~dbandyop/BIOS647/Todddata.xlsx"
temp_file <- tempfile(fileext = ".xlsx")
download.file(url = URL, destfile = temp_file, mode = "wb")
mydata = read_excel(path = temp_file)
da = cbind(mydata[,6], mydata[,5])
Ti = da[,1]   ## Observed Time-to-event
deld = da[,2] ## Failure/censoring indicator



s <- length(Ti)
tab0 <- km(Ti,deld)
m <- length(tab0[ ,1])
U <- tab0[,1]
dn <- tab0[,5] 
s.t <- km1(U,dn)
s.val <- s.t[ ,6]
s.tn <- c(1, s.val[-s])
swt <- s.tn**(r1-1)*(1 - s.tn)**(r2-1)
y <- tab0[ ,6]

DN1bY0 <- tab0[ ,2]/y
DN2bY0 <- tab0[ ,3]/y
tab0<- cbind(tab0, DN1bY0,DN2bY0)

S0 <- tab0[ ,8]
S0s <- c(1, S0[-m])
tab0 <- cbind(tab0, S0s)

P01 <- cumsum(S0s*DN1bY0)
P02 <- cumsum(S0s*DN2bY0)
tab0 <- cbind(tab0,P01,P02)


P01l <- P01[m]
P02l <- P02[m]
P01p <- (P01l - P01)/S0
P02p <- (P02l - P02)/S0 
P01p[P01p=="NaN"] <- 0
P02p[P02p=="NaN"] <-0
tab0 <- cbind(tab0,P01p,P02p)


deln1 <- tab0[ ,2]
deln2 <- tab0[ ,3]
delnc <- tab0[ ,4]
v1c<-rev(cumsum(rev(deln1)))
v2c<-rev(cumsum(rev(deln2)))
v3 <- delnc
v3c1<-rev(cumsum(rev(P01p*v3)))
v3c2<-rev(cumsum(rev(P02p*v3)))
Y1 <- v1c + v3c1  # Fractional risk set for Cause 1
Y2 <- v2c + v3c2  # Fractional risk set for Cause 2
Y1.2 <- v1c
Y2.2 <- v2c


# tab0 <- cbind(tab0,v1c,v2c,v3c1, v3c2, Y1,Y2, Y1.1, Y2.1, Y1.2, Y2.2)




##### Bootstrap computation of variance starts here. 


bigDel1 <- bigDel2 <- bigDel1.lr <- NULL
bigDel1.2 <- bigDel2.2 <- NULL


boot <- 5000 ## Size of the bootsrap, feel free to change 

for ( b in 1:boot) ## bootstrap loop 
{cat("boot = ",b , "\n")
bs <- sample(s,replace=TRUE)
t <- Ti
del <- deld
myT <- t[bs]
delbs <- del[bs]
mydel <- sample(c(1,2), size = s, replace = TRUE, prob = c( (Y1[1]/s), (Y2[1]/s) ) )
mydel[delbs==0] <- 0
mytab <- cbind(myT,mydel)  # This is the bootstrap sample 


mytab <- mytab[order(mytab[,1]), ]
myT <- mytab[ ,1]
mydel <- mytab[ ,2]
mdel1 <- ifelse(mydel >0, 1, 0)
tab <- km(myT, mydel)


m <- length(tab[ ,1])
s.tb <- km1(myT, mdel1)
s.valb <- s.tb[ ,6]
s.tnb <- c(1, s.valb[-m])
y <- tab[ ,6]
d2b <- mydel
d2b[d2b==2] <- 1
md2b <- 1-d2b

a1 <- km1(myT, md2b)
wtb <- a1[ ,6]


## cwtb <- c(1, wtb[-m])
###  wtb <- 1  ## weight function taken to be 1
#### mwtb <- wtb**(rho-1)*(1-wtb)**(r-1)


swtb <- s.tnb**(r1-1)*(1-s.tnb)**(r2-1)

DN1bY0 <- tab[ ,2]/y
DN2bY0 <- tab[ ,3]/y
tab <- cbind(tab,DN1bY0,DN2bY0)

S0 <- tab[,8]
S0s <- c(1,S0[-m])
tab <- cbind(tab,S0s)

P01 <- cumsum(S0s*DN1bY0)
P02 <- cumsum(S0s*DN2bY0)
tab <- cbind(tab,P01,P02)


P01l <- P01[m]
P02l <- P02[m]
P01p <- (P01l - P01)/S0
P02p <- (P02l - P02)/S0 
P01p[P01p=="NaN"] <- 0
P02p[P02p=="NaN"] <-0
tab <- cbind(tab,P01p,P02p)


deln1 <- tab[,2]
deln2 <- tab[,3]
delnc <- tab[,4]
v1c<-rev(cumsum(rev(deln1)))
v2c<-rev(cumsum(rev(deln2)))
v3 <- delnc
v3c1<-rev(cumsum(rev(P01p*v3)))
v3c2<-rev(cumsum(rev(P02p*v3)))
Y1b <- v1c + v3c1
Y2b <- v2c + v3c2
tab <- cbind(tab,v1c,v2c,v3,v3c1,v3c2,Y1b,Y2b)



####### Now computing the fractional risk set (FRS) log-rank test, weight = 1  

temp <- (Y1b+Y2b)
temp[temp==0] <-1
L <- tab[ ,5]
Del1b.lr<-sum(deln1 - Y1b*L/(temp))
Del1b.lr[Del1b.lr=="NaN"] <- 0
bigDel1.lr <- c(bigDel1.lr,Del1b.lr)   ###### collecting results in a vector

#Y1b[Y1b==0] <- 1
#Y2b[Y2b==0] <- 1
u1b <- 1 - (deln1/Y1b)
u2b <- 1 - (deln2/Y2b)
u1b[u1b=="NaN"] <- 0
u2b[u2b=="NaN"] <- 0
s1b <- cumprod(u1b)
s2b <- cumprod(u2b)
u11b <- 1-  ( (1-deln1)/Y1b)
u21b <- 1 - ( (1-deln2)/Y2b)
u11b[u11b=="NaN"] <- 0
u21b[u21b=="NaN"] <- 0
u11b[u11b=="-Inf"] <- 1
u21b[u21b=="-Inf"] <- 1
c1b <- cumprod(u11b)
c2b <- cumprod(u21b)
c1bn <- c(1, c1b[-length(c1b)])
c2bn <- c(1, c2b[-length(c2b)])
p.1b <- Y1b[1]/(Y1b[1]+Y2b[1])
p.2b <- 1 - p.1b 
cwtb <- c1bn*c2bn/(p.1b*c1bn + p.2b*c2bn)
cwtb[cwtb=="NaN"] <- 0


Del1b <- sqrt( Y1b[1]*Y2b[1]/(Y1b[1] + Y2b[1]) )*sum(cwtb*swtb*(s1b-s2b))  ## test statistic for bootstrap
bigDel1 <- c(bigDel1,Del1b)  ##### collecting it in a vector

}  ## Bootstrap ends here; sigma for our scheme generated..


bigDel1 <- subset(bigDel1, bigDel1!="NaN")
bigDel1.lr <- subset(bigDel1.lr, bigDel1.lr!="NaN")
sigmaour <- sqrt(var(bigDel1) )  ## bootstrap based standard error for FRS-KM test
sigmaour.lr <- sqrt(var(bigDel1.lr) )  ## bootstrap based standard error for FRS-log-rank test


###############  Original computation resumes; computing the test statistics for the original sample (Ti, deld)  

deln1 <- tab0[ ,2]
deln2 <- tab0[ ,3]
L <- tab0[  ,5]




#######  Now for our FRS-Log rank test


temp <- (Y1+Y2)
temp[temp==0] <-1
Del1.lr<-sum(deln1 - Y1*L/(temp))  # This is for our estimated phi case; the numerator for the FRS-LR case


u1 <- 1-  (deln1/Y1)
u2 <- 1-  (deln2/Y2)
u1[u1=="NaN"] <- 0
u2[u2=="NaN"] <- 0
s1 <- cumprod(u1)
s2 <- cumprod(u2)
u11 <- 1-  ( (1-deln1)/Y1)
u21 <- 1 - ( (1-deln2)/Y2)
u11[u11=="NaN"] <- 0
u21[u21=="NaN"] <- 0
u11[u11=="-Inf"] <- 1
u21[u21=="-Inf"] <- 1
c1 <- cumprod(u11)
c2 <- cumprod(u21)
c1n <- c(1, c1[-length(c1)])
c2n <- c(1, c2[-length(c2)])
p.1 <- Y1[1]/(Y1[1]+Y2[1])
p.2 <- 1 - p.1 
cwt <- c1n*c2n/(p.1*c1n + p.2*c2n)
swt = swt[-1] 
cwt[cwt=="NaN"] <- 0



Del1 <- sqrt( Y1[1]*Y2[1]/(Y1[1] + Y2[1]) )*sum(cwt*swt*(s1-s2))  ## The numerator for the FRS-KM test 



### Test (Z)- statistics


Z1 <- Del1/sigmaour        ### Z statistics [KM statistic/standard error]
Z2 <- Del1.lr/sigmaour.lr  ### Z statistics [log-rank statistic/standard error]


pvalKM <- sum(abs(bigDel1) >= abs(Del1))/boot   # p-value (2-sided for KM-based test)
pvalLR <- sum(abs(bigDel1.lr) >= abs(Del1.lr))/boot # p-value (2-sided LR test) 
print(pvalKM)
print(pvalLR)


###### You can also compute 1-sided p-values; see below. 

pvalKM.one <- sum(bigDel1 >= Del1)/boot  
pvalLR.one <- sum(bigDel1.lr >= Del1.lr)/boot
print(pvalKM.one)
print(pvalLR.one)