############################################################################################
#
#  Local FDR estimator for multiple hypothesis tests where the alternative distribution 
#  is non-central chi-square with d.f. 1
#
#  Author of the codes: Jozsef Bukszar
#                       Center for Biomarker Research and Personalized Medicine, 
#                        School of Pharmacy, Medical College of Virginia of VCU
#                       jbukszar@vcu.edu
#
#  
############################################################################################



localFDR_estimator_nsort <- function(inp_x,first=-1,MeinRice=TRUE,rule=1,delrange=c(0, 10)) {

#  Estimates the local FDR by the real MLE method.
#
#  CHI-SQUARE DISTRIBUTION WITH D.F. 1 IS ASSUMED!!!
#
#  INPUTS:
#      x  =  array of observed test statistic value (CHI-SQUARE DISTRIBUTION WITH D.F. 1 IS ASSUMED!!!)
#
#  Function needed: lFDR_estim, mle_cond_quant, linbound_pval, conserv_pval (directly)
#                   real likelihood calculator codes (indirectly)

      x <- inp_x
   
      if (min(x)==0) { # THIS STEP IS NEEDED BECAUSE OF THE CONTINUOUS APPROXIMATION OF A DISCRETE DISTRIBUTION
         minx <- min(x[x>0])
         x[x==0] = 0.5*minx
      }

## p0 estimator

      pvals <- 1 - pchisq(x,1) # CHI-SQUARE DISTRIBUTION WITH D.F. 1 IS ASSUMED!!!

      if(MeinRice) m1_est<-linbound_pval(pvals)
       else m1_est<-conserv_pval(pvals)

      print( " The estimated number of positive effects is: ")
      print( m1_est )  

      cond_m1 <-  m1_est + rule

## End of p0 estimator

## Individual effect size estimator

      delest <- rep(0,cond_m1)
      for (i in 1:cond_m1) {
#          print( c("Estimating effect sizes ",i) ) 
          delest[i] = mle_cond_quant(x,i,delrange)   
#          print(delest[i])
#          print("------------")            
      }

      if (cond_m1>1) {
         auxa <- (1:cond_m1)*delest
         buxa <- auxa[2:cond_m1]-auxa[1:(cond_m1-1)]
         indies <- c(delest[1],buxa)      
      } else indies <- delest[1] 

## End of Individual effect size estimator

## Calculating local FDRs

      resu <- lFDR_estim_nsort(x,indies,first) # resu=lfdr_est

      list(resu,indies)
   
}



###################

ies_estimator <- function(inp_x,MeinRice=TRUE,rule=1,delrange=c(0, 10)) {

#  Estimates the individual effect sizes by the real MLE method.
#
#  INPUTS:
#      x  =  array of observed test statistic value (CHI-SQUARE DISTRIBUTION WITH D.F. 1 IS ASSUMED!!!)
#
#  Function needed: mle_cond_quant, linbound_pval, conserv_pval (directly)
#                   real likelihood calculator codes (indirectly)


      x <- inp_x

   
      if (min(x)==0) { # THIS STEP IS NEEDED BECAUSE OF THE CONTINUOUS APPROXIMATION OF A DISCRETE DISTRIBUTION
         minx <- min(x[x>0])
         x[x==0] = 0.5*minx
      }

      pvals <- 1 - pchisq(x,1) # CHI-SQUARE DISTRIBUTION WITH D.F. 1 IS ASSUMED!!!

      if(MeinRice) m1_est<-linbound_pval(pvals)
       else m1_est<-conserv_pval(pvals)

      print( " The estimated number of positive effects is: ")
      print( m1_est )  

      cond_m1 <-  m1_est + rule


      delest <- rep(0,cond_m1)
      for (i in 1:cond_m1) {
          print( c("Estimating effect sizes ",i) ) 
          delest[i] = mle_cond_quant(x,i,delrange) 
          print(delest[i])
          print("------------")       
      }

      if (cond_m1>1) {
         auxa <- (1:cond_m1)*delest
         buxa <- auxa[2:cond_m1]-auxa[1:(cond_m1-1)]
         indies <- c(delest[1],buxa)      
      } else indies <- delest[1] 

      indies
   

}



###################

lFDR_estim_nsort <- function(x,indes,inp_first=-1) {

#  Estimates the local FDR based on the test statistic values (x) and individual effect size/detectability estimates (indes)
#
#  CHI-SQUARE DISTRIBUTION WITH D.F. 1 IS ASSUMED!!!
#
#  RETURNS THE LOCAL FDR ESITMATES OF THE FIRST 'FIRST' TESTS IN INPUT X NOT SORTED!!!
#
#
#  INPUTS:
#       x  =  array of observed test statistic value (CHI-SQUARE DISTRIBUTION WITH D.F. 1 IS ASSUMED!!!)
#    indes =  individual effect size/detectability estimates
#    first =  the number of the first tests in x whose local FDR estimates the code calculates (The code returns all local FDR esimates if 
#             first is set to negative number) 
#
#  Function needed: -

 
      numark <- length(x)

      if (inp_first<0) first <- numark
       else first <- inp_first


      lenind <- length(indes)
      numnulls <- numark - lenind

      sox <- x[1:first]

      aux <- numnulls*dchisq(sox,1) 
      nonc <- indes*indes

      bux <- aux
      for (j in 1:lenind) bux = bux + dchisq(sox,1,nonc[j])

      lfdr_est <- aux/bux

      lfdr_est
   
}



###################

mle_cond_quant <- function(x,m1,delrange=c(0, 10)) {

#  Estimates the average effect size by the real MLE method conditioned on m1.
#
#  INPUTS:
#      x  =  array of observation
#
#  Function needed: mlefval_quant (directly)
#                   funcs, funcsdirect, binar, combine_mlep0aux (indirectly)


   logl <- function(del0) {
      val <- mlefval_quant(x,del0,m1)
      -val
   }
   
   dmin <- optimize(logl, delrange, tol = 0.00001)

   delest <- dmin$minimum

   delest

}


#########################################################################################################
#
#         P0 ESTIMATORS AND MISCELLANEOUS
#
#########################################################################################################


conserv_pval <- function(pval,k=1) {

#  Estimates the number positive effects with conservative method based on p-values. 
#
#  Function needed: -

   m <- length(pval)
   cutoff <- k/m
   d <- sum(pval<cutoff) 

   m1_est <- max(0,round(d-k))

   m1_est

}


###################

linbound_pval <- function(pval_inp,alpha=0.5) {

#  Estimates the number of positive effects with Meinshausen-Rice method based on p-values.
#
#  Function needed: -

   pval <- sort(pval_inp)
   m <- length(pval)
   aux <- (1:m)/m - pval/alpha

   bux <- aux / (1-pval)

   raw <- round(m*max(bux))

   m1_est <- max(0,raw)

   m1_est

}


###################

esize_to_majafandmaf <- function(majaf,maf,gamma=0.5) {

#  Calculates the effect size (effect) for given case minor allele frequency (majaf) and control minor allele frequency (maf)
#  for ALLELE-BASED TESTS FOR CASE-CONTROL STUDIES. 

   delta <- 1 - gamma

   num <- gamma*delta*(majaf-maf)*(majaf-maf)
   aux <- gamma*majaf+delta*maf
   effect <- sqrt( num/(aux*(1-aux)) )

   effect

}


#########################################################################################################
#
#         CODES FOR COMPUTING THE REAL LIKELIHOOD FUNCTION VALUE
#
#########################################################################################################


mlefval_quant <- function(x,del,m1,ShallIwrite=FALSE,tstep=10,MIN_NUM_STEP=3,MLIM=2) {

#  Calculates the Real loglikelihood function value for m1 and del (effect size).
#
#  INPUTS:
#             x  =  array of observation
#           del  =  effect size
#             n  =  sample size (cases + controls)
#   MIN_NUM_STEP =  minimum number of steps handled by funcsdirect, e.g. if MIN_NUM_STEP=3, the top 3*10=30 a's are handled by funcsdirect.
#
#  CODES NEEDED: funcs, funcsdirect, binar, combine_mlep0aux
#

# MLIM <- 4 # Upper limit that funcs still can calculate

 m <- length(x)

# nonc <- del*del
# a <- dchisq(x,1,nonc) / dchisq(x,1)

 cons <- exp(-0.5*del*del)
 a <- cons*cosh(del*sqrt(x))



 if (m1<MLIM) {

   if (m1 == 0) resu <- 0  # Not 1.0 because we also have to take logarithm. 
    else if (m1 == 1) resu <- log(sum(a)) - log(m)
     else { 
         aux <- funcs(a,m1)
#        logs1 <- aux[2:(m1+1)]  # The logarithm of array S_n
         resu <- aux[m1+1] - sum(log( (m-m1+1):m )) + sum(log(1:m1))
     }


 } else { 

#   tstep <- 10  # technical parameter for funcsdirect

   m1max <- max(MIN_NUM_STEP*tstep,m1) 

   as <- sort(a)

   t1 <- m-MIN_NUM_STEP*tstep
   a1 <- as[1:t1]
   aux <- funcs(a1,m1max)

   if (aux[1]==0 ) {
      logs1 <- aux[2:(m1max+1)]  # The logarithm of array S_n
   } else {
       while (aux[1]!=0) {
         t1 = t1-tstep
         a1 <- as[1:t1]
         aux <- funcs(a1,m1max)
       }
       logs1 <- aux[2:(m1max+1)]   # The logarithm of array S_n (lower part)
#       if (ShallIwrite) {
          print( " From mlefval: The length of the part handled by funcsdirect became : ")
          print( m - t1 ) 
          print( "-----------" ) 
#       }
   }

   if (ShallIwrite) {
      print( " From mlefval: vector given by funcs is : ")
      print( logs1 ) 
      print( "-----------" ) 
   }


   nsteps <- round( (m-t1) / tstep )

if (ShallIwrite) print( c(" nsteps = ", nsteps ) )

   aa <- matrix(0,nsteps,tstep)
   for (i in 1:nsteps) {
       lo <- t1+(i-1)*tstep
       aa[i,] = as[(lo+1):(lo+tstep)] 
   }


   logss <- matrix(0,nsteps,tstep)
   for (i in 1:nsteps)
       logss[i,] = funcsdirect(aa[i,],tstep)


   if (ShallIwrite) {
      print( " From mlefval: matrix logss is : ")
      print( logss ) 
      print( "-----------" ) 
   }


   logs <- logss[1,]  # Will be the logarithm of array S_n (upper part, i.e. combined products of funcsdirect)
   if (nsteps>1) {
      for ( i in 2:nsteps ) {
          aux <- logs
          logs <- combine_mlep0aux(logss[i,],aux)   # Combining the arrays calculated by funcsdirect
      }
   }

   if (ShallIwrite) {
      print( " From mlefval: vector logs is : ")
      print( logs ) 
      print( "-----------" ) 
   }


   logli <- rep(0,m1max)  # Will be the logarithm of array S_n (obtained by combining lower and upper part)


   su <- exp(logs1[1]) + exp(logs[1])
   logli[1] = log(su) - log(m)


   su <- exp(logs1[2])
   su = su + exp(logs1[1]+logs[1])
   su = su + exp(logs[2])
   logli[2] = log(su) - sum(log( (m-1):m )) + log(2)

   lenlogs <- length(logs)
   it <- 2
   while ( it < m1 ) {
       it = it + 1
       iaux<-min(it-1,lenlogs)
       
       if (it <= lenlogs) bux <- c(logs1[it],logs1[it-(1:iaux)]+logs[1:iaux],logs[it])
         else bux <- c(logs1[it],logs1[it-(1:iaux)]+logs[1:iaux])

       bas <- 0.5*(min(bux)+max(bux))
       su <- sum( exp(bux-bas) )

       logli[it] = bas + log(su) - sum(log( (m-it+1):m )) + sum(log(1:it))

   }

   if (ShallIwrite) {
      print( " From mlefval: vector logli is : ")
      print( logli ) 
      print( "-----------" ) 
   }


   resu <- logli[m1]  

 } # closing of if

 resu  # Not the real loglikelihood function value is returned, the real would be sum(log( dchisq(x,1)  )) more.

  
}


###################

funcs<- function(a,rn) {

#  Calculates logarithm of array S for i=1,...,rn, where 
#  S_n =sum_( {i_1,...,i_n} \in {1,...,m} )  a_i_1...a_i_n,
#  and m is the length of array a.
#
#  Auxalary function for the real likelihood function calculator.
#
#  INPUTS:
#      a  =  array a
#     rn  =  the maximum number of elements in the product
#
     
   m <- length(a)

   T <- rep(0,rn)
   S <- rep(0,(rn+1))

#   bas <- 1/(sum(a)**0.8)
   bas <- 1/sum(a) 
#   bas <- 1

   aux <- rep(1,m)
   for (i in 1:rn) {
       aux = bas*aux*a
       T[i] = sum(aux)
   }

# print(T)

   bux <- rep(0,rn)
   bux[1] = 1
   for (i in 2:rn) bux[i] = -bux[i-1]
# print(bux)
   
   S[1] = 1 # S[i+1] = (bas**i) * S_i  !!!!!!!!!
   for (i in 1:rn) {

       auxa<-T[1:i]*S[i:1]
       S[i+1] = sum(auxa*bux[1:i])/i

   }

# print( " --------------- " )
# print(S[2:(rn+1)]*exp((1:rn)*log(bas)) )

   ir <- 2
   nya <- rn+2
   while(ir<nya && S[ir]>0) ir = ir+1

   logS <- log(S[2:(ir-1)]) - (1:(ir-2))*log(bas) 

   if (ir==nya) ir=0 else ir=ir-2  # ir = the number of positive items in S if not cut


   c(ir,logS)

}



###################

funcsdirect<- function(a,rn) {

#  Calculates logarithm of array S for i=1,...,rn, where 
#  S_n =sum_( {i_1,...,i_n} \in {1,...,m} )  a_i_1...a_i_n,
#  and m is the length of array a.
#
#  Auxalary function for the real likelihood function calculator.
#  BASED ON DIRECT APPROACH!!!
#
#  INPUTS:
#      a  =  array a
#     rn  =  the maximum number of elements in the product
#
    

   S <- rep(0,rn)

   S[1] = sum(a)
   if (rn>1) {
      for (k in 2:rn) {
         S[k] = funcsdirect_aux(a,k)   
      }      
   }

   logS <- log(S)

   logS

}


###########

funcsdirect_aux<- function(a,k) {

#
#  RECURSIVE FUNCTION!!!!
#
#  Calculates logarithm of array S for i=1,...,rn, where 
#  S_n =sum_( {i_1,...,i_n} \in {1,...,m} )  a_i_1...a_i_n,
#  and m is the length of array a.
#
#  Auxalary function for the real likelihood function calculator.
#  BASED ON DIRECT APPROACH!!!
#
#  INPUTS:
#      a  =  array a
#     rn  =  the maximum number of elements in the product
#
     
   m <- length(a)

   if (k==0) res <- 1
      else if (k==m) res <- prod(a)  
         else {
           aa <- funcsdirect_aux(a[1:(m-1)],k-1)
           bb <- funcsdirect_aux(a[1:(m-1)],k)
           res <- a[m]*aa + bb
         }

    res     
}


###################

combine_mlep0aux<- function(logsa,logsb,ShallIwrite=FALSE) {


#
#  Auxalary function for the real likelihood function calculator.
#
#  WARNING! logsb is supposed to be not shorter than logsa
#
   
   if (ShallIwrite) {
      print( " combine_mlep0aux: logsa and logsb are : ")
      print( logsa ) 
      print( logsb ) 
      print( "----- aux -" ) 
   }

   lena <- length(logsa)
   lenb <- length(logsb)
   
   lenc <- lena + lenb
   logsc <- rep(0,lenc)

   su <- exp(logsa[1]) + exp(logsb[1])
   logsc[1] = log(su)

   for ( it in 2:lena ) {

        su <- exp(logsb[it])

        for ( k in 1:(it-1) ) 
               su = su + exp(logsa[k]+logsb[it-k])

        su = su + exp(logsa[it])

        logsc[it] = log(su)
    }

   if (lenb > lena) {
     for ( it in (lena+1):lenb ) {

        su <- exp(logsb[it])

        for ( k in 1:lena ) 
               su = su + exp(logsa[k]+logsb[it-k])

        logsc[it] = log(su)
      }
    }


   for ( it in (lenb+1):lenc ) {

        su <- 0
           
        for ( k in (it-lenb):lena ) 
               su = su + exp(logsa[k]+logsb[it-k])

        logsc[it] = log(su)
    }

    logsc

}


#########################################################################################################
#
#         CODES THAT GENERATE TEST STATISTIC FUNCTION VALUES
#
#########################################################################################################

gen_allelebased <- function(maf_case,maf,num_nulls,nsample,gamma=0.5,ShallIWrite=TRUE) {

#  Generates case-control allele-based Pearson test statistic values based on
#  the given probability tables. 
#
#   nsample = the total number of alleles, NOT the number of individuals
#
#  TO RUN: m1<-10; x <- gen_allelebased(seq(0.4,0.35,length=m1),rep(0.3,m1),100000-m1,4000,gamma=0.5)
# 

   maf0 <- seq(0.1,0.5,length=num_nulls)

   num_alts <- length(maf_case)
   if(num_alts!=length(maf)) print( " Error from gen_allelebased: the length of maf_case and maf should be the same" )

   if(ShallIWrite){
      print("The effects are")
      aux <- esize_to_majafandmaf(maf_case,maf,gamma)
      print(aux)
      print("The detectabilities are")
      print(aux*sqrt(nsample))
   }

   num_marks <- num_alts+num_nulls

   delta <- 1-gamma
   n_cases <- round(nsample*gamma)
   n_controls <- round(nsample*delta)


   x1 <- rep(0,num_alts)
   for (i in 1:num_alts) x1[i]=rbinom(1, n_cases, maf_case[i])

   y1 <- rep(0,num_alts)
   for (i in 1:num_alts) y1[i]=rbinom(1, n_controls, maf[i])

   x0 <- rep(0,num_nulls)
   for (i in 1:num_nulls) x0[i]=rbinom(1, n_cases, maf0[i])

   y0 <- rep(0,num_nulls)
   for (i in 1:num_nulls) y0[i]=rbinom(1, n_controls, maf0[i])

   x <- c(x1,x0)
   y <- c(y1,y0)

   diff <- delta*x - gamma*y
   su <- x + y
   su2 <- nsample - su
   Pears <- diff*diff*nsample / (gamma*delta*su*su2)

   Pears


}



###################

gen_quant <- function(m,delr,blocksiz=5, blockval=0) {

#   Generates m - m1 values based on central chi-square with d.f. 1 and 
#   m1 values based on common approach with DIFFERENT effect sizes in array delr, where
#
#   blocksiz = size of the square block whose elements correlate each other
#   blockval = the correlation within the block
#
#   delr  =  array of effect sizes
#
#   CODES NEDEED: -
#

    m1 <- length(delr)

    m0 <- m - m1

    r <- sqrt(blockval)
    s <- sqrt(1-r*r)    


    m0block <- round(m0/blocksiz)

    y <- rnorm(m0)
    zaux <- rnorm(m0block)
    z <- rep(zaux, each = blocksiz)

    x0aux <- r*z+s*y
    x0 <- x0aux*x0aux

# Generating x1

    m1block <- round(m1/blocksiz)

    y <- rnorm(m1)
    zaux <- rnorm(m1block)
    z <- rep(zaux, each = blocksiz)

    x1aux <- r*z+s*y + delr
    x1 <- x1aux*x1aux


    x <- c(x1,x0)

    x


}



#########################################################################################################
#
#         TESTER CODE
#
#########################################################################################################



tester_lfdr <- function(m,delr,N,blocksiz=5,blockval=0,rule=1,delrange=c(0, 10)) {

#  Tests the SPECIAL VERSION of the local FDR estimator, 'localFDR_estimator', i.e. the one that assumes
#  CHI-SQUARE DISTRIBUTION WITH D.F. 1.
#
#
#  TO RUN: system.time( resu<-tester_lfdr(100000,seq(6.0,4.0,length=10),N=5) )
#
#
#  CODES NEDEED: localFDR_estimator_nsort, gen_quant (directly)
#

   lendelr <- length(delr)
   first <- 2*lendelr

   ltdr_est <- matrix(0,N,first)
   infocont <- matrix(0,N,3)      # Halfway between estimated and actual information content
   infocont_est <- matrix(0,N,3)  # Estimated information content 
   m1est_vec <- rep(0,N)
   indies_est <- matrix(0,N,2*first)

   for (iter in 1:N) {

    set.seed(1000*iter)

    print("-------------------")
    print(c("Iteration:" , iter) )

#    if (10*floor(iter/10)==iter) print(iter)

    x <- gen_quant(m,delr,blocksiz, blockval) # Generating test static values according to CHI-SQUARE DISTRIBUTION WITH D.F. 1..

    auxa <- localFDR_estimator_nsort(x,-1,MeinRice=TRUE,rule,delrange) # Calculating the local FDR estimates

    ltdre <- 1 - auxa[[1]]
    ltdr_est[iter,] = ltdre[1:first]

    su <- sum(ltdre)
    infocont[iter,1]=sum(ltdre[1:lendelr])
    infocont[iter,2]=sum(ltdre)

    m1est <- length(auxa[[2]])
    m1est_vec[iter] = m1est

    indies_est[iter,1:m1est] = auxa[[2]]

    ordl <- order(ltdre,decreasing=TRUE)
    infocont_est[iter,1]=sum(ltdre[ordl[1:m1est]])
    infocont_est[iter,2]=sum(ltdre)
    
    print(round(ltdre[1:first],3))
    print(round(infocont_est[iter,],3))
    print(round(ltdre[ordl[1:m1est]],3))
    print(ordl[1:m1est])

   }

   maxm1est <- max(m1est_vec)
   indies_est <- indies_est[,1:maxm1est]

   infocont[,3] = infocont[,1] / infocont[,2]

   infocont_est[,3] = infocont_est[,1] / infocont_est[,2]

   mealtdr_est <- rep(0,first)

   for (i in 1:first) mealtdr_est[i] = mean(ltdr_est[,i])

   print(" Mean of the local TDR estimates: " )
   print(round(mealtdr_est,3))

   list(ltdr_est,infocont_est,infocont,m1est_vec,indies_est)

}