############################################################################################ # # Local FDR estimator for multiple hypothesis tests where the alternative distribution # is non-central chi-square with d.f. 1 # # Author of the codes: Jozsef Bukszar # Center for Biomarker Research and Personalized Medicine, # School of Pharmacy, Medical College of Virginia of VCU # jbukszar@vcu.edu # # ############################################################################################ localFDR_estimator_nsort <- function(inp_x,first=-1,MeinRice=TRUE,rule=1,delrange=c(0, 10)) { # Estimates the local FDR by the real MLE method. # # CHI-SQUARE DISTRIBUTION WITH D.F. 1 IS ASSUMED!!! # # INPUTS: # x = array of observed test statistic value (CHI-SQUARE DISTRIBUTION WITH D.F. 1 IS ASSUMED!!!) # # Function needed: lFDR_estim, mle_cond_quant, linbound_pval, conserv_pval (directly) # real likelihood calculator codes (indirectly) x <- inp_x if (min(x)==0) { # THIS STEP IS NEEDED BECAUSE OF THE CONTINUOUS APPROXIMATION OF A DISCRETE DISTRIBUTION minx <- min(x[x>0]) x[x==0] = 0.5*minx } ## p0 estimator pvals <- 1 - pchisq(x,1) # CHI-SQUARE DISTRIBUTION WITH D.F. 1 IS ASSUMED!!! if(MeinRice) m1_est<-linbound_pval(pvals) else m1_est<-conserv_pval(pvals) print( " The estimated number of positive effects is: ") print( m1_est ) cond_m1 <- m1_est + rule ## End of p0 estimator ## Individual effect size estimator delest <- rep(0,cond_m1) for (i in 1:cond_m1) { # print( c("Estimating effect sizes ",i) ) delest[i] = mle_cond_quant(x,i,delrange) # print(delest[i]) # print("------------") } if (cond_m1>1) { auxa <- (1:cond_m1)*delest buxa <- auxa[2:cond_m1]-auxa[1:(cond_m1-1)] indies <- c(delest[1],buxa) } else indies <- delest[1] ## End of Individual effect size estimator ## Calculating local FDRs resu <- lFDR_estim_nsort(x,indies,first) # resu=lfdr_est list(resu,indies) } ################### ies_estimator <- function(inp_x,MeinRice=TRUE,rule=1,delrange=c(0, 10)) { # Estimates the individual effect sizes by the real MLE method. # # INPUTS: # x = array of observed test statistic value (CHI-SQUARE DISTRIBUTION WITH D.F. 1 IS ASSUMED!!!) # # Function needed: mle_cond_quant, linbound_pval, conserv_pval (directly) # real likelihood calculator codes (indirectly) x <- inp_x if (min(x)==0) { # THIS STEP IS NEEDED BECAUSE OF THE CONTINUOUS APPROXIMATION OF A DISCRETE DISTRIBUTION minx <- min(x[x>0]) x[x==0] = 0.5*minx } pvals <- 1 - pchisq(x,1) # CHI-SQUARE DISTRIBUTION WITH D.F. 1 IS ASSUMED!!! if(MeinRice) m1_est<-linbound_pval(pvals) else m1_est<-conserv_pval(pvals) print( " The estimated number of positive effects is: ") print( m1_est ) cond_m1 <- m1_est + rule delest <- rep(0,cond_m1) for (i in 1:cond_m1) { print( c("Estimating effect sizes ",i) ) delest[i] = mle_cond_quant(x,i,delrange) print(delest[i]) print("------------") } if (cond_m1>1) { auxa <- (1:cond_m1)*delest buxa <- auxa[2:cond_m1]-auxa[1:(cond_m1-1)] indies <- c(delest[1],buxa) } else indies <- delest[1] indies } ################### lFDR_estim_nsort <- function(x,indes,inp_first=-1) { # Estimates the local FDR based on the test statistic values (x) and individual effect size/detectability estimates (indes) # # CHI-SQUARE DISTRIBUTION WITH D.F. 1 IS ASSUMED!!! # # RETURNS THE LOCAL FDR ESITMATES OF THE FIRST 'FIRST' TESTS IN INPUT X NOT SORTED!!! # # # INPUTS: # x = array of observed test statistic value (CHI-SQUARE DISTRIBUTION WITH D.F. 1 IS ASSUMED!!!) # indes = individual effect size/detectability estimates # first = the number of the first tests in x whose local FDR estimates the code calculates (The code returns all local FDR esimates if # first is set to negative number) # # Function needed: - numark <- length(x) if (inp_first<0) first <- numark else first <- inp_first lenind <- length(indes) numnulls <- numark - lenind sox <- x[1:first] aux <- numnulls*dchisq(sox,1) nonc <- indes*indes bux <- aux for (j in 1:lenind) bux = bux + dchisq(sox,1,nonc[j]) lfdr_est <- aux/bux lfdr_est } ################### mle_cond_quant <- function(x,m1,delrange=c(0, 10)) { # Estimates the average effect size by the real MLE method conditioned on m1. # # INPUTS: # x = array of observation # # Function needed: mlefval_quant (directly) # funcs, funcsdirect, binar, combine_mlep0aux (indirectly) logl <- function(del0) { val <- mlefval_quant(x,del0,m1) -val } dmin <- optimize(logl, delrange, tol = 0.00001) delest <- dmin$minimum delest } ######################################################################################################### # # P0 ESTIMATORS AND MISCELLANEOUS # ######################################################################################################### conserv_pval <- function(pval,k=1) { # Estimates the number positive effects with conservative method based on p-values. # # Function needed: - m <- length(pval) cutoff <- k/m d <- sum(pval1) { for ( i in 2:nsteps ) { aux <- logs logs <- combine_mlep0aux(logss[i,],aux) # Combining the arrays calculated by funcsdirect } } if (ShallIwrite) { print( " From mlefval: vector logs is : ") print( logs ) print( "-----------" ) } logli <- rep(0,m1max) # Will be the logarithm of array S_n (obtained by combining lower and upper part) su <- exp(logs1[1]) + exp(logs[1]) logli[1] = log(su) - log(m) su <- exp(logs1[2]) su = su + exp(logs1[1]+logs[1]) su = su + exp(logs[2]) logli[2] = log(su) - sum(log( (m-1):m )) + log(2) lenlogs <- length(logs) it <- 2 while ( it < m1 ) { it = it + 1 iaux<-min(it-1,lenlogs) if (it <= lenlogs) bux <- c(logs1[it],logs1[it-(1:iaux)]+logs[1:iaux],logs[it]) else bux <- c(logs1[it],logs1[it-(1:iaux)]+logs[1:iaux]) bas <- 0.5*(min(bux)+max(bux)) su <- sum( exp(bux-bas) ) logli[it] = bas + log(su) - sum(log( (m-it+1):m )) + sum(log(1:it)) } if (ShallIwrite) { print( " From mlefval: vector logli is : ") print( logli ) print( "-----------" ) } resu <- logli[m1] } # closing of if resu # Not the real loglikelihood function value is returned, the real would be sum(log( dchisq(x,1) )) more. } ################### funcs<- function(a,rn) { # Calculates logarithm of array S for i=1,...,rn, where # S_n =sum_( {i_1,...,i_n} \in {1,...,m} ) a_i_1...a_i_n, # and m is the length of array a. # # Auxalary function for the real likelihood function calculator. # # INPUTS: # a = array a # rn = the maximum number of elements in the product # m <- length(a) T <- rep(0,rn) S <- rep(0,(rn+1)) # bas <- 1/(sum(a)**0.8) bas <- 1/sum(a) # bas <- 1 aux <- rep(1,m) for (i in 1:rn) { aux = bas*aux*a T[i] = sum(aux) } # print(T) bux <- rep(0,rn) bux[1] = 1 for (i in 2:rn) bux[i] = -bux[i-1] # print(bux) S[1] = 1 # S[i+1] = (bas**i) * S_i !!!!!!!!! for (i in 1:rn) { auxa<-T[1:i]*S[i:1] S[i+1] = sum(auxa*bux[1:i])/i } # print( " --------------- " ) # print(S[2:(rn+1)]*exp((1:rn)*log(bas)) ) ir <- 2 nya <- rn+2 while(ir0) ir = ir+1 logS <- log(S[2:(ir-1)]) - (1:(ir-2))*log(bas) if (ir==nya) ir=0 else ir=ir-2 # ir = the number of positive items in S if not cut c(ir,logS) } ################### funcsdirect<- function(a,rn) { # Calculates logarithm of array S for i=1,...,rn, where # S_n =sum_( {i_1,...,i_n} \in {1,...,m} ) a_i_1...a_i_n, # and m is the length of array a. # # Auxalary function for the real likelihood function calculator. # BASED ON DIRECT APPROACH!!! # # INPUTS: # a = array a # rn = the maximum number of elements in the product # S <- rep(0,rn) S[1] = sum(a) if (rn>1) { for (k in 2:rn) { S[k] = funcsdirect_aux(a,k) } } logS <- log(S) logS } ########### funcsdirect_aux<- function(a,k) { # # RECURSIVE FUNCTION!!!! # # Calculates logarithm of array S for i=1,...,rn, where # S_n =sum_( {i_1,...,i_n} \in {1,...,m} ) a_i_1...a_i_n, # and m is the length of array a. # # Auxalary function for the real likelihood function calculator. # BASED ON DIRECT APPROACH!!! # # INPUTS: # a = array a # rn = the maximum number of elements in the product # m <- length(a) if (k==0) res <- 1 else if (k==m) res <- prod(a) else { aa <- funcsdirect_aux(a[1:(m-1)],k-1) bb <- funcsdirect_aux(a[1:(m-1)],k) res <- a[m]*aa + bb } res } ################### combine_mlep0aux<- function(logsa,logsb,ShallIwrite=FALSE) { # # Auxalary function for the real likelihood function calculator. # # WARNING! logsb is supposed to be not shorter than logsa # if (ShallIwrite) { print( " combine_mlep0aux: logsa and logsb are : ") print( logsa ) print( logsb ) print( "----- aux -" ) } lena <- length(logsa) lenb <- length(logsb) lenc <- lena + lenb logsc <- rep(0,lenc) su <- exp(logsa[1]) + exp(logsb[1]) logsc[1] = log(su) for ( it in 2:lena ) { su <- exp(logsb[it]) for ( k in 1:(it-1) ) su = su + exp(logsa[k]+logsb[it-k]) su = su + exp(logsa[it]) logsc[it] = log(su) } if (lenb > lena) { for ( it in (lena+1):lenb ) { su <- exp(logsb[it]) for ( k in 1:lena ) su = su + exp(logsa[k]+logsb[it-k]) logsc[it] = log(su) } } for ( it in (lenb+1):lenc ) { su <- 0 for ( k in (it-lenb):lena ) su = su + exp(logsa[k]+logsb[it-k]) logsc[it] = log(su) } logsc } ######################################################################################################### # # CODES THAT GENERATE TEST STATISTIC FUNCTION VALUES # ######################################################################################################### gen_allelebased <- function(maf_case,maf,num_nulls,nsample,gamma=0.5,ShallIWrite=TRUE) { # Generates case-control allele-based Pearson test statistic values based on # the given probability tables. # # nsample = the total number of alleles, NOT the number of individuals # # TO RUN: m1<-10; x <- gen_allelebased(seq(0.4,0.35,length=m1),rep(0.3,m1),100000-m1,4000,gamma=0.5) # maf0 <- seq(0.1,0.5,length=num_nulls) num_alts <- length(maf_case) if(num_alts!=length(maf)) print( " Error from gen_allelebased: the length of maf_case and maf should be the same" ) if(ShallIWrite){ print("The effects are") aux <- esize_to_majafandmaf(maf_case,maf,gamma) print(aux) print("The detectabilities are") print(aux*sqrt(nsample)) } num_marks <- num_alts+num_nulls delta <- 1-gamma n_cases <- round(nsample*gamma) n_controls <- round(nsample*delta) x1 <- rep(0,num_alts) for (i in 1:num_alts) x1[i]=rbinom(1, n_cases, maf_case[i]) y1 <- rep(0,num_alts) for (i in 1:num_alts) y1[i]=rbinom(1, n_controls, maf[i]) x0 <- rep(0,num_nulls) for (i in 1:num_nulls) x0[i]=rbinom(1, n_cases, maf0[i]) y0 <- rep(0,num_nulls) for (i in 1:num_nulls) y0[i]=rbinom(1, n_controls, maf0[i]) x <- c(x1,x0) y <- c(y1,y0) diff <- delta*x - gamma*y su <- x + y su2 <- nsample - su Pears <- diff*diff*nsample / (gamma*delta*su*su2) Pears } ################### gen_quant <- function(m,delr,blocksiz=5, blockval=0) { # Generates m - m1 values based on central chi-square with d.f. 1 and # m1 values based on common approach with DIFFERENT effect sizes in array delr, where # # blocksiz = size of the square block whose elements correlate each other # blockval = the correlation within the block # # delr = array of effect sizes # # CODES NEDEED: - # m1 <- length(delr) m0 <- m - m1 r <- sqrt(blockval) s <- sqrt(1-r*r) m0block <- round(m0/blocksiz) y <- rnorm(m0) zaux <- rnorm(m0block) z <- rep(zaux, each = blocksiz) x0aux <- r*z+s*y x0 <- x0aux*x0aux # Generating x1 m1block <- round(m1/blocksiz) y <- rnorm(m1) zaux <- rnorm(m1block) z <- rep(zaux, each = blocksiz) x1aux <- r*z+s*y + delr x1 <- x1aux*x1aux x <- c(x1,x0) x } ######################################################################################################### # # TESTER CODE # ######################################################################################################### tester_lfdr <- function(m,delr,N,blocksiz=5,blockval=0,rule=1,delrange=c(0, 10)) { # Tests the SPECIAL VERSION of the local FDR estimator, 'localFDR_estimator', i.e. the one that assumes # CHI-SQUARE DISTRIBUTION WITH D.F. 1. # # # TO RUN: system.time( resu<-tester_lfdr(100000,seq(6.0,4.0,length=10),N=5) ) # # # CODES NEDEED: localFDR_estimator_nsort, gen_quant (directly) # lendelr <- length(delr) first <- 2*lendelr ltdr_est <- matrix(0,N,first) infocont <- matrix(0,N,3) # Halfway between estimated and actual information content infocont_est <- matrix(0,N,3) # Estimated information content m1est_vec <- rep(0,N) indies_est <- matrix(0,N,2*first) for (iter in 1:N) { set.seed(1000*iter) print("-------------------") print(c("Iteration:" , iter) ) # if (10*floor(iter/10)==iter) print(iter) x <- gen_quant(m,delr,blocksiz, blockval) # Generating test static values according to CHI-SQUARE DISTRIBUTION WITH D.F. 1.. auxa <- localFDR_estimator_nsort(x,-1,MeinRice=TRUE,rule,delrange) # Calculating the local FDR estimates ltdre <- 1 - auxa[[1]] ltdr_est[iter,] = ltdre[1:first] su <- sum(ltdre) infocont[iter,1]=sum(ltdre[1:lendelr]) infocont[iter,2]=sum(ltdre) m1est <- length(auxa[[2]]) m1est_vec[iter] = m1est indies_est[iter,1:m1est] = auxa[[2]] ordl <- order(ltdre,decreasing=TRUE) infocont_est[iter,1]=sum(ltdre[ordl[1:m1est]]) infocont_est[iter,2]=sum(ltdre) print(round(ltdre[1:first],3)) print(round(infocont_est[iter,],3)) print(round(ltdre[ordl[1:m1est]],3)) print(ordl[1:m1est]) } maxm1est <- max(m1est_vec) indies_est <- indies_est[,1:maxm1est] infocont[,3] = infocont[,1] / infocont[,2] infocont_est[,3] = infocont_est[,1] / infocont_est[,2] mealtdr_est <- rep(0,first) for (i in 1:first) mealtdr_est[i] = mean(ltdr_est[,i]) print(" Mean of the local TDR estimates: " ) print(round(mealtdr_est,3)) list(ltdr_est,infocont_est,infocont,m1est_vec,indies_est) }