############################################################################################ # # Individual effect size/detectability estimator for multiple hypothesis tests where # the alternative distribution is non-central chi-square with d.f. 1 # # Author of the codes: Jozsef Bukszar # Center for Biomarker Research and Personalized Medicine, # School of Pharmacy, Medical College of Virginia of VCU # jbukszar@vcu.edu # # ############################################################################################ ################### combine_mlep0aux<- function(logsa,logsb,ShallIwrite=FALSE) { # # Auxalary function for the real likelihood function calculator. # # WARNING! logsb is supposed to be not shorter than logsa # if (ShallIwrite) { print( " combine_mlep0aux: logsa and logsb are : ") print( logsa ) print( logsb ) print( "----- aux -" ) } lena <- length(logsa) lenb <- length(logsb) lenc <- lena + lenb logsc <- rep(0,lenc) su <- exp(logsa[1]) + exp(logsb[1]) logsc[1] = log(su) for ( it in 2:lena ) { su <- exp(logsb[it]) for ( k in 1:(it-1) ) su = su + exp(logsa[k]+logsb[it-k]) su = su + exp(logsa[it]) logsc[it] = log(su) } if (lenb > lena) { for ( it in (lena+1):lenb ) { su <- exp(logsb[it]) for ( k in 1:lena ) su = su + exp(logsa[k]+logsb[it-k]) logsc[it] = log(su) } } for ( it in (lenb+1):lenc ) { su <- 0 for ( k in (it-lenb):lena ) su = su + exp(logsa[k]+logsb[it-k]) logsc[it] = log(su) } logsc } ################### conserv_pval <- function(pval,k=1) { # Estimates the number positive effects with conservative method based on p-values. # # Function needed: - m <- length(pval) cutoff <- k/m d <- sum(pval0) ir = ir+1 logS <- log(S[2:(ir-1)]) - (1:(ir-2))*log(bas) if (ir==nya) ir=0 else ir=ir-2 # ir = the number of positive items in S if not cut c(ir,logS) } ################### funcsdirect<- function(a,rn) { # Calculates logarithm of array S for i=1,...,rn, where # S_n =sum_( {i_1,...,i_n} \in {1,...,m} ) a_i_1...a_i_n, # and m is the length of array a. # # Auxalary function for the real likelihood function calculator. # BASED ON DIRECT APPROACH!!! # # INPUTS: # a = array a # rn = the maximum number of elements in the product # m <- length(a) po <- 2**m - 1 S<-rep(0,m) for (i in 1:po) { bi <- binar(i,m) kk <- round(sum(bi)) if (kk > rn) next S[kk] = S[kk] + prod( (a*bi) + (1-bi) ) } # print(S) logS <- log(S[1:rn]) logS } ################### gen_allelebased <- function(maf_case,maf,num_nulls,nsample,gamma=0.5,ShallIWrite=TRUE) { # Generates case-control allele-based Pearson test statistic values based on # the given probability tables. # # nsample = the total number of alleles, NOT the number of individuals # # TO RUN: m1<-10; x <- gen_allelebased(seq(0.4,0.35,length=m1),rep(0.3,m1),100000-m1,4000,gamma=0.5) # maf0 <- seq(0.1,0.5,length=num_nulls) num_alts <- length(maf_case) if(num_alts!=length(maf)) print( " Error from gen_allelebased: the length of maf_case and maf should be the same" ) if(ShallIWrite){ print("The effects are") aux <- esize_to_majafandmaf(maf_case,maf,gamma) print(aux) print("The detectabilities are") print(aux*sqrt(nsample)) } num_marks <- num_alts+num_nulls delta <- 1-gamma n_cases <- round(nsample*gamma) n_controls <- round(nsample*delta) x1 <- rep(0,num_alts) for (i in 1:num_alts) x1[i]=rbinom(1, n_cases, maf_case[i]) y1 <- rep(0,num_alts) for (i in 1:num_alts) y1[i]=rbinom(1, n_controls, maf[i]) x0 <- rep(0,num_nulls) for (i in 1:num_nulls) x0[i]=rbinom(1, n_cases, maf0[i]) y0 <- rep(0,num_nulls) for (i in 1:num_nulls) y0[i]=rbinom(1, n_controls, maf0[i]) x <- c(x1,x0) y <- c(y1,y0) diff <- delta*x - gamma*y su <- x + y su2 <- nsample - su Pears <- diff*diff*nsample / (gamma*delta*su*su2) Pears } ################### gen_quant <- function(m,delr,blocksiz=5, blockval=0) { # Generates m - m1 values based on central chi-square with d.f. 1 and # m1 values with non-central chi-square with d.f. 1 DIFFERENT effect sizes in array delr, where # # blocksiz = size of the square block whose elements correlate each other # blockval = the correlation within the block # # delr = array of effect sizes # # CODES NEDEED: - # m1 <- length(delr) m0 <- m - m1 r <- sqrt(blockval) s <- sqrt(1-r*r) m0block <- round(m0/blocksiz) y <- rnorm(m0) zaux <- rnorm(m0block) z <- rep(zaux, each = blocksiz) x0aux <- r*z+s*y x0 <- x0aux*x0aux # Generating x1 m1block <- round(m1/blocksiz) y <- rnorm(m1) zaux <- rnorm(m1block) z <- rep(zaux, each = blocksiz) x1aux <- r*z+s*y + delr x1 <- x1aux*x1aux x <- c(x1,x0) x } ################### ies_estimator <- function(inp_x,MeinRice=TRUE,rule=1,delrange=c(0, 20)) { # Estimates the individual effect sizes by the real MLE method. # Stopping rule # # INPUTS: # x = array of observed test statistic value (CHI-SQUARE DISTRIBUTION WITH D.F. 1 IS ASSUMED!!!) # # Function needed: mle_cond_quant, linbound_pval, conserv_pval (directly) # mlefval_quant, funcs, funcsdirect, binar, combine_mlep0aux (indirectly) x <- inp_x if (min(x)==0) { # THIS STEP IS NEED BECAUSE OF THE CONTINUOUS APPROXIMATION OF A DISCRETE DISTRIBUTION minx <- min(x[x>0]) x[x==0] = 0.5*minx } pvals <- 1 - pchisq(x,1) # CHI-SQUARE DISTRIBUTION WITH D.F. 1 IS ASSUMED!!! if(MeinRice) m1_est<-linbound_pval(pvals) else m1_est<-conserv_pval(pvals) print( " The estimated number of positive effects is: ") print( m1_est ) cond_m1 <- m1_est + rule delest <- rep(0,cond_m1) for (i in 1:cond_m1) delest[i] = mle_cond_quant(x,i,delrange) auxa <- (1:cond_m1)*delest buxa <- auxa[2:cond_m1]-auxa[1:(cond_m1-1)] indies <- c(delest[1],buxa) indies } ################### linbound_pval <- function(pval_inp,alpha=0.5) { # Estimates the number of positive effects with Meinshausen-Rice method based on p-values. # # Function needed: - pval <- sort(pval_inp) m <- length(pval) aux <- (1:m)/m - pval/alpha bux <- aux / (1-pval) raw <- round(m*max(bux)) m1_est <- max(0,raw) m1_est } ################### mle_cond_quant <- function(x,m1,delrange=c(0, 20)) { # Estimates the average effect size by the real MLE method conditioned on m1. # # INPUTS: # x = array of observation # # Function needed: mlefval_quant (directly) # funcs, funcsdirect, binar, combine_mlep0aux (indirectly) logl <- function(del0) { val <- mlefval_quant(x,del0,m1) -val } dmin <- optimize(logl, delrange, tol = 0.00001) delest <- dmin$minimum delest } ################### mlefval_quant <- function(x,del,m1,ShallIwrite=FALSE,MIN_NUM_STEP=3,MLIM=2) { # Calculates the Real loglikelihood function value for m1 and del (effect size). # # INPUTS: # x = array of observation # del = effect size # n = sample size (cases + controls) # MIN_NUM_STEP = minimum number of steps handled by funcsdirect, e.g. if MIN_NUM_STEP=3, the top 3*10=30 a's are handled by funcsdirect. # # CODES NEEDED: funcs, funcsdirect, binar, combine_mlep0aux # # MLIM <- 4 # Upper limit that funcs still can calculate m <- length(x) nonc <- del*del a <- dchisq(x,1,nonc) / dchisq(x,1) if (m11) { for ( i in 2:nsteps ) { aux <- logs logs <- combine_mlep0aux(logss[i,],aux) # Combining the arrays calculated by funcsdirect } } if (ShallIwrite) { print( " From mlefval: vector logs is : ") print( logs ) print( "-----------" ) } logli <- rep(0,m1max) # Will be the logarithm of array S_n (obtained by combining lower and upper part) su <- exp(logs1[1]) + exp(logs[1]) logli[1] = log(su) - log(m) su <- exp(logs1[2]) su = su + exp(logs1[1]+logs[1]) su = su + exp(logs[2]) logli[2] = log(su) - sum(log( (m-1):m )) + log(2) lenlogs <- length(logs) it <- 2 while ( it < m1 ) { it = it + 1 iaux<-min(it-1,lenlogs) if (it <= lenlogs) bux <- c(logs1[it],logs1[it-(1:iaux)]+logs[1:iaux],logs[it]) else bux <- c(logs1[it],logs1[it-(1:iaux)]+logs[1:iaux]) bas <- 0.5*(min(bux)+max(bux)) su <- sum( exp(bux-bas) ) logli[it] = bas + log(su) - sum(log( (m-it+1):m )) + sum(log(1:it)) } if (ShallIwrite) { print( " From mlefval: vector logli is : ") print( logli ) print( "-----------" ) } resu <- logli[m1] } # closing of if resu # Not the real loglikelihood function value is returned, the real would be sum(log( dchisq(x,1) )) more. } ################### binar <- function(n,len) { # calculates the binary form of positive integer n. a <- n bin <- rep(0,len) loc <- 0 while( a > 0 ) { loc = loc + 1 if (2*floor(0.5*a) < a) { bin[len+1-loc] = 1 a = a - 1 } a = a/2 } bin } ################### tester <- function(m,delr,N,rule=1,blocksiz=5, blockval=0) { # Tests the individual effect size estimator ies_estimator. # # TO RUN: tester(100000,seq(6.6,3.8,length=10),N=5) # # CODES NEDEED: ies_estimator # set.seed(1000) lendelr <- length(delr) ies <- matrix(0,N,3*lendelr) for (iter in 1:N) { if (10*floor(iter/10)==iter) print(iter) x <- gen_quant(m,delr,blocksiz, blockval) # Generating test static values according to continuous distribution auxa <- ies_estimator(x,MeinRice=TRUE,rule,delrange=c(0, 20)) # Calculating individual effect size estimates lena <- length(auxa) ies[iter,1:lena] = auxa } meaies <- rep(0,lendelr) for (i in 1:lendelr) meaies[i] = mean(ies[,i]) print(" Actual individual effect sizes: " ) print(delr) print(" Mean of the individual effect size estimates: " ) print(meaies) } ################### tester2 <- function(maf_case,maf,num_nulls,nsample,N,rule=1,gamma=0.5) { # Tests the individual effect size estimator ies_estimator on data generated # by DISCRETE DISTRIBUTION OF THE TEST STATISTIC IN ALLELE-BASED CASE-CONTROL studies # # nsample = the total number of alleles, NOT the number of individuals # # TO RUN: m1<-10; tester2(seq(0.4,0.35,length=m1),rep(0.3,m1),100000-m1,4000,N=5) # # CODES NEDEED: ies_estimator # set.seed(1000) print("The effects are") aux <- esize_to_majafandmaf(maf_case,maf,gamma) print(aux) delr <- aux*sqrt(nsample) print("The detectabilities are") print(delr) lendelr <- length(delr) ies <- matrix(0,N,3*lendelr) for (iter in 1:N) { if (10*floor(iter/10)==iter) print(iter) x <- gen_allelebased(maf_case,maf,num_nulls,nsample,gamma,ShallIWrite=FALSE) # Generating test static values according to DISCRETE DISTRIBUTION OF # THE TEST STATISTIC IN ALLELE-BASED CASE-CONTROL studies auxa <- ies_estimator(x,MeinRice=TRUE,rule,delrange=c(0, 20)) # Calculating individual effect size estimates lena <- length(auxa) ies[iter,1:lena] = auxa } meaies <- rep(0,lendelr) for (i in 1:lendelr) meaies[i] = mean(ies[,i]) print(" Actual individual effect sizes: " ) print(round(delr,3)) print(" Mean of the individual effect size estimates: " ) print(round(meaies,3)) # ies } ###################