# implementation of L_1-PCA* # from the paper "A Pure L1-norm Principal Component Analysis" by JP Brooks, JH Dula, and EL Boone # manuscript available at: # to use: 1) open R # 2) install packages SparseM and quantreg (install.packages("SparseM"), install.packages("quantreg")) # 3) issue command: source("l1pcastar.r") # 4) load data into a matrix X; X must be full column rank library(quantreg) ################# # read in data # ################# X <- data.frame(as.matrix(read.table("toyPCA.txt",skip=2))) ################# n <- nrow(X) m <- ncol(X) # matrix to hold principal components: the main output Alphas <- matrix(nrow=m, ncol=m) # keep track of product of Vk matrices prodVk <- diag(m) k <- m # main loop: iterate over dimensions of the data for (k in m:2) { minobjective <- 10000000.0 # Step 3: Identify best-fit L1 hyperplane using result from Theorem 1 for (j in 1:k) { names(X)[j] <- "depvar" # constrain intercept to 0 Rmatrix <- matrix(c(1,-1,rep(0,2*(k-1))),nrow=2) rvector <- c(0,0) #L1 regression l1regress <- rq(depvar ~ ., data=X, R=Rmatrix, r=rvector, method="fnc") Rj <- sum(abs(l1regress$residuals)) if (Rj < minobjective) { jstar <- j minobjective <- Rj if (j == 1) { betak <- c(0,l1regress$coefficients[2:k]) } else if (j < k) { betak <- c(l1regress$coefficients[2:j], 0, l1regress$coefficients[(j+1):k]) } else { betak <- c(l1regress$coefficients[2:j], 0) } } names(X)[j] <- paste("indepvar",j, sep="") } # Step 4: Calculate projected points in terms of the k-dimensional subspace Ijstark <- diag(k) Ijstark[jstar,] <- betak Z <- as.matrix(X) %*% t(Ijstark) # Step 5: Caculcate SVD of Z Zsvd <- svd(Z) Vk <- Zsvd$v[,1:(k-1)] # Step 6: Calculate the k^th principal component betak[jstar] <- -1.0 Alphas[,k] <- prodVk %*% (betak/sqrt(sum(betak*betak))) # Step 7: Calculate the scores X <- data.frame(Z %*% Vk) prodVk <- prodVk %*% Vk } Alphas[,1] <- prodVk