## ----SetUp, echo = FALSE, eval = TRUE, results = "hide"-----------------------
# R options & configuration:
suppressPackageStartupMessages(library("knitr"))
suppressPackageStartupMessages(library("chemometrics"))

# Stuff specifically for knitr:
opts_chunk$set(eval = TRUE, echo = TRUE)

## ----echo = FALSE-------------------------------------------------------------
desc <- packageDescription("LearnPCA")

## ----echo = FALSE, results = "asis"-------------------------------------------
res <- knitr::knit_child("top_matter.md", quiet = TRUE)
cat(res, sep = '\n')

## ----PCA-Matrices, echo = FALSE, results = "show", fig.cap = "One way to look at the matrix algebra behind PCA. Reconstruction of the data matrix $\\mathbf{X}$ is achieved by multiplying the score matrix ($\\mathbf{S}$) by the transpose of the loadings matrix ($\\mathbf{L^T}$).  The method of matrix multiplication is symbolized in the red-dotted outlines: Each element of row $i$ of the scores matrix is multiplied by the corresponding element of column $j$ of the transposed loadings.  These results are summed to give a single entry in the original data matrix $\\mathbf{X}_{ij}$.", out.width = "75%", fig.align = "center"----

knitr::include_graphics("PCA_Matrices.png")

## ----accepted-----------------------------------------------------------------
set.seed(30)
X <- matrix(rnorm(100*50), ncol = 50) # generate random data
X <- scale(X, center = TRUE, scale = FALSE) # always work with centered data
X_svd <- svd(X)
X_pca <- prcomp(X, center = FALSE) # already centered

## ----power3-------------------------------------------------------------------
V <- rnorm(50)
for (iter in 1:50) {
  U <- X %*% V # Step 1
  U <- U/sqrt(sum(U^2)) # Step 2
  V <- t(X) %*% U # Step 3
  V <- V/sqrt(sum(V^2)) # Step 4
 if ((iter %% 10) == 0L) { # report every 10 steps; print the correlation between
   cat("\nIteration", iter, "\n") # the current U or V and the actual values from SVD
   cat("\tcor with V:", sprintf("%f", cor(X_svd$v[,1], V)), "\n")
   cat("\tcor with U:", sprintf("%f", cor(X_svd$u[,1], U)), "\n")
   }
}

## ----comp1--------------------------------------------------------------------
mean(abs(V) - abs(X_svd$v[,1]))
mean(abs(U) - abs(X_svd$u[,1]))

## -----------------------------------------------------------------------------
mean(abs(X %*% V) - abs(X_pca$x[,1])) # compare the scores
mean(abs(V)- abs(X_pca$rotation[,1])) # compare the loadings

## ----eigen2-------------------------------------------------------------------
X_cor <- cor(X)
X_eig <- eigen(X_cor)

## ----eigen3-------------------------------------------------------------------
Q <- rnorm(50)
for (iter in 1:50) {
  Q <- X_cor %*% Q # Step 1
  Q <- Q/sqrt(sum(Q^2)) # Step 2

 if ((iter %% 5) == 0L) { # report every 5 steps; print the correlation between
   cat("\nIteration", iter, "\n") # the current Q and the actual values from SVD
   cat("\tcor with Q:", sprintf("%f", cor(X_eig$vectors[,1], Q)), "\n")
   }
}

## -----------------------------------------------------------------------------
mean(abs(Q) - abs(X_eig$vectors[,1])) # check the loadings

## -----------------------------------------------------------------------------
R_loadings <- X_pca$rotation[,1] 
std_loadings <- (X_svd$v %*% diag(X_svd$d)/(nrow(X - 1)))[,1]
plot(R_loadings, std_loadings, xlab = "R's notion of loadings", ylab = "Standard notion of loadings")

## -----------------------------------------------------------------------------
simpleNIPALS <- function(X, pcs = 3, tol = 1e-6) {
  S <- NULL # a place to store the scores
  L <- NULL # a place to store the loadings
  for (pc in 1:pcs) { # compute one PC at a time
    s_old <- X[,1] # initial estimate of the score Note 1
    done <- FALSE # flag to track progress
    while (!done) { # Note 2
      l <- t(X) %*% s_old # compute/improve/update loadings estimate
      l <- l/sqrt(sum(l^2)) # scale by l2 norm
      s_new <-X %*% l # compute/improve/update scores estimate
      err <- sqrt(sum((s_new - s_old)^2)) # compute the error of the score estimate
      if (err < tol) {done <- TRUE; next} # check error against tolerance,
                                          # exit if good
      s_old <- s_new # if continuing, use the improved score estimate
                     # next time through while loop
    }
    X <- X - s_new %*% t(l) # "deflate" the data by removing the 
                            # contribution of the results so far Note 3
    S <- cbind(S, s_new) # add the most recent scores to the growing results
    L <- cbind(L, l) # add the most recent loadings to the growing results
  }
  list(Scores = S, Loadings = L) # return the answers
}

## -----------------------------------------------------------------------------
N <- simpleNIPALS(X)
mean(abs(N$Scores[,1]) - abs(X_pca$x[,1])) # compare the scores from prcomp
mean(abs(N$Loadings[,1]) - abs(X_pca$rotation[,1])) # compare the loadings from prcomp
mean(abs(N$Scores[,1]) - abs(X %*% X_svd$v[,1])) # compare the scores from svd
mean(abs(N$Loadings[,1]) - abs(X_svd$v[,1])) # compare the loadings from svd

## ----echo = FALSE, results = "asis"-------------------------------------------
res <- knitr::knit_child("refer_to_works_consulted.md", quiet = TRUE)
cat(res, sep = '\n')