#' MANOVA update for lm.rrpp model fits
#' 
#' @description 
#' Function updates a lm.rrpp fit to add $MANOVA, which like $ANOVA, provides statistics
#' or matrices typically associated with multivariate analysis of variance (MANOVA).
#' 
#' MANOVA statistics or sums of squares and cross-products (SSCP) matrices
#' are calculated over the random permutations of a \code{\link{lm.rrpp}} fit.  SSCP matrices are 
#' computed as the inverse of R time H (invR.H), where R is a SSCP for the residuals or random effects and H is
#' the difference between SSCP matrices of full and reduced models (see below).  If
#' the argument, verbose, is TRUE, invR.H matrices will be returned for every model effect and the entire model,
#' for every permutation.  If verbose = FALSE, eigen-analysis is performed on the invR.H matrices and returned, allowing
#' MANOVA statistics to be calculated, including Roy's maximum root (eigen value), Pillai trace, Hotelling-Lawley trace,
#' and Wilks lambda (via \code{\link{summary.manova.lm.rrpp}}).
#' 
#' The manova.update to add $MANOVA to \code{\link{lm.rrpp}} fits requires more computation time than the $ANOVA
#' statistics that are computed automatically in \code{\link{lm.rrpp}}.  Generally, the same inferential conclusions will
#' be found with either approach, when observations outnumber response variables.  For high-dimensional data (more variables
#' than observations) data are projected into a Euclidean space of appropriate dimensions (rank of residual covariance matrix).  
#' One can vary the tolerance for eigenvalue decay or specify the number of PCs, if a smaller set of PCs than the maximum is desired.  
#' This is advised if there is strong correlation among variables (the data space could be simplified to fewer dimensions), as spurious
#' results are possible.  When observations outnumber variables, projection of data onto PCs yields the same results as the original 
#' variables.  Because distributions of MANOVA stats can be generated from the random permutations,
#' there is no need to approximate F-values, like with parametric MANOVA. By restricting analysis to the real, positive eigen values calculated,
#' all statistics can be calculated (but Wilks lambda, as a product but not a trace, might be less reliable as variable number approaches
#' the number of observations).
#' 
#'  \subsection{ANOVA vs. MANOVA}{ 
#'  Two SSCP matrices are calculated for each linear model effect, for every random permutation: R (Residuals or Random effects) and
#'  H, the difference between SSCPs for "full" and "reduced" models. (Full models contain and reduced models lack
#'  the effect tested; SSCPs are hypothesized to be the same under a null hypothesis, if there is no effect.  The 
#'  difference, H, would have a trace of 0 if the null hypothesis were true.)  In RRPP, ANOVA and MANOVA correspond to
#'  two different ways to calculate statistics from R and H matrices.
#'  
#'  ANOVA statistics are those that find the trace of R and H SSCP matrices before calculating subsequent statistics,
#'  including sums of squares (SS), mean squares (MS), and F-values.  These statistics can be calculated with univariate data
#'  and provide univariate-like statistics for multivariate data.  These statistics are dispersion measures only (covariances
#'  among variables do not contribute) and are the same as "distance-based" stats proposed by Goodall (1991) and Anderson (2001).
#'  MANOVA stats require multivariate data and are implicitly affected by variable covariances.  For MANOVA, the inverse of R times
#'  H (invR.H) is first calculated for each effect, then eigenanalysis is performed on these matrix products.  Multivariate
#'  statistics are calculated from the positive, real eigenvalues.  In general, inferential
#'  conclusions will be similar with either approach, but effect sizes might differ.
#'  
#'  ANOVA tables are generated by \code{\link{anova.lm.rrpp}} on lm.rrpp fits and MANOVA tables are generated
#'  by \code{\link{summary.manova.lm.rrpp}}, after running manova.update on lm.rrpp fits.
#'  
#'  Currently, mixed model effects are only possible with $ANOVA statistics, not $MANOVA.
#'  
#'  More detail is found in the vignette, ANOVA versus MANOVA.  
#' }
#' 
#' @references Goodall, C.R. 1991. Procrustes methods in the statistical analysis of shape. Journal of the 
#'    Royal Statistical Society B 53:285-339.
#' @references Anderson MJ. 2001. A new method for non-parametric multivariate analysis of variance.
#'    Austral Ecology 26: 32-46.
#' @param fit Linear model fit from \code{\link{lm.rrpp}}
#' @param error An optional character string to define R matrices used to calculate invR.H.
#' (Currently only Residuals can be used and this argument defaults to NULL.  Future versions
#' will update this argument.)
#' @param tol A tolerance value for culling data dimensions to prevent spurious results.  The distribution
#' of eigenvalues for the data will be examined and if the decay becomes less than the tolerance,
#' the data will be truncated to principal components ahead of this point.  This will possibly prevent spurious results
#' calculated from eigenvalues near 0.  If tol = 0, all possible PC axes are used, which is likely
#' not a problem if observations outnumber variables.
#' @param PC.no A value that, if not NULL,  overrides the tolerance argument, and forces a desired number of 
#' data PCs to use for analysis.  If a value larger than the possible number of PCs is chosen, the full compliment of PCs
#' (the full data space) will be used.
#' @param print.progress A logical value to indicate whether a progress bar should be printed to the screen.
#' This is helpful for long-running analyses.
#' @param verbose A logical value to indicate whether invR.H matrices should be returned (if TRUE) or just their 
#' eigen values (if FALSE).
#' @keywords analysis
#' @export
#' @author Michael Collyer
#' @return An object of class \code{lm.rrpp} is updated to include class \code{manova.lm.rrpp}, and the object,
#' $MANOVA, which includes
#' \item{eiges}{The eigenvalues of invR.H, if verbose = FALSE.}
#' \item{invR.H}{The inverse of the residuals SSCP times the H SSCP, if verbose = TRUE}
#' \item{e.rank}{Rank of the error (residuals) covariance matrix.}
#' @examples 
#'    
#' # Body Shape Analysis (Multivariate) ----------------------------------------------------
#' 
#' data(Pupfish)
#' 
#' # Although not recommended as a practice, this example will use only
#' # three principal components of body shape for demonstration.  A larger
#' # number of random permutations should also be used.
#' 
#' Pupfish$shape <- prcomp(Pupfish$coords)$x[, 1:3]
#' 
#' Pupfish$logSize <- log(Pupfish$CS) # better to not have functions in formulas
#' 
#' fit <- lm.rrpp(shape ~ logSize + Sex, SS.type = "I", 
#' data = Pupfish, print.progress = FALSE, iter = 499) 
#' summary(fit, formula = FALSE)
#' anova(fit) # ANOVA table
#' 
#' # MANOVA
#' 
#' fit.m <- manova.update(fit, print.progress = FALSE, tol = 0.001)
#' summary(fit.m, test = "Roy")
#' summary(fit.m, test = "Pillai")
#' 
#' fit.m$MANOVA$eigs$logSize[1:3] # eigenvalues first three iterations
#' fit.m$MANOVA$eigs$Sex[1:3] # eigenvalues first three iterations
#' 
#' fit.m <- manova.update(fit, verbose = TRUE, print.progress = FALSE)
#' summary(fit.m, test = "Roy")
#' summary(fit.m, test = "Pillai")
#' 
#' fit.m$MANOVA$invR.H$logSize[1:3] # invR.H first three iterations
#' fit.m$MANOVA$invR.H$Sex[1:3] # invR.H first three iterations
#' 
#' # Distributions of test statistics
#' 
#' summ.roy <- summary(fit.m, test = "Roy")
#' dens <- apply(summ.roy$rand.stats, 2, density)
#' par(mfcol = c(1, length(dens)))
#' for(i in 1:length(dens)) {
#'      plot(dens[[i]], xlab = "Roy max root", ylab = "Density",
#'      type = "l", main = names(dens)[[i]])
#'      abline(v = summ.roy$rand.stats[1, i], col = "red")
#' }
#' par(mfcol = c(1,1))
#' 

manova.update <- function(fit, error = NULL, 
                          tol = 0.01, PC.no = NULL,
                          print.progress = TRUE, verbose = FALSE) {
  if(inherits(fit, "manova.lm.rrpp")) stop("\nlm.rrpp object has already been updated for MANOVA\n", 
                                           call. = FALSE)
  if(!is.logical(verbose)) verbose = FALSE
  p <- fit$LM$p
  if(p < 2) stop("\n Multiple responses are required for this analysis.\n", call. = FALSE)
  p.prime <- fit$LM$p.prime
  n <- fit$LM$n
  E.qr <- qr(crossprod(fit$LM$residuals))
  e.rank <- E.qr$rank
  
  perm.method <- fit$PermInfo$perm.method
  if(perm.method == "RRPP") RRPP = TRUE else RRPP = FALSE
  ind <- fit$PermInfo$perm.schedule
  trms <- fit$LM$term.labels
  k <- length(trms)
  gls <- fit$LM$gls
  if(gls) P <- fit$LM$Pcov else P <- NULL
  o.fit <- fit
  d <- svd(var(o.fit$LM$Y))$d
  d <- cumsum(d/sum(d))
  dd <- rep(0, length(d) - 1)
  for(i in 2:length(d)) dd[i - 1] <- (d[i] - d[i - 1])
  dd <- c(1, dd)
  if(tol > 0) d <- which(dd >= tol) else d <- 1:p.prime
  if(!is.null(PC.no)) {
    d <- 1:PC.no
    if(PC.no > p.prime) d <- 1:p.prime
  }
  PCA <- prcomp(o.fit$LM$Y)
  dd <- zapsmall(PCA$sdev^2)
  dd <- dd[dd > 0]
  if(length(dd) < e.rank) {
    d <- 1:length(dd)
    e.rank <- length(d)
  }
  Y <- PCA$x[, 1:(min(length(d), e.rank))]
  dat <- fit$LM$data
  dat$Y <- Y
  dat <- rrpp.data.frame(dat)
  form <- fit$call[[2]]
  form <- update.formula(form, Y ~.)
  SS.type <- fit$ANOVA$SS.type
  weights <- fit$LM$weights
  offset <- fit$LM$offset
  if(fit$PermInfo$perm.method == "RRPP")  RRPP = TRUE else RRPP = FALSE
  Cov <- fit$LM$Cov
  # int.first should be an option
  
  fit <- lm.rrpp(formula(deparse(form)), data = dat, SS.type = SS.type, weights = weights, offset = offset, Cov = Cov,
                 iter= 0, print.progress = FALSE)
  
  if(!is.null(error)) {
    if(!inherits(error, "character")) stop("The error description is illogical.  It should be a string of character values matching ANOVA terms.",
                                           call. = FALSE)
    kk <- length(error)
    if(kk != k) stop("The error description should match in length the number of ANOVA terms (not including Residuals)",
                     call. = FALSE)
    Ematch <- match(error, c(trms, "Residuals"))
    if(any(is.na(Ematch))) stop("At least one of the error terms is not an ANOVA term",
                                call. = FALSE)
  } else Ematch <- NULL
  
  # Until a better solution is found, this must be forced
  error <- Ematch <- NULL
  
  fit <- refit(fit)
  perms <- length(ind)
  fitted <- fit$wFitted.reduced
  res <- fit$wResiduals.reduced
  k <- length(trms)
  w <- sqrt(fit$weights)
  o <- fit$offset
  if(sum(o) != 0) offset = TRUE else offset = FALSE
  rrpp.args <- list(fitted = fitted, residuals = res,
                    ind.i = NULL, w = NULL, o = NULL)
  if(offset) rrpp.args$o <- o
  if(print.progress){
    if(verbose) 
      cat(paste("\nCalculation of SSCP matrix products:", perms, "permutations.\n")) else
        cat(paste("\nEigen-analysis of SSCP matrix products:", perms, "permutations.\n"))
    pb <- txtProgressBar(min = 0, max = perms+1, initial = 0, style=3)
  }
  
  getEigs <- function(EH, EH.rank){
    r <- min(dim(na.omit(EH)))
    EH <- EH[1:r, 1:r]
    Re(eigen(EH, symmetric = FALSE, only.values = TRUE)$values)[1:EH.rank]
  }
  
  
  if(gls){
    Y <- crossprod(P, Y)
    Xr <- lapply(fit$wXrs, function(x) crossprod(P, as.matrix(x)))
    Xf <- lapply(fit$wXfs, function(x) crossprod(P, as.matrix(x)))
    Ur <- lapply(Xr, function(x) qr.Q(qr(x)))
    Uf <- lapply(Xf, function(x) qr.Q(qr(x)))
    Ufull <- Uf[[k]]
    int <- attr(fit$Terms, "intercept")
    Unull <- qr.Q(qr(crossprod(P, rep(int, n))))
    
    H <- Map(function(f, r) tcrossprod(f) - tcrossprod(r), Uf, Ur)
    Hfull <- tcrossprod(Ufull)
    Hnull <- tcrossprod(Unull)
    EH.rank <-  c(Map(function(r, f) f$rank - r$rank, Qr, Qf), qr(Hfull)$rank - qr(Hnull)$rank)
    
    yh0 <- fastFit(Unull, Y, n, p)
    r0 <- Y - yh0
    
    if(!RRPP) {
      fitted <- lapply(fitted, function(.) matrix(0, n, p))
      res <- lapply(res, function(.) Y)
    } else {
      fitted <- Map(function(u) crossprod(tcrossprod(u), Y), Ur)
      res <- lapply(fitted, function(f) Y - f)
    }
    rrpp.args$fitted <- fitted
    rrpp.args$residuals <- res
    
    result <- lapply(1: perms, function(j){
      step <- j
      if(print.progress) setTxtProgressBar(pb,step)
      x <-ind[[j]]
      rrpp.args$ind.i <- x
      Yi <- do.call(rrpp, rrpp.args)
      y <- yh0 + r0[x,]
      Hs <- Map(function(h, y) crossprod(h %*% y), H, Yi)
      Es <- Map(function(y) crossprod(y - Hfull %*% y), Yi)
      names(Hs) <- names(Es) <- trms
      
      if(!is.null(error)) {
        kk <- which(Ematch <= k)
        for(i in 1:length(kk)) {
          e.match <- Ematch[kk[i]]
          Es[[kk[i]]] <- Hs[[e.match]]
        }
      }
      H.full <- crossprod(Hfull %*% y)
      E.full <- crossprod(y) - H.full
      D <- diag(1/sqrt(diag(E.full)))
      Ef.qr <- Map(function(e) qr(D %*% e %*% D), Es)
      Ef.qr.full <- qr(D %*% E.full %*% D)
      EH <- Map(function(e, h) qr.coef(e, (D %*% h %*% D)), Ef.qr, Hs)
      EH[[k+1]] <- qr.coef(Ef.qr.full, (D %*% H.full %*% D))
      names(EH)[[k+1]] <- "full.model"
      eig.d <- Map(function(e, r) getEigs(e, r),
                   EH, EH.rank)
      if(verbose) out <- EH else out <- eig.d
      names(out) <- c(trms, "full.model")
      out
    })
  } else {
    if(!RRPP) {
      fitted <- lapply(fitted, function(.) matrix(0, n, p))
      res <- lapply(res, function(.) Y)
      rrpp.args$fitted <- fitted
      rrpp.args$residuals <- res
    }
    Qr <- lapply(fit$wXrs, qr)
    Qf <- lapply(fit$wXfs, qr)
    Ur <- lapply(Qr, qr.Q)
    Uf <- lapply(Qf, qr.Q)
    Ufull <- Uf[[k]]
    int <- attr(fit$Terms, "intercept")
    Unull <- qr.Q(qr(rep(int, n)))
    H <- Map(function(f, r) tcrossprod(f) - tcrossprod(r), Uf, Ur)
    Hfull <- tcrossprod(Ufull)
    Hnull <- tcrossprod(Unull)
    EH.rank <-  c(Map(function(r, f) f$rank - r$rank, Qr, Qf), qr(Hfull)$rank - qr(Hnull)$rank)
    
    yh0 <- fastFit(Unull, Y, n, p)
    r0 <- Y - yh0
    result <- lapply(1: perms, function(j){
      step <- j
      if(print.progress) setTxtProgressBar(pb,step)
      x <-ind[[j]]
      rrpp.args$ind.i <- x
      Yi <- do.call(rrpp, rrpp.args)
      y <- yh0 + r0[x,]
      Hs <- Map(function(h, y) crossprod(h %*% y), H, Yi)
      Es <- Map(function(y) crossprod(y - Hfull %*% y), Yi)
      names(Hs) <- names(Es) <- trms
      
      if(!is.null(error)) {
        kk <- which(Ematch <= k)
        for(i in 1:length(kk)) {
          e.match <- Ematch[kk[i]]
          Es[[kk[i]]] <- Hs[[e.match]]
        }
      }
      H.full <- crossprod(Hfull %*% y)
      E.full <- crossprod(y) - H.full
      D <- diag(1/sqrt(diag(E.full)))
      Ef.qr <- Map(function(e) qr(D %*% e %*% D), Es)
      Ef.qr.full <- qr(D %*% E.full %*% D)
      EH <- Map(function(e, h) qr.coef(e, (D %*% h %*% D)), Ef.qr, Hs)
      EH[[k+1]] <- qr.coef(Ef.qr.full, (D %*% H.full %*% D))
      names(EH)[[k+1]] <- "full.model"
      eig.d <- Map(function(e, r) getEigs(e, r),
                   EH, EH.rank)
      
      if(verbose) out <- EH else out <- eig.d
      
      out
    })
  }
  
  MANOVA <- list()
  for(i in 1:(k+1)){
    res <- MANOVA[[i]] <- lapply(1:perms, function(j){
      res <- result[[j]][[i]]
    })
    names(res) <- c("obs", paste("iter", 1:(perms - 1), sep = "."))
    MANOVA[[i]] <- res
  }
  names(MANOVA) <- names(result[[1]])
  
  out <- o.fit
  out$MANOVA <- list(MANOVA = MANOVA, verbose = verbose, error = error, 
                     e.rank = e.rank, PCA = PCA, manova.pc.dims = d)
  if(verbose) names(out$MANOVA)[[1]] <- "invR.H" else 
    names(out$MANOVA)[[1]] <- "eigs"
  
  if(print.progress) {
    step <- perms + 1
    setTxtProgressBar(pb,step)
    close(pb)
  }
  
  class(out) <- c("manova.lm.rrpp", class(o.fit))
  out
}