% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/UVA.R
\name{UVA}
\alias{UVA}
\title{Unique Variable Analysis}
\usage{
UVA(
  data,
  n = NULL,
  model = c("glasso", "TMFG"),
  corr = c("cor_auto", "pearson", "spearman"),
  method = c("cor", "pcor", "wTO"),
  type = c("adapt", "alpha", "threshold"),
  sig,
  key = NULL,
  reduce = TRUE,
  auto = TRUE,
  label.latent = FALSE,
  reduce.method = c("latent", "remove", "sum"),
  lavaan.args = list(),
  adhoc = TRUE,
  plot.redundancy = FALSE,
  plot.args = list()
)
}
\arguments{
\item{data}{Matrix or data frame.
Input can either be data or a correlation matrix}

\item{n}{Numeric.
If input in \code{data} is a correlation matrix, 
then sample size is required.
Defaults to \code{NULL}}

\item{model}{Character.
A string indicating the method to use.
Current options are:

\itemize{

\item{\strong{\code{glasso}}}
{Estimates the Gaussian graphical model using graphical LASSO with
extended Bayesian information criterion to select optimal regularization parameter.
This is the default method}

\item{\strong{\code{TMFG}}}
{Estimates a Triangulated Maximally Filtered Graph}

}}

\item{corr}{Type of correlation matrix to compute. The default uses \code{\link[qgraph]{cor_auto}}.
Current options are:

\itemize{

\item{\strong{\code{cor_auto}}}
{Computes the correlation matrix using the \code{\link[qgraph]{cor_auto}} function from
\code{\link[qgraph]{qgraph}}}.

\item{\strong{\code{pearson}}}
{Computes Pearson's correlation coefficient using the pairwise complete observations via
the \code{\link[stats]{cor}}} function.

\item{\strong{\code{spearman}}}
{Computes Spearman's correlation coefficient using the pairwise complete observations via
the \code{\link[stats]{cor}}} function.
}}

\item{method}{Character.
Computes weighted topological overlap (\code{"wTO"} using \code{\link[qgraph]{EBICglasso}}),
partial correlations (\code{"pcor"}), or correlations (\code{"cor"})
Defaults to \code{"wTO"}}

\item{type}{Character. Type of significance.
Computes significance using the standard \emph{p}-value (\code{"alpha"}),
adaptive alpha \emph{p}-value (\code{adapt.a}), 
or some threshold \code{"threshold"}.
Defaults to \code{"threshold"}}

\item{sig}{Numeric.
\emph{p}-value for significance of overlap (defaults to \code{.05}).
Defaults for \code{"threshold"} for each \code{method}:

\itemize{

\item{\code{"wTO"}}
{.25}

\item{\code{"pcor"}}
{.35}

\item{\code{"cor"}}
{.50}

}}

\item{key}{Character vector.
A vector with variable descriptions that correspond
to the order of variables input into \code{data}.
Defaults to \code{NULL} or the column names of \code{data}}

\item{reduce}{Boolean.
Should redundancy reduction be performed?
Defaults to \code{TRUE}.
Set to \code{FALSE} for redundancy analysis only}

\item{auto}{Boolean.
Should redundancy reduction be automated?
Defaults to \code{TRUE}.
Set to \code{FALSE} for manual selection}

\item{label.latent}{Boolean.
Should latent variables be labelled?
Defaults to \code{TRUE}.
Set to \code{FALSE} for arbitrary labelling (i.e., "LV_")}

\item{reduce.method}{Character.
How should data be reduced?
Defaults to \code{"latent"}

\itemize{

\item{\code{"latent"}}
{Redundant variables will be combined into a latent variable}

\item{\code{"remove"}}
{All but one redundant variable will be removed}

\item{\code{"sum"}}
{Redundant variables are combined by summing across cases (rows)}

}}

\item{lavaan.args}{List.
If \code{reduce.method = "latent"}, then \code{\link{lavaan}}'s \code{\link[lavaan]{cfa}}
function will be used to create latent variables to reduce variables.
Arguments should be input as a list. Some example arguments 
(see \code{\link[lavaan]{lavOptions} for full details}):

\itemize{

\item{\code{estimator}}
{Estimator to use for latent variables (see \href{https://lavaan.ugent.be/tutorial/est.html}{Estimators})
for more details. Defaults to \code{"MLR"} for continuous data and \code{"WLSMV"} for mixed and categorical data.
Data are considered continuous data if they have 6 or more categories (see Rhemtulla, Brosseau-Liard, & Savalei, 2012)}

\item{\code{missing}}
{How missing data should be handled. Defaults to \code{"fiml"}}

\item{\code{std.lv}}
{If \code{TRUE}, the metric of each latent variable is determined by fixing their (residual) variances to 1.0.
If \code{FALSE}, the metric of each latent variable is determined by fixing the factor loading of the first
indicator to 1.0. If there are multiple groups, \code{std.lv = TRUE} and \code{"loadings"} is included in the
\code{group.label} argument, then only the latent variances i of the first group will be fixed to 1.0, while
the latent variances of other groups are set free.
Defaults to \code{TRUE}}

}}

\item{adhoc}{Boolean.
Should adhoc check of redundancies be performed?
Defaults to \code{TRUE}.
If \code{TRUE}, adhoc check will run the redundancy analysis
on the reduced variable set to determine if there are any remaining
redundancies. This check is performed with the arguments:
\code{method = "wTO"}, \code{type = "threshold"}, and \code{sig = .20}.
This check is based on Christensen, Garrido, and Golino's (2020)
simulation where these parameters were found to be the most conservative,
demonstrating few false positives and false negatives}

\item{plot.redundancy}{Boolean.
Should redundancies be plotted in a network plot?
Defaults to \code{FALSE}}

\item{plot.args}{List.
Arguments to be passed onto \code{\link[GGally]{ggnet2}}.
Defaults:

\itemize{

\item{\code{vsize = 6}}{Changes node size}

\item{\code{alpha = 0.4}}{Changes transparency}

\item{\code{label.size = 5}}{Changes label size}

\item{\code{edge.alpha = 0.7}}{Changes edge transparency}

}}
}
\value{
Returns a list:

\item{redundancy}{A list containing several objects:

\itemize{

\item{\code{redudant}}
{Vectors nested within the list corresponding
to redundant nodes with the name of object in the list}

\item{\code{data}}
{Original data}

\item{\code{correlation}}
{Correlation matrix of original data}

\item{\code{weights}}
{Weights determine by weighted topological overlap,
partial correlation, or zero-order correlation}

\item{\code{network}}
{If \code{method = "wTO"}, then
the network computed following \code{\link[EGAnet]{EGA}} with
\code{\link[qgraph]{EBICglasso}} network estimation}

\item{\code{plot}}
{If \code{redundancy.plot = TRUE}, then
a plot of all redundancies found}

\item{\code{descriptives}}{

\itemize{

\item{basic}
{A vector containing the mean, standard deviation,
median, median absolute deviation (MAD), 3 times the MAD, 6 times the MAD,
minimum, maximum, and critical value for the overlap measure
(i.e., weighted topological overlap, partial correlation, or threshold)}

\item{centralTendency}
{A matrix for all (absolute) non-zero values and their
respective standard deviation from the mean and median absolute deviation
from the median}

}
}

\item{\code{method}}
{Returns \code{method} argument}

\item{\code{type}}
{Returns \code{type} argument}

\item{\code{distribution}}
{If \code{type != "threshold"}, then 
distribution that was used to determine significance}

}

}

\item{reduced}{If \code{reduce = TRUE}, then a list containing:

\itemize{

\item{\code{data}}
{New data with redundant variables merged or removed}

\item{\code{merged}}{A matrix containing the variables that were
decided to be redundant with one another}

\item{\code{method}}{Method used to perform redundancy reduction}

}

}

\item{adhoc}{If \code{adhoc = TRUE}, then
the adhoc check containing the same objects as in
the \code{redundancy} list object in the output
}
}
\description{
Identifies redundant variables in a multivariate dataset
using a number of different association methods and types of significance values
(see Christensen, Garrido, & Golino, 2020 for more details)
}
\examples{
# Select Five Factor Model personality items only
idx <- na.omit(match(gsub("-", "", unlist(psychTools::spi.keys[1:5])), colnames(psychTools::spi)))
items <- psychTools::spi[,idx]

# Change names in redundancy output to each item's description
key.ind <- match(colnames(items), as.character(psychTools::spi.dictionary$item_id))
key <- as.character(psychTools::spi.dictionary$item[key.ind])

\dontrun{
# Automated selection of local dependence (default)
uva.results <- UVA(data = items, key = key)

# Produce Methods section
methods.section(uva.results)}

# Manual selection of local dependence
if(interactive()){
uva.results <- UVA(data = items, key = key, auto = FALSE)}

}
\references{
# Simulation using \code{UVA} \cr
Christensen, A. P., Garrido, L. E., & Golino, H. (under review).
Unique Variable Analysis: A novel approach for detecting redundant variables in multivariate data.
\emph{PsyArXiv}.

# Implementation of \code{UVA} (formally \code{node.redundant}) \cr
Christensen, A. P., Golino, H., & Silvia, P. J. (2020).
A psychometric network perspective on the validity and validation of personality trait questionnaires.
\emph{European Journal of Personality}, \emph{34}, 1095-1108.

# wTO measure \cr
Nowick, K., Gernat, T., Almaas, E., & Stubbs, L. (2009).
Differences in human and chimpanzee gene expression patterns define an evolving network of transcription factors in brain.
\emph{Proceedings of the National Academy of Sciences}, \emph{106}, 22358-22363.

# Selection of CFA Estimator \cr
Rhemtulla, M., Brosseau-Liard, P. E., & Savalei, V. (2012).
When can categorical variables be treated as continuous? A comparison of robust continuous and categorical SEM estimation methods under suboptimal conditions.
\emph{Psychological Methods}, \emph{17}, 354-373.
}
\author{
Alexander Christensen <alexpaulchristensen@gmail.com>
}
