% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lik.R
\name{lik}
\alias{lik}
\title{Likelihood Estimation}
\usage{
lik(
  params,
  query,
  evidence = NULL,
  arf = NULL,
  oob = FALSE,
  log = TRUE,
  batch = NULL,
  parallel = TRUE
)
}
\arguments{
\item{params}{Circuit parameters learned via \code{\link{forde}}.}

\item{query}{Data frame of samples, optionally comprising just a subset of
training features. Likelihoods will be computed for each sample. Missing
features will be marginalized out. See Details.}

\item{evidence}{Optional set of conditioning events. This can take one of
three forms: (1) a partial sample, i.e. a single row of data with some but
not all columns; (2) a data frame of conditioning events, which allows for
inequalities; or (3) a posterior distribution over leaves. See Details.}

\item{arf}{Pre-trained \code{\link{adversarial_rf}} or other object of class
\code{ranger}. This is not required but speeds up computation considerably
for total evidence queries. (Ignored for partial evidence queries.)}

\item{oob}{Only use out-of-bag leaves for likelihood estimation? If
\code{TRUE}, \code{x} must be the same dataset used to train \code{arf}.
Only applicable for total evidence queries.}

\item{log}{Return likelihoods on log scale? Recommended to prevent underflow.}

\item{batch}{Batch size. The default is to compute densities for all of
queries in one round, which is always the fastest option if memory allows.
However, with large samples or many trees, it can be more memory efficient
to split the data into batches. This has no impact on results.}

\item{parallel}{Compute in parallel? Must register backend beforehand, e.g.
via \code{doParallel} or \code{doFuture}; see examples.}
}
\value{
A vector of likelihoods, optionally on the log scale.
}
\description{
Compute the likelihood of input data, optionally conditioned on some event(s).
}
\details{
This function computes the likelihood of input data, optionally conditioned
on some event(s). Queries may be partial, i.e. covering some but not all
features, in which case excluded variables will be marginalized out.

There are three methods for (optionally) encoding conditioning events via the
\code{evidence} argument. The first is to provide a partial sample, where
some but not all columns from the training data are present. The second is to
provide a data frame with three columns: \code{variable}, \code{relation},
and \code{value}. This supports inequalities via \code{relation}.
Alternatively, users may directly input a pre-calculated posterior
distribution over leaves, with columns \code{f_idx} and \code{wt}. This may
be preferable for complex constraints. See Examples.
}
\examples{
# Train ARF and estimate leaf parameters
arf <- adversarial_rf(iris)
psi <- forde(arf, iris)

# Estimate average log-likelihood
ll <- lik(psi, iris, arf = arf, log = TRUE)
mean(ll)

# Identical but slower
ll <- lik(psi, iris, log = TRUE)
mean(ll)

# Partial evidence query
lik(psi, query = iris[1, 1:3])

# Condition on Species = "setosa"
evi <- data.frame(Species = "setosa")
lik(psi, query = iris[1, 1:3], evidence = evi)

# Condition on Species = "setosa" and Petal.Width > 0.3
evi <- data.frame(Species = "setosa", 
                  Petal.Width = ">0.3")
lik(psi, query = iris[1, 1:3], evidence = evi)

\dontrun{
# Parallelization with doParallel
doParallel::registerDoParallel(cores = 4)

# ... or with doFuture
doFuture::registerDoFuture()
future::plan("multisession", workers = 4)
}

}
\references{
Watson, D., Blesch, K., Kapar, J., & Wright, M. (2023). Adversarial random
forests for density estimation and generative modeling. In \emph{Proceedings
of the 26th International Conference on Artificial Intelligence and
Statistics}, pp. 5357-5375.
}
\seealso{
\code{\link{arf}}, \code{\link{adversarial_rf}}, \code{\link{forde}}, \code{\link{forge}}, \code{\link{expct}}
}
