% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/GRSxE.R
\name{GRSxE}
\alias{GRSxE}
\title{Testing gene-environment interactions}
\usage{
GRSxE(
  X,
  y,
  E,
  C = NULL,
  test.type = "bagging",
  B = 500,
  replace = TRUE,
  subsample = ifelse(replace, 1, 0.632),
  test.ind = sample(nrow(X), floor(nrow(X)/2)),
  grs.type = "rf",
  grs.args = list()
)
}
\arguments{
\item{X}{Matrix or data frame of genetic variables such as SNPs usually
coded as 0-1-2.}

\item{y}{Numeric vector of the outcome/phenotype. Binary outcomes such
as a disease status should be coded as 0-1 (control-case).}

\item{E}{Numeric vector of the environmental exposure.}

\item{C}{Optional data frame containing potentially confounding
variables to be adjusted for.}

\item{test.type}{Testing type. The standard setting is \code{"bagging"},
which employs its OOB (out-of-bag) prediction mechanism such that the full
data can be used for both training the GRS and testing the GxE interaction.
Alternatively, this can be set to \code{"holdout"}, which requires
splitting the available data into a training data set and test data set.
For that, \code{test.ind} needs to be set to the data indices used for
testing.}

\item{B}{The number of bagging iterations if \code{test.type = "bagging"} is
used. Also used as the number of trees grown in the random forest if
\code{grs.type = "rf"} is set.}

\item{replace}{Should sampling with or without replacement be performed?
Only used if \code{test.type = "bagging"} is set.}

\item{subsample}{Subsample fraction if \code{test.type = "bagging"} is used.}

\item{test.ind}{Vector of indices in the supplied data for testing the GxE
interaction. Only used if \code{test.type = "holdout"} is set.
The standard setting corresponds to a random 50:50 training-test split.}

\item{grs.type}{Type of GRS to be constructed. Either \code{"rf"} for a
random forest or \code{"elnet"} for an elastic net.}

\item{grs.args}{Optional list of arguments passed to the GRS fitting
procedure.}
}
\value{
An object of class \code{glm} is returned, in which \code{G:E}
  describes the GRSxE term.
}
\description{
Fitting and evaluating GRS (genetic risk scores) for testing the
presence of GxE (gene-environment) interactions.
}
\details{
The GRS is usually constructed through random forests for taking gene-gene
interactions into account and using its OOB (out-of-bag) prediction
mechanism. Alternatively, a classical GRS construction approach can be
employed by fitting an elastic net.
Bagging can also be applied to fit multiple elastic net models to also be
able to perform OOB predictions.

The advantage of OOB predictions is that they allow the GRS model to be
constructed on the full available data, while performing unbiased
predictions also on the full available data.
Thus, both the GRS construction and the GxE interaction testing can utilize
all observations.

If desired, sampling can be performed without replacement in contrast to
the classical bagging approach that utilizes bootstrap sampling.

Potentially confounding variables can also be supplied that will then be
adjusted for in the GxE interaction testing.

This function uses a GLM (generalized linear model) for modelling the
marginal genetic effect, marginal environmental effect, the GRSxE interaction
effect, and potential confounding effects.
The fitted GLM is returned, which can be, e.g., inspected via
\code{summary(...)} to retrieve the Wald test p-values for the individual
terms. The p-value corresponding to the \code{G:E} term is the p-value
for testing the presence of a GRSxE interaction.
}
\examples{
# Generate toy data
set.seed(101299)
maf <- 0.25
n.snps <- 10
N <- 500
X <- matrix(sample(0:2, n.snps * N, replace = TRUE,
                   prob = c((1-maf)^2, 1-(1-maf)^2-maf^2, maf^2)),
            ncol = n.snps)
colnames(X) <- paste("SNP", 1:n.snps, sep="")
E <- rnorm(N, 20, 10)
E[E < 0] <- 0

# Generate outcome with a GxE interaction
y.GxE <- -0.75 + log(2) * (X[,"SNP1"] != 0) +
  log(4) * E/20 * (X[,"SNP2"] != 0 & X[,"SNP3"] == 0) +
  rnorm(N, 0, 2)
# Test for GxE interaction (Wald test for G:E)
summary(GRSxE(X, y.GxE, E))

# Generate outcome without a GxE interaction
y.no.GxE <- -0.75 + log(2) * (X[,"SNP1"] != 0) +
  log(4) * E/20 + log(4) * (X[,"SNP2"] != 0 & X[,"SNP3"] == 0) +
  rnorm(N, 0, 2)
# Test for GxE interaction (Wald test for G:E)
summary(GRSxE(X, y.no.GxE, E))
}
\references{
\itemize{
  \item Lau, M., Kress, S., Schikowski, T. & Schwender, H. (2023).
  Efficient gene--environment interaction testing through
  bootstrap aggregating. Scientific Reports 13:937.
  \doi{https://doi.org/10.1038/s41598-023-28172-4}
  \item Lau, M., Wigmann C., Kress S., Schikowski, T. & Schwender, H. (2022).
  Evaluation of tree-based statistical learning methods for constructing
  genetic risk scores. BMC Bioinformatics 23:97.
  \doi{https://doi.org/10.1186/s12859-022-04634-w}
  \item Breiman, L. (1996).
  Bagging predictors. Machine Learning 24:123–140.
  \doi{https://doi.org/10.1007/BF00058655}
  \item Breiman, L. (2001).
  Random Forests. Machine Learning 45:5–32.
  \doi{https://doi.org/10.1023/A:1010933404324}
  \item Friedman J., Hastie T. & Tibshirani R. (2010).
  Regularization Paths for Generalized Linear Models via Coordinate Descent.
  Journal of Statistical Software 33(1):1–22.
  \doi{https://doi.org/10.18637/jss.v033.i01}
}
}
