% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mcmcSAR.R
\name{mcmcSAR}
\alias{mcmcSAR}
\title{Bayesian Estimator of SAR model}
\usage{
mcmcSAR(
  formula,
  contextual,
  start,
  G0.obs,
  G0 = NULL,
  mlinks = list(),
  hyperparms = list(),
  ctrl.mcmc = list(),
  iteration = 2000L,
  data
)
}
\arguments{
\item{formula}{object of class \link[stats]{formula}: a symbolic description of the model. The \code{formula} should be as for example \code{y ~ x1 + x2 | x1 + x2}
where \code{y} is the endogenous vector, the listed variables before the pipe, \code{x1}, \code{x2} are the individual exogenous variables and
the listed variables after the pipe, \code{x1}, \code{x2} are the contextual observable variables. Other formulas may be
\code{y ~ x1 + x2} for the model without contextual effects, \code{y ~ -1 + x1 + x2 | x1 + x2} for the model
without intercept, or \code{ y ~ x1 + x2 | x2 + x3} to allow the contextual variables to be different from the individual variables.}

\item{contextual}{(optional) logical; if true, this means that all individual variables will be set as contextual variables. Set
\code{formula} as \code{y ~ x1 + x2} and \code{contextual} as \code{TRUE} is equivalent to set formula as \code{y ~ x1 + x2 | x1 + x2}.}

\item{start}{(optional) vector of starting value of the model parameter as \eqn{(\beta' ~ \gamma' ~ \alpha ~ \sigma^2)'}{(\beta'  \gamma'  \alpha  se^2)'},
where \eqn{\beta} is the individual variables parameter, \eqn{\gamma} is the contextual variables parameter, \eqn{\alpha} is the peer effect parameter
and \eqn{\sigma^2}{se^2} the variance of the error term. If the \code{start} is missing, a Maximum Likelihood estimator will be used, where
the network matrix is that given through the argument \code{G0} (if provided) or generated from it distribution.}

\item{G0.obs}{list of matrices (or simply matrix if the list contains only one matrix) indicating the part of the network data which is observed. If the (i,j)-th element
of the m-th matrix is one, then the element at the same position in the network data will be considered as observed and will not be inferred in the MCMC. In contrast,
if the (i,j)-th element of the m-th matrix is zero, the element at the same position in the network data will be considered as a starting value of the missing link which will be inferred.
\code{G0.obs} can also take \code{"none"} when no part of the network data is observed (equivalent to the case where all the entries are zeros) and \code{"all"} when the network data is fully
observed (equivalent to the case where all the entries are ones).}

\item{G0}{list of sub-network matrices (or simply network matrix if there is only one sub-network). \code{G0} is made up of starting values for the entries with missing network data and observed values for the entries with
observed network data. \code{G0} is optional when \code{G0.obs = "none"}.}

\item{mlinks}{list specifying the network formation model (see Section Network formation model in Details).}

\item{hyperparms}{(optional) is a list of hyperparameters (see Section Hyperparameters in Details).}

\item{ctrl.mcmc}{list of MCMC controls (see Section MCMC control in Details).}

\item{iteration}{number of MCMC steps to be performed.}

\item{data}{optional data frame, list or environment (or object coercible by \link[base]{as.data.frame} to a data frame) containing the variables
in the model. If missing, the variables are taken from \code{environment(formula)}, typically the environment from which \code{mcmcSAR} is called.}
}
\value{
A list consisting of:
\item{n.group}{number of groups.}
\item{N}{vector of each group size.}
\item{time}{elapsed time to run the MCMC in second.}
\item{iteration}{number of MCMC steps performed.}
\item{posterior}{matrix (or list of matrices) containing the simulations.}
\item{hyperparms}{return value of \code{hyperparms}.}
\item{mlinks}{return value of \code{mlinks}.}
\item{accept.rate}{acceptance rates.}
\item{prop.net}{proportion of observed network data.}
\item{method.net}{network formation model specification.}
\item{start}{starting values.}
\item{formula}{input value of \code{formula} and \code{mlinks.formula}.}
\item{contextual}{input value of \code{contextual}.}
\item{ctrl.mcmc}{return value of \code{ctrl.mcmc}.}
}
\description{
\code{mcmcSAR} implements the Bayesian estimator of the linear-in-mean SAR model when only the linking probabilities are available or can be estimated.
}
\details{
\subsection{Outcome model}{

The model is given by
\deqn{\mathbf{y} = \mathbf{X}\beta + \mathbf{G}\mathbf{X}\gamma + \alpha \mathbf{G}\mathbf{y} + \epsilon.}{y = X\beta + GX\gamma + \alpha Gy + \epsilon,}
where \deqn{\epsilon \sim N(0, \sigma^2).}{\epsilon ~ N(0, se^2).}
The parameters to estimate in this model are the matrix \eqn{\mathbf{G}}{G}, the vectors \eqn{\beta}, \eqn{\gamma} and the scalar \eqn{\alpha}, \eqn{\sigma}{se}.
Prior distributions are assumed on \eqn{\mathbf{A}}, the adjacency matrix in which \eqn{\mathbf{A}_{ij} = 1}{A[i,j] = 1} if i is  connected to j and
\eqn{\mathbf{A}_{ij} = 0}{A[i,j] = 0} otherwise, and on \eqn{\beta}, \eqn{\gamma}, \eqn{\alpha} and \eqn{\sigma^2}{se^2}.
\deqn{\mathbf{A}_{ij} \sim Bernoulli(\mathbf{P}_{ij})}{A[i,j] ~ Bernoulli(P[i,j])}
\deqn{(\beta' ~ \gamma')'|\sigma^2 \sim \mathcal{N}(\mu_{\theta}, \sigma^2\Sigma_{\theta})}{(\beta' \gamma')'|se^2 ~ N(mutheta, se^2*stheta)}
\deqn{\zeta = \log\left(\frac{\alpha}{1 - \alpha}\right) \sim \mathcal{N}(\mu_{\zeta}, \sigma_{\zeta}^2)}{\zeta = log(\alpha/(1 - \alpha)) ~ N(muzeta, szeta)}
\deqn{\sigma^2 \sim IG(\frac{a}{2}, \frac{b}{2})}{se^2 ~ IG(a/2, b/2)}
where \eqn{\mathbf{P}}{P} is the linking probability. The linking probability is an hyperparameters that can be set fixed or updated using a network formation model.
}

\subsection{Network formation model}{

The linking probability can be set fixed or updated using a network formation model. Information about how \eqn{\mathbf{P}}{P} should be handled in in the MCMC can be set through the
argument \code{mlinks} which should be a list with named elements. Divers specifications of network formation model are possible. The list assigned to \code{mlist} should include
an element named \code{model}. The expected values of \code{model} are \code{"none"} (default value), \code{"logit"}, \code{"probit"}, and \code{"latent space"}.
\itemize{
\item \code{"none"} means that the network distribution \eqn{\mathbf{P}}{P} is set fixed throughout the MCMC,
\item \code{"probit"} or \code{"logit"} implies that the network distribution \eqn{\mathbf{P}}{P} will be updated using a Probit or Logit model,
\item \code{"latent spate"} means that \eqn{\mathbf{P}}{P} will be updated following Breza et al. (2020).}
\subsection{Fixed network distribution}{

To set \eqn{\mathbf{P}}{P} fixed, \code{mlinks} could contain,
\itemize{
\item \code{dnetwork}, a list, where the m-th elements is the matrix of
link probability in the m-th sub-network.
\item \code{model = "none"} (optional as \code{"none"} is the default value).
}
}

\subsection{Probit and Logit models}{

For the Probit and Logit specification as network formation model, the following elements could be declared in \code{mlinks}.
\itemize{
\item \code{model = "probit"} or \code{model = "logit"}.
\item \code{mlinks.formula} object of class \link[stats]{formula}: a symbolic description of the Logit or Probit model. The \code{formula} should only specify the explanatory variables, as for example \code{~ x1 + x2},
the variables \code{x1} and \code{x2} are the dyadic observable characteristics. Each variable should verify \code{length(x) == sum(N^2 - N)},
where \code{N} is a vector of the number of individual in each sub-network. Indeed, \code{x} will be associated with the entries
\eqn{(1, 2)}; \eqn{(1, 3)}; \eqn{(1, 4)}; ...; \eqn{(2, 1)}; \eqn{(2, 3)}; \eqn{(2, 4)}; ... of the linking probability and
as so, in all the sub-networks. Functions \code{\link{mat.to.vec}} and \code{\link{vec.to.mat}} can be used to convert a list of dyadic variable as in matrix form to a format that suits \code{mlinks.formula}.
\item \code{weights} (optional) is a vector of weights of observed entries. This is important to address the selection problem of observed entries. Default is a vector of ones.
\item \code{estimates} (optional when a part of the network is observed) is a list containing \code{rho}, a vector of the estimates of the Probit or Logit
parameters, and \code{var.rho} the covariance matrix of the estimator. These estimates can be automatically computed when a part of the network data is available.
In this case, \code{rho} and the unobserved part of the network are updated without using the observed part of the network. The latter is assumed non-stochastic in the MCMC.
In addition, if \code{G0.obs = "none"}, \code{estimates} should also include \code{N}, a vector of the number of individuals in each sub-network.
\item \code{prior} (optional) is a list containing \code{rho}, a vector of the prior beliefs on \code{rho}, and \code{var.rho} the prior covariance matrix of \code{rho}. This input
is relevant only when the observed part of the network is used to update \code{rho}, i.e. only when \code{estimates = NULL} (so, either \code{estimates} or \code{prior} should be \code{NULL}). \cr
To understand the difference between
\code{estimates} and \code{prior}, note that \code{estimates} includes initial estimates of \code{rho} and \code{var.rho}, meaning that the observed part of the network is not used in the MCMC
to update \code{rho}. In contrast, \code{prior} contains the prior beliefs of the user, and therefore, \code{rho} is updated using this prior and information from the observed part of the network.
In addition, if \code{G0.obs = "none"}, \code{prior} should also include \code{N}, a vector of the number of individuals in each sub-network.
\item \code{mlinks.data} optional data frame, list or environment (or object coercible by \link[base]{as.data.frame} to a data frame) containing the dyadic observable characteristics
If missing, the variables will be taken from \code{environment(mlinks.formula)}, typically the environment from which \code{mcmcARD} is called.
}
}

\subsection{Latent space models}{

The following element could be declared in \code{mlinks}.
\itemize{
\item \code{model = "latent space"}.
\item \code{estimates} a list of objects of class \code{mcmcARD}, where the m-th element is Breza et al. (2020) estimator as returned by the function \code{\link{mcmcARD}}
in the m-th sub-network.
\item \code{mlinks.data} (required only when ARD are partially observed) is a list of matrices, where the m-th element is the variable matrix to use to compute distance between individuals (could be the list of traits) in the m-th sub-network.
The distances will be used to compute gregariousness and coordinates for individuals without ARD by k-nearest neighbors approach.
\item \code{obsARD} (required only when ARD are partially observed) is a list of logical vectors, where the i-th entry of the m-th vector indicates by \code{TRUE} or \code{FALSE} if  the i-th individual in the m-th
sub-network has ARD or not.
\item \code{mARD} (optional, default value is \verb{rep(1, M})) is a vector indicating the number of neighbors to use in each sub-network.
\item \code{burninARD} (optional) set the burn-in to summarize the posterior distribution in \code{estimates}.
}
}

}

\subsection{Hyperparameters}{

All the hyperparameters can be defined through the argument \code{hyperparms} (a list) and should be named as follow.
\itemize{
\item \code{mutheta}, the prior mean of \eqn{(\beta' ~ \gamma')'|\sigma^2}{(\beta' \gamma')'|se^2}. The default value assumes that
the prior mean is zero.
\item \code{invstheta} as \eqn{\Sigma_{\theta}^{-1}}{inverse of `stheta`}. The default value is a diagonal matrix with 0.01 on the diagonal.
\item \code{muzeta}, the prior mean of \eqn{\zeta}. The default value is zero.
\item \code{invszeta}, the inverse of the prior variance of \eqn{\zeta} with default value equal to 2.
\item \code{a} and \code{b} which default values equal to 4.2 and 2.2 respectively. This means for example that the prior mean of \eqn{\sigma^2}{se^2} is 1.
}
Inverses are used for the prior variance through the argument \code{hyperparms}  in order to allow non informative prior. Set the inverse of the prior
variance to 0 is equivalent to assume a non informative prior.
}

\subsection{MCMC control}{

During the MCMC, the jumping scales of \eqn{\alpha} and \eqn{\rho} are updated following Atchade and Rosenthal (2005) in order to target the acceptance rate to the \code{target} value. This
requires to set a minimal and a maximal jumping scales through the parameter \code{ctrl.mcmc}. The parameter \code{ctrl.mcmc} is a list which can contain the following named components.
\itemize{
\item{\code{target}}: the default value is \code{c("alpha" = 0.44, "rho" = 0.234)}.
\item{\code{jumpmin}}: the default value is \code{c("alpha" = 1e-5, "rho" = 1e-5)}.
\item{\code{jumpmax}}: the default value is \code{c("alpha" = 10, "rho" = 10)}.
\item{\code{print.level}}: an integer in \{0, 1, 2\} that indicates if the MCMC progression should be printed in the console.
If 0, the MCMC progression is not be printed. If 1 (default value), the progression is printed and if 2,
the simulations from the posterior distribution are printed.
\item{\code{block.max}}: The maximal number of entries that can be updated simultaneously in \eqn{\mathbf{A}}{A}. It might be
more efficient to update simultaneously 2 or 3 entries (see Boucher and Houndetoungan, 2022).
}
If \code{block.max} > 1, several entries are randomly chosen from the same row and updated simultaneously. The number of entries chosen is randomly
chosen between 1 and \code{block.max}. In addition, the entries are not chosen in order. For example, on the row i, the entries (i, 5) and (i, 9) can be updated simultaneously,
then the entries (i, 1), (i, 3), (i, 8), and so on.
}
}
\examples{
# We assume that the network is fully observed
# See our vignette for examples where the network is partially observed
# Number of groups
M             <- 10
# size of each group
N             <- rep(20,M)
# individual effects
beta          <- c(2,1,1.5)
# contextual effects
gamma         <- c(5,-3)
# endogenous effects
alpha         <- 0.4
# std-dev errors
se            <- 1
# prior distribution
prior         <- runif(sum(N*(N-1)))
prior         <- vec.to.mat(prior, N, normalise = FALSE)
# covariates
X             <- cbind(rnorm(sum(N),0,5),rpois(sum(N),7))
# true network
G0            <- sim.network(prior)
# normalise
G0norm        <- norm.network(G0)
GX            <- peer.avg(G0norm, X)
# simulate dependent variable use an external package
y             <- simSAR(~ X + GX, Glist = G0norm,
                        parms = c(alpha, beta, gamma), 
                        epsilon = rnorm(sum(N), sd = se))
y             <- y$y
# dataset
dataset       <- as.data.frame(cbind(y, X1 = X[,1], X2 = X[,2]))
out.none1     <- mcmcSAR(formula = y ~ X1 + X2, contextual = TRUE, G0.obs = "all",
                         G0 = G0, data = dataset, iteration = 3000)
summary(out.none1)
plot(out.none1)
plot(out.none1, plot.type = "dens")
}
\seealso{
\code{\link{smmSAR}}, \code{\link{sim.IV}}
}
