% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ergm.san.R
\name{san}
\alias{san}
\alias{san.default}
\alias{san.formula}
\alias{san.ergm_model}
\title{Generate networks with a given set of network statistics}
\usage{
san(object, ...)

\method{san}{formula}(
  object,
  response = NULL,
  reference = ~Bernoulli,
  constraints = ~.,
  target.stats = NULL,
  basis = NULL,
  output = c("network", "edgelist", "ergm_state"),
  only.last = TRUE,
  control = control.san(),
  verbose = FALSE,
  offset.coef = NULL,
  ...
)

\method{san}{ergm_model}(
  object,
  reference = ~Bernoulli,
  constraints = ~.,
  target.stats = NULL,
  basis = NULL,
  output = c("network", "edgelist", "ergm_state"),
  only.last = TRUE,
  control = control.san(),
  verbose = FALSE,
  offset.coef = NULL,
  ...
)
}
\arguments{
\item{object}{Either a \code{\link{formula}} or some other supported
representation of an ERGM, such as an \code{\link{ergm_model}} object.
\code{\link{formula}} should be of the form \code{y ~ <model terms>}, where
\code{y} is a network object or a matrix that can be coerced to a
\code{\link[network:network]{network}} object.  For the details on the possible \code{<model
  terms>}, see \code{\link{ergmTerm}}.  To create a
\code{\link[network:network]{network}} object in , use the
\code{network()} function, then add nodal attributes to it using
the \code{\%v\%} operator if necessary.}

\item{\dots}{Further arguments passed to other functions.}

\item{response}{Either a character string, a formula, or \code{NULL} (the default), to specify the response attributes and whether the ERGM is binary or valued. Interpreted as follows: \describe{

\item{\code{NULL}}{Model simple presence or absence, via a binary ERGM.}

\item{character string}{The name of the edge attribute whose value is to be modeled. Type of ERGM will be determined by whether the attribute is \code{\link{logical}} (\code{TRUE}/\code{FALSE}) for binary or \code{\link{numeric}} for valued.}

\item{a formula}{must be of the form \code{NAME~EXPR|TYPE} (with \code{|} being literal). \code{EXPR} is evaluated in the formula's environment with the network's edge attributes accessible as variables. The optional \code{NAME} specifies the name of the edge attribute into which the results should be stored, with the default being a concise version of \code{EXPR}. Normally, the type of ERGM is determined by whether the result of evaluating \code{EXPR} is logical or numeric, but the optional \code{TYPE} can be used to override by specifying a scalar of the type involved (e.g., \code{TRUE} for binary and \code{1} for valued).}

}}

\item{reference}{A one-sided formula specifying
the reference measure (\eqn{h(y)}) to be used.
See help for \link[=ergm-references]{ERGM reference measures} implemented in the
\strong{\link[=ergm-package]{ergm}} package.}

\item{constraints}{A formula specifying one or more constraints
on the support of the distribution of the networks being modeled. Multiple constraints
may be given, separated by \dQuote{+} and \dQuote{-} operators. See
\code{\link{ergmConstraint}} for the detailed explanation of
their semantics and also for an indexed list of the constraints visible to the \pkg{ergm} package.

The default is to have no constraints except those provided through
the \code{\link{ergmlhs}} API.

Together with the model terms in the formula and the reference measure, the constraints
define the distribution of networks being modeled.

It is also possible to specify a proposal function directly either
by passing a string with the function's name (in which case,
arguments to the proposal should be specified through the
\code{MCMC.prop.args} argument to the relevant control function, or
by giving it on the LHS of the hints formula to \code{MCMC.prop}
argument to the control function. This will override
the one chosen automatically.

Note that not all possible combinations of constraints and reference
measures are supported. However, for relatively simple constraints
(i.e., those that simply permit or forbid specific dyads or sets of
dyads from changing), arbitrary combinations should be possible.}

\item{target.stats}{A vector of the same length as the number of non-offset statistics
implied by the formula. \matchnames{statistic}}

\item{basis}{If not NULL, a \code{network} object used to start the Markov
chain.  If NULL, this is taken to be the network named in the formula.}

\item{output}{Character, one of \code{"network"} (default),
\code{"edgelist"}, or \code{"ergm_state"}: determines the output
format. Partial matching is performed.}

\item{only.last}{if \code{TRUE}, only return the last network generated;
otherwise, return a \code{\link{network.list}} with a network for each
iteration.}

\item{control}{A list of control parameters for algorithm tuning,
typically constructed with \code{\link[=control.san]{control.san()}}. Its documentation
gives the the list of recognized control parameters and their
meaning. The more generic utility \code{\link[=snctrl]{snctrl()}} (StatNet ConTRoL)
also provides argument completion for the available control
functions and limited argument name checking.}

\item{verbose}{A logical or an integer to control the amount of
progress and diagnostic information to be printed. \code{FALSE}/\code{0}
produces minimal output, with higher values producing more
detail. Note that very high values (5+) may significantly slow
down processing.}

\item{offset.coef}{A vector of offset coefficients; these must be passed in by the user.
Note that these should be the same set of coefficients one would pass to \code{ergm} via
its \code{offset.coef} argument.}

\item{formula}{(By default, the \code{formula} is taken from the \code{ergm}
object.  If a different \code{formula} object is wanted, specify it here.}
}
\value{
A network or list of networks that hopefully have network
statistics close to the \code{target.stats} vector. No guarantees
are provided about their probability distribution. Additionally,
\code{\link[=attr]{attr()}}-style attributes \code{formula} and \code{stats} are included.
}
\description{
This function attempts to find a network or networks whose statistics match
those passed in via the \code{target.stats} vector.
}
\details{
The following description is an exegesis of section 4 of Krivitsky
et al. (2022).

Let \eqn{\mathbf{g}}{g} be a vector of target statistics for the
network we wish to construct. That is, we are given an arbitrary network
\eqn{\mathbf{y}^0 \in \mathcal{Y}}{y0 ∈ Y}, and we seek a network
\eqn{\mathbf{y} \in \mathcal{Y}}{y ∈ Y} such that
\eqn{\mathbf{g}(\mathbf{y}) \approx \mathbf{g}}{g(y) ≈ g} -- ideally equality is achieved,
but in practice we may have to settle for a close approximation. The
variant of simulated annealing is as follows.

The energy function is defined

\deqn{E_W (\mathbf{y}) = (\mathbf{g}(\mathbf{y}) - \mathbf{g})^\mathsf{T} W (\mathbf{g}(\mathbf{y}) - \mathbf{g}),}{E_W (y) = (g(y) - g)^T W (g(y) - g),}

with \eqn{W} a symmetric positive (barring multicollinearity in statistics)
definite matrix of weights. This function achieves 0 only if the target is
reached. A good choice of this matrix yields a more efficient search.

A standard simulated annealing loop is used, as described below, with some
modifications. In particular, we allow the user to specify a vector of
offsets \eqn{\eta}{η} to bias the annealing, with \eqn{\eta_k = 0}{η_k = 0} 
denoting no offset. Offsets can be used with SAN to forbid certain
statistics from ever increasing or decreasing. As with \code{\link[=ergm]{ergm()}}, offset
terms are specified using the \code{\link[=offset]{offset()}} decorator and their coefficients
specified with the \code{offset.coef} argument. By default, finite offsets are
ignored by, but this can be overridden by setting the \code{\link[=control.san]{control.san()}}
argument \code{SAN.ignore.finite.offsets = FALSE}.

The number of simulated annealing runs is specified by the \code{SAN.maxit}
control parameter and the initial value of the temperature \eqn{T} is set
to \code{SAN.tau}. The value of \eqn{T} decreases linearly until \eqn{T = 0}
at the last run, which implies that all proposals that increase
\eqn{E_W (\mathbf{y})}{E_W(y)} are rejected. The weight matrix \eqn{W}
is initially set to \eqn{I_p / p}, where \eqn{I_p} is the identity matrix
of an appropriate dimension. For weight \eqn{W} and temperature \eqn{T},
the simulated annealing iteration proceeds as follows:
\enumerate{
\item Test if \eqn{E_W(\mathbf{y}) = 0}{E_W(y) = 0}. If so, then exit.
\item Generate a perturbed network \eqn{\mathbf{y^*}}{y*} from a proposal that
respects the model constraints. (This is typically the same proposal as
that used for MCMC.)
\item Store the quantity
\eqn{\mathbf{g}(\mathbf{y^*}) - \mathbf{g}(\mathbf{y})}{g(y*) - g(y)}
for later use.
\item Calculate acceptance probability

\deqn{\alpha = \exp[ - (E_W (\mathbf{y^*}) - E_W (\mathbf{y})) / T + \eta^\mathsf{T} (\mathbf{g}(\mathbf{y^*}) - \mathbf{g}(\mathbf{y}))]}{α = exp( - E_W(y*) - E_W(y) / T + η' (g(y*) - g(y)) ).}
 
(If \eqn{|\eta_k| = \infty}{|η_k| = ∞} and \eqn{g_k (\mathbf{y^*}) - g_k (\mathbf{y}) = 0}{g_k (y) - g_k (y) = 0}, their product is defined to be 0.)
\item Replace \eqn{\mathbf{y}}{y} with \eqn{\mathbf{y^*}}{y} with probability
\eqn{\min(1, \alpha)}{min(1, α)}.
}

After the specified number of iterations, \eqn{T} is updated as described
above, and \eqn{W} is recalculated by first computing a matrix \eqn{S}, the
sample covariance matrix of the proposed differences stored in Step 3
(i.e., whether or not they were rejected), then
\eqn{W = S^+ / tr(S^+)}{W = S+ / tr(S+)}, where \eqn{S^+}{S+} is the
Moore–Penrose pseudoinverse of \eqn{S} and \eqn{tr(S^+)}{tr(S+)} is the
trace of \eqn{S^+}{S+}. The differences in Step 3 closely reflect the
relative variances and correlations among the network statistics.

In Step 2, the many options for MCMC proposals can provide for effective
means of speeding the SAN algorithm's search for a viable network.
}
\section{Methods (by class)}{
\itemize{
\item \code{san(formula)}: Sufficient statistics are specified by a \code{\link{formula}}.

\item \code{san(ergm_model)}: A lower-level function that expects a pre-initialized \code{\link{ergm_model}}.

}}
\examples{
\donttest{
# initialize x to a random undirected network with 50 nodes and a density of 0.1
x <- network(50, density = 0.05, directed = FALSE)
 
# try to find a network on 50 nodes with 300 edges, 150 triangles,
# and 1250 4-cycles, starting from the network x
y <- san(x ~ edges + triangles + cycle(4), target.stats = c(300, 150, 1250))

# check results
summary(y ~ edges + triangles + cycle(4))

# initialize x to a random directed network with 50 nodes
x <- network(50)

# add vertex attributes
x \%v\% 'give' <- runif(50, 0, 1)
x \%v\% 'take' <- runif(50, 0, 1)

# try to find a set of 100 directed edges making the outward sum of
# 'give' and the inward sum of 'take' both equal to 62.5, so in
# edges (i,j) the node i tends to have above average 'give' and j
# tends to have above average 'take'
y <- san(x ~ edges + nodeocov('give') + nodeicov('take'), target.stats = c(100, 62.5, 62.5))

# check results
summary(y ~ edges + nodeocov('give') + nodeicov('take'))


# initialize x to a random undirected network with 50 nodes
x <- network(50, directed = FALSE)

# add a vertex attribute
x \%v\% 'popularity' <- runif(50, 0, 1)

# try to find a set of 100 edges making the total sum of
# popularity(i) and popularity(j) over all edges (i,j) equal to
# 125, so nodes with higher popularity are more likely to be
# connected to other nodes
y <- san(x ~ edges + nodecov('popularity'), target.stats = c(100, 125))
 
# check results
summary(y ~ edges + nodecov('popularity'))

# creates a network with denser "core" spreading out to sparser
# "periphery"
plot(y)
}
}
\references{
Krivitsky, P. N., Hunter, D. R., Morris, M., & Klumb, C. (2022).
ergm 4: Computational Improvements. arXiv preprint arXiv:2203.08198.
}
\keyword{models}
