% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/simCategorical.R
\name{simCategorical}
\alias{simCategorical}
\title{Simulate categorical variables of population data}
\usage{
simCategorical(simPopObj, additional, method = c("multinom",
  "distribution", "ctree", "cforest", "ranger"), limit = NULL,
  censor = NULL, maxit = 500, MaxNWts = 1500, eps = NULL,
  nr_cpus = NULL, regModel = NULL, seed = 1, verbose = FALSE,
  by = "strata")
}
\arguments{
\item{simPopObj}{a \code{simPopObj} containing population and household
survey data as well as optionally margins in standardized format.}

\item{additional}{a character vector specifying additional categorical
variables available in the sample object of \code{simPopObj} that should be
simulated for the population data.}

\item{method}{a character string specifying the method to be used for
simulating the additional categorical variables. Accepted values are
\code{"multinom"} (estimation of the conditional probabilities using
multinomial log-linear models and random draws from the resulting
distributions) or \code{"distribution"} (random draws from the observed
conditional distributions of their multivariate realizations).
\code{"ctree"}  for using Classification trees
\code{"cforest"}  for using random forest (implementation in package party)
\code{"ranger"}  for using random forest (implementation in package ranger)}

\item{limit}{if \code{method} is \code{"multinom"}, this can be used to
account for structural zeros. If only one additional variable is requested,
a named list of lists should be supplied. The names of the list components
specify the predictor variables for which to limit the possible outcomes of
the response. For each predictor, a list containing the possible outcomes of
the response for each category of the predictor can be supplied. The
probabilities of other outcomes conditional on combinations that contain the
specified categories of the supplied predictors are set to 0. If more than
one additional variable is requested, such a list of lists can be supplied
for each variable as a component of yet another list, with the component
names specifying the respective variables.}

\item{censor}{if \code{method} is \code{"multinom"}, this can be used to
account for structural zeros. If only one additional variable is requested,
a named list of lists or \code{data.frame}s should be supplied. The names of
the list components specify the categories that should be censored. For each
of these categories, a list or \code{data.frame} containing levels of the
predictor variables can be supplied. The probability of the specified
categories is set to 0 for the respective predictor levels. If more than one
additional variable is requested, such a list of lists or \code{data.frame}s
can be supplied for each variable as a component of yet another list, with
the component names specifying the respective variables.}

\item{maxit, MaxNWts}{control parameters to be passed to
\code{\link[nnet]{multinom}} and \code{\link[nnet]{nnet}}. See the help file
for \code{\link[nnet]{nnet}}.}

\item{eps}{a small positive numeric value, or \code{NULL} (the default). In
the former case and if \code{method} is \code{"multinom"}, estimated
probabilities smaller than this are assumed to result from structural zeros
and are set to exactly 0.}

\item{nr_cpus}{if specified, an integer number defining the number of cpus
that should be used for parallel processing.}

\item{regModel}{allows to specify the variables or model that is used when
simulating additional categorical variables. The following choices are
available if different from NULL.  \itemize{ \item'basic'only the basic
household variables (generated with \code{\link{simStructure}}) are used.
\item'available'all available variables (that are common in the sample and
the synthetic population such as previously generated varaibles) excluding
id-variables, strata variables and household sizes are used for the
modelling. This parameter should be used with care because all factors are
automatically used as factors internally.  \item formula-objectUsers may also
specify a specifiy formula (class 'formula') that will be used. Checks are
performed that all required variables are available.  } If method
'distribution' is used, it is only possible to specify a vector of length
one containing one of the choices described above.  If parameter 'regModel'
is NULL, only basic household variables are used in any case.}

\item{seed}{optional; an integer value to be used as the seed of the random
number generator, or an integer vector containing the state of the random
number generator to be restored.}

\item{verbose}{set to TRUE if additional print output should be shown.}

\item{by}{defining which variable to use as split up variable of the estimation. Defaults to the strata variable.}
}
\value{
An object of class \code{\linkS4class{simPopObj}} containing survey
data as well as the simulated population data including the categorical
variables specified by argument \code{additional}.
}
\description{
Simulate categorical variables of population data. The household structure
of the population data needs to be simulated beforehand.
}
\details{
The number of cpus are selected automatically in the following manner. The
number of cpus is equal the number of strata. However, if the number of cpus
is less than the number of strata, the number of cpus - 1 is used by
default. This should be the best strategy, but the user can also overwrite
this decision.
}
\note{
The basic household structure needs to be simulated beforehand with
the function \code{\link{simStructure}}.
}
\examples{
data(eusilcS) # load sample data
\dontrun{
## approx. 20 seconds computation time
inp <- specifyInput(data=eusilcS, hhid="db030", hhsize="hsize", strata="db040", weight="db090")
## in the following, nr_cpus are selected automatically
simPop <- simStructure(data=inp, method="direct", basicHHvars=c("age", "rb090"))
simPop <- simCategorical(simPop, additional=c("pl030", "pb220a"), method="multinom", nr_cpus=1)
simPop
}
}
\references{
B. Meindl, M. Templ, A. Kowarik, O. Dupriez (2017) Simulation of Synthetic Populations for Survey Data Considering Auxiliary
Information. \emph{Journal of Statistical Survey}, \strong{79} (10), 1--38. \doi{10.18637/jss.v079.i10}

A. Alfons, M. Templ (2011) Simulation of close-to-reality population data for household surveys with application to EU-SILC. 
\emph{Statistical Methods & Applications}, \strong{20} (3), 383--407. \doi{10.1080/02664763.2013.859237}
}
\seealso{
\code{\link{simStructure}}, \code{\link{simRelation}},
\code{\link{simContinuous}}, \code{\link{simComponents}}
}
\author{
Bernhard Meindl, Andreas Alfons, Stefan Kraft, Alexander Kowarik, Matthias Templ
}
\keyword{datagen}
