% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/fit_dat.R
\name{fit_dat}
\alias{fit_dat}
\title{Fit a Penalized Generalized Mixed Model via Monte Carlo Expectation Conditional 
Minimization (MCECM)

\code{fit_dat} is used to fit a penalized generalized mixed model
via Monte Carlo Expectation Conditional Minimization (MCECM) for 
a single tuning parameter combinations and is called within
\code{glmmPen} or \code{glmm} (cannot be called directly by user)}
\usage{
fit_dat(
  dat,
  lambda0 = 0,
  lambda1 = 0,
  conv_EM = 0.001,
  conv_CD = 1e-04,
  family = "binomial",
  offset_fit = NULL,
  trace = 0,
  penalty = c("MCP", "SCAD", "lasso"),
  alpha = 1,
  gamma_penalty = switch(penalty[1], SCAD = 4, 3),
  group_X = 0:(ncol(dat$X) - 1),
  nMC_burnin = 250,
  nMC = 250,
  nMC_max = 5000,
  t = 2,
  mcc = 2,
  u_init = NULL,
  coef_old = NULL,
  ufull_describe = NULL,
  maxitEM = 50,
  maxit_CD = 250,
  M = 10^4,
  sampler = c("stan", "random_walk", "independence"),
  adapt_RW_options = adaptControl(),
  covar = c("unstructured", "independent"),
  var_start = 1,
  logLik_calc = FALSE,
  checks_complete = FALSE,
  ranef_keep = rep(1, times = (ncol(dat$Z)/nlevels(dat$group))),
  conv_type = 1,
  progress = TRUE
)
}
\arguments{
\item{dat}{a list object specifying y (response vector), X (model matrix of all covariates), 
Z (model matrix for the random effects), and group (numeric factor vector whose value indicates 
the study, batch, or other group identity to which on observation belongs)}

\item{lambda0}{a non-negative numeric penalty parameter for the fixed effects parameters}

\item{lambda1}{a non-negative numeric penalty parameter for the (grouped) random effects
covariance parameters}

\item{conv_EM}{a non-negative numeric convergence criteria for the convergence of the 
EM algorithm. Default is 0.0015. 
EM algorithm is considered to have converge if the average Euclidean 
distance between the current coefficient estimates and the coefficient estimates from 
\code{t} EM iterations back is less than \code{conv_EM} \code{mcc} times in a row.
See \code{t} and \code{mcc} for more details.}

\item{conv_CD}{a non-negative numeric convergence criteria for the convergence of the 
grouped coordinate descent loop within the M step of the EM algorithm. Default 0.0005.}

\item{family}{a description of the error distribution and link function to be used in the model. 
Currently, the \code{glmmPen} algorithm allows the binomial, gaussian, and poisson families
with canonical links only.}

\item{offset_fit}{This can be used to specify an a priori known component to be included in the 
linear predictor during fitting. This should be \code{NULL} or a numeric vector of length equal to the 
number of cases.}

\item{trace}{an integer specifying print output to include as function runs. Default value is 0. 
See Details for more information about output provided when trace = 0, 1, or 2.}

\item{penalty}{character describing the type of penalty to use in the variable selection procedure.
Options include 'MCP', 'SCAD', and 'lasso'. Default is MCP penalty. If the random effect covariance
matrix is "unstructured", then a group MCP, group SCAD, or group Lasso penalty is used on the 
random effects coefficients.}

\item{alpha}{Tuning parameter for the Mnet estimator which controls the relative contributions 
from the MCP/SCAD/lasso penalty and the ridge, or L2, penalty. \code{alpha=1} is equivalent to 
the MCP/SCAD/lasso penalty, while \code{alpha=0} is equivalent to ridge regression. However,
\code{alpha=0} is not supported; \code{alpha} may be arbitrarily small, but not exactly zero}

\item{gamma_penalty}{The tuning parameter of the MCP and SCAD penalties. Not used by Lasso penalty.
Default is 4.0 for SCAD and 3.0 for MCP.}

\item{group_X}{vector describing the grouping of the covariates in the model matrix.}

\item{nMC_burnin}{positive integer specifying the number of posterior samples to use as
burnin for each E step in the EM algorithm. If set to \code{NULL}, the algorithm inputs
the following defaults: Default 250 when the number of random effects 
predictors is less than or equal to 10; default 100 otherwise. Function will not allow \code{nMC_burnin}
to be less than 100.}

\item{nMC}{a positive integer for the initial number of Monte Carlo draws. See the \code{nMC_start}
argument in \code{\link{optimControl}} for more details.}

\item{nMC_max}{a positive integer for the maximum number of allowed Monte Carlo draws used
in each step of the EM algorithm. If set to \code{NULL}, the algorithm inputs the following 
defaults: When the number of random effect predictors is 10 or less, 
Default is set to 5000 when no selection is performed and 2500 when selection is performed.
Default is set to 1000 when the number of random effect predictors is greater than 10.}

\item{t}{the convergence criteria is based on the average Euclidean distance between 
the most recent coefficient estimates and the coefficient estimates from t EM iterations back.
Positive integer, default equals 2.}

\item{mcc}{the number of times the convergence critera must be met before the algorithm is
seen as having converged (mcc for 'meet condition counter'). Default set to 2. Value retricted 
to be no less than 2.}

\item{u_init}{matrix giving values to initialize samples from the posterior. If 
Binomial or Poisson families, only need a single row to initialize samples from
the posterior; if Gaussian family, multiple rows needed to initialize the estimate
of the residual error (needed for the E-step). Columns correspond to the 
columns of the Z random effect model matrix.}

\item{coef_old}{vector giving values to initialized the coefficients (both fixed
and random effects)}

\item{ufull_describe}{output from \code{bigmemory::describe} (which returns a list 
of the information needed to attach to a big.matrix object) applied to the
big.matrix of posterior samples from the 'full' model. The big.matrix 
described by the object is used to calculate the BIC-ICQ value for the model.}

\item{maxitEM}{a positive integer for the maximum number of allowed EM iterations. 
If set to \code{NULL}, then the algorithm inputs the following defaults:
Default equals 50 for the Binomial and Poisson families, 100 for the Gaussian family.}

\item{maxit_CD}{a positive integer for the maximum number of allowed interations for the
coordinate descent algorithms used within the M-step of each EM iteration. Default equals 50.}

\item{M}{positive integer specifying the number of posterior samples to use within the 
Pajor log-likelihood calculation. Default is 10^4; minimum allowed value is 5000.}

\item{sampler}{character string specifying whether the posterior samples of the random effects
should be drawn using Stan (default, from package rstan) or the Metropolis-within-Gibbs procedure 
incorporating an adaptive random walk sampler ("random_walk") or an
independence sampler ("independence"). If using the random walk sampler, see \code{\link{adaptControl}}
for some additional control structure parameters.}

\item{adapt_RW_options}{a list of class "adaptControl" from function \code{\link{adaptControl}} 
containing the control parameters for the adaptive random walk Metropolis-within-Gibbs procedure. 
Ignored if \code{\link{optimControl}} parameter \code{sampler} is set to "stan" (default) or "independence".}

\item{covar}{character string specifying whether the covariance matrix should be unstructured
("unstructured") or diagonal with no covariances between variables ("independent").
Default is set to \code{NULL}. If \code{covar} is set to \code{NULL} and the number of random effects
predictors (not including the intercept) is 
greater than or equal to 10 (i.e. high dimensional), then the algorithm automatically assumes an 
independent covariance structure and \code{covar} is set to "independent". Otherwise if \code{covar}
is set to \code{NULL} and the number of random effects predictors is less than 10, then the
algorithm automatically assumes an unstructured covariance structure and \code{covar} is set to "unstructured".}

\item{var_start}{either the character string "recommend" or a positive number specifying the 
starting values to initialize the variance of the covariance matrix. Default "recommend" first
fits a simple model with a fixed and random intercept only using a Laplace approximation. The 
random intercept variance estimate from this model is then multiplied by 2 and used as the 
starting variance.}

\item{logLik_calc}{logical value specifying if the log likelihood (and log-likelihood based 
calculations BIC, BICh, and BICNgrp) should be calculated for all of the models in the selection procedure. 
If BIC-ICQ is used for selection, the log-likelihood is not needed for each model. 
However, if users are interested
in comparing the best models from BIC-ICQ and other BIC-type selection criteria, setting
\code{logLik_calc} to \code{TRUE} will calculate these other quantities for all of the models.}

\item{checks_complete}{logical value indicating whether the function has been called within
\code{glmm} or \code{glmmPen} or whether the function has been called by itself. 
Used for package testing purposes (user cannot directly call \code{fit_dat}). If true,
performs additional checks on the input data. If false, assumes data input checks have 
already been performed.}

\item{ranef_keep}{vector of 0s and 1s indicating which random effects should 
be considered as non-zero at the start of the algorithm. For each random effect,
1 indicates the random effect should be considered non-zero at start of algorithm,
0 indicates otherwise. The first element for the random intercept should always be 1.}

\item{conv_type}{integer specifying which type of convergence criteria to use. Default 1 specifies
using the average Eucledian distance, and 2 specifies using relative change in the Q-function
estimate. For now, all calls to \code{fit_dat} within the \code{glmmPen} framework
restrict this convergence type to be the average Euclidean distance. However,
we keep this argument in case we decide to allow multiple convergence type options in
future versions of the package.}

\item{progress}{a logical value indicating if additional output should be given showing the
progress of the fit procedure. If \code{TRUE}, such output includes iteration-level information
for the fit procedure (iteration number EM_iter,
number of MCMC draws nMC, average Euclidean distance between current coefficients and coefficients
from t--defined in \code{\link{optimControl}}--iterations back EM_conv, 
and number of non-zero fixed and random effects
including the intercept). Additionally, \code{progress = TRUE}
gives some other information regarding the progress of the variable selection 
procedure, including the model selection criteria and log-likelihood estimates
for each model fit.
Default is \code{TRUE}.}
}
\value{
a list with the following elements:
\item{coef}{a numeric vector of coefficients of fixed effects estimates and 
non-zero estimates of the lower-triangular cholesky decomposition of the random effects
covariance matrix (in vector form)}
\item{sigma}{random effects covariance matrix}
\item{lambda0, lambda1}{the penalty parameters input into the function}
\item{covgroup}{Organization of how random effects coefficients are grouped.}
\item{J}{a sparse matrix that transforms the non-zero elements of the lower-triangular cholesky 
decomposition of the random effects covariance matrix into a vector. For unstructured
covariance matrices, dimension of dimension q^2 x (q(q+1)/2) (where q = number of random effects).
For independent covariance matrices, q^2 x q.}
\item{ll}{estimate of the log likelihood, calculated using the Pajor method}
\item{BICh}{the hybrid BIC estimate described in Delattre, Lavielle, and Poursat (2014)}
\item{BIC}{Regular BIC estimate}
\item{BICNgrps}{BIC estimate with N = number of groups in penalty term instead of N = number
of total observations.}
\item{BICq}{BIC-ICQ estimate}
\item{u}{a matrix of the Monte Carlo draws. Organization of columns: first by random effect variable,
then by group within variable (i.e. Var1:Grp1 Var1:Grp2 ... Var1:GrpK Var2:Grp1 ... Varq:GrpK)}
\item{gibbs_accept_rate}{a matrix of the ending gibbs acceptance rates for each variable (columns)
and each group (rows) when the sampler is either "random_walk" or "independence"}
\item{proposal_SD}{a matrix of the ending proposal standard deviations (used in the adaptive
random walk version of the Metropolis-within-Gibbs sampling) for each variable (columns) and
each group (rows)}
}
\description{
Fit a Penalized Generalized Mixed Model via Monte Carlo Expectation Conditional 
Minimization (MCECM)

\code{fit_dat} is used to fit a penalized generalized mixed model
via Monte Carlo Expectation Conditional Minimization (MCECM) for 
a single tuning parameter combinations and is called within
\code{glmmPen} or \code{glmm} (cannot be called directly by user)
}
