% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cv-select.R
\name{cv.function}
\alias{cv.function}
\alias{selectStepAIC}
\alias{selectTrans}
\alias{selectTransStepAIC}
\alias{selectModelList}
\alias{compareFolds}
\alias{coef.cvSelect}
\alias{cvInfo.cvSelect}
\title{Cross-Validate a Model-Selection Procedure}
\usage{
\method{cv}{`function`}(
  model,
  data,
  criterion = mse,
  k = 10L,
  reps = 1L,
  seed = NULL,
  working.model = NULL,
  y.expression = NULL,
  confint = n >= 400L,
  level = 0.95,
  details = k <= 10L,
  save.model = FALSE,
  ncores = 1L,
  ...
)

selectStepAIC(
  data,
  indices,
  model,
  criterion = mse,
  AIC = TRUE,
  details = TRUE,
  save.model = FALSE,
  ...
)

selectTrans(
  data,
  indices,
  details = TRUE,
  save.model = FALSE,
  model,
  criterion = mse,
  predictors,
  response,
  family = c("bcPower", "bcnPower", "yjPower", "basicPower"),
  family.y = c("bcPower", "bcnPower", "yjPower", "basicPower"),
  rounded = TRUE,
  ...
)

selectTransStepAIC(
  data,
  indices,
  details = TRUE,
  save.model = FALSE,
  model,
  criterion = mse,
  predictors,
  response,
  family = c("bcPower", "bcnPower", "yjPower", "basicPower"),
  family.y = c("bcPower", "bcnPower", "yjPower", "basicPower"),
  rounded = TRUE,
  AIC = TRUE,
  ...
)

selectModelList(
  data,
  indices,
  model,
  criterion = mse,
  k = 10L,
  k.meta = k,
  details = k <= 10L,
  save.model = FALSE,
  seed = FALSE,
  quietly = TRUE,
  ...
)

compareFolds(object, digits = 3, ...)

\method{coef}{cvSelect}(object, average, NAs = 0, ...)

\method{cvInfo}{cvSelect}(
  object,
  what = c("CV criterion", "adjusted CV criterion", "full CV criterion", "confint", "SE",
    "k", "seed", "method", "criterion name", "selected model"),
  ...
)
}
\arguments{
\item{model}{a regression model object fit to data, or for the
\code{cv()} \code{"function"} method, a model-selection procedure function
(see Details).}

\item{data}{full data frame for model selection.}

\item{criterion}{a CV criterion ("cost" or lack-of-fit) function.}

\item{k}{perform k-fold cross-validation (default is 10); \code{k}
may be a number or \code{"loo"} or \code{"n"} for n-fold (leave-one-out)
cross-validation.}

\item{reps}{number of times to replicate k-fold CV (default is \code{1})}

\item{seed}{for R's random number generator; not used for n-fold cross-validation.
If not explicitly set, a seed is randomly generated and saved to make the results
reproducible. In some cases, for internal use only, \code{seed} is set to
\code{FALSE} to suppress automatically setting the seed.}

\item{working.model}{a regression model object fit to data, typically
to begin a model-selection process; for use with \code{selectModelList()},
a list of competing models created by \code{\link{models}()}.}

\item{y.expression}{normally the response variable is found from the
\code{model} or \code{working.model} argument; but if, for a particular selection procedure, the
\code{model} or \code{working.model} argument is absent, or if the response can't be inferred from the
model, the response can be specified by an expression, such as \code{expression(log(income))},
to be evaluated within the data set provided by the \code{data} argument.}

\item{confint}{if \code{TRUE} (the default if the number of cases is 400
or greater), compute a confidence interval for the bias-corrected CV
criterion, if the criterion is the average of casewise components.}

\item{level}{confidence level (default \code{0.95}).}

\item{details}{if \code{TRUE}, save detailed information about the value of the
CV criterion for the cases in each fold and the regression coefficients
(and possibly other information)
with that fold deleted; default is \code{TRUE} if \code{k} is 10 or smaller,
\code{FALSE} otherwise.}

\item{save.model}{save the model that's selected using the \emph{full} data set
(default, \code{FALSE}).}

\item{ncores}{number of cores to use for parallel computations
(default is \code{1}, i.e., computations aren't done in parallel)}

\item{...}{for \code{cvSelect()} and the \code{cv()} \code{"function"} method,
arguments to be passed to \code{procedure()};
for \code{selectStepAIC()} and \code{selectTransStepAIC()},
arguments to be passed to \code{stepAIC()}.}

\item{indices}{indices of cases in data defining the current fold.}

\item{AIC}{if \code{TRUE} (the default) use the AIC as the
model-selection criterion; if \code{FALSE}, use the BIC.
The \code{k} argument to \code{\link[MASS]{stepAIC}()}
is set accordingly (note that this is distinct from the number of
folds \code{k}).}

\item{predictors}{character vector of names of the predictors in the model
to transform; if missing, no predictors will be transformed.}

\item{response}{name of the response variable; if missing, the response
won't be transformed.}

\item{family}{transformation family for the predictors, one of
\code{"bcPower", "bcnPower", "yjPower", "basicPower"},
with \code{"bcPower"} as the default. These are the names of transformation
functions in the \pkg{car} package; see \code{\link[car]{bcPower}()}.}

\item{family.y}{transformation family for the response,
with \code{"bcPower"} as the default.}

\item{rounded}{if \code{TRUE} (the default) use nicely rounded versions
of the estimated transformation parameters (see \code{\link[car]{bcPower}()}).}

\item{k.meta}{the number of folds for meta CV; defaults
to the value of \code{k}; may be specified as \code{"loo"} or
\code{"n"} as well as an integer.}

\item{quietly}{if \code{TRUE} (the default), simple messages (for example about the
value to which the random-number generator seed is set), but not warnings or
errors, are suppressed.}

\item{object}{an object of class \code{"cvSelect"}.}

\item{digits}{significant digits for printing coefficients
(default \code{3}).}

\item{average}{if supplied, a function, such as \code{mean} or \code{median},
to use us in averaging estimates across folds; if missing, the
estimates for each fold are returned.}

\item{NAs}{values to substitute for \code{NA}s in calculating
averaged estimates; the default, \code{0}, is appropriate, e.g.,
for regression coefficients; the value \code{1} might be appropriate
for power-transformation estimates.}

\item{what}{the information to extract from a \code{"cvSelect"} object,
one of: \code{"CV criterion"}, \code{"adjusted CV criterion"},
\code{"full CV criterion"} (the CV criterion applied to the model fit to the
full data set), \code{"SE"} (the standard error of the adjusted CV criterion),
\code{"confint"} (confidence interval for the adjusted CV criterion),
\code{"k"}, (the number of folds), \code{"seed"} (the seed employed for
R's random-number generator), \code{"method"} (the computational method
employed, e.g., for a \code{"lm"} model object), \code{"criterion name"}
(the CV criterion employed), or \code{"selected model"} (the model object
for the model that was selected); not all of these elements may be present, in
which case \code{cvInfo()} would return \code{NULL}.}
}
\value{
An object of class \code{"cvSelect"},
inheriting from class \code{"cv"}, with the CV criterion
(\code{"CV crit"}), the bias-adjusted CV criterion (\code{"adj CV crit"}),
the criterion for the model applied to the full data (\code{"full crit"}),
the confidence interval and level for the bias-adjusted CV criterion (\code{"confint"}),
the number of folds (\code{"k"}), the seed for R's random-number
generator (\code{"seed"}), and (optionally) a list of coefficients
(or, in the case of \code{selectTrans()}, estimated transformation
parameters, and in the case of \code{selectTransAndStepAIC()}, both regression coefficients
and transformation parameters) for the selected models
for each fold (\code{"coefficients"}).
If \code{reps} > \code{1}, then an object of class \code{c("cvSelectList", "cvList")} is returned,
which is literally a list of \code{c("cvSelect", "cv")} objects.
}
\description{
The \code{cv()} \code{"function"} method
is a general function to cross-validate a model-selection procedure,
such as the following:
\code{selectStepAIC()} is a procedure that applies the \code{\link[MASS]{stepAIC}()}
model-selection function in the \pkg{MASS} package; \code{selectTrans()} is a procedure
for selecting predictor and response transformations in regression, which
uses the \code{\link[car]{powerTransform}()} function in the
\pkg{car} package; \code{selectTransAndStepAIC()} combines predictor and response
transformations with predictor selection; and \code{selectModelList()}
uses cross-validation to select a model from a list of models created by
\code{\link{models}()} and employs (meta) cross-validation to assess the predictive
accuracy of this procedure.
}
\details{
The model-selection function supplied as the \code{procedure} (for \code{cvSelect()})
or \code{model} (for \code{cv()}) argument
should accept the following arguments:
\describe{
\item{\code{data}}{set to the \code{data} argument to \code{cvSelect()} or \code{cv()}.}
\item{\code{indices}}{the indices of the rows of \code{data} defining the current fold; if missing,
the model-selection procedure is applied to the full \code{data}.}
\item{other arguments}{to be passed via \code{...}
from \code{cvSelect()} or \code{cv()}.}
}
\code{procedure()} or \code{model()} should return a list with the following
named elements: \code{fit.i}, the vector of predicted values for the cases in
the current fold computed from the model omitting these cases;
\code{crit.all.i}, the CV criterion computed for all of the cases using
the model omitting the current fold; and (optionally) \code{coefficients},
parameter estimates from the model computed omitting the current fold.

When the \code{indices} argument is missing, \code{procedure()} returns the cross-validation criterion for all of the cases based on
the model fit to all of the cases.

For examples of model-selection functions for the \code{procedure}
argument, see the code for \code{selectStepAIC()},
\code{selectTrans()}, and \code{selectTransAndStepAIC()}.

For additional information, see the "Cross-validating model selection"
vignette (\code{vignette("cv-select", package="cv")})
and the "Extending the cv package" vignette
(\code{vignette("cv-extend", package="cv")}).
}
\section{Functions}{
\itemize{
\item \code{cv(`function`)}: \code{cv()} method for applying a model
model-selection (or specification) procedure.

\item \code{selectStepAIC()}: select a regression model using the
\code{\link[MASS]{stepAIC}()} function in the \pkg{MASS} package.

\item \code{selectTrans()}: select transformations of the predictors and response
using \code{\link[car]{powerTransform}()} in the \pkg{car} package.

\item \code{selectTransStepAIC()}: select transformations of the predictors and response,
and then select predictors.

\item \code{selectModelList()}: select a model using (meta) CV.

\item \code{compareFolds()}: print the coefficients from the selected models
for the several folds.

\item \code{coef(cvSelect)}: extract the coefficients from the selected models
for the several folds and possibly average them.

}}
\examples{
if (requireNamespace("ISLR2", quietly=TRUE)){
withAutoprint({
data("Auto", package="ISLR2")
m.auto <- lm(mpg ~ . - name - origin, data=Auto)
cv(selectStepAIC, Auto, seed=123, working.model=m.auto)
cv(selectStepAIC, Auto, seed=123, working.model=m.auto,
         AIC=FALSE, k=5, reps=3) # via BIC
})
} else {
cat("\n install the 'ISLR2' package to run these examples\n")
}
if (requireNamespace("carData", quietly=TRUE)){
withAutoprint({
data("Prestige", package="carData")
m.pres <- lm(prestige ~ income + education + women,
             data=Prestige)
cvt <- cv(selectTrans, data=Prestige, working.model=m.pres, seed=123,
          predictors=c("income", "education", "women"),
          response="prestige", family="yjPower")
cvt
compareFolds(cvt)
coef(cvt, average=median, NAs=1) # NAs not really needed here
cv(m.pres, seed=123)
})
} else {
cat("install the 'carData' package to run these examples\n")
}
if (requireNamespace("ISLR2", quietly=TRUE)){
withAutoprint({
Auto$year <- as.factor(Auto$year)
Auto$origin <- factor(Auto$origin,
                      labels=c("America", "Europe", "Japan"))
rownames(Auto) <- make.names(Auto$name, unique=TRUE)
Auto$name <- NULL
m.auto <- lm(mpg ~ . , data=Auto)
cvs <- cv(selectTransStepAIC, data=Auto, seed=76692, working.model=m.auto,
          criterion=medAbsErr,
          predictors=c("cylinders", "displacement", "horsepower",
                       "weight", "acceleration"),
          response="mpg", AIC=FALSE)
cvs
compareFolds(cvs)
})
}
data("Duncan", package="carData")
m1 <- lm(prestige ~ income + education, data=Duncan)
m2 <- lm(prestige ~ income + education + type, data=Duncan)
m3 <- lm(prestige ~ (income + education)*type, data=Duncan)
summary(cv.sel <- cv(selectModelList, data=Duncan, seed=5963,
                     working.model=models(m1, m2, m3),
                     save.model=TRUE)) # meta CV
cvInfo(cv.sel, "selected model")

}
\seealso{
\code{\link[MASS]{stepAIC}}, \code{\link[car]{bcPower}},
\code{\link[car]{powerTransform}}, \code{\link{cv}}.
}
