% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/partial.R
\name{partial}
\alias{partial}
\alias{partial.default}
\alias{partial.model_fit}
\title{Partial Dependence Functions}
\usage{
partial(object, ...)

\method{partial}{default}(
  object,
  pred.var,
  pred.grid,
  pred.fun = NULL,
  grid.resolution = NULL,
  ice = FALSE,
  center = FALSE,
  approx = FALSE,
  quantiles = FALSE,
  probs = 1:9/10,
  trim.outliers = FALSE,
  type = c("auto", "regression", "classification"),
  inv.link = NULL,
  which.class = 1L,
  prob = FALSE,
  recursive = TRUE,
  plot = FALSE,
  plot.engine = c("lattice", "ggplot2"),
  smooth = FALSE,
  rug = FALSE,
  chull = FALSE,
  levelplot = TRUE,
  contour = FALSE,
  contour.color = "white",
  alpha = 1,
  train,
  cats = NULL,
  check.class = TRUE,
  progress = FALSE,
  parallel = FALSE,
  paropts = NULL,
  ...
)

\method{partial}{model_fit}(object, ...)
}
\arguments{
\item{object}{A fitted model object of appropriate class (e.g., \code{"gbm"},
\code{"lm"}, \code{"randomForest"}, \code{"train"}, etc.).}

\item{...}{Additional optional arguments to be passed onto
\code{\link[stats]{predict}}.}

\item{pred.var}{Character string giving the names of the predictor variables
of interest. For reasons of computation/interpretation, this should include
no more than three variables.}

\item{pred.grid}{Data frame containing the joint values of interest for the
variables listed in \code{pred.var}.}

\item{pred.fun}{Optional prediction function that requires two arguments:
\code{object} and \code{newdata}. If specified, then the function must return
a single prediction or a vector of predictions (i.e., not a matrix or data
frame). Default is \code{NULL}.}

\item{grid.resolution}{Integer giving the number of equally spaced points to
use for the continuous variables listed in \code{pred.var} when
\code{pred.grid} is not supplied. If left \code{NULL}, it will default to
the minimum between \code{51} and the number of unique data points for each
of the continuous independent variables listed in \code{pred.var}.}

\item{ice}{Logical indicating whether or not to compute individual
conditional expectation (ICE) curves. Default is \code{FALSE}. See
Goldstein et al. (2014) for details.}

\item{center}{Logical indicating whether or not to produce centered ICE
curves (c-ICE curves). Only used when \code{ice = TRUE}. Default is
\code{FALSE}. See Goldstein et al. (2014) for details.}

\item{approx}{Logical indicating whether or not to compute a faster, but
approximate, marginal effect plot (similar in spirit to the
\strong{plotmo} package). If \code{TRUE}, then \code{partial()} will compute
predictions across the predictors specified in \code{pred.var} while holding
the other predictors constant (a "poor man's partial dependence" function as
Stephen Milborrow, the author of \strong{plotmo}, puts it).
Default is \code{FALSE}. Note this works with \code{ice = TRUE} as well.
WARNING: This option is currently experimental. Use at your own risk. It is
possible (and arguably safer) to do this manually by passing a specific
"exemplar" observation to the train argument and specifying \code{pred.grid}
manually.}

\item{quantiles}{Logical indicating whether or not to use the sample
quantiles of the continuous predictors listed in \code{pred.var}. If
\code{quantiles = TRUE} and \code{grid.resolution = NULL} the sample
quantiles will be used to generate the grid of joint values for which the
partial dependence is computed.}

\item{probs}{Numeric vector of probabilities with values in [0,1]. (Values up
to 2e-14 outside that range are accepted and moved to the nearby endpoint.)
Default is \code{1:9/10} which corresponds to the deciles of the predictor
variables. These specify which quantiles to use for the continuous predictors
listed in \code{pred.var} when \code{quantiles = TRUE}.}

\item{trim.outliers}{Logical indicating whether or not to trim off outliers
from the continuous predictors listed in \code{pred.var} (using the simple
boxplot method) before generating the grid of joint values for which the
partial dependence is computed. Default is \code{FALSE}.}

\item{type}{Character string specifying the type of supervised learning.
Current options are \code{"auto"}, \code{"regression"} or
\code{"classification"}. If \code{type = "auto"} then \code{partial} will try
to extract the necessary information from \code{object}.}

\item{inv.link}{Function specifying the transformation to be applied to the
predictions before the partial dependence function is computed
(experimental). Default is \code{NULL} (i.e., no transformation). This option
is intended to be used for models that allow for non-Gaussian response
variables (e.g., counts). For these models, predictions are not typically
returned on the original response scale by default. For example, Poisson GBMs
typically return predictions on the log scale. In this case setting
\code{inv.link = exp} will return the partial dependence function on the
response (i.e., raw count) scale.}

\item{which.class}{Integer specifying which column of the matrix of predicted
probabilities to use as the "focus" class. Default is to use the first class.
Only used for classification problems (i.e., when
\code{type = "classification"}).}

\item{prob}{Logical indicating whether or not partial dependence for
classification problems should be returned on the probability scale, rather
than the centered logit. If \code{FALSE}, the partial dependence function is
on a scale similar to the logit. Default is \code{FALSE}.}

\item{recursive}{Logical indicating whether or not to use the weighted tree
traversal method described in Friedman (2001). This only applies to objects
that inherit from class \code{"gbm"}. Default is \code{TRUE} which is much
faster than the exact brute force approach used for all other models. (Based
on the C++ code behind \code{\link[gbm]{plot.gbm}}.)}

\item{plot}{Logical indicating whether to return a data frame containing the
partial dependence values (\code{FALSE}) or plot the partial dependence
function directly (\code{TRUE}). Default is \code{FALSE}. See
\code{\link{plotPartial}} for plotting details.}

\item{plot.engine}{Character string specifying which plotting engine to use
whenever \code{plot = TRUE}. Options include \code{"lattice"} (default) or
\code{"ggplot2"}.}

\item{smooth}{Logical indicating whether or not to overlay a LOESS smooth.
Default is \code{FALSE}.}

\item{rug}{Logical indicating whether or not to include a rug display on the
predictor axes. The tick marks indicate the min/max and deciles of the
predictor distributions. This helps reduce the risk of interpreting the
partial dependence plot outside the region of the data (i.e., extrapolating).
Only used when \code{plot = TRUE}. Default is \code{FALSE}.}

\item{chull}{Logical indicating whether or not to restrict the values of the
first two variables in \code{pred.var} to lie within the convex hull of their
training values; this affects \code{pred.grid}. This helps reduce the risk of
interpreting the partial dependence plot outside the region of the data
(i.e., extrapolating).Default is \code{FALSE}.}

\item{levelplot}{Logical indicating whether or not to use a false color level
plot (\code{TRUE}) or a 3-D surface (\code{FALSE}). Default is \code{TRUE}.}

\item{contour}{Logical indicating whether or not to add contour lines to the
level plot. Only used when \code{levelplot = TRUE}. Default is \code{FALSE}.}

\item{contour.color}{Character string specifying the color to use for the
contour lines when \code{contour = TRUE}. Default is \code{"white"}.}

\item{alpha}{Numeric value in \code{[0, 1]} specifying the opacity alpha (
most useful when plotting ICE/c-ICE curves). Default is 1 (i.e., no
transparency). In fact, this option only affects ICE/c-ICE curves and level
plots.}

\item{train}{An optional data frame, matrix, or sparse matrix containing the
original training data. This may be required depending on the class of
\code{object}. For objects that do not store a copy of the original training
data, this argument is required. For reasons discussed below, it is good
practice to always specify this argument.}

\item{cats}{Character string indicating which columns of \code{train} should
be treated as categorical variables. Only used when \code{train} inherits
from class \code{"matrix"} or \code{"dgCMatrix"}.}

\item{check.class}{Logical indicating whether or not to make sure each column
in \code{pred.grid} has the correct class, levels, etc. Default is
\code{TRUE}.}

\item{progress}{Logical indicating whether or not to display a text-based
progress bar. Default is \code{FALSE}.}

\item{parallel}{Logical indicating whether or not to run \code{partial} in
parallel using a backend provided by the \code{foreach} package. Default is
\code{FALSE}.}

\item{paropts}{List containing additional options to be passed onto
\code{\link[foreach]{foreach}} when \code{parallel = TRUE}.}
}
\value{
By default, \code{partial} returns an object of class
\code{c("data.frame", "partial")}. If \code{ice = TRUE} and
\code{center = FALSE} then an object of class \code{c("data.frame", "ice")}
is returned. If \code{ice = TRUE} and \code{center = TRUE} then an object of
class \code{c("data.frame", "cice")} is returned. These three classes
determine the behavior of the \code{plotPartial} function which is
automatically called whenever \code{plot = TRUE}. Specifically, when
\code{plot = TRUE}, a \code{"trellis"} object is returned (see
\code{\link[lattice]{lattice}} for details); the \code{"trellis"} object will
also include an additional attribute, \code{"partial.data"}, containing the
data displayed in the plot.
}
\description{
Compute partial dependence functions (i.e., marginal effects) for various
model fitting objects.
}
\note{
In some cases it is difficult for \code{partial} to extract the original
training data from \code{object}. In these cases an error message is
displayed requesting the user to supply the training data via the
\code{train} argument in the call to \code{partial}. In most cases where
\code{partial} can extract the required training data from \code{object},
it is taken from the same environment in which \code{partial} is called.
Therefore, it is important to not change the training data used to construct
\code{object} before calling \code{partial}. This problem is completely
avoided when the training data are passed to the \code{train} argument in the
call to \code{partial}.

It is recommended to call \code{partial} with \code{plot = FALSE} and store
the results. This allows for more flexible plotting, and the user will not
have to waste time calling \code{partial} again if the default plot is not
sufficient.

It is possible to retrieve the last printed \code{"trellis"} object, such as
those produced by \code{plotPartial}, using \code{trellis.last.object()}.

If \code{ice = TRUE} or the prediction function given to \code{pred.fun}
returns a prediction for each observation in \code{newdata}, then the result
will be a curve for each observation. These are called individual conditional
expectation (ICE) curves; see Goldstein et al. (2015) and
\code{\link[ICEbox]{ice}} for details.
}
\examples{
\dontrun{
#
# Regression example (requires randomForest package to run)
#

# Fit a random forest to the boston housing data
library(randomForest)
data (boston)  # load the boston housing data
set.seed(101)  # for reproducibility
boston.rf <- randomForest(cmedv ~ ., data = boston)

# Using randomForest's partialPlot function
partialPlot(boston.rf, pred.data = boston, x.var = "lstat")

# Using pdp's partial function
head(partial(boston.rf, pred.var = "lstat"))  # returns a data frame
partial(boston.rf, pred.var = "lstat", plot = TRUE, rug = TRUE)

# The partial function allows for multiple predictors
partial(boston.rf, pred.var = c("lstat", "rm"), grid.resolution = 40,
        plot = TRUE, chull = TRUE, progress = TRUE)

# The plotPartial function offers more flexible plotting
pd <- partial(boston.rf, pred.var = c("lstat", "rm"), grid.resolution = 40)
plotPartial(pd, levelplot = FALSE, zlab = "cmedv", drape = TRUE,
            colorkey = FALSE, screen = list(z = -20, x = -60))

# The autplot function can be used to produce graphics based on ggplot2
library(ggplot2)
autoplot(pd, contour = TRUE, legend.title = "Partial\ndependence")

#
# Individual conditional expectation (ICE) curves
#

# Use partial to obtain ICE/c-ICE curves
rm.ice <- partial(boston.rf, pred.var = "rm", ice = TRUE)
plotPartial(rm.ice, rug = TRUE, train = boston, alpha = 0.2)
autoplot(rm.ice, center = TRUE, alpha = 0.2, rug = TRUE, train = boston)

#
# Classification example (requires randomForest package to run)
#

# Fit a random forest to the Pima Indians diabetes data
data (pima)  # load the boston housing data
set.seed(102)  # for reproducibility
pima.rf <- randomForest(diabetes ~ ., data = pima, na.action = na.omit)

# Partial dependence of positive test result on glucose (default logit scale)
partial(pima.rf, pred.var = "glucose", plot = TRUE, chull = TRUE,
        progress = TRUE)

# Partial dependence of positive test result on glucose (probability scale)
partial(pima.rf, pred.var = "glucose", prob = TRUE, plot = TRUE,
        chull = TRUE, progress = TRUE)
}
}
\references{
J. H. Friedman. Greedy function approximation: A gradient boosting machine.
\emph{Annals of Statistics}, \bold{29}: 1189-1232, 2001.

Goldstein, A., Kapelner, A., Bleich, J., and Pitkin, E., Peeking Inside the
Black Box: Visualizing Statistical Learning With Plots of Individual
Conditional Expectation. (2014) \emph{Journal of Computational and Graphical
Statistics}, \bold{24}(1): 44-65, 2015.
}
