% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/prob-pr_auc.R
\name{pr_auc}
\alias{pr_auc}
\alias{pr_auc.data.frame}
\alias{pr_auc_vec}
\title{Area under the precision recall curve}
\usage{
pr_auc(data, ...)

\method{pr_auc}{data.frame}(data, truth, ..., estimator = NULL, na_rm = TRUE)

pr_auc_vec(truth, estimate, estimator = NULL, na_rm = TRUE, ...)
}
\arguments{
\item{data}{A \code{data.frame} containing the \code{truth} and \code{estimate}
columns.}

\item{...}{A set of unquoted column names or one or more
\code{dplyr} selector functions to choose which variables contain the
class probabilities. If \code{truth} is binary, only 1 column should be selected.
Otherwise, there should be as many columns as factor levels of \code{truth}.}

\item{truth}{The column identifier for the true class results
(that is a \code{factor}). This should be an unquoted column name although
this argument is passed by expression and supports
\link[rlang:quasiquotation]{quasiquotation} (you can unquote column
names). For \verb{_vec()} functions, a \code{factor} vector.}

\item{estimator}{One of \code{"binary"}, \code{"macro"}, or \code{"macro_weighted"} to
specify the type of averaging to be done. \code{"binary"} is only relevant for
the two class case. The other two are general methods for calculating
multiclass metrics. The default will automatically choose \code{"binary"} or
\code{"macro"} based on \code{truth}.}

\item{na_rm}{A \code{logical} value indicating whether \code{NA}
values should be stripped before the computation proceeds.}

\item{estimate}{If \code{truth} is binary, a numeric vector of class probabilities
corresponding to the "relevant" class. Otherwise, a matrix with as many
columns as factor levels of \code{truth}. \emph{It is assumed that these are in the
same order as the levels of \code{truth}.}}
}
\value{
A \code{tibble} with columns \code{.metric}, \code{.estimator},
and \code{.estimate} and 1 row of values.

For grouped data frames, the number of rows returned will be the same as
the number of groups.

For \code{pr_auc_vec()}, a single \code{numeric} value (or \code{NA}).
}
\description{
\code{pr_auc()} is a metric that computes the area under the precision
recall curve. See \code{\link[=pr_curve]{pr_curve()}} for the full curve.
}
\section{Multiclass}{


Macro and macro-weighted averaging is available for this metric.
The default is to select macro averaging if a \code{truth} factor with more
than 2 levels is provided. Otherwise, a standard binary calculation is done.
See \code{vignette("multiclass", "yardstick")} for more information.
}

\section{Relevant Level}{


There is no common convention on which factor level should
automatically be considered the "event" or "positive" result.
In \code{yardstick}, the default is to use the \emph{first} level. To
change this, a global option called \code{yardstick.event_first} is
set to \code{TRUE} when the package is loaded. This can be changed
to \code{FALSE} if the \emph{last} level of the factor is considered the
level of interest by running: \code{options(yardstick.event_first = FALSE)}.
For multiclass extensions involving one-vs-all
comparisons (such as macro averaging), this option is ignored and
the "one" level is always the relevant result.
}

\examples{
# ---------------------------------------------------------------------------
# Two class example

# `truth` is a 2 level factor. The first level is `"Class1"`, which is the
# "event of interest" by default in yardstick. See the Relevant Level
# section above.
data(two_class_example)

# Binary metrics using class probabilities take a factor `truth` column,
# and a single class probability column containing the probabilities of
# the event of interest. Here, since `"Class1"` is the first level of
# `"truth"`, it is the event of interest and we pass in probabilities for it.
pr_auc(two_class_example, truth, Class1)

# ---------------------------------------------------------------------------
# Multiclass example

# `obs` is a 4 level factor. The first level is `"VF"`, which is the
# "event of interest" by default in yardstick. See the Relevant Level
# section above.
data(hpc_cv)

# You can use the col1:colN tidyselect syntax
library(dplyr)
hpc_cv \%>\%
  filter(Resample == "Fold01") \%>\%
  pr_auc(obs, VF:L)

# Change the first level of `obs` from `"VF"` to `"M"` to alter the
# event of interest. The class probability columns should be supplied
# in the same order as the levels.
hpc_cv \%>\%
  filter(Resample == "Fold01") \%>\%
  mutate(obs = relevel(obs, "M")) \%>\%
  pr_auc(obs, M, VF:L)

# Groups are respected
hpc_cv \%>\%
  group_by(Resample) \%>\%
  pr_auc(obs, VF:L)

# Weighted macro averaging
hpc_cv \%>\%
  group_by(Resample) \%>\%
  pr_auc(obs, VF:L, estimator = "macro_weighted")

# Vector version
# Supply a matrix of class probabilities
fold1 <- hpc_cv \%>\%
  filter(Resample == "Fold01")

pr_auc_vec(
   truth = fold1$obs,
   matrix(
     c(fold1$VF, fold1$F, fold1$M, fold1$L),
     ncol = 4
   )
)

}
\seealso{
\code{\link[=pr_curve]{pr_curve()}} for computing the full precision recall curve.

Other class probability metrics: 
\code{\link{average_precision}()},
\code{\link{gain_capture}()},
\code{\link{mn_log_loss}()},
\code{\link{roc_auc}()},
\code{\link{roc_aunp}()},
\code{\link{roc_aunu}()}
}
\author{
Max Kuhn
}
\concept{class probability metrics}
