% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/MCBoost.R
\name{MCBoost}
\alias{MCBoost}
\title{Multi-Calibration Boosting}
\description{
Implements Multi-Calibration Boosting by Hebert-Johnson et al. (2018) and
Multi-Accuracy Boosting by Kim et al. (2019) for the multi-calibration of a
machine learning model's prediction.
Multi-Calibration works best in scenarios where the underlying data & labels are unbiased
but a bias is introduced within the algorithm's fitting procedure. This is often the case,
e.g. when an algorithm fits a majority population while ignoring or under-fitting minority
populations.\cr
Expects initial models that fit binary outcomes or continuous outcomes with
predictions that are in (or scaled to) the 0-1 range.
The method defaults to \verb{Multi-Accuracy Boosting} as described in Kim et al. (2019).
In order to obtain behaviour as described in Hebert-Johnson et al. (2018) set
\code{multiplicative=FALSE} and \code{num_buckets} to 10.
\itemize{
For additional details, please refer to the relevant publications:
\item{Hebert-Johnson et al., 2018. Multicalibration: Calibration for the (Computationally-Identifiable) Masses.
Proceedings of the 35th International Conference on Machine Learning, PMLR 80:1939-1948.
https://proceedings.mlr.press/v80/hebert-johnson18a.html.}{}
\item{Kim et al., 2019. Multiaccuracy: Black-Box Post-Processing for Fairness in Classification.
Proceedings of the 2019 AAAI/ACM Conference on AI, Ethics, and Society (AIES '19).
Association for Computing Machinery, New York, NY, USA, 247–254.
https://dl.acm.org/doi/10.1145/3306618.3314287}{}
}
}
\examples{
# See vignette for more examples.
# Instantiate the object
\dontrun{
mc = MCBoost$new()
# Run multi-calibration on training dataset.
mc$multicalibrate(iris[1:100, 1:4], factor(sample(c("A", "B"), 100, TRUE)))
# Predict on test set
mc$predict_probs(iris[101:150, 1:4])
# Get auditor effect
mc$auditor_effect(iris[101:150, 1:4])
}
}
\section{Public fields}{
\if{html}{\out{<div class="r6-fields">}}
\describe{
\item{\code{max_iter}}{\code{\link{integer}} \cr
The maximum number of iterations of the multi-calibration/multi-accuracy method.}

\item{\code{alpha}}{\code{\link{numeric}} \cr
Accuracy parameter that determines the stopping condition.}

\item{\code{eta}}{\code{\link{numeric}} \cr
Parameter for multiplicative weight update (step size).}

\item{\code{num_buckets}}{\code{\link{integer}} \cr
The number of buckets to split into in addition to using the whole sample.}

\item{\code{bucket_strategy}}{\code{\link{character}} \cr
Currently only supports "simple", even split along probabilities.
Only relevant for \code{num_buckets} > 1.}

\item{\code{rebucket}}{\code{\link{logical}} \cr
Should buckets be re-calculated at each iteration?}

\item{\code{eval_fulldata}}{\code{\link{logical}} \cr
Should auditor be evaluated on the full data?}

\item{\code{partition}}{\code{\link{logical}} \cr
True/False flag for whether to split up predictions by their "partition"
(e.g., predictions less than 0.5 and predictions greater than 0.5).}

\item{\code{multiplicative}}{\code{\link{logical}} \cr
Specifies the strategy for updating the weights (multiplicative weight vs additive).}

\item{\code{iter_sampling}}{\code{\link{character}} \cr
Specifies the strategy to sample the validation data for each iteration.}

\item{\code{auditor_fitter}}{\code{\link{AuditorFitter}} \cr
Specifies the type of model used to fit the residuals.}

\item{\code{predictor}}{\code{\link{function}} \cr
Initial predictor function.}

\item{\code{iter_models}}{\code{\link{list}} \cr
Cumulative list of fitted models.}

\item{\code{iter_partitions}}{\code{\link{list}} \cr
Cumulative list of data partitions for models.}

\item{\code{iter_corr}}{\code{\link{list}} \cr
Auditor correlation in each iteration.}

\item{\code{auditor_effects}}{\code{\link{list}} \cr
Auditor effect in each iteration.}

\item{\code{bucket_strategies}}{\code{\link{character}} \cr
Possible bucket_strategies.}

\item{\code{weight_degree}}{\code{\link{integer}} \cr
Weighting degree for low-degree multi-calibration.}
}
\if{html}{\out{</div>}}
}
\section{Methods}{
\subsection{Public methods}{
\itemize{
\item \href{#method-MCBoost-new}{\code{MCBoost$new()}}
\item \href{#method-MCBoost-multicalibrate}{\code{MCBoost$multicalibrate()}}
\item \href{#method-MCBoost-predict_probs}{\code{MCBoost$predict_probs()}}
\item \href{#method-MCBoost-auditor_effect}{\code{MCBoost$auditor_effect()}}
\item \href{#method-MCBoost-print}{\code{MCBoost$print()}}
\item \href{#method-MCBoost-clone}{\code{MCBoost$clone()}}
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-MCBoost-new"></a>}}
\if{latex}{\out{\hypertarget{method-MCBoost-new}{}}}
\subsection{Method \code{new()}}{
Initialize a multi-calibration instance.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{MCBoost$new(
  max_iter = 5,
  alpha = 1e-04,
  eta = 1,
  num_buckets = 2,
  partition = ifelse(num_buckets > 1, TRUE, FALSE),
  bucket_strategy = "simple",
  rebucket = FALSE,
  eval_fulldata = FALSE,
  multiplicative = TRUE,
  auditor_fitter = NULL,
  subpops = NULL,
  default_model_class = ConstantPredictor,
  init_predictor = NULL,
  iter_sampling = "none",
  weight_degree = 1L
)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{max_iter}}{\code{\link{integer}} \cr
The maximum number of iterations of the multi-calibration/multi-accuracy method.
Default \code{5L}.}

\item{\code{alpha}}{\code{\link{numeric}} \cr
Accuracy parameter that determines the stopping condition. Default \code{1e-4}.}

\item{\code{eta}}{\code{\link{numeric}} \cr
Parameter for multiplicative weight update (step size). Default \code{1.0}.}

\item{\code{num_buckets}}{\code{\link{integer}} \cr
The number of buckets to split into in addition to using the whole sample. Default \code{2L}.}

\item{\code{partition}}{\code{\link{logical}} \cr
True/False flag for whether to split up predictions by their "partition"
(e.g., predictions less than 0.5 and predictions greater than 0.5).
Defaults to \code{TRUE} (multi-accuracy boosting).}

\item{\code{bucket_strategy}}{\code{\link{character}} \cr
Currently only supports "simple", even split along probabilities.
Only taken into account for \code{num_buckets} > 1.}

\item{\code{rebucket}}{\code{\link{logical}} \cr
Should buckets be re-done at each iteration? Default \code{FALSE}.}

\item{\code{eval_fulldata}}{\code{\link{logical}} \cr
Should the auditor be evaluated on the full data or on the respective bucket for determining
the stopping criterion? Default \code{FALSE}, auditor is only evaluated on the bucket.
This setting keeps the implementation closer to the Algorithm proposed in the corresponding
multi-accuracy paper (Kim et al., 2019) where auditor effects are computed across the full
sample (i.e. eval_fulldata = TRUE).}

\item{\code{multiplicative}}{\code{\link{logical}} \cr
Specifies the strategy for updating the weights (multiplicative weight vs additive).
Defaults to \code{TRUE} (multi-accuracy boosting). Set to \code{FALSE} for multi-calibration.}

\item{\code{auditor_fitter}}{\code{\link{AuditorFitter}}|\code{\link{character}}|\code{\link[mlr3:Learner]{mlr3::Learner}} \cr
Specifies the type of model used to fit the
residuals. The default is \code{\link{RidgeAuditorFitter}}.
Can be a \code{character}, the name of a \code{\link{AuditorFitter}}, a \code{\link[mlr3:Learner]{mlr3::Learner}} that is then
auto-converted into a \code{\link{LearnerAuditorFitter}} or a custom \code{\link{AuditorFitter}}.}

\item{\code{subpops}}{\code{\link{list}} \cr
Specifies a collection of characteristic attributes
and the values they take to define subpopulations
e.g. list(age = c('20-29','30-39','40+'), nJobs = c(0,1,2,'3+'), ,..).}

\item{\code{default_model_class}}{\code{Predictor} \cr
The class of the model that should be used as the init predictor model if
\code{init_predictor} is not specified. Defaults to \code{ConstantPredictor} which
predicts a constant value.}

\item{\code{init_predictor}}{\code{\link{function}}|\code{\link[mlr3:Learner]{mlr3::Learner}} \cr
The initial predictor function to use (i.e., if the user has a pretrained model).
If a \code{mlr3} \code{Learner} is passed, it will be autoconverted using \code{mlr3_init_predictor}.
This requires the \code{\link[mlr3:Learner]{mlr3::Learner}} to be trained.}

\item{\code{iter_sampling}}{\code{\link{character}} \cr
How to sample the validation data for each iteration?
Can be \code{bootstrap}, \code{split} or \code{none}.\cr
"split" splits the data into \code{max_iter} parts and validates on each sample in each iteration.\cr
"bootstrap" uses a new bootstrap sample in each iteration.\cr
"none" uses the same dataset in each iteration.}

\item{\code{weight_degree}}{\code{\link{character}} \cr
Weighting degree for low-degree multi-calibration. Initialized to 1, which applies constant weighting with 1.}
}
\if{html}{\out{</div>}}
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-MCBoost-multicalibrate"></a>}}
\if{latex}{\out{\hypertarget{method-MCBoost-multicalibrate}{}}}
\subsection{Method \code{multicalibrate()}}{
Run multi-calibration.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{MCBoost$multicalibrate(data, labels, predictor_args = NULL, audit = FALSE, ...)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{data}}{\code{\link{data.table}}\cr
Features.}

\item{\code{labels}}{\code{\link{numeric}}\cr
One-hot encoded labels (of same length as data).}

\item{\code{predictor_args}}{\code{\link{any}} \cr
Arguments passed on to \code{init_predictor}. Defaults to \code{NULL}.}

\item{\code{audit}}{\code{\link{logical}} \cr
Perform auditing? Initialized to \code{TRUE}.}

\item{\code{...}}{\code{\link{any}} \cr
Params passed on to other methods.}
}
\if{html}{\out{</div>}}
}
\subsection{Returns}{
\code{NULL}
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-MCBoost-predict_probs"></a>}}
\if{latex}{\out{\hypertarget{method-MCBoost-predict_probs}{}}}
\subsection{Method \code{predict_probs()}}{
Predict a dataset with multi-calibrated predictions
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{MCBoost$predict_probs(x, t = Inf, predictor_args = NULL, audit = FALSE, ...)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{x}}{\code{\link{data.table}} \cr
Prediction data.}

\item{\code{t}}{\code{\link{integer}} \cr
Number of multi-calibration steps to predict. Default: \code{Inf} (all).}

\item{\code{predictor_args}}{\code{\link{any}} \cr
Arguments passed on to \code{init_predictor}. Defaults to \code{NULL}.}

\item{\code{audit}}{\code{\link{logical}} \cr
Should audit weights be stored? Default \code{FALSE}.}

\item{\code{...}}{\code{\link{any}} \cr
Params passed on to the residual prediction model's predict method.}
}
\if{html}{\out{</div>}}
}
\subsection{Returns}{
\code{\link{numeric}}\cr
Numeric vector of multi-calibrated predictions.
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-MCBoost-auditor_effect"></a>}}
\if{latex}{\out{\hypertarget{method-MCBoost-auditor_effect}{}}}
\subsection{Method \code{auditor_effect()}}{
Compute the auditor effect for each instance which are the cumulative
absolute predictions of the auditor. It indicates "how much"
each observation was affected by multi-calibration on average across iterations.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{MCBoost$auditor_effect(
  x,
  aggregate = TRUE,
  t = Inf,
  predictor_args = NULL,
  ...
)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{x}}{\code{\link{data.table}} \cr
Prediction data.}

\item{\code{aggregate}}{\code{\link{logical}} \cr
Should the auditor effect be aggregated across iterations? Defaults to \code{TRUE}.}

\item{\code{t}}{\code{\link{integer}} \cr
Number of multi-calibration steps to predict. Defaults to \code{Inf} (all).}

\item{\code{predictor_args}}{\code{\link{any}} \cr
Arguments passed on to \code{init_predictor}. Defaults to \code{NULL}.}

\item{\code{...}}{\code{\link{any}} \cr
Params passed on to the residual prediction model's predict method.}
}
\if{html}{\out{</div>}}
}
\subsection{Returns}{
\code{\link{numeric}} \cr
Numeric vector of auditor effects for each row in \code{x}.
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-MCBoost-print"></a>}}
\if{latex}{\out{\hypertarget{method-MCBoost-print}{}}}
\subsection{Method \code{print()}}{
Prints information about multi-calibration.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{MCBoost$print(...)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{...}}{\code{any}\cr
Not used.}
}
\if{html}{\out{</div>}}
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-MCBoost-clone"></a>}}
\if{latex}{\out{\hypertarget{method-MCBoost-clone}{}}}
\subsection{Method \code{clone()}}{
The objects of this class are cloneable with this method.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{MCBoost$clone(deep = FALSE)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{deep}}{Whether to make a deep clone.}
}
\if{html}{\out{</div>}}
}
}
}
