% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/PipeOpRegrAvg.R
\name{mlr_pipeops_regravg}
\alias{mlr_pipeops_regravg}
\alias{PipeOpRegrAvg}
\title{Weighted Prediction Averaging}
\format{
\code{\link[R6:R6Class]{R6Class}} inheriting from \code{\link{PipeOpEnsemble}}/\code{\link{PipeOp}}.
}
\description{
Perform (weighted) prediction averaging from regression \code{\link[mlr3:Prediction]{Prediction}}s by connecting
\code{\link{PipeOpRegrAvg}} to multiple \code{\link{PipeOpLearner}} outputs.

The resulting \code{"response"} prediction is a weighted average of the incoming \code{"response"} predictions.
Aggregation of \code{"se"} predictions is controlled by the \code{se_aggr} parameter (see below). When \code{"se"} is not requested
or \code{se_aggr = "none"}, \code{"se"} is dropped.
}
\section{\code{"se"} Aggregation}{


Let there be \code{K} incoming predictions with weights \code{w} (sum to 1). For a given row \code{j}, denote
per-model means \code{mu_i[j]} and, if available, per-model standard errors \code{se_i[j]}.
Define

\if{html}{\out{<div class="sourceCode">}}\preformatted{mu_bar[j]      = sum_i w[i] * mu_i[j]
var_between[j] = sum_i w[i] * (mu_i[j] - mu_bar[j])^2 # weighted var of means
var_within[j]  = sum_i w[i] * se_i[j]^2               # weighted mean of SE^2s
}\if{html}{\out{</div>}}

The following aggregation methods are available:
\itemize{
\item \strong{\code{se_aggr = "predictive"}} -- \emph{Within + Between (mixture/predictive SD)}

\if{html}{\out{<div class="sourceCode">}}\preformatted{se[j] = sqrt(var_within[j] + var_between[j])
}\if{html}{\out{</div>}}

\strong{Interpretation.} Treats each incoming \code{se_i} as that model's predictive SD at the point (or, if the learner
reports SE of the conditional mean--as many \code{mlr3} regression learners do--then as that mean-SE). The returned \code{se}
is the SD of the \emph{mixture ensemble} under weighted averaging: it increases when base models disagree (epistemic spread)
and when individual models are uncertain (aleatoric spread).
\strong{Notes.} If \code{se_i} represents \emph{mean} SE (common in \code{predict.lm(se.fit=TRUE)}-style learners), the result
aggregates those mean-SEs and still adds model disagreement correctly, but it will \emph{underestimate} a true predictive SD
that would additionally include irreducible noise. Requires \code{"se"} to be present from \strong{all} inputs.
\item \strong{\code{se_aggr = "mean"}} -- \emph{SE of the weighted average of means under equicorrelation}
With a correlation parameter \code{se_aggr_rho = rho}, assume
\code{Cov(mu_i_hat, mu_j_hat) = rho * se_i * se_j} for all \code{i != j}. Then

\if{html}{\out{<div class="sourceCode">}}\preformatted{# components:
a[j] = sum_i (w[i]^2 * se_i[j]^2)
b[j] = (sum_i w[i] * se_i[j])^2
var_mean[j] = (1 - rho) * a[j] + rho * b[j]
se[j] = sqrt(var_mean[j])
}\if{html}{\out{</div>}}

\strong{Interpretation.} Returns the \emph{standard error of the averaged estimator} \verb{sum_i w[i] * mu_i}, not a predictive SD.
Use when you specifically care about uncertainty of the averaged mean itself.
\strong{Notes.} \code{rho} is clamped to the PSD range \verb{[-1/(K-1), 1]} for \code{K > 1}. Typical settings:
\code{rho = 0} (assume independence; often optimistic for CV/bagging) and \code{rho = 1} (perfect correlation; conservative and
equal to the weighted arithmetic mean of SEs). Requires \code{"se"} from \strong{all} inputs.
\item \strong{\code{se_aggr = "within"}} -- \emph{Within-model component only}

\if{html}{\out{<div class="sourceCode">}}\preformatted{se[j] = sqrt(var_within[j])
}\if{html}{\out{</div>}}

\strong{Interpretation.} Aggregates only the average per-model uncertainty and \strong{ignores} disagreement between models.
Useful as a diagnostic of the aleatoric component; not a full ensemble uncertainty.
\strong{Notes.} Typically \emph{underestimates} the uncertainty of the ensemble prediction when models disagree.
Requires \code{"se"} from \strong{all} inputs.
\item \strong{\code{se_aggr = "between"}} -- \emph{Between-model component only (works without \code{"se"})}

\if{html}{\out{<div class="sourceCode">}}\preformatted{se[j] = sqrt(var_between[j])
}\if{html}{\out{</div>}}

\strong{Interpretation.} Captures only the spread of the base means (epistemic/model disagreement).
\strong{Notes.} This is the only method that does not use incoming \code{"se"}. It is a \emph{lower bound} on a full predictive SD,
because it omits within-model noise.
\item \strong{\code{se_aggr = "none"}} -- \emph{Do not return \code{"se"}}
\code{"se"} is dropped from the output prediction.
}

\strong{Relationships and edge cases.} For any row, \code{se("predictive") >= max(se("within"), se("between"))}.
With a single input (\code{K = 1}), \code{"predictive"} and \code{"within"} return the input \code{"se"}, \code{"between"} returns \code{0}.
Methods \code{"predictive"}, \code{"mean"}, and \code{"within"} require all inputs to provide \code{"se"}; otherwise aggregation errors.

Weights can be set as a parameter; if none are provided, defaults to
equal weights for each prediction.
}

\section{Construction}{


\if{html}{\out{<div class="sourceCode">}}\preformatted{PipeOpRegrAvg$new(innum = 0, collect_multiplicity = FALSE, id = "regravg", param_vals = list())
}\if{html}{\out{</div>}}
\itemize{
\item \code{innum} :: \code{numeric(1)}\cr
Determines the number of input channels.
If \code{innum} is 0 (default), a vararg input channel is created that can take an arbitrary number of inputs.
\item \code{collect_multiplicity} :: \code{logical(1)}\cr
If \code{TRUE}, the input is a \code{\link{Multiplicity}} collecting channel. This means, a
\code{\link{Multiplicity}} input, instead of multiple normal inputs, is accepted and the members are aggregated. This requires \code{innum} to be 0.
Default is \code{FALSE}.
\item \code{id} :: \code{character(1)}
Identifier of the resulting  object, default \code{"regravg"}.
\item \code{param_vals} :: named \code{list}\cr
List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default \code{list()}.
}
}

\section{Input and Output Channels}{

Input and output channels are inherited from \code{\link{PipeOpEnsemble}}. Instead of a \code{\link[mlr3:Prediction]{Prediction}}, a \code{\link[mlr3:PredictionRegr]{PredictionRegr}}
is used as input and output during prediction.
}

\section{State}{

The \verb{$state} is left empty (\code{list()}).
}

\section{Parameters}{

The parameters are the parameters inherited from the \code{\link{PipeOpEnsemble}}, as well as:
\itemize{
\item \code{se_aggr} :: \code{character(1)}\cr
Controls how incoming \code{"se"} values are aggregated into an ensemble \code{"se"}. One of
\code{"predictive"}, \code{"mean"}, \code{"within"}, \code{"between"}, \code{"none"}. See the description above for definitions and interpretation.
\item \code{se_aggr_rho} :: \code{numeric(1)}\cr
Equicorrelation parameter used only for \code{se_aggr = "mean"}. Interpreted as the common correlation between
per-model mean estimators. Recommended range \verb{[0, 1]}; values are clamped to \verb{[-1/(K-1), 1]} for validity.
}
}

\section{Internals}{

Inherits from \code{\link{PipeOpEnsemble}} by implementing the \code{private$weighted_avg_predictions()} method.
}

\section{Fields}{

Only fields inherited from \code{\link{PipeOp}}.
}

\section{Methods}{

Only methods inherited from \code{\link{PipeOpEnsemble}}/\code{\link{PipeOp}}.
}

\examples{
\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
library("mlr3")

# Simple Bagging for Regression
gr = ppl("greplicate",
  po("subsample") \%>>\%
  po("learner", lrn("regr.rpart")),
  n = 5
) \%>>\%
  po("regravg")

resample(tsk("mtcars"), GraphLearner$new(gr), rsmp("holdout"))
\dontshow{\}) # examplesIf}
}
\seealso{
https://mlr-org.com/pipeops.html

Other PipeOps: 
\code{\link{PipeOp}},
\code{\link{PipeOpEncodePL}},
\code{\link{PipeOpEnsemble}},
\code{\link{PipeOpImpute}},
\code{\link{PipeOpTargetTrafo}},
\code{\link{PipeOpTaskPreproc}},
\code{\link{PipeOpTaskPreprocSimple}},
\code{\link{mlr_pipeops}},
\code{\link{mlr_pipeops_adas}},
\code{\link{mlr_pipeops_blsmote}},
\code{\link{mlr_pipeops_boxcox}},
\code{\link{mlr_pipeops_branch}},
\code{\link{mlr_pipeops_chunk}},
\code{\link{mlr_pipeops_classbalancing}},
\code{\link{mlr_pipeops_classifavg}},
\code{\link{mlr_pipeops_classweights}},
\code{\link{mlr_pipeops_colapply}},
\code{\link{mlr_pipeops_collapsefactors}},
\code{\link{mlr_pipeops_colroles}},
\code{\link{mlr_pipeops_copy}},
\code{\link{mlr_pipeops_datefeatures}},
\code{\link{mlr_pipeops_decode}},
\code{\link{mlr_pipeops_encode}},
\code{\link{mlr_pipeops_encodeimpact}},
\code{\link{mlr_pipeops_encodelmer}},
\code{\link{mlr_pipeops_encodeplquantiles}},
\code{\link{mlr_pipeops_encodepltree}},
\code{\link{mlr_pipeops_featureunion}},
\code{\link{mlr_pipeops_filter}},
\code{\link{mlr_pipeops_fixfactors}},
\code{\link{mlr_pipeops_histbin}},
\code{\link{mlr_pipeops_ica}},
\code{\link{mlr_pipeops_imputeconstant}},
\code{\link{mlr_pipeops_imputehist}},
\code{\link{mlr_pipeops_imputelearner}},
\code{\link{mlr_pipeops_imputemean}},
\code{\link{mlr_pipeops_imputemedian}},
\code{\link{mlr_pipeops_imputemode}},
\code{\link{mlr_pipeops_imputeoor}},
\code{\link{mlr_pipeops_imputesample}},
\code{\link{mlr_pipeops_info}},
\code{\link{mlr_pipeops_isomap}},
\code{\link{mlr_pipeops_kernelpca}},
\code{\link{mlr_pipeops_learner}},
\code{\link{mlr_pipeops_learner_pi_cvplus}},
\code{\link{mlr_pipeops_learner_quantiles}},
\code{\link{mlr_pipeops_missind}},
\code{\link{mlr_pipeops_modelmatrix}},
\code{\link{mlr_pipeops_multiplicityexply}},
\code{\link{mlr_pipeops_multiplicityimply}},
\code{\link{mlr_pipeops_mutate}},
\code{\link{mlr_pipeops_nearmiss}},
\code{\link{mlr_pipeops_nmf}},
\code{\link{mlr_pipeops_nop}},
\code{\link{mlr_pipeops_ovrsplit}},
\code{\link{mlr_pipeops_ovrunite}},
\code{\link{mlr_pipeops_pca}},
\code{\link{mlr_pipeops_proxy}},
\code{\link{mlr_pipeops_quantilebin}},
\code{\link{mlr_pipeops_randomprojection}},
\code{\link{mlr_pipeops_randomresponse}},
\code{\link{mlr_pipeops_removeconstants}},
\code{\link{mlr_pipeops_renamecolumns}},
\code{\link{mlr_pipeops_replicate}},
\code{\link{mlr_pipeops_rowapply}},
\code{\link{mlr_pipeops_scale}},
\code{\link{mlr_pipeops_scalemaxabs}},
\code{\link{mlr_pipeops_scalerange}},
\code{\link{mlr_pipeops_select}},
\code{\link{mlr_pipeops_smote}},
\code{\link{mlr_pipeops_smotenc}},
\code{\link{mlr_pipeops_spatialsign}},
\code{\link{mlr_pipeops_subsample}},
\code{\link{mlr_pipeops_targetinvert}},
\code{\link{mlr_pipeops_targetmutate}},
\code{\link{mlr_pipeops_targettrafoscalerange}},
\code{\link{mlr_pipeops_textvectorizer}},
\code{\link{mlr_pipeops_threshold}},
\code{\link{mlr_pipeops_tomek}},
\code{\link{mlr_pipeops_tunethreshold}},
\code{\link{mlr_pipeops_unbranch}},
\code{\link{mlr_pipeops_updatetarget}},
\code{\link{mlr_pipeops_vtreat}},
\code{\link{mlr_pipeops_yeojohnson}}

Other Multiplicity PipeOps: 
\code{\link{Multiplicity}()},
\code{\link{PipeOpEnsemble}},
\code{\link{mlr_pipeops_classifavg}},
\code{\link{mlr_pipeops_featureunion}},
\code{\link{mlr_pipeops_multiplicityexply}},
\code{\link{mlr_pipeops_multiplicityimply}},
\code{\link{mlr_pipeops_ovrsplit}},
\code{\link{mlr_pipeops_ovrunite}},
\code{\link{mlr_pipeops_replicate}}

Other Ensembles: 
\code{\link{PipeOpEnsemble}},
\code{\link{mlr_learners_avg}},
\code{\link{mlr_pipeops_classifavg}},
\code{\link{mlr_pipeops_ovrunite}}
}
\concept{Ensembles}
\concept{Multiplicity PipeOps}
\concept{PipeOps}
