% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/simple_ensemble.R
\name{simple_ensemble}
\alias{simple_ensemble}
\title{Compute ensemble model outputs by summarizing component model outputs for
each combination of model task, output type, and output type id. Supported
output types include \code{mean}, \code{median}, \code{quantile}, \code{cdf}, and \code{pmf}.}
\usage{
simple_ensemble(
  model_out_tbl,
  weights = NULL,
  weights_col_name = "weight",
  agg_fun = mean,
  agg_args = list(),
  model_id = "hub-ensemble",
  task_id_cols = NULL
)
}
\arguments{
\item{model_out_tbl}{an object of class \code{model_out_tbl} with component
model outputs (e.g., predictions).}

\item{weights}{an optional \code{data.frame} with component model weights. If
provided, it should have a column named \code{model_id} and a column containing
model weights. Optionally, it may contain additional columns corresponding
to task id variables, \code{output_type}, or \code{output_type_id}, if weights are
specific to values of those variables. The default is \code{NULL}, in which case
an equally-weighted ensemble is calculated. Should be prevalidated.}

\item{weights_col_name}{\code{character} string naming the column in \code{weights}
with model weights. Defaults to \code{"weight"}}

\item{agg_fun}{a function or character string name of a function to use for
aggregating component model outputs into the ensemble outputs. See the
details for more information.}

\item{agg_args}{a named list of any additional arguments that will be passed
to \code{agg_fun}.}

\item{model_id}{\code{character} string with the identifier to use for the
ensemble model.}

\item{task_id_cols}{\code{character} vector with names of columns in
\code{model_out_tbl} that specify modeling tasks. Defaults to \code{NULL}, in which
case all columns in \code{model_out_tbl} other than \code{"model_id"}, \code{"output_type"},
\code{"output_type_id"}, and \code{"value"} are used as task ids.}
}
\value{
a \code{model_out_tbl} object of ensemble predictions. Note that
any additional columns in the input \code{model_out_tbl} are dropped.
}
\description{
Compute ensemble model outputs by summarizing component model outputs for
each combination of model task, output type, and output type id. Supported
output types include \code{mean}, \code{median}, \code{quantile}, \code{cdf}, and \code{pmf}.
}
\details{
The default for \code{agg_fun} is \code{"mean"}, in which case the ensemble's
output is the average of the component model outputs within each group
defined by a combination of values in the task id columns, output type, and
output type id. The provided \code{agg_fun} should have an argument \code{x} for the
vector of numeric values to summarize, and for weighted methods, an
argument \code{w} with a numeric vector of weights. If it desired to use an
aggregation function that does not accept these arguments, a wrapper
would need to be written. For weighted methods, \code{agg_fun = "mean"} and
\code{agg_fun = "median"} are translated to use \code{matrixStats::weightedMean} and
\code{matrixStats::weightedMedian} respectively. For \code{matrixStats::weightedMedian},
the argument \code{interpolate} is automatically set to FALSE to circumvent a
calculation issue that results in invalid distributions.
}
\examples{
# Calculate a weighted median in two ways
data(model_outputs)
data(fweights)

weighted_median1 <- simple_ensemble(model_outputs, weights = fweights,
                                    agg_fun = stats::median)
weighted_median2 <- simple_ensemble(model_outputs, weights = fweights,
                                     agg_fun = matrixStats::weightedMedian)
all.equal(weighted_median1, weighted_median2)

}
