% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/Task.R
\docType{data}
\name{Task}
\alias{Task}
\title{Task Class}
\format{\link[R6:R6Class]{R6::R6Class} object.}
\description{
This is the abstract base class for task objects like \link{TaskClassif} and \link{TaskRegr}.

Tasks serve two purposes:
\enumerate{
\item Tasks wrap a \link{DataBackend}, an object to transparently interface different data storage types.
\item Tasks store meta-information, such as the role of the individual columns in the \link{DataBackend}.
For example, for a classification task a single column must be marked as target column, and others as features.
}

Predefined (toy) tasks are stored in the \link[mlr3misc:Dictionary]{mlr3misc::Dictionary} \link{mlr_tasks},
e.g. \code{\link[=mlr_tasks_iris]{iris}} or \code{\link[=mlr_tasks_boston_housing]{boston_housing}}.
}
\section{Construction}{

Note: This object is typically constructed via a derived classes, e.g. \link{TaskClassif} or \link{TaskRegr}.\preformatted{t = Task$new(id, task_type, backend)
}
\itemize{
\item \code{id} :: \code{character(1)}\cr
Identifier for the task.
\item \code{task_type} :: \code{character(1)}\cr
Set in the classes which inherit from this class.
Must be an element of \link[=mlr_reflections]{mlr_reflections$task_types$type}.
\item \code{backend} :: \link{DataBackend}\cr
Either a \link{DataBackend}, or any object which is convertible to a DataBackend with \code{as_data_backend()}.
E.g., a \code{data.frame()} will be converted to a \link{DataBackendDataTable}.
}
}

\section{Fields}{

\itemize{
\item \code{backend} :: \link{DataBackend}.
\item \code{col_info} :: \code{\link[data.table:data.table]{data.table::data.table()}}\cr
Table with with 3 columns:
\itemize{
\item \code{"id"} stores the name of the column.
\item \code{"type"} holds the storage type of the variable, e.g. \code{integer}, \code{numeric} or \code{character}.
\item \code{"levels"} stores a vector of distinct values (levels) for factor and character variables.
}
\item \code{col_roles} :: named \code{list()}\cr
Each column (feature) can have an arbitrary number of the following roles:
\itemize{
\item \code{"feature"}: Regular feature used in the model fitting process.
\item \code{"target"}: Target variable.
\item \code{"label"}: Observation labels. May be used in plots.
\item \code{"order"}: Data returned by \code{$data()} is ordered by this column (or these columns).
\item \code{"groups"}: During resampling, observations with the same value of the variable with role "groups"
are marked as "belonging together". They will be exclusively assigned to be either in the training set
or in the test set for each resampling iteration. Only up to one column may have this role.
\item \code{"weights"}: Observation weights. Only up to one column may have this role.
}

\code{col_roles} keeps track of the roles with a named list of vectors of feature names.
To alter the roles, use \code{t$set_col_role()}.
\item \code{row_roles} :: named \code{list()}\cr
Each row (observation) can have an arbitrary number of roles in the learning task:
\itemize{
\item \code{"use"}: Use in train / predict / resampling.
\item \code{"validation"}: Hold the observations back unless explicitly requested.
Validation sets are not yet completely integrated into the package.
}

\code{row_roles} keeps track of the roles with a named list of vectors of feature names.
To alter the role, use \code{set_row_role()}.
\item \code{feature_names} :: \code{character()}\cr
Return all column names with \code{role == "feature"}.
\item \code{feature_types} :: \code{\link[data.table:data.table]{data.table::data.table()}}\cr
Returns a table with columns \code{id} and \code{type} where \code{id} are the column names of "active" features of the task
and \code{type} is the storage type.
\item \code{hash} :: \code{character(1)}\cr
Hash (unique identifier) for this object.
\item \code{id} :: \code{character(1)}\cr
Identifier of the Task.
\item \code{ncol} :: \code{integer(1)}\cr
Returns the total number of cols with role "target" or "feature".
\item \code{nrow} :: \code{integer(1)}\cr
Return the total number of rows with role "use".
\item \code{row_ids} :: (\code{integer()} | \code{character()})\cr
Returns the row ids of the \link{DataBackend} for observations with with role "use".
\item \code{target_names} :: \code{character()}\cr
Returns all column names with role "target".
\item \code{task_type} :: \code{character(1)}\cr
Stores the type of the \link{Task}.
\item \code{properties} :: \code{character()}\cr
Set of task properties. Possible properties are are stored in
\link[=mlr_reflections]{mlr_reflections$task_properties}.
The following properties are currently standardized and understood by tasks in \CRANpkg{mlr3}:
\itemize{
\item \code{"weights"}: The task comes with observation weights.
\item \code{"groups"}: The task comes with grouping/blocking information.
}
\item \code{groups} :: \code{\link[data.table:data.table]{data.table::data.table()}}\cr
If the task has a designated column role "groups", table with two columns:
\code{row_id} (\code{integer()} | \code{character()}) and the grouping variable \code{group} (\code{vector()}).
Returns \code{NULL} if there are is no grouping column.
\item \code{weights} :: \code{\link[data.table:data.table]{data.table::data.table()}}\cr
If the task has a designated column role "weights", table with two columns:
\code{row_id} (\code{integer()} | \code{character()}) and the observation weights \code{weight} (\code{numeric()}).
Returns \code{NULL} if there are is no weight column.
}
}

\section{Methods}{

\itemize{
\item \code{data(rows = NULL, cols = NULL, data_format = NULL)}\cr
(\code{integer()} | \code{character()}, \code{character(1)}, \code{character(1)}) -> \code{any}\cr
Returns a slice of the data from the \link{DataBackend} in the data format specified by \code{data_format}
(depending on the \link{DataBackend}, but usually a \code{\link[data.table:data.table]{data.table::data.table()}}).

Rows are additionally subsetted to only contain observations with role "use", and
columns are filtered to only contain features with roles "target" and "feature".
If invalid \code{rows} or \code{cols} are specified, an exception is raised.
\item \code{formula(rhs = ".")}\cr
\code{character()} -> \code{\link[stats:formula]{stats::formula()}}\cr
Constructs a \code{\link[stats:formula]{stats::formula()}}, e.g. \code{[target] ~ [feature_1] + [feature_2] + ... + [feature_k]}, using
the features provided in argument \code{rhs} (defaults to all columns with role \code{"feature"}, symbolized by \code{"."}).
\item \code{levels(cols = NULL)}\cr
\code{character()} -> named \code{list()}\cr
Returns the distinct values for columns referenced in \code{cols} with storage type "character", "factor" or "ordered".
Argument \code{cols} defaults to all such columns with role \code{"target"} or \code{"feature"}.

Note that this function ignores the row roles, it returns all levels available in the \link{DataBackend}.
To update the stored level information, e.g. after filtering a task, call \code{$droplevels()}.
\item \code{droplevels(cols = NULL)}\cr
\code{character()} -> \code{self}\cr
Updates the cache of stored factor levels, removing all levels not present in the current set of active rows.
\code{cols} defaults to all columns with storage type "character", "factor", or "ordered".
\item \code{missings(cols = NULL)}\cr
\code{character()} -> named \code{integer()}\cr
Returns the number of missing observations for columns referenced in \code{cols}.
Considers only active rows with row role \code{"use"}.
Argument \code{cols} defaults to all columns with role "target" or "feature".
\item \code{head(n = 6)}\cr
\code{integer()} -> \code{\link[data.table:data.table]{data.table::data.table()}}\cr
Get the first \code{n} observations with role \code{"use"}.
\item \code{set_col_role(cols, new_roles, exclusive = TRUE)}\cr
(\code{character()}, \code{character()}, \code{logical(1)}) -> \code{self}\cr
Adds the roles \code{new_roles} to columns referred to by \code{cols}.
If \code{exclusive} is \code{TRUE}, the referenced columns will be removed from all other roles.
\item \code{set_row_role(rows, new_roles, exclusive = TRUE)}\cr
(\code{character()}, \code{character()}, \code{logical(1)}) -> \code{self}\cr
Adds the roles \code{new_roles} to rows referred to by \code{rows}.
If \code{exclusive} is \code{TRUE}, the referenced rows will be removed from all other roles.
\item \code{filter(rows)}\cr
(\code{integer()} | \code{character()}) -> \code{self}\cr
Subsets the task, reducing it to only keep the rows specified in \code{rows}.

This operation mutates the task in-place.
See the section on task mutators for more information.
\item \code{select(cols)}\cr
\code{character()} -> \code{self}\cr
Subsets the task, reducing it to only keep the features specified in \code{cols}.
Note that you cannot deselect the target column, for obvious reasons.

This operation mutates the task in-place.
See the section on task mutators for more information.
\item \code{cbind(data)}\cr
\code{data.frame()} -> \code{self}\cr
Adds additional columns to the \link{DataBackend}.
The row ids must be provided as column in \code{data} (with column name matching the primary key name of the \link{DataBackend}).
If this column is missing, it is assumed that the rows are exactly in the order of \code{t$row_ids}.
In case of name clashes of column names in \code{data} and \link{DataBackend}, columns in \code{data} have higher precedence
and virtually overwrite the columns in the \link{DataBackend}.

This operation mutates the task in-place.
See the section on task mutators for more information.
\item \code{rbind(data)}\cr
\code{data.frame()} -> \code{self}\cr
Adds additional rows to the \link{DataBackend}.
The new row ids must be provided as column in \code{data}.
If this column is missing, new row ids are constructed automatically.
In case of name clashes of row ids, rows in \code{data} have higher precedence
and virtually overwrite the rows in the \link{DataBackend}.

This operation mutates the task in-place.
See the section on task mutators for more information.
\item \code{rename(from, to)}\cr
(\code{character()}, \code{character()}) -> \code{self}\cr
Renames columns by mapping column names in \code{old} to new column names in \code{new}.

This operation mutates the task in-place.
See the section on task mutators for more information.
}
}

\section{S3 methods}{

\itemize{
\item \code{as.data.table(t)}\cr
\link{Task} -> \code{\link[data.table:data.table]{data.table::data.table()}}\cr
Returns the complete data as \code{\link[data.table:data.table]{data.table::data.table()}}.
}
}

\section{Task mutators}{

The following methods change the task in-place:
\itemize{
\item \code{set_row_role()} and \code{set_col_role()} alter the row or column information in \code{row_roles} or \code{col_roles}, respectively.
This provides a different "view" on the data without altering the data itself.
\item \code{filter()} and \code{select()} subset the set of active rows or features in \code{row_roles} or \code{col_roles}, respectively.
This provides a different "view" on the data without altering the data itself.
\item \code{rbind()} and \code{cbind()} change the task in-place by binding rows or columns to the data, but without modifying the original \link{DataBackend}.
Instead, the methods first create a new \link{DataBackendDataTable} from the provided new data, and then
merge both backends into an abstract \link{DataBackend} which combines the results on-demand.
\item \code{rename()} wraps the \link{DataBackend} of the Task in an additional \link{DataBackend} which deals with the renaming. Also updates \code{col_roles} and \code{col_info}.
}
}

\examples{
# we use the inherited class TaskClassif here,
# Class Task is not intended for direct use
task = TaskClassif$new("iris", iris, target = "Species")

task$nrow
task$ncol
task$feature_names
task$formula()

# Remove "Petal.Length"
task$set_col_role("Petal.Length", character())

# Remove "Petal.Width", alternative way
task$select(setdiff(task$feature_names, "Petal.Width"))

task$feature_names

# Add new column "foo"
task$cbind(data.frame(foo = 1:150))
task$head()
}
\seealso{
Other Task: \code{\link{TaskClassif}},
  \code{\link{TaskRegr}}, \code{\link{TaskSupervised}},
  \code{\link{mlr_tasks}}
}
\concept{Task}
\keyword{datasets}
