% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bf_map.R
\name{bf_map}
\alias{bf_map}
\title{Map variables to a bitflag}
\usage{
bf_map(protocol, data, registry, ..., name = NULL, na.val = NULL)
}
\arguments{
\item{protocol}{\code{\link[=character]{character(1)}}\cr the protocol based on which
the flag should be determined, see Details.}

\item{data}{the object to build bit flags for.}

\item{registry}{\code{\link[=registry]{registry(1)}}\cr an already defined bitfield
registry.}

\item{...}{the protocol-specific arguments for building a bit flag, see
Details.}

\item{name}{\code{\link[=character]{character(1)}}\cr optional flag-name.}

\item{na.val}{value, of the same encoding type as the flag, that needs to be
given, if the test for this flag results in \code{NA}s.}
}
\value{
an (updated) object of class 'registry' with the additional flag
defined here.
}
\description{
This function maps values from a dataset to bit flags that can be encoded
into a bitfield.
}
\details{
\code{protocol} can either be the name of an internal item (see
\code{\link{bf_pcl}}), a newly built local protocol
(\code{\link{bf_protocol}}) or one that has been imported from the bitfield
community standards repo on github (\code{\link{bf_standards}}). Any
\code{protocol} has specific arguments, typically at least the name of the
column containing the values to test (\code{x}). To make this function as
general as possible, all of these arguments are specified via the
\code{...} argument of \code{bf_map}. Internal
protocols are: \itemize{
\item \code{na} (x): test whether a variable contains \code{NA}-values
(\emph{boolean}).
\item \code{nan} (x): test whether a variable contains \code{NaN}-values
(\emph{boolean}).
\item \code{inf} (x): test whether a variable contains \code{Inf}-values
(\emph{boolean}).
\item \code{identical} (x, y): element-wise test whether values are
identical across two variables (\emph{boolean}).
\item \code{range} (x, min, max): test whether the values are within a
given range (\emph{boolean}).
\item \code{matches} (x, set): test whether the values match a given set
(\emph{boolean}).
\item \code{grepl} (x, pattern): test whether the values match a given
pattern (\emph{boolean}).
\item \code{category} (x): test whether the values are part of a set of
given categories. (\emph{enumeration}).
\item \code{case} (...): test whether values are part of given cases
(\emph{enumeration}).
\item \code{nChar} (x): count the number of characters of the values
(\emph{unsigned integer}).
\item \code{nInt} (x): count the number of integer digits of the values
(\emph{unsigned integer}).
\item \code{nDec} (x): count the decimal digits of the variable values
(\emph{unsigned integer}).
\item \code{integer} (x, ...): encode values as integer bit-sequence.
Accepts raw integer data directly, or numeric data with
auto-scaling when \code{range}, \code{fields}, or \code{decimals}
are provided. With \code{range = c(min, max)} and
\code{fields = list(significand = n)}, values are linearly mapped
from \code{[min, max]} to \code{[0, 2^n - 1]} during encoding and
back during decoding. The scaling parameters are stored in
provenance for transparent round-trips (\emph{signed integer}).
\item \code{numeric} (x, ...): encode the numeric value as floating-point
bit-sequence (see \code{\link{.makeEncoding}} for details on the
... argument) (\emph{floating-point}).
}
}
\section{Notes}{
 Console output from R classes (such as tibble) often rounds
or truncates decimal places, even for ordinary numeric vectors. Internally,
R stores numeric values as double-precision floating-point numbers (64
bits, with 52 bits for the significand), providing approximately 16
significant decimal digits (\eqn{log10(2^52) = 15.65}). If a bit flag
appears inconsistent with the displayed values, verify the full precision
using \code{sprintf("\%.16f", values)}. Using more than 16 digits will show
additional figures, but these are artifacts of binary-to-decimal conversion
and carry no meaningful information.
}

\examples{
# first, set up the registry
reg <- bf_registry(name = "testBF", description = "test bitfield",
                   template = bf_tbl)

# then, put the test for NA values together
reg <- bf_map(protocol = "na", data = bf_tbl, registry = reg,
              x = year)

# all the other protocols...
# boolean encoding
reg <- bf_map(protocol = "nan", data = bf_tbl, registry = reg,
              x = y)
reg <- bf_map(protocol = "inf", data = bf_tbl, registry = reg,
              x = y)
reg <- bf_map(protocol = "identical", data = bf_tbl, registry = reg,
              x = x, y = y, na.val = FALSE)
reg <- bf_map(protocol = "range", data = bf_tbl, registry = reg,
              x = yield, min = 10.4, max = 11)
reg <- bf_map(protocol = "matches", data = bf_tbl, registry = reg,
              x = commodity, set = c("soybean", "honey"), na.val = FALSE)
reg <- bf_map(protocol = "grepl", data = bf_tbl, registry = reg,
              x = year, pattern = ".*r", na.val = FALSE)

# enumeration encoding
reg <- bf_map(protocol = "category", data = bf_tbl, registry = reg,
              x = commodity, na.val = 0)
reg <- bf_map(protocol = "case", data = bf_tbl, registry = reg, na.val = 4,
              yield >= 11, yield < 11 & yield > 9, yield < 9 & commodity == "maize")

# integer encoding
reg <- bf_map(protocol = "nChar", data = bf_tbl, registry = reg,
              x = commodity, na.val = 0)
reg <- bf_map(protocol = "nInt", data = bf_tbl, registry = reg,
              x = yield)
reg <- bf_map(protocol = "nDec", data = bf_tbl, registry = reg,
              x = yield)
reg <- bf_map(protocol = "integer", data = bf_tbl, registry = reg,
              x = as.integer(year), na.val = 0L)

# integer encoding with auto-scaling (numeric data mapped to integer range)
dat <- data.frame(density = c(0.5, 1.2, 2.8, 0.0, 3.1))
reg2 <- bf_registry(name = "scaledBF", description = "auto-scaled",
                    template = dat)
reg2 <- bf_map(protocol = "integer", data = dat, registry = reg2,
               x = density, range = c(0, 3.1),
               fields = list(significand = 5), na.val = 0L)

# floating-point encoding
reg <- bf_map(protocol = "numeric", data = bf_tbl, registry = reg,
              x = yield, decimals = 2)

# finally, take a look at the registry
reg

# alternatively, a raster
library(terra)
bf_rst <- rast(nrows = 3, ncols = 3, vals = bf_tbl$commodity, names = "commodity")
bf_rst$yield <- rast(nrows = 3, ncols = 3, vals = bf_tbl$yield)

reg <- bf_registry(name = "testBF", description = "raster bitfield",
                   template = bf_rst)

reg <- bf_map(protocol = "na", data = bf_rst, registry = reg,
              x = commodity)

reg <- bf_map(protocol = "range", data = bf_rst, registry = reg,
              x = yield, min = 5, max = 11)

reg <- bf_map(protocol = "category", data = bf_rst, registry = reg,
              x = commodity, na.val = 0)
reg

}
