% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/flag_faunabr.R
\name{flag_faunabr}
\alias{flag_faunabr}
\title{Identify records outside natural ranges according to Fauna do Brasil}
\usage{
flag_faunabr(
  data_dir,
  occ,
  species = "species",
  long = "decimalLongitude",
  lat = "decimalLatitude",
  origin = NULL,
  by_state = TRUE,
  buffer_state = 20,
  by_country = TRUE,
  buffer_country = 20,
  keep_columns = TRUE,
  spat_state = NULL,
  spat_country = NULL,
  progress_bar = FALSE,
  verbose = FALSE
)
}
\arguments{
\item{data_dir}{(character)  \strong{Required} directory path where the \code{faunabr}
data is saved.}

\item{occ}{(data.frame or data.table) a data frame containing the occurrence
records to be flagged. Must contain columns for species, longitude, and
latitude.}

\item{species}{(character) the name of the column in \code{occ} that contains the
species scientific names. Default is \code{"species"}.}

\item{long}{(character) the name of the column in \code{occ} that contains the
longitude values. Default is \code{"decimalLongitude"}.}

\item{lat}{(character) the name of the column in \code{occ} that contains the
latitude values. Default is \code{"decimalLatitude"}.}

\item{origin}{(character) filter the \code{faunabr} data by origin type
before checking (\code{"native"}, \code{"cryptogenic"}, or \code{"exotic"}). Default is
\code{NULL} (no filtering).}

\item{by_state}{(logical) if \code{TRUE}, flags records based on their distance
to known Brazilian state distributions. Default is \code{TRUE}.}

\item{buffer_state}{(numeric) buffer distance (in kilometers) to be applied
around the known state distribution boundaries. Records within this distance
are considered valid. Default is 20 km.}

\item{by_country}{(logical) if \code{TRUE}, flags records based on their distance
to country distributions. Default is \code{TRUE}.}

\item{buffer_country}{(numeric) buffer distance (in kilometers) to be applied
around the country boundaries. Records within this distance are considered
valid. Default is 20 km.}

\item{keep_columns}{(logical) if \code{TRUE}, the returned data frame contains
all original columns from \code{occ}. If \code{FALSE}, it returns only the key columns
and the flag. Default is \code{TRUE}.}

\item{spat_state}{(SpatVector) a SpatVector of the Brazilian states. By
default, it uses the SpatVector provided by geobr::read_state(). It can be
another Spatvector, but the structure must be identical to 'faunabr::states',
with a column called "abbrev_state" identifying the states codes.}

\item{spat_country}{(SpatVector) a SpatVector of the world countries. By
default, it uses the SpatVector provided by rnaturalearth::ne_countries. It
can be another Spatvector, but the structure must be identical to
'faunabr::world_fauna', with a column called "country_code" identifying the
country codes.}

\item{progress_bar}{(logical) whether to display a progress bar during
processing. If TRUE, the 'pbapply' package must be installed. Default is
\code{FALSE}.}

\item{verbose}{(logical) if \code{TRUE}, prints messages about the progress and
the number of species being checked. Default is \code{FALSE}.}
}
\value{
#' A \code{data.frame} that is the original \code{occ} data frame
augmented with a new column named \code{faunabr_flag}. This column is
logical (\code{TRUE}/\code{FALSE}) indicating whether the record falls
within the expected distribution (plus buffer) based on the \code{faunabr}
data. Records for species not found in the \code{faunabr} data will have
\code{NA} in the \code{faunabr_flag} column.
}
\description{
Flags (validates) occurrence records based on known distribution data
from the Catálogo Taxônomico da Fauna do Brasil (faunabr) data. This function
checks if an occurrence point for a given species falls within its documented
distribution, allowing for user-defined buffers around Brazilian states,
or the entire country. Records are flagged as valid (\code{TRUE}) if they fall
within the specified range for the distribution information available in the
\code{faunabr} data.
}
\examples{
# Load example data
data("occurrences", package = "RuHere")
# Get only occurrences from Azure Jay
occ <- occurrences[occurrences$species == "Cyanocorax caeruleus", ]
# Set folder where distributional datasets were saved
# Here, just a sample provided in the package
# You must run 'faunabr_here()' beforehand to download the necessary data files for your species
dataset_dir <- system.file("extdata/datasets", package = "RuHere")
# Flag records using faunabr specialist information
occ_fauna <- flag_faunabr(data_dir = dataset_dir, occ = occ)
}
