% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/sample_strata.R
\name{sample_strata}
\alias{sample_strata}
\title{Select Sampling Units based on Stratified Random Sampling}
\usage{
sample_strata(
  data,
  strata,
  id,
  already_sampled = NULL,
  design_data,
  design_strata = "strata",
  n_allocated = "n_to_sample"
)
}
\arguments{
\item{data}{A data frame or matrix with one row for each
sampling unit in the population, one column specifying each
unit's stratum, and one column with a unique identifier for each
unit.}

\item{strata}{a character string specifying the name of column
in \code{data} which indicates stratum membership.}

\item{id}{a character string specifying the name of the column
in \code{data} that uniquely identifies each unit.}

\item{already_sampled}{a character sting specifying the name of the
column in \code{data} which indicates (1/0 or Y/N) whether a
unit has already been sampled in a prior wave. Defaults to NULL
which means that none have been sampled yet.}

\item{design_data}{a dataframe or matrix with one row for each stratum
that subdivides the population, one column specifying the
stratum name, and one column indicating the number of samples
allocated to each stratum.}

\item{design_strata}{a character string specifying the name of the
column in \code{design_data} that contains the stratum levels.
Defaults to "strata".}

\item{n_allocated}{a character string specifying the name of the
column in \code{design_data} that indicates the n allocated to each
stratum. Defaults to "n_to_sample".}
}
\value{
returns {data} as a dataframe with a new column named
"sample_indicator" containing a binary (1/0) indicator of
whether each unit should be sampled.
}
\description{
Requires two dataframes or matrices: \code{data} with a column
\code{strata} which specifies stratum membership for each unit in
the population and a second dataframe \code{design_data} with one
row per strata level with a column \code{design_strata} that
indicates the unique levels of \code{strata} in \code{data} and
\code{n_allocated} that specifies the
number to be sampled from each stratum.
\code{sample_strata} selects the units to sample by
selecting a random sample of the desired size within each
stratum. The second dataframe can be the output of \code{allocate_wave()}
or \code{optimum_allocation()}.
}
\examples{
# Define a design dataframe
design <- data.frame(
  strata = c("setosa", "virginica", "versicolor"),
  n_to_sample = c(5, 5, 5)
)

# Make sure there is an id column
iris$id <- 1:nrow(iris)

# Run
sample_strata(
  data = iris, strata = "Species", id = "id",
  design_data = design, design_strata = "strata", n_allocated = "n_to_sample"
)

# If some units had already been sampled
iris$already_sampled <- rbinom(nrow(iris), 1, 0.25)

sample_strata(
  data = iris, strata = "Species", id = "id",
  already_sampled = "already_sampled",
  design_data = design, design_strata = "strata", n_allocated = "n_to_sample"
)
}
