% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/sia.R
\name{sia_data}
\alias{sia_data}
\title{Download SIA Outpatient Production Microdata}
\usage{
sia_data(
  year,
  type = "PA",
  month = NULL,
  vars = NULL,
  uf = NULL,
  procedure = NULL,
  diagnosis = NULL,
  parse = TRUE,
  col_types = NULL,
  cache = TRUE,
  cache_dir = NULL,
  lazy = FALSE,
  backend = c("arrow", "duckdb")
)
}
\arguments{
\item{year}{Integer. Year(s) of the data. Required.}

\item{type}{Character. File type to download. Default: \code{"PA"}
(outpatient production). See \code{\link{sia_info}()} for all 13 types.}

\item{month}{Integer. Month(s) of the data (1-12). If NULL (default),
downloads all 12 months. Example: \code{1} (January), \code{1:6}
(first semester).}

\item{vars}{Character vector. Variables to keep. If NULL (default),
returns all available variables. Use \code{\link{sia_variables}()} to see
available variables.}

\item{uf}{Character. Two-letter state abbreviation(s) to download.
If NULL (default), downloads all 27 states.
Example: \code{"SP"}, \code{c("SP", "RJ")}.}

\item{procedure}{Character. SIGTAP procedure code pattern(s) to filter by
(\code{PA_PROC_ID}). Supports partial matching (prefix).
If NULL (default), returns all procedures.
Example: \code{"0301"} (consultations).}

\item{diagnosis}{Character. CID-10 code pattern(s) to filter by principal
diagnosis (\code{PA_CIDPRI}). Supports partial matching (prefix).
If NULL (default), returns all diagnoses.
Example: \code{"J"} (respiratory diseases).}

\item{parse}{Logical. If TRUE (default), converts columns to
appropriate types (integer, double, Date) based on the variable
metadata. Use \code{\link{sia_variables}()} to see the target type for each
variable. Set to FALSE for backward-compatible all-character output.}

\item{col_types}{Named list. Override the default type for specific
columns. Names are column names, values are type strings:
\code{"character"}, \code{"integer"}, \code{"double"},
\code{"date_dmy"}, \code{"date_ymd"}, \code{"date_ym"}, \code{"date"}.
Example: \code{list(PA_VALAPR = "character")} to keep PA_VALAPR as character.}

\item{cache}{Logical. If TRUE (default), caches downloaded data for
faster future access.}

\item{cache_dir}{Character. Directory for caching. Default:
\code{tools::R_user_dir("healthbR", "cache")}.}

\item{lazy}{Logical. If TRUE, returns a lazy query object instead of a
tibble. Requires the \pkg{arrow} package. The lazy object supports
dplyr verbs (filter, select, mutate, etc.) which are pushed down
to the query engine before collecting into memory. Call
\code{dplyr::collect()} to materialize the result. Default: FALSE.}

\item{backend}{Character. Backend for lazy evaluation: \code{"arrow"}
(default) or \code{"duckdb"}. Only used when \code{lazy = TRUE}.
DuckDB backend requires the \pkg{duckdb} package.}
}
\value{
A tibble with outpatient production microdata. Includes columns
\code{year}, \code{month}, and \code{uf_source} to identify the source
when multiple years/months/states are combined.
}
\description{
Downloads and returns outpatient production microdata from DATASUS FTP.
Each row represents one outpatient production record.
Data is organized monthly -- one .dbc file per type, state (UF), and month.
}
\details{
Data is downloaded from DATASUS FTP as .dbc files (one per type/state/month).
The .dbc format is decompressed internally using vendored C code from the
blast library. No external dependencies are required.

SIA data is monthly, so downloading an entire year for all states requires
324 files (27 UFs x 12 months) per type. Use \code{uf} and \code{month}
to limit downloads.

The SIA has 13 file types. The default \code{"PA"} (outpatient production)
is the most commonly used. Use \code{\link{sia_info}()} to see all types.
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
# all outpatient production in Acre, January 2022
ac_jan <- sia_data(year = 2022, month = 1, uf = "AC")

# filter by procedure code
consult <- sia_data(year = 2022, month = 1, uf = "AC",
                    procedure = "0301")

# filter by diagnosis (CID-10)
resp <- sia_data(year = 2022, month = 1, uf = "AC",
                 diagnosis = "J")

# only key variables
sia_data(year = 2022, month = 1, uf = "AC",
         vars = c("PA_PROC_ID", "PA_CIDPRI", "PA_SEXO",
                  "PA_IDADE", "PA_VALAPR"))

# different file type (APAC Medicamentos)
med <- sia_data(year = 2022, month = 1, uf = "AC", type = "AM")
\dontshow{\}) # examplesIf}
}
\seealso{
\code{\link{sia_info}()} for file type descriptions,
\code{\link{censo_populacao}()} for population denominators.

Other sia: 
\code{\link{sia_cache_status}()},
\code{\link{sia_clear_cache}()},
\code{\link{sia_dictionary}()},
\code{\link{sia_info}()},
\code{\link{sia_variables}()},
\code{\link{sia_years}()}
}
\concept{sia}
