% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/episode_group.R
\name{episode_group}
\alias{episode_group}
\alias{fixed_episodes}
\alias{rolling_episodes}
\title{Episode grouping for record deduplication and case assignment}
\usage{
episode_group(df, sn = NULL, strata = NULL, date, case_length,
  episode_type = "fixed", episode_unit = "days", episodes_max = Inf,
  recurrence_length = NULL, rolls_max = Inf, data_source = NULL,
  custom_sort = NULL, from_last = FALSE, overlap_method = c("across",
  "inbetween", "aligns_start", "aligns_end", "chain"),
  bi_direction = FALSE, group_stats = FALSE, display = TRUE,
  deduplicate = FALSE, to_s4 = FALSE)

fixed_episodes(date, sn = NULL, strata = NULL, case_length,
  episode_unit = "days", episodes_max = Inf, data_source = NULL,
  custom_sort = NULL, from_last = FALSE, overlap_method = c("across",
  "inbetween", "aligns_start", "aligns_end", "chain"),
  bi_direction = FALSE, group_stats = FALSE, display = TRUE,
  deduplicate = FALSE, x, to_s4 = FALSE)

rolling_episodes(date, sn = NULL, strata = NULL, case_length,
  recurrence_length = NULL, episode_unit = "days",
  episodes_max = Inf, rolls_max = Inf, data_source = NULL,
  custom_sort = NULL, from_last = FALSE, overlap_method = c("across",
  "inbetween", "aligns_start", "aligns_end", "chain"),
  bi_direction = FALSE, group_stats = FALSE, display = TRUE,
  deduplicate = FALSE, x, to_s4 = FALSE)
}
\arguments{
\item{df}{\code{data.frame}. One or more datasets appended together.}

\item{sn}{Unique numerical record identifier. Optional.}

\item{strata}{Subsets of the dataset within which episode grouping will be done separately. \code{\link{episode_group}} supports the use of multiple columns supplied as column names. \code{\link{record_group}} can be used to create the \code{strata}.}

\item{date}{Date (\code{date}, \code{datetime} or \code{numeric}) or period (\code{\link{number_line}}) of events.}

\item{case_length}{Period after a \code{"Case (C)"} within which another record from the same \code{strata} is considered a \code{"Duplicate (D)"} record.}

\item{episode_type}{\code{"fixed"} or \code{"rolling"}.}

\item{episode_unit}{Time units as supported by lubridate's \code{\link[lubridate]{duration}} function.}

\item{episodes_max}{Maximum number of times to group episodes within each \code{strata}.}

\item{recurrence_length}{Period after the last record (\code{"Case (C)"} , \code{"Duplicate (D)"} or \code{"Recurrent (R)"}) of an episode within which another record from the same \code{strata} is considered a \code{"Recurrent (R)"} record. If \code{recurrence_length} is not supplied, \code{case_length} is used as the \code{recurrence_length}.}

\item{rolls_max}{Maximum number of times an event can reoccur within an episode. Only used if \code{episode_type} is \code{"rolling"}.}

\item{data_source}{Unique dataset identifier. Useful when the dataset contains data from multiple sources. \code{\link{episode_group}} support the use of multiple columns supplied as column names.}

\item{custom_sort}{If \code{TRUE}, \code{"Case (C)"} assignment will be done with preference to this sort order. Useful in specifying that episode grouping begins at particular records regardless of chronological order. \code{\link{episode_group}} supports the use of multiple columns supplied as column names.}

\item{from_last}{If \code{TRUE}, episode grouping will be backwards in time - starting at the most recent record and proceeding to the earliest. If \code{FALSE}, it'll be forward in time - starting at the earliest record and proceeding to the most recent one.}

\item{overlap_method}{A set of ways for grouped intervals to overlap. Options are; \code{"across"}, \code{"aligns_start"}, \code{"aligns_end"}, \code{"inbetween"}, \code{"chain"}. See \code{\link{overlap}} functions.}

\item{bi_direction}{If \code{FALSE}, \code{"Duplicate (D)"} records will be those within the \code{case_length} period, before or after the \code{"Case (C)"} as determined by \code{from_last}. If \code{TRUE}, \code{"Duplicate (D)"} records will be those within the same period before and after the \code{"Case (C)"}.}

\item{group_stats}{If \code{TRUE}, the output will include additional columns with useful stats for each episode group.}

\item{display}{If \code{TRUE}, status messages are printed on screen.}

\item{deduplicate}{if \code{TRUE}, \code{"Dupilcate (D)"} records are excluded from the output.}

\item{to_s4}{if \code{TRUE}, changes the returned output to an \code{\link[=epid-class]{epid}} object.}

\item{x}{Record date or interval. Deprecated. Please use \code{date}}
}
\value{
\code{data.frame} (\code{\link[=epid-class]{epid}} objects if \code{to_s4} is \code{TRUE})

\itemize{
\item \code{sn} - unique record identifier as provided
\item \code{epid | .Data} - unique episode identifier
\item \code{case_nm} - record type in regards to case assignment
\item \code{epid_dataset} - data sources in each episode
\item \code{epid_interval} - episode start and end dates. A \code{\link{number_line}} object.
\item \code{epid_length} - difference between episode start and end dates (\code{difftime}). If possible, it's the same unit as \code{episode_unit} otherwise, a difference in days is returned
\item \code{epid_total} - number of records in each episode
}

\code{epid} objects will be the default output in the next release.
}
\description{
Group records into chronological episodes
}
\details{
Episode grouping begins at a reference record (\code{"Case (C)"}) and proceeds forward or backward in time depending on \code{from_last}.
If \code{custom_sort} is used, episode grouping can be forced to begin at certain records before proceeding forward or backwards in time.
The maximum duration of a \code{"fixed"} episode is the \code{case_length} while, the maximum duration of a \code{"rolling"} episode is the
\code{case_length} plus all recurrence periods. A recurrence period is a fixed period (\code{recurrence_length}) after the last record of an episode. Records within this period are taken an a \code{"Recurrent (R)"} record of the initial \code{"Case"}

#' When a \code{data_source} identifier is included,
\code{epid_dataset} is included in the output. This lists the source of every record in each record group.

\code{fixed_episodes()} and \code{rolling_episodes()} are wrapper functions of \code{episode_group()}.
They are convenient alternatives with the same functionalities.
}
\examples{
library(dplyr)
library(lubridate)

#1. Fixed episodes
data(infections); infections
db_1 <- infections
# 16-day (difference of 15 days) episodes beginning from the earliest record
db_1$fd <- fixed_episodes(db_1$date, case_length = 15, to_s4 = TRUE, display = FALSE)
# 16-hour (difference of 15 hours) episodes beginning from the earliest record
db_1$fh <- fixed_episodes(db_1$date, case_length = 15,
episode_unit = "hours", to_s4 = TRUE, display = FALSE)
db_1

#2. Rolling episodes
# Case length and recurrence periods of 16 days
db_1$rd_a <- rolling_episodes(db_1$date, case_length = 15, to_s4 = TRUE, display = FALSE)
# Case length of 16 days and recurrence periods of 11 days
db_1$rd_b <- rolling_episodes(db_1$date, case_length = 15,
recurrence_length = 10, to_s4 = TRUE, display = FALSE)
# Case length of 16 days and 2 recurrence periods of 11 days
db_1$rd_c <- rolling_episodes(db_1$date, case_length = 15,
recurrence_length = 10, rolls_max = 2, to_s4 = TRUE, display = FALSE)
db_1

# 3. Stratified episode grouping
db_3 <- infections

db_3$patient_id <- c(rep("PID 1",8), rep("PID 2",3))
# One 16-day episode per patient
db_3$epids_p <- fixed_episodes(date=db_3$date, strata = db_3$patient_id,
case_length = 15, episodes_max = 1, to_s4 = TRUE, display = FALSE)
db_3

# 4. Case assignment
db_4 <- infections

## 4.1 Chronological order
db_4$forward_time <- fixed_episodes(db_4$date, case_length = 1,
episode_unit = "month", to_s4 = TRUE, display = FALSE)
db_4$backward_time <- fixed_episodes(db_4$date, case_length = 1,
episode_unit = "month", from_last = TRUE, to_s4 = TRUE, display = FALSE)
db_4

## 4.2 User defined order
db_4b <- infections
db_4b
# RTI > UTI, or RTI > BSI
db_4b$ord1 <- ifelse(db_4b$infection =="RTI",0,1)
# UTI > BSI > RTI
db_4b$ord2 <- factor(db_4b$infection, levels = c("UTI","BSI","RTI"))

db_4b$epids_1 <- fixed_episodes(db_4b$date, case_length = 15,
custom_sort = db_4b$ord1, to_s4 = TRUE, display = FALSE)
db_4b$epids_2 <- fixed_episodes(db_4b$date, case_length = 15,
custom_sort = db_4b$ord2, to_s4 = TRUE, display = FALSE)
db_4b$epids_2b <- fixed_episodes(db_4b$date, case_length = 15,
custom_sort = db_4b$ord2, bi_direction = TRUE, to_s4 = TRUE, display = FALSE)
db_4b

#5. Interval grouping
data(hospital_admissions)

hospital_admissions$admin_period <- number_line(hospital_admissions$admin_dt,
hospital_admissions$discharge_dt)
admissions <- hospital_admissions[c("admin_period","epi_len")]
admissions

# Episodes of overlaping periods of admission
admissions$epi_0 <- fixed_episodes(date=admissions$admin_period, case_length = 0,
group_stats = TRUE, to_s4=TRUE)
admissions

# Overlaping periods of admission seperated by 1 month
admissions$epi_1 <- fixed_episodes(date=admissions$admin_period, case_length = 1,
episode_unit = "months", group_stats = TRUE, to_s4 = TRUE, display = FALSE)
admissions

# Episodes of chained admission periods, and those with aligned end periods
admissions$epi_0b <- fixed_episodes(date=admissions$admin_period, case_length = 0,
overlap_method = c("chain","aligns_end"), group_stats = TRUE, to_s4 = TRUE, display = FALSE)
admissions["epi_0b"]


# Note - episode_group() takes column names not actual values
db_5 <- infections

db_5$recur <- 20
db_5$epids_f <- episode_group(db_5, date=date, episode_type = "fixed",
case_length = epi_len, to_s4 = TRUE, display = FALSE)
db_5$epids_r <- episode_group(db_5, date=date, episode_type = "rolling",
case_length = epi_len, recurrence_length = recur, to_s4 = TRUE, display = FALSE)
db_5

}
\seealso{
\code{\link{record_group}}, \code{\link{overlap}} and \code{\link{number_line}}
}
