% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ldgeno.R
\name{ldest}
\alias{ldest}
\title{Pairwise LD estimation in polyploids.}
\usage{
ldest(
  ga,
  gb,
  K,
  se = TRUE,
  type = c("hap", "comp"),
  model = c("norm", "flex"),
  pen = ifelse(type == "hap", 2, 1)
)
}
\arguments{
\item{ga}{One of two possible inputs:
\enumerate{
\item{A vector of counts, containing the genotypes for each
individual at the first locus. When \code{type = "comp"},
the vector of genotypes may be continuous (e.g. the
posterior mean genotype).}
\item{A matrix of genotype log-likelihoods at the first locus.
The rows index the individuals and the columns index
the genotypes. That is \code{ga[i, j]} is the genotype
likelihood of individual \code{i} for genotype \code{j-1}.}
}}

\item{gb}{One of two possible inputs:
\enumerate{
\item{A vector of counts, containing the genotypes for each
individual at the second locus. When \code{type = "comp"},
the vector of genotypes may be continuous (e.g. the
posterior mean genotype).}
\item{A matrix of genotype log-likelihoods at the second locus.
The rows index the individuals and the columns index
the genotypes. That is \code{gb[i, j]} is the genotype
likelihood of individual \code{i} for genotype \code{j-1}.}
}}

\item{K}{The ploidy of the species. Assumed to be the same for all
individuals.}

\item{se}{A logical. Should we calculate standard errors (\code{TRUE}) or
not (\code{FALSE}). Calculating standard errors can be really slow
when \code{type = "comp"}, \code{model = "flex"}, and when using
genotype likelihoods. Otherwise, standard error calculations
should be pretty fast.}

\item{type}{The type of LD to calculate. The available types are
haplotypic LD (\code{type = "hap"}) or composite LD
(\code{type = "comp"}). Haplotypic LD is only appropriate for
autopolyploids when the individuals are in Hardy-Weinberg
equilibrium (HWE). The composite
measures of LD are always applicable, and consistently estimate the
usual measures of LD when HWE is fulfilled in autopolyploids.
However, when HWE is not fulfilled, interpreting the
composite measures of LD could be a little tricky.}

\item{model}{When \code{type = "comp"} and using genotype likelihoods,
should we use the proportional
bivariate normal model to estimate the genotype distribution
(\code{model = "norm"}), or the general categorical distribution
(\code{model = "flex"})? Defaults to \code{"norm"}.}

\item{pen}{The penalty to be applied to the likelihood. You can think about
this as the prior sample size. Should be greater than 1. Does not
apply if \code{model = "norm"}, \code{type = "comp"}, and using
genotype likelihoods. Also does not apply when \code{type = "comp"}
and using genotypes.}
}
\value{
A vector with some or all of the following elements:
\describe{
\item{\code{D}}{The estimate of the LD coefficient.}
\item{\code{D_se}}{The standard error of the estimate of
the LD coefficient.}
\item{\code{r2}}{The estimate of the squared Pearson correlation.}
\item{\code{r2_se}}{The standard error of the estimate of the
squared Pearson correlation.}
\item{\code{r}}{The estimate of the Pearson correlation.}
\item{\code{r_se}}{The standard error of the estimate of the
Pearson correlation.}
\item{\code{Dprime}}{The estimate of the standardized LD
coefficient. When \code{type} = "comp", this corresponds
to the standardization where we fix allele frequencies.}
\item{\code{Dprime_se}}{The standard error of \code{Dprime}.}
\item{\code{Dprimeg}}{The estimate of the standardized LD
coefficient. This corresponds to the standardization where
we fix genotype frequencies.}
\item{\code{Dprimeg_se}}{The standard error of \code{Dprimeg}.}
\item{\code{z}}{The Fisher-z transformation of \code{r}.}
\item{\code{z_se}}{The standard error of the Fisher-z
transformation of \code{r}.}
\item{\code{p_ab}}{The estimated haplotype frequency of ab.
Only returned if estimating the haplotypic LD.}
\item{\code{p_Ab}}{The estimated haplotype frequency of Ab.
Only returned if estimating the haplotypic LD.}
\item{\code{p_aB}}{The estimated haplotype frequency of aB.
Only returned if estimating the haplotypic LD.}
\item{\code{p_AB}}{The estimated haplotype frequency of AB.
Only returned if estimating the haplotypic LD.}
\item{\code{q_ij}}{The estimated frequency of genotype i at locus 1
and genotype j at locus 2. Only returned if estimating the
composite LD.}
\item{\code{n}}{The number of individuals used to estimate pairwise LD.}
}
}
\description{
Estimates either haplotypic or composite measures of LD using either
genotypes are genotype likelihoods via maximum likelihood.
The usual measures of LD are estimated (D, D', and r) along with
the Fisher-z transformation of r (called "z"). All estimates
are returned with standard errors. See Gerard (2021) for details.
}
\section{Haplotypic LD}{


This section describes the methods used when \code{type = "hap"} is
selected.

Haplotypic LD measures the association
between two loci on the same haplotype. When haplotypes are known, estimating
haplotypic LD is simple using just the haplotypic frequencies.

When haplotypes are not known, we can still estimate haplotypic frequencies
using the genotypes or genotype likelihoods
\emph{in autopolyploids as long as Hardy-Weinberg equilibrium (HWE) is satisfied}. We do
this via maximum likelihood using gradient ascent. Gradient ascent is
performed over the unconstrained parameterization of the 3-simplex from
Betancourt (2012). The estimated haplotype frequencies are then used to
estimate haplotypic LD.

Standard errors are provided using standard maximum likelihood theory.
In brief, the Hessian matrix of the log-likelihood is calculated at
the MLE's of the haplotype frequencies. The negative inverse of this
Hessian matrix is approximately the covariance matrix of the MLE's of the
haplotype frequencies. Since all haplotypic LD measures are functions
of the haplotype frequencies, we use the delta-method to obtain
the standard errors for each LD estimate.

A Dirichlet(2,2,2,2) prior is placed over the frequencies of
haplotypes 00, 01, 10, and 11. This corresponds to the "add two" rule
of Agresti (1998). You can change this prior via the \code{pen} argument.

When you either do not have autopolyploids or when HWE is \emph{not}
satisfied, then the estimates using \code{type = "hap"}
are nonsensical. However, the composite measures of LD are still
applicable (see below).
}

\section{Composite LD}{


This section describes the methods used when \code{type = "comp"} is
selected.

When HWE is not satisfied, haplotype frequencies are not estimable. However,
measures of association between two loci are still estimable. These
associations may be caused by LD either on the same haplotype or between
different haplotypes. Cockerham and Weir (1977) thus called such measures
"composite" measures of LD.

When the genotypes are known, these composite measures have simple
correspondences to well-known statistical measures of association.
D is the covariance of genotypes between loci divided by the ploidy.
r is the Pearson correlation of genotypes. D' is D divided by a
term that involves only mean genotypes.

When genotypes are not known, we estimate the joint genotype frequencies
and use these to estimate the composite measures of LD using
genotype likelihoods. The distribution of genotypes is assumed to
either follow a proportional bivariate normal model (by default) or
a general categorical model.

These estimates of composite measures of LD estimate the haplotypic
measures of LD when HWE is fulfilled, but are still applicable when HWE
is not fulfilled.

When genotypes are known, standard errors are calculated using standard
moment-based approaches. When genotypes are not known, standard
errors are calculated using standard maximum likelihood theory,
same as for the haplotypic LD estimates (see above), or using
a bootstrap.
}

\examples{
set.seed(1)
n <- 100 # sample size
K <- 6 # ploidy

## generate some fake genotypes when LD = 0.
ga <- stats::rbinom(n = n, size = K, prob = 0.5)
gb <- stats::rbinom(n = n, size = K, prob = 0.5)
head(ga)
head(gb)

## generate some fake genotype likelihoods when LD = 0.
gamat <- t(sapply(ga, stats::dnorm, x = 0:K, sd = 1, log = TRUE))
gbmat <- t(sapply(gb, stats::dnorm, x = 0:K, sd = 1, log = TRUE))
head(gamat)
head(gbmat)

## Haplotypic LD with genotypes
ldout1 <- ldest(ga = ga,
                gb = gb,
                K = K,
                type = "hap")
head(ldout1)

## Haplotypic LD with genotype likelihoods
ldout2 <- ldest(ga = gamat,
                gb = gbmat,
                K = K,
                type = "hap")
head(ldout2)

## Composite LD with genotypes
ldout3 <- ldest(ga = ga,
                gb = gb,
                K = K,
                type = "comp")
head(ldout3)

## Composite LD with genotype likelihoods and normal model
ldout4 <- ldest(ga = gamat,
                gb = gbmat,
                K = K,
                type = "comp",
                model = "norm")
head(ldout4)

## Composite LD with genotype likelihoods and general categorical model
ldout5 <- ldest(ga = gamat,
                gb = gbmat,
                K = K,
                type = "comp",
                model = "flex",
                se = FALSE)
head(ldout5)

ldout1[["D"]]
ldout2[["D"]]
ldout3[["D"]]
ldout4[["D"]]
ldout5[["D"]]

}
\references{
\itemize{
\item{Agresti, Alan, and Brent A. Coull. "Approximate is better than
"exact" for interval estimation of binomial proportions."
\emph{The American Statistician} 52, no. 2 (1998): 119-126.
\doi{10.1080/00031305.1998.10480550}}
\item{Betancourt, Michael. "Cruising the simplex: Hamiltonian Monte
Carlo and the Dirichlet distribution." In
\emph{AIP Conference Proceedings 31st}, vol. 1443, no. 1,
pp. 157-164. American Institute of Physics, 2012.
\doi{10.1063/1.3703631}}
\item{Cockerham, C. Clark, and B. S. Weir. "Digenic descent measures
for finite populations." \emph{Genetics Research} 30, no. 2 (1977):
121-147. \doi{10.1017/S0016672300017547}}
\item{Gerard, David. "Pairwise Linkage Disequilibrium Estimation
for Polyploids." \emph{Molecular Ecology Resources}.
Accepted Author Manuscript. (2021)
\doi{10.1111/1755-0998.13349}}
}
}
\seealso{
\describe{
\item{\code{\link{ldfast}()}}{Fast, moment-based approach to LD estimation
that also accounts for genotype uncertainty.}
\item{\code{\link{mldest}()}}{For calculating pairwise LD among all
pairs of a collection of SNPs.}
\item{\code{\link{sldest}()}}{For calculating pairwise LD along a
sliding window of SNPs.}
\item{\code{\link{ldest_hap}()}}{For the function that directly estimates
haplotypic LD when HWE is fulfilled.}
\item{\code{\link{ldest_comp}()}}{For the function that directly
estimates composite LD.}
}
}
\author{
David Gerard
}
