% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gt_pca_autoSVD.R
\name{gt_pca_autoSVD}
\alias{gt_pca_autoSVD}
\title{PCA controlling for LD for \code{gen_tibble} objects}
\usage{
gt_pca_autoSVD(
  x,
  k = 10,
  fun_scaling = bigsnpr::snp_scaleBinom(),
  thr_r2 = 0.2,
  use_positions = TRUE,
  size = 100/thr_r2,
  roll_size = 50,
  int_min_size = 20,
  alpha_tukey = 0.05,
  min_mac = 10,
  max_iter = 5,
  n_cores = 1,
  verbose = TRUE,
  total_var = TRUE
)
}
\arguments{
\item{x}{a \code{gen_tbl} object}

\item{k}{Number of singular vectors/values to compute. Default is \code{10}.
\strong{This algorithm should be used to compute a few singular vectors/values.}}

\item{fun_scaling}{Usually this  can be left unset, as it defaults to
\code{\link[bigsnpr:snp_scaleBinom]{bigsnpr::snp_scaleBinom()}}, which is the appropriate function for
biallelic SNPs. Alternatively it is possible to use  custom function (see
\code{\link[bigsnpr:snp_autoSVD]{bigsnpr::snp_autoSVD()}} for details.}

\item{thr_r2}{Threshold over the squared correlation between two SNPs.
Default is \code{0.2}. Use \code{NA} if you want to skip the clumping step. size}

\item{use_positions}{a boolean on whether the position is used to define
\code{size}, or whether the size should be in number of SNPs. Default is TRUE}

\item{size}{For one SNP, window size around this SNP to compute correlations.
Default is 100 / thr_r2 for clumping (0.2 -> 500; 0.1 -> 1000; 0.5 -> 200).
If not providing infos.pos (NULL, the default), this is a window in number
of SNPs, otherwise it is a window in kb (genetic distance). I recommend
that you provide the positions if available.}

\item{roll_size}{Radius of rolling windows to smooth log-p-values. Default is
\code{50}.}

\item{int_min_size}{Minimum number of consecutive outlier SNPs in order to be
reported as long-range LD region. Default is \code{20}.}

\item{alpha_tukey}{Default is \code{0.05}. The type-I error rate in outlier
detection (that is further corrected for multiple testing).}

\item{min_mac}{Minimum minor allele count (MAC) for variants to be included.
Default is \code{10}.}

\item{max_iter}{Maximum number of iterations of outlier detection. Default is
\code{5}.}

\item{n_cores}{Number of cores used. Default doesn't use parallelism. You may
use \code{\link[bigstatsr:reexports]{bigstatsr::nb_cores()}}.}

\item{verbose}{Output some information on the iterations? Default is \code{TRUE}.}

\item{total_var}{a boolean indicating whether to compute the total variance
of the matrix. Default is \code{TRUE}. Using \code{FALSE} will speed up computation,
but the total variance will not be stored in the output (and thus it will
not be possible to assign a proportion of variance explained to the
components).}
}
\value{
a \code{gt_pca} object, which is a subclass of \code{bigSVD}; this is an S3
list with elements: A named list (an S3 class "big_SVD") of
\itemize{
\item \code{d}, the eigenvalues (singular values, i.e. as variances),
\item \code{u}, the scores for each sample on each component
(the left singular vectors)
\item \code{v}, the loadings (the right singular vectors)
\item \code{center}, the centering vector,
\item \code{scale}, the scaling vector,
\item \code{method}, a string defining the method (in this case 'autoSVD'),
\item \code{call}, the call that generated the object.
\item \code{loci}, the loci used after long range LD removal.
}
}
\description{
This function performs Principal Component Analysis on a \code{gen_tibble}, using
a fast truncated SVD with initial pruning and then iterative removal of
long-range LD regions. This function is a wrapper for
\code{\link[bigsnpr:snp_autoSVD]{bigsnpr::snp_autoSVD()}}
}
\details{
Using gt_pca_autoSVD requires a reasonably large dataset, as the function
iteratively removes regions of long range LD. If you encounter: 'Error in
rollmean(): Parameter 'size' is too large.', \code{roll_size} exceeds the number
of variants on at least one of your chromosomes. Try reducing 'roll_size' to
avoid this error.

Note: rather than accessing these elements directly, it is better to use
\code{tidy} and \code{augment}. See \code{\link{gt_pca_tidiers}}.
}
\examples{
\dontshow{if (all(rlang::is_installed(c("RhpcBLASctl", "data.table")))) withAutoprint(\{ # examplesIf}
\dontshow{
data.table::setDTthreads(2)
RhpcBLASctl::blas_set_num_threads(2)
RhpcBLASctl::omp_set_num_threads(2)
}
# Create a gen_tibble of lobster genotypes
bed_file <-
  system.file("extdata", "lobster", "lobster.bed", package = "tidypopgen")
lobsters <- gen_tibble(bed_file,
  backingfile = tempfile("lobsters"),
  quiet = TRUE
)

# Remove monomorphic loci and impute
lobsters <- lobsters \%>\% select_loci_if(loci_maf(genotypes) > 0)
lobsters <- gt_impute_simple(lobsters, method = "mode")

show_loci(lobsters)$chromosome <- "1"

# Create PCA object, including total variance
gt_pca_autoSVD(lobsters,
  k = 10,
  roll_size = 20,
  total_var = TRUE
)
# Change number of components and exclude total variance
gt_pca_autoSVD(lobsters,
  k = 5,
  roll_size = 20,
  total_var = FALSE
)
\dontshow{\}) # examplesIf}
}
\seealso{
\code{\link[bigsnpr:snp_autoSVD]{bigsnpr::snp_autoSVD()}}  which this function wraps.
}
