% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/generate_phenodata.R
\name{generate_phenodata}
\alias{generate_phenodata}
\alias{generate_phenodata_1_simple}
\alias{generate_phenodata_1}
\alias{generate_phenodata_2_bvn}
\alias{generate_phenodata_2_copula}
\title{Functions to generate phenotype data.}
\usage{
generate_phenodata_1_simple(genodata = NULL, type = "quantitative", b = 0,
  a = c(0, 0.5, 0.5))

generate_phenodata_1(genodata = NULL, type = "quantitative", b = 0.6,
  a = c(0, 0.5, 0.5), MAF_cutoff = 1, prop_causal = 0.1,
  direction = "a")

generate_phenodata_2_bvn(genodata = NULL, tau = NULL, b1 = 0, b2 = 0,
  a1 = c(0, 0.5, 0.5), a2 = c(0, 0.5, 0.5))

generate_phenodata_2_copula(genodata = NULL, phi = NULL, tau = 0.5,
  b1 = 0.6, b2 = 0.6, a1 = c(0, 0.5, 0.5), a2 = c(0, 0.5, 0.5),
  MAF_cutoff = 1, prop_causal = 0.1, direction = "a")
}
\arguments{
\item{genodata}{Numeric input vector or dataframe containing the genetic
variant(s) in columns. Must be in allelic coding 0, 1, 2.}

\item{type}{String with value \code{"quantitative"} or \code{"binary"}
specifying whether normally-distributed or binary phenotypes
are generated.}

\item{b}{Integer or vector specifying the genetic effect size(s) of
the provided SNVs (\code{genodata}) in the data generation.}

\item{a}{Numeric vector specifying the effect sizes of the covariates \eqn{X_1}{X1}, \eqn{X_2}{X2}
in the data generation.}

\item{MAF_cutoff}{Integer specifying a minor allele frequency cutoff to
determine among which SNVs the causal SNVs are
sampled for the phenotype generation.}

\item{prop_causal}{Integer specifying the desired percentage of causal SNVs
among all SNVs.}

\item{direction}{String with value \code{"a"}, \code{"b"}, or \code{"c"}
specifying whether all causal SNVs have a positive effect on
the phenotypes (\code{"a"}), 20\% of the causal SNVs have a
negative effect and 80\% a positive effect on the phenotypes
(\code{"b"}), or 50\% of the causal SNVs have a negative
effect and 50\% a positive effect on the phenotypes (\code{"c"}).}

\item{tau}{Integer specifying Kendall's tau, which determines the
dependence between the two generated phenotypes.}

\item{b1}{Integer or vector specifying the genetic effect size(s) of
the provided SNVs (\code{genodata}) on the first phenotype
in the data generation.}

\item{b2}{Integer or vector specifying the genetic effect size(s) of
the provided SNVs (\code{genodata}) on the second phenotype
in the data generation.}

\item{a1}{Numeric vector specifying the effect sizes of the covariates \eqn{X_1}{X1}, \eqn{X_2}{X2}
on the first phenotype in the data generation.}

\item{a2}{Numeric vector specifying the effect sizes of the covariates \eqn{X_1}{X1}, \eqn{X_2}{X2}
on the second phenotype in the data generation.}

\item{phi}{Integer specifying the parameter \eqn{\phi} for
the dependence between the two generated phenotypes.}
}
\value{
A dataframe containing n observations of the phenotype Y or phenotypes
        \eqn{Y_1}{Y1}, \eqn{Y_2}{Y2} and of the covariates \eqn{X_1}{X1}, \eqn{X_2}{X2}.
}
\description{
Functions to generate standard normal or binary phenotypes based on provided genetic
data, for specified effect sizes.
The functions \code{\link{generate_phenodata_1_simple}} and
\code{\link{generate_phenodata_1}} generate one phenotype Y conditional on
single nucleotide variants (SNVs) and two covariates.
\code{\link{generate_phenodata_2_bvn}} as well as \code{\link{generate_phenodata_2_copula}}
generate two phenotypes \eqn{Y_1}{Y1}, \eqn{Y_2}{Y2} with dependence Kendall's tau conditional on
the provided SNVs and two covariates.
}
\details{
In more detail, the function \code{\link{generate_phenodata_1_simple}}
generates a quantitative or binary phenotype Y with n observations,
conditional on the specified SNVs with given effect sizes and conditional
on one binary and one standard normally-distributed covariate with
specified effect sizes. n is given through the provided SNVs.

\code{\link{generate_phenodata_1}} provides an extension of
\code{\link{generate_phenodata_1_simple}} and allows to further select
the percentage of causal SNVs, a minor allele frequency cutoff on the
causal SNVs, and varying effect directions. n is given through the
provided SNVs.

The function \code{\link{generate_phenodata_2_bvn}} generates
two quantitative phenotypes \eqn{Y_1}{Y1}, \eqn{Y_2}{Y2} conditional on one binary and one
standard normally-distributed covariate \eqn{X_1}{X1}, \eqn{X_2}{X2} from the bivariate
normal distribution so that they have have dependence \eqn{\tau} given
by Kendall's \code{tau}.

The function \code{\link{generate_phenodata_2_copula}} generates
two quantitative phenotypes \eqn{Y_1}{Y1}, \eqn{Y_2}{Y2} conditional on one binary and one
standard normally-distributed covariate \eqn{X_1}{X1}, \eqn{X_2}{X2} from the Clayton copula
so that \eqn{Y_1}{Y1}, \eqn{Y_2}{Y2} are marginally normally distributed and have dependence
Kendall's tau specified by \code{tau} or \code{phi}, using the function
\code{\link{generate_clayton_copula}}.

The genetic effect sizes are the specified numeric values \code{b} and
\code{b1, b2}, respectively, in the functions \code{\link{generate_phenodata_1_simple}}
and \code{\link{generate_phenodata_2_bvn}}. In
\code{\link{generate_phenodata_1}} and \code{\link{generate_phenodata_2_copula}},
the genetic effect sizes are computed by multiplying \code{b} or \code{b1, b2},
respectively, with the absolute value of the log10-transformed
minor allele frequencies, so that rarer variants have larger effect sizes.
}
\examples{

# Generate genetic data:
genodata <- generate_genodata(n_SNV = 20, n_ind = 1000)
compute_MAF(genodata)

# Generate different phenotype data:
phenodata1 <- generate_phenodata_1_simple(genodata = genodata[,1],
                                          type = "quantitative", b = 0)
phenodata2 <- generate_phenodata_1_simple(genodata = genodata[,1],
                                          type = "quantitative", b = 2)
phenodata3 <- generate_phenodata_1_simple(genodata = genodata,
                                          type = "quantitative", b = 2)
phenodata4 <- generate_phenodata_1_simple(genodata = genodata,
                                          type = "quantitative",
                                          b = seq(0.1, 2, 0.1))
phenodata5 <- generate_phenodata_1_simple(genodata = genodata[,1],
                                          type = "binary", b = 0)

phenodata6 <- generate_phenodata_1(genodata = genodata[,1],
                                   type = "quantitative", b = 0,
                                   MAF_cutoff = 1, prop_causal = 0.1,
                                   direction = "a")
phenodata7 <- generate_phenodata_1(genodata = genodata,
                                   type = "quantitative", b = 0.6,
                                   MAF_cutoff = 0.03, prop_causal = 0.1,
                                   direction = "a")
phenodata8 <- generate_phenodata_1(genodata = genodata,
                                   type = "quantitative",
                                   b = seq(0.1, 2, 0.1),
                                   MAF_cutoff = 0.03, prop_causal = 0.1,
                                   direction = "a")

phenodata9 <- generate_phenodata_2_bvn(genodata = genodata[,1],
                                       tau = 0.5, b1 = 0, b2 = 0)
phenodata10 <- generate_phenodata_2_bvn(genodata = genodata,
                                        tau = 0.5, b1 = 0, b2 = 0)
phenodata11 <- generate_phenodata_2_bvn(genodata = genodata,
                                        tau = 0.5, b1 = 1,
                                        b2 = seq(0.1,2,0.1))
phenodata12 <- generate_phenodata_2_bvn(genodata = genodata,
                                        tau = 0.5, b1 = 1, b2 = 2)
par(mfrow = c(3, 1))
hist(phenodata12$Y1)
hist(phenodata12$Y2)
plot(phenodata12$Y1, phenodata12$Y2)

phenodata13 <- generate_phenodata_2_copula(genodata = genodata[,1],
                                           MAF_cutoff = 1, prop_causal = 1,
                                           tau = 0.5, b1 = 0, b2 = 0)
phenodata14 <- generate_phenodata_2_copula(genodata = genodata,
                                           MAF_cutoff = 1, prop_causal = 0.5,
                                           tau = 0.5, b1 = 0, b2 = 0)
phenodata15 <- generate_phenodata_2_copula(genodata = genodata,
                                           MAF_cutoff = 1, prop_causal = 0.5,
                                           tau = 0.5, b1 = 0, b2 = 0)
phenodata16 <- generate_phenodata_2_copula(genodata = genodata,
                                           MAF_cutoff = 1, prop_causal = 0.5,
                                           tau = 0.2, b1 = 0.3,
                                           b2 = seq(0.1, 2, 0.1))
phenodata17 <- generate_phenodata_2_copula(genodata = genodata,
                                           MAF_cutoff = 1, prop_causal = 0.5,
                                           tau = 0.2, b1 = 0.3, b2 = 0.3)
par(mfrow = c(3, 1))
hist(phenodata17$Y1)
hist(phenodata17$Y2)
plot(phenodata17$Y1, phenodata17$Y2)






}
