% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/simReg.R
\name{sim_reg}
\alias{sim_reg}
\title{Similarity regression}
\usage{
sim_reg(ontology = NULL, y, x = NULL, g = rep(0, length(y)),
  its = 20000, thin = 1, record_sims = FALSE,
  record_model_likelihoods = FALSE, tune_proposals = TRUE,
  verbose = FALSE, information_content = get_term_info_content(ontology,
  term_sets = x),
  term_descendancy_matrix = get_term_descendancy_matrix(ontology,
  names(information_content)), term_sim_mat = prune_sim_mat(ontology,
  get_term_sim_mat(ontology, information_content, term_descendancy_matrix =
  term_descendancy_matrix)), case_ids = unlist(mapply(SIMPLIFY = FALSE, FUN =
  rep, 0:(length(x) - 1), sapply(x, length))),
  term_ids = as.integer(match(unlist(x), colnames(term_descendancy_matrix))) -
  1, return_tuning_runs = FALSE, tuning_its = its,
  tuning_burn = as.integer(tuning_its/5), burn = as.integer(its/5),
  tune_phi_pseudoprior = TRUE, gamma = (runif(1) < gamma_prior_prob),
  alpha_star = rnorm(n = 1, mean = alpha_star_mean, sd = alpha_star_sd),
  alpha = rnorm(n = 1, mean = alpha_mean, sd = alpha_sd), log_beta = rnorm(n
  = 1, mean = log_beta_mean, sd = log_beta_sd), phi = sample.int(n =
  ncol(term_descendancy_matrix), size = 3, replace = TRUE) - 1,
  logit_mean_f = rnorm(n = 1, mean = logit_mean_f_mean, sd = logit_mean_f_sd),
  log_alpha_plus_beta_f = rnorm(n = 1, mean = log_alpha_plus_beta_f_mean, sd =
  log_alpha_plus_beta_f_sd), logit_mean_g = rnorm(n = 1, mean =
  logit_mean_g_mean, sd = logit_mean_g_sd), log_alpha_plus_beta_g = rnorm(n =
  1, mean = log_alpha_plus_beta_g_mean, sd = log_alpha_plus_beta_g_sd),
  gamma_prior_prob = 0.05, alpha_star_mean = 0, alpha_mean = 0,
  alpha_star_sd = 5, alpha_sd = 5, log_beta_mean = 2, log_beta_sd = 1,
  logit_mean_f_mean = 1, logit_mean_f_sd = 1,
  log_alpha_plus_beta_f_mean = 2, log_alpha_plus_beta_f_sd = 1,
  logit_mean_g_mean = 0, logit_mean_g_sd = 1.5,
  log_alpha_plus_beta_g_mean = 2, log_alpha_plus_beta_g_sd = 1,
  alpha_star_proposal_sd = 2, alpha_proposal_sd = 2,
  log_beta_proposal_sd = 2, logit_mean_f_proposal_sd = 2,
  log_alpha_plus_beta_f_proposal_sd = 2, logit_mean_g_proposal_sd = 2,
  log_alpha_plus_beta_g_proposal_sd = 2,
  phi_jumps = c(0:(ncol(term_descendancy_matrix) - 1),
  rep(match(unlist(lapply(x[y], get_ancestors, ontology = ontology)),
  colnames(term_descendancy_matrix)) - 1, 50)),
  pseudo_phi_marginal_prior = phi_jumps, phi_num_leaves_geometric_rate = 1,
  lit_sims = setNames(rep(1, ncol(term_sim_mat)), colnames(term_sim_mat)),
  favour_gamma1_factor = 1, check_args = TRUE)
}
\arguments{
\item{ontology}{\code{ontology_index} object.}

\item{y}{Logical vector of genotypes (typically TRUE for rare genotype, FALSE for common genotype).}

\item{x}{List of character vectors of terms IDs.}

\item{g}{Genotype log odds offset per individual.}

\item{its}{Number of update cycles to perform .}

\item{thin}{Factor by which to thin resultant chains of parameter samples.}

\item{record_sims}{Logical indicating whether to record trace of similarities.}

\item{record_model_likelihoods}{Record likelihood of parameters under both models.}

\item{tune_proposals}{Logical value determining whether to adaptively tune proposal variances for \code{sim_reg} numeric parameters.}

\item{verbose}{Logical value determining whether to print progress of execution.}

\item{information_content}{Numeric vector, named by HPO IDs, containing the information content of corresponding terms.}

\item{term_descendancy_matrix}{Logical term descendancy matrix, dimensions symmetrically labelled by terms, and where by a cell value of TRUE indicates that the row is the ancestor of the column term (in the sense of the DAG structure of the HPO.}

\item{term_sim_mat}{Numeric matrix with rows and columns corresponding to (and named by) term IDs, and cells containing the similarity between the row and column term}

\item{case_ids}{IDs for the N cases from 0 to N-1, indicating which case terms in \code{term_ids} belong to (automatically determined given x).}

\item{term_ids}{Vector of HPO term IDs belonging to cases.}

\item{return_tuning_runs}{Logical indicating whether to return the MCMC output of the tuning phase of the inference procedure.}

\item{tuning_its}{Number of update cycles to perform in the tuning phase of the inference procedure.}

\item{tuning_burn}{Number of update cycles to discard in tuning phase.}

\item{burn}{Number of update cycles to discard .}

\item{tune_phi_pseudoprior}{Logical value determining whether tuned pseudoprior for phi is used in main Markov chain.}

\item{gamma}{Initial value of model selection indicator gamma..}

\item{alpha_star}{Initial value of alpha_star, the rate of observing the rare genotype y = 1 under gamma = 0, i.e. the no association model .}

\item{alpha}{Initial value of alpha, the background rate of observing the rare genotype under gamma = 1.}

\item{log_beta}{Initial value of log_beta, the log of the effect size of onotological similarity.}

\item{phi}{Character vector of HPO term IDs giving the initial value of phi, the characteristic phenotype.}

\item{logit_mean_f}{Initial value of logit_mean_f.}

\item{log_alpha_plus_beta_f}{Initial value of log_alpha_plus_beta_f.}

\item{logit_mean_g}{Initial value of logit_mean_g.}

\item{log_alpha_plus_beta_g}{Initial value of log_alpha_plus_beta_g.}

\item{gamma_prior_prob}{Prior probability of gamma = 1.}

\item{alpha_star_mean}{Prior mean of alpha_star given gamma = 0.}

\item{alpha_mean}{Prior mean of alpha given gamma = 1.}

\item{alpha_star_sd}{Prior sd of alpha_star given gamma = 0.}

\item{alpha_sd}{Prior sd of alpha given gamma = 1.}

\item{log_beta_mean}{Prior mean of log_beta given gamma = 1.}

\item{log_beta_sd}{Prior sd of log_beta given gamma = 1.}

\item{logit_mean_f_mean}{Prior mean of logit_mean_f given gamma = 1.}

\item{logit_mean_f_sd}{Prior sd of logit_mean_f given gamma = 1.}

\item{log_alpha_plus_beta_f_mean}{Prior mean of log_alpha_plus_beta_f given gamma = 1.}

\item{log_alpha_plus_beta_f_sd}{Prior sd of log_alpha_plus_beta_f given gamma = 1.}

\item{logit_mean_g_mean}{Prior mean of logit_mean_g given gamma = 1.}

\item{logit_mean_g_sd}{Prior sd of logit_mean_g given gamma = 1.}

\item{log_alpha_plus_beta_g_mean}{Prior mean of log_alpha_plus_beta_g given gamma = 1.}

\item{log_alpha_plus_beta_g_sd}{Prior sd of log_alpha_plus_beta_g given gamma = 1.}

\item{alpha_star_proposal_sd}{Proposal sd of local jumps in MH updates of alpha_star used during inference.}

\item{alpha_proposal_sd}{Proposal sd of local jumps in MH updates of alpha used during inference.}

\item{log_beta_proposal_sd}{Proposal sd of local jumps in MH updates of log_beta used during inference.}

\item{logit_mean_f_proposal_sd}{Proposal sd of local jumps in MH updates of logit_mean_f used during inference.}

\item{log_alpha_plus_beta_f_proposal_sd}{Proposal sd of local jumps in MH updates of log_alpha_plus_beta_f used during inference.}

\item{logit_mean_g_proposal_sd}{Proposal sd of local jumps in MH updates of logit_mean_g used during inference.}

\item{log_alpha_plus_beta_g_proposal_sd}{Proposal sd of local jumps in MH updates of log_alpha_plus_beta_g used during inference.}

\item{phi_jumps}{Vector of HPO term IDs to be used as jumping distribution for proposal replacements of terms in phi during inference given gamma = 1.}

\item{pseudo_phi_marginal_prior}{Vector of HPO term IDs to be used as prior distribution on marginal probability of single term in phi given gamma = 0.}

\item{phi_num_leaves_geometric_rate}{Geometric parameter for truncated geometric distribution on number of leaf terms in phi.}

\item{lit_sims}{Numeric vector of similarities (greater than 0) of literature phenotype to individual terms (named by term ID).}

\item{favour_gamma1_factor}{Value by which to multiply odds of \code{P(gamma=1)/P(gamma=0)} in order to encourage better mixing and higher accuracy for a given number of iterations. Defaults to 1.}

\item{check_args}{Logical value determining whether arguments are checked for consistency.}
}
\value{
List (by parameter) of vectors of consecutive parameter samples from MCMC inference.
}
\description{
Performns Bayesian `similarity regression' on given binary genotype \code{y} (logical vector) against ontological term sets \code{x} (list of character vectors of term IDs). This could, for example, be a \code{list} of character vectors of HPO term IDs representing case phenotypes. It returns an object of class `sim_reg_samples` which is a list of traces for the sampled parameters. The results can be summarised with `summary`. Of particular interest are the estimated mean posteriors of \code{gamma} (the model selection indicator, thus giving an estimate of the probability of an association under the model assumptions - stored in the `mean_posterior_gamma' slot in the result, i.e. \code{result$mean_posterior_gamma} (which can also be calculated \code{mean(result$gamma)}), and the characteristic ontological profile phi (which can be visualised by the functions \code{\link{phi_plot}}, \code{\link{term_pair_marginals_plot}}, and \code{\link{term_marginals}}).
}
\examples{
\dontrun{
set.seed(0)
data(hpo)
disease_terms <- c("HP:0005537", "HP:0000729", "HP:0001873")
all_terms <- get_ancestors(hpo, 
c(disease_terms, sample(hpo$id, size=50)))
y <- c(rep(FALSE, 96), rep(TRUE, 3))
x <- lapply(y, function(.y) minimal_set(
hpo, if (!.y) sample(all_terms, size=3) else 
	c(sample(all_terms, size=1), disease_terms[runif(n=3) < 0.8])))
sim_reg_out <- sim_reg(ontology=hpo, x=x, y=y)
mean(sim_reg_out$gamma)
phi_plot(hpo, 
sim_reg_out$phi[sim_reg_out$gamma])
}
}

