%% $Id: isOrtholog.Rd 179 2014-08-23 20:52:52Z khliland $

\name{isOrtholog}
\alias{isOrtholog}

\title{
  Identifies orthologs in gene clusters
}
\description{
  Finds the ortholog sequences in every cluster based on pairwise distances.
}
\usage{
isOrtholog(clustering,dist.table)
}
\arguments{
  \item{clustering}{A vector of integers indicating the cluster for every sequence. Sequences with the same number belong to the same cluster. The name of each element is the tag identifying the sequence.}
  \item{dist.table}{A \code{data.frame} with pairwise distances. The columns \samp{Sequence.A} and \samp{Sequence.B} contain tags identifying pairs of sequences. The column \samp{Distance} contains the distances, always a number from 0.0 to 1.0.}
}
\details{
  The input \code{clustering} is typically produced by \code{\link{bClust}}. The input \code{dist.table} is typically produced by \code{\link{bDist}}.
  
  The concept of orthologs is difficult for prokaryotes, and this function finds orthologs in a simplistic way. For a given cluster, with members from many genomes, there is one ortholog from every genome. In cases where a genome has two or more members in the same cluster, only one of these is an ortholog, the rest are paralogs.
  
  Consider all sequences from the same genome belonging to the same cluster. The ortholog is defined as the one having the smallest sum of distances to all other members of the same cluster, i.e. the one closest to the \sQuote{center} of the cluster.
  
  Note that the status as ortholog or paralog depends greatly on how clusters are defined in the first place. If you allow large and diverse (and few) clusters, many sequences will be paralogs. If you define tight and homogenous (and many) clusters, almost all sequences will be orthologs. 
}
\value{
  A vector of logicals with the same number of elements as the input \samp{clustering}, indicating if the corresponding sequence is an ortholog (\code{TRUE}) or not (\code{FALSE}). The name of each element is copied from \samp{clustering}.
}
\author{
   Lars Snipen and Kristian Hovde Liland.
}
\seealso{
  \code{\link{bDist}}, \code{\link{bClust}}.
}
\examples{
\dontrun{
	# Loading distance data in the micropan package
	data(list=c("Mpneumoniae.blast.distances","Mpneumoniae.blast.clustering"),
		package="micropan")

	# Finding orthologs
	is.ortholog <- isOrtholog(Mpneumoniae.blast.clustering,
		Mpneumoniae.blast.distances)
}
}