% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/runClue.R
\name{runClue}
\alias{runClue}
\title{Run CLUster Evaluation}
\usage{
runClue(Tc, annotation, rep = 10, kRange, clustAlg = "cmeans",
  effectiveSize = c(5, 100), pvalueCutoff = 0.05, alpha = 0.5)
}
\arguments{
\item{Tc}{a numeric matrix to be clustered. The columns correspond to the time-course and the rows correspond to phosphorylation sites.}

\item{annotation}{a list with names correspond to kinases and elements correspond to substrates belong to each kinase.}

\item{rep}{number of times the clustering is to be applied. This is to account for variability in the clustering algorithm.}

\item{kRange}{the range of k to be tested for clustering.}

\item{clustAlg}{the clustering algorithm to be used. The default is cmeans clustering.}

\item{effectiveSize}{the size of annotation groups to be considered for calculating enrichment. Groups that are too small
or too large will be removed from calculating overall enrichment of the clustering.}

\item{pvalueCutoff}{a pvalue cutoff for determining which kinase-substrate groups to be included in calculating overall enrichment of the clustering.}

\item{alpha}{a regularisation factor for penalizing large number of clusters.}
}
\value{
a clue output that contains the input parameters used for evaluation and the evaluation results. Use ls(x) to see details of output. 'x' be the output here.
}
\description{
Takes in a time-course matrix and test for enrichment of the clustering using cmeans or kmeans clustering algorithm with a reference annotation.
}
\examples{
## Example 1. Running CLUE with a simulated phosphoproteomics data

## simulate a time-series phosphoproteomics data with 4 clusters and
## each cluster with a size of 100 phosphosites
simuData <- temporalSimu(seed=1, groupSize=100, sdd=1, numGroups=4)

## create an artificial annotation database. Specifically, Generate 50
## kinase-substrate groups each comprising 20 substrates assigned to a kinase. 
## Among them, create 5 groups each contains phosphosites defined 
## to have the same temporal profile.

kinaseAnno <- list()
groupSize <- 100
for (i in 1:5) {
  kinaseAnno[[i]] <- paste("p", (groupSize*(i-1)+1):(groupSize*(i-1)+20), sep="_")
}

for (i in 6:50) {
  set.seed(i)
  kinaseAnno[[i]] <- paste("p", sample.int(nrow(simuData), size = 20), sep="_")
}
names(kinaseAnno) <- paste("KS", 1:50, sep="_")

## run CLUE with a repeat of 3 times and a range from 2 to 8
set.seed(1)
clueObj <- runClue(Tc=simuData, annotation=kinaseAnno, rep=3, kRange=8)

## visualize the evaluation outcome
xl <- "Number of clusters"
yl <- "Enrichment score"
boxplot(clueObj$evlMat, col=rainbow(ncol(clueObj$evlMat)), las=2, xlab=xl, ylab=yl, main="CLUE")
abline(v=(clueObj$maxK-1), col=rgb(1,0,0,.3))

## generate optimal clustering results using the optimal k determined by CLUE
best <- clustOptimal(clueObj, rep=3, mfrow=c(2, 3))

## list enriched clusters
best$enrichList

## obtain the optimal clustering object (not run)
# best$clustObj


## Example 2. Running CLUE with a phosphoproteomics dataset, discover optimal number of clusters, 
## clustering data accordingly, and identify key kinases involved in each cluster.

## load the human ES phosphoprotoemics data (Rigbolt et al. Sci Signal. 4(164):rs3, 2011)
data(hES)
# load the PhosphoSitePlus annotations (Hornbeck et al. Nucleic Acids Res. 40:D261-70, 2012)
# note that one can instead use PhosphoELM database by typing "data(PhosphoELM)".
data(PhosphoSite)

## run CLUE with a repeat of 5 times and a range from 2 to 13 (not run)
# set.seed(2)
# clueObj <- runClue(Tc=hES, annotation=PhosphoSite.human, rep=5, kRange=13)

## Example 3. Running CLUE with a gene expression dataset, discover optimal number of clusters, 
## clustering data accordingly, and identify key pathway involved in each cluster.

## load mouse adipocyte gene expression data.
## (Ma et al. Molecular and Cellular Biology. 2014, 34(19):3607-17)
# data(adipocyte) 
## (the above demo dataset is not included due to size constrain.
## please access https://github.com/PengyiYang/ClueR to download the 
## development version for this example)

## load the KEGG annotations
## note that one can instead use reactome, GOBP, biocarta database
data(Pathways)

## run CLUE with a repeat of 5 times and a range from 2 to 13 (not run)
# set.seed(3)
# clueObj <- runClue(Tc=adipocyte, annotation=Pathways.KEGG, rep=5, kRange=13)


}
