% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mus.R
\name{MUS}
\alias{MUS}
\title{MUS algorithm}
\usage{
MUS(C, clusters, prec_par = 5)
}
\arguments{
\item{C}{\eqn{N \times N} matrix with a non-negligible number of zeros.
For instance, a similarity matrix estimated from a \eqn{N \times D} data matrix whose rows
are statistical units, or a co-association matrix resulting from clustering
ensembles.}

\item{clusters}{A vector of integers from \code{1:k} (with \code{k <= 4})
indicating a partition of the \eqn{N} units resulting from clustering.}

\item{prec_par}{Optional argument. The maximum number of alternative pivots for each group.}
}
\value{
\item{\code{pivots}}{ The \code{k} pivotal units}
}
\description{
Perform Maxima Units Search (MUS) algorithm on a large and sparse matrix in
order to find a set of pivotal units through a sequential search
in the given matrix.
}
\details{
Consider \eqn{H} distinct partitions of a set of \eqn{N} \eqn{d}-dimensional
statistical units into \eqn{k}
groups determined by some
clustering technique.  A \eqn{N \times N} co-association matrix
\eqn{C} with generic element \eqn{c_{i,j}=n_{i,j}/H} can be constructed,
where \eqn{n_{i,j}} is the number of times the \eqn{i}-th and the \eqn{j}-th unit
are assigned to the same cluster with respect to the clustering ensemble.
Units which are very distant
from each other are likely to have zero co-occurrences; as a consequence,
 \eqn{C} is
a square symmetric matrix expected  to contain a non-negligible number of zeros.
The main task of the MUS algorithm is to detect submatrices of small
rank from the co-association matrix
and extract those units---pivots---such
that the \eqn{k \times k} submatrix of \eqn{C},
determined by only the pivotal rows
and columns indexes, is identical or nearly identical.
Practically, the resulting units
have the desirable property to be representative of
the group they belong to.
}
\examples{
# Data generated from a mixture of three bivariate Gaussian distributions

N <- 620
centers  <- 3
n1 <- 20
n2 <- 100
n3 <- 500
x  <- matrix(NA, N,2)
truegroup <- c( rep(1,n1), rep(2, n2), rep(3, n3))

for (i in 1:n1){
 x[i,]=rmvnorm(1, c(1,5), sigma=diag(2))}
for (i in 1:n2){
 x[n1+i,]=rmvnorm(1, c(4,0), sigma=diag(2))}
for (i in 1:n3){
 x[n1+n2+i,]=rmvnorm(1, c(6,6), sigma=diag(2))}

# Build a similarity matrix from clustering ensembles

H <- 1000
a <- matrix(NA, H, N)

for (h in 1:H){
   a[h,] <- kmeans(x,centers)$cluster
}

sim_matr <- matrix(1, N,N)
for (i in 1:(N-1)){
  for (j in (i+1):N){
     sim_matr[i,j] <- sum(a[,i]==a[,j])/H
     sim_matr[j,i] <- sim_matr[i,j]
     }
}

# Obtain a clustering solution via kmeans with multiple random seeds

cl <- KMeans(x, centers)$cluster

# Find three pivots

mus_alg <- MUS(C = sim_matr, clusters = cl)


}
\references{
Egidi, L., Pappadà, R., Pauli, F., Torelli, N. (2018).
 Maxima Units Search(MUS) algorithm:
methodology and applications. In: Perna, C. , Pratesi, M., Ruiz-Gazen A. (eds.) Studies in
Theoretical and Applied Statistics,
Springer Proceedings in Mathematics and Statistics 227, pp. 71–81.
}
