.packageName <- "energy"
edist <- 
function(x, sizes, distance = FALSE, ix = 1:sum(sizes)) {
    #  computes the e-dissimilarity matrix between k samples or clusters
    #  x:          pooled sample or distances
    #  sizes:      vector of sample (cluster) sizes
    #  distance:   TRUE if x is a distance matrix, otherwise FALSE
    #  ix:         a permutation of row indices of x 
    #    
    k <- length(sizes)
    if (k == 1) return (as.dist(0.0))
    if (k < 1) return (NA)
    e <- matrix(nrow=k, ncol=k)
    n <- cumsum(sizes)
    m <- 1 + c(0, n[1:(k-1)])
    if (distance == FALSE) {
        if (is.vector(x)) x <- matrix(x, nrow = length(x), ncol = 1)
        dst <- as.matrix(dist(x))
        }
    else dst <- as.matrix(x)
    for (i in 1:(k - 1)) {
        e[i, i] <- 0.0
        for (j in (i + 1):k) {
            n1 <- sizes[i]
            n2 <- sizes[j]
            ii <- ix[m[i]:n[i]]
            jj <- ix[m[j]:n[j]]
            w <- n1 * n2 / (n1 + n2)
            m11 <- sum(dst[ii, ii]) / (n1 * n1)
            m22 <- sum(dst[jj, jj]) / (n2 * n2)
            m12 <- sum(dst[ii, jj]) / (n1 * n2)
            e[i, j] <- e[j, i] <- w * ((m12 + m12) - (m11 + m22))
            }
        }
    as.dist(e)
}


energy.hclust <- 
function(dst) {
    d <- dst
    if (is.matrix(dst)) {
        if (nrow(dst) != ncol(dst) || sum(dst != t(dst)) > 0)
            stop("distance matrix must be square symmetric")
    	d <- as.dist(dst)
    	attr(d, "Labels") <- row.names(dst)
    }
    n <- attr(d, "Size")
    if (is.null(n))
        stop("dst argument must be square matrix or dist object")
    labels <- attr(d, "Labels")
    if (is.null(labels))
        labels <- paste(1:n)  
    merge <- integer(2 * (n - 1))
    height <- double(n - 1)
    order <- integer(n) 
    ecl <- .C("Emin_hclust", 
              diss = as.double(d), 
              en = as.integer(n), 
              merge = as.integer(merge), 
              height = as.double(height),
              order = as.integer(order),
              PACKAGE = "energy")
    merge <- matrix(ecl$merge, nrow = n - 1, ncol = 2)
    e <- list(merge = merge, 
              height = ecl$height, 
              order = ecl$order,
              labels = labels,
              method = "e-distance",
              call = match.call(),
              dist.method = attr(dst, "method"))      
    class(e) <- "hclust"            
    e
}
ksample.e <- 
function(x, sizes, distance = FALSE, ix = 1:sum(sizes), incomplete = FALSE, N = 100) {
    #computes the k-sample E-statistic for equal distributions
    #  x:          pooled sample or distance matrix
    #  sizes:      vector of sample sizes
    #  distance:   TRUE if x is a distance matrix, otherwise FALSE
    #  ix:         a permutation of row indices of x
    #  incomplete: if TRUE compute incomplete E-statistic
    #  N:          incomplete sample size
    #  
    #  NOT much error checking here: for test use eqdist.etest
    #
    k <- length(sizes)
    if (k == 1) return (0.0)
    if (k < 2) return (NA)
    e <- e0 <- 0
    if (!is.null(attr(x, "Size"))) distance <- TRUE
    x <- as.matrix(x)
    if (incomplete == TRUE && distance == FALSE && any(sizes > N))
        return(.incomplete.etest(x, sizes=sizes, R=0, N=N)$statistic)  
    
    if (distance == TRUE) {
        # same as test with 0 replicates
        b <- .C("ksampleEtest", 
            x = as.double(t(x)), 
            byrow = as.integer(1),
            nsamples = as.integer(length(sizes)), 
            sizes = as.integer(sizes),
            dim = as.integer(0), 
            R = as.integer(0), 
            e0 = as.double(e),
            e = as.double(e), 
            pval = as.double(e), 
            PACKAGE = "energy")           
        return (b$e0)
    }

    # compute e directly, without storing distances
    d <- ncol(x)
    n <- cumsum(sizes)
    m <- 1 + c(0, n[1:(k-1)])
    for (i in 1:(k - 1)) {
        for (j in (i + 1):k) {
            n1 <- sizes[i]
            n2 <- sizes[j]
            ii <- ix[m[i]:n[i]]
            jj <- ix[m[j]:n[j]]
                if (d == 1) y <- as.matrix(c(x[ii], x[jj]))
                else y <- rbind(x[ii,], x[jj,])
                e <- e + .C("E2sample",
                        x = as.double(t(y)), 
                        sizes = as.integer(c(n1, n2)), 
                        dim = as.integer(d), 
                        e = as.double(e0),
                        PACKAGE = "energy")$e
            }
        }
    e
}

eqdist.etest <- 
function(x, sizes, distance = FALSE, incomplete = FALSE, N = 100, R = 999) {
    #multivariate E-test of the multisample hypothesis of equal distributions
    #  x:          matrix of pooled sample or distance matrix
    #  sizes:      vector of sample sizes
    #  distance:   logical, TRUE if x is a distance matrix, otherwise false
    #  R:          number of replicates
    #  incomplete: logical, TRUE if incomplete E statistics
    #  N:          sample size for incomplete version
    #  
    
    nsamples <- length(sizes)
    if (nsamples < 2) return (NA)
    if (min(sizes) < 1) return (NA)
    if (!is.null(attr(x, "Size"))) distance <- TRUE
    
    if (nsamples == 2) {
        if (incomplete == TRUE && distance == FALSE && any(sizes > N))
            return(.incomplete.etest(x, sizes=sizes, N=N, R=R))  
        }
        
    x <- as.matrix(x)
    if (nrow(x) != sum(sizes)) stop("nrow(x) should equal sum(sizes)")
    if (distance == FALSE && nrow(x) == ncol(x))
        warning("square data matrix with distance==FALSE")
    d <- ncol(x)
    if (distance == TRUE) d <- 0
    str <- "Multivariate "
    if (d == 1) str <- "Univariate "
    if (d == 0) str <- ""

    e0 <- 0.0
    repl <- rep(0, R)
    pval <- 1.0
    b <- .C("ksampleEtest", 
        x = as.double(t(x)), 
        byrow = as.integer(1),
        nsamples = as.integer(nsamples), 
        sizes = as.integer(sizes),
        dim = as.integer(d), 
        R = as.integer(R), 
        e0 = as.double(e0),
        e = as.double(repl), 
        pval = as.double(pval), 
        PACKAGE = "energy")           
    e <- list(
        method = paste(str, length(sizes), "-sample E-test of equal distributions", sep = ""),
        statistic = b$e0,
        p.value = b$pval,
        n = sizes,
        R = R,
        replicates = b$e)

    class(e) <- "etest.eqdist"        
    e
}
 

.incomplete.etest <- 
function(x, sizes, N = 100, R = 999) {
    #  intended to be called from eqdist.etest, not much error checking
    #
    #  multivariate E-test of the multisample hypothesis of equal distributions, incomplete E-statistic
    #  C library currently supports two sample test only
    #  x:          matrix of pooled sample or distance matrix
    #  sizes:      vector of sample sizes
    #  N:          max sample size for estimation of pairwise E
    #  R:          number of replicates
    #  
    
    k <- length(sizes)
    if (k != 2) return (NA);
    n <- cumsum(sizes) 
    m <- 1 + c(0, n[1:(k-1)])
    x <- as.matrix(x)
    if (nrow(x) != sum(sizes)) return (NA)
    d <- ncol(x)
    r <- nrow(x)
    str <- "Multivariate "
    if (d == 1) str <- "Univariate "
    e0 <- 0
    pval <- 1
    repl <- rep(0, R)  
    b <- .C("twosampleIEtest", 
        x = as.double(t(x)), 
        byrow = as.integer(1),
        sizes = as.integer(sizes),
        dim = as.integer(d),
        iN = as.integer(N),
        R = as.integer(R), 
        e0 = as.double(e0),
        e = as.double(repl), 
        pval = as.double(pval),
        PACKAGE = "energy")           

    e <- list(
    method = paste(str, length(sizes), "-sample E-test of equal distributions, incomplete version with max size ", N, sep = ""),
    statistic = b$e0,
    p.value = b$pval,
    n = sizes,
    R = R,
    replicates = b$e)
    class(e) <- "etest.eqdist"        
    e
}
    
print.etest.eqdist <- 
function(x, ...) {
    cat("\n", x$method, "\n")
    cat("\tSample sizes:       ", x$n, "\n")
    cat("\tTest statistic:    ", format(x$statistic, digits = 4), "\n")
    cat("\tApprox. p-value:   ", format.pval(x$p.value), "\n")
    cat("\t", x$R, " replicates, resampling method = permutation\n", sep = "")
}
            
mvnorm.etest <- 
function(x, R=999) 
{
    # parametric bootstrap E-test for multivariate normality
    e <- list(
        method = paste("E-test of multivariate normality", sep = ""),
        statistic = 0, 
        p.value = 0, 
        n = nrow(x), 
        d = ncol(x), 
        R = R, 
        replicates = NULL)
    if (is.vector(x)) {
        e$n <- length(x)
        e$d <- 1
        bootobj <- boot(x, statistic = normal.e, R = R, sim = "parametric", 
            ran.gen = function(x, y) {return(rnorm(e$n)) })
        }
        else {
        bootobj <- boot(x, statistic = mvnorm.e, R = R, sim = "parametric", 
            ran.gen = function(x, y) {
                return(matrix(rnorm(e$n * e$d), nrow=e$n, ncol=e$d)) })
        }
    p <- 1 - mean(bootobj$t < bootobj$t0)
    e$statistic = bootobj$t0
    e$p.value = p
    e$replicates <- bootobj$t
    class(e) <- "etest.mvnorm"        
    e                 
}

mvnorm.e <- 
function(x) 
{
    # E-statistic for multivariate normality
    if (is.vector(x)) return(E.norm(x))
    n <- nrow(x)
    d <- ncol(x)
    if (n < 2) return(E.norm(x))
    z <- scale(x, scale = FALSE)    #subtract column means and 
    ev <- eigen(var(x), symmetric = TRUE)    #compute S^(-1/2)
    P <- ev$vectors
    lambda <- ev$values    
    y <- z %*% (P %*% diag(1 / sqrt(lambda)) %*% t(P))
    if (any(!is.finite(y))) return (NA)
    stat <- 0
    e <- .C("mvnEstat", y = as.double(t(y)), byrow = as.integer(TRUE),
            nobs = as.integer(n), dim = as.integer(d), 
            stat = as.double(stat), PACKAGE = "energy")$stat
    e
}

normal.e <- 
function(x) 
{
   x <- as.vector(x)
   y <- sort(x)
   n <- length(y)
   if (y[1] == y[n]) return (NA)
   y <- scale(y) 
   K <- seq(1 - n, n - 1, 2)
   e <- 2 * (sum(2 * y * pnorm(y) + 2 * dnorm(y)) - n / sqrt(pi) - mean(K * y))
   e
}
   
print.etest.mvnorm <- 
function(x, ...) 
{
    cat("\n", x$method, "\n")
    cat("\tSample size:       ", x$n, "\n")
    cat("\tDimension:         ", x$d, "\n")
    cat("\tTest statistic:    ", format(x$statistic, digits = 4), "\n")
    cat("\tApprox. p-value:   ", format.pval(x$p.value), "\n")
    cat("\t", x$R, " replicates\n", sep="")
}
            
poisson.mtest <- 
function(x, R = 999) {
    # parametric bootstrap mean distance test of Poisson distribution
    n <- length(x)
    lambda <- mean(x)
    bootobj <- boot(x, statistic = poisson.m, R = R, sim = "parametric", 
            ran.gen = function(x, y) {rpois(n, lambda)})
    p <- 1 - mean(bootobj$t < bootobj$t0)
    e <- list(
        method = paste("Mean distance test of Poisson distribution", sep = ""),
        statistic = bootobj$t0, 
        p.value = p, 
        n = n, 
        lambda = lambda,
        R = R, 
        replicates = bootobj$t)
    class(e) <- "etest.poisson"        
    e           
}

poisson.m<- 
function(x) {
    # mean distance statistic for Poissonity
    n <- length(x)
    stat <- 0
    e <- .C("poisMstat", 
            x = as.integer(x),
            nx = as.integer(n), 
            stat = as.double(stat), 
            PACKAGE = "energy")$stat
    e
}
  
print.etest.poisson <- 
function(x, ...) {
    cat("\n", x$method, "\n")
    cat("\tSample size:       ", x$n, "\n")
    cat("\tSample mean:       ", x$lambda, "\n")
    cat("\tTest statistic:    ", format(x$statistic, digits = 4), "\n")
    cat("\tApprox. p-value:   ", format.pval(x$p.value), "\n")
    cat("\t", x$R, " replicates\n", sep = "")
}
            
.First.lib <- function(lib, pkg)
{
    require(boot)
    library.dynam("energy", pkg, lib) 
}
