
### Regression tests for fixed bugs

set.seed(290875)
library(coin)
isequal <- coin:::isequal

### check if doxygen documentation is there
stopifnot(nchar(system.file("documentation", "html", "index.html", 
                            package = "coin")) > 29) 

### I() returns objects of class AsIs which caused an error in `trafo'
df <- data.frame(x1 = rnorm(100), x2 = rnorm(100), x3 = gl(2, 50))
independence_test(I(x1 / x2) ~ x3, data = df)
independence_test(I(x1 < 0) ~ x3, data = df)

### expectation was wrong when varonly = TRUE in case both
### xtrafo and ytrafo were multivariate 
if (require(multcomp)) {
    df <- data.frame(x = runif(30), y = runif(30), z = gl(3, 10))
    a <- independence_test(x + y ~ z, data = df,
         distribution = approximate(B = 19999),
         xtrafo = function(data) trafo(data, factor_trafo = function(x)
             model.matrix(~x - 1) %*% t(contrMat(table(x), "Tukey"))))
    b <- independence_test(x + y ~ z, data = df,
         xtrafo = function(data) trafo(data, factor_trafo = function(x)
             model.matrix(~x - 1) %*% t(contrMat(table(x), "Tukey"))))
    isequal(expectation(a), expectation(b))
}


### `statistic' for linear and standardized statistics was wrong in case of 
### scores
data("jobsatisfaction")
stopifnot(unique(dim(statistic(lbl_test(jobsatisfaction), "linear"))) == 1)
stopifnot(unique(dim(statistic(lbl_test(jobsatisfaction), "standardized"))) == 1)


### support() failed in most cases
df <- data.frame(x = runif(20), y = runif(20), z = gl(2, 10))
support(independence_test(x ~ z, data = df))
support(independence_test(x ~ z, data = df, teststat = "quad"))
ite <- independence_test(I(round(x, 1)) ~ z, data = df, dist = exact())
ae <- support(ite)
de <- sapply(ae, function(x) dperm(ite, x))
sum(de)
ita <- independence_test(I(round(x, 1)) ~ z, data = df, 
                         dist = approximate(B = 100000))
aa <- support(ita)
da <- sapply(aa, function(x) dperm(ita, x))
sum(da)
mean(round(ae, 10) %in% round(aa, 10))

plot(aa, da, type = "s", lty = 1)
lines(ae, de, type = "s", lty = 2)
itas <- independence_test(I(round(x, 1)) ~ z, data = df)
lines(ae[-1], diff(sapply(ae, function(x) pperm(itas, x))), lty = 3)
legend("topleft", lty = 1:3, legend = c("approx", "exact", "asympt"), bty = "n")

### check correct handling of multiple censoring indicators (in modeltools)
### was never wrong, just in case...
data("photocar", package = "coin")
i1 <- independence_test(Surv(time, event) + Surv(dmin, tumor) + ntumor ~ group,
                  data = photocar)
i2 <- independence_test(Surv(time, event) ~ group, data = photocar)
i3 <- independence_test(Surv(dmin, tumor) ~ group, data = photocar)

stopifnot(max(abs(statistic(i1, "standardized")[,1] - 
                  statistic(i2, "stand"))) < sqrt(.Machine$double.eps))
stopifnot(max(abs(statistic(i1, "standardized")[,2] - 
                  statistic(i3, "stand"))) < sqrt(.Machine$double.eps))

### check new var_trafo argument
x <- rnorm(20)
y <- gl(2, 10)
a <- trafo(data.frame(x = x, y = y), numeric_trafo = normal_trafo)
b <- trafo(data.frame(x = x, y = y), var_trafo = list(x = normal_trafo))
stopifnot(isequal(a, b))

### check for multiple ordered factors
mydf <- data.frame(x = ordered(gl(4, 5)), y = ordered(gl(5, 4)), 
                   z = rnorm(20))
it1 <- independence_test(x + y ~ z , data = mydf)
stopifnot(isequal(drop(statistic(it1, "linear")), 
          c(statistic(independence_test(x ~ z , data = mydf), "linear"),
            statistic(independence_test(y ~ z , data = mydf), "linear"))))
it1 <- independence_test(x + z ~ y , data = mydf)
stopifnot(isequal(drop(statistic(it1, "linear")),
          c(statistic(independence_test(x ~ y , data = mydf), "linear"),
            statistic(independence_test(z ~ y , data = mydf), "linear"))))
it1 <- independence_test(z ~ x + y , data = mydf)
stopifnot(isequal(drop(statistic(it1, "linear")),
          c(statistic(independence_test(z ~ x , data = mydf), "linear"),
            statistic(independence_test(z ~ y , data = mydf), "linear"))))

### NA's and weights
mydf <- data.frame(x = 1:10, y = gl(2, 5), w = rep(2, 10))
s <- statistic(independence_test(x ~ y, data = mydf, weights = ~ w), "linear")
stopifnot(s == 30)
mydf$x[1] <- NA
s <- statistic(independence_test(x ~ y, data = mydf, weights = ~ w), "linear")
stopifnot(s == 28)

### two observations only
mydf <- data.frame(x = 1:10, y = factor(rep(c(1, 2), 5)))
independence_test(y ~ x, data = mydf, subset = c(1, 6))
independence_test(y ~ x, data = mydf, subset = c(1, 2))
try(independence_test(y ~ x, data = mydf, subset = 1))

### names of expectation and covariance
YOY <- data.frame(length = c(46, 28, 46, 37, 32, 41, 42, 45, 38, 44,
                             42, 60, 32, 42, 45, 58, 27, 51, 42, 52,
                             38, 33, 26, 25, 28, 28, 26, 27, 27, 27,
                             31, 30, 27, 29, 30, 25, 25, 24, 27, 30),
                  site = factor(c(rep("I", 10), rep("II", 10),
                                  rep("III", 10), rep("IV", 10))))

it <- independence_test(length ~ site, data = YOY,
    ytrafo = function(data) trafo(data, numeric_trafo = rank),
    teststat = "quad")
expectation(it)
covariance(it)

mydf <- data.frame(x = rnorm(10), y = rnorm(10), z = gl(2, 5))
it <- independence_test(x + y ~ z, data = mydf)
statistic(it, "linear")
expectation(it)
covariance(it)

### maxstat_trafo
n <- seq(from = 5, to = 100, by = 1)
for (i in n) {
   x <- round(rnorm(i) * 10, 1)
   xm <- maxstat_trafo(x)
   stopifnot(min(c(mean(xm[,1]), 1 - mean(xm[,ncol(xm)])) - 0.1) >
             -.Machine$double.eps)
}

### formula evaluation in `parent.frame()', spotted by Z
foo <- function(x, y) independence_test(y ~ x)
a <- 1:10
b <- 1:10
foo(a, b)
x <- 1
y <- 1
foo(a, b)

### factors with only one level
dat <- data.frame(y = rnorm(100), x1 = runif(100), x2 = factor(rep(0, 100)))
try(independence_test(y ~ x1  + x2, data = dat))

### user specified g: names, MC
me <- as.table(matrix(c( 6,  8, 10,
               32, 47, 20), byrow = TRUE, nrow = 2,
    dimnames = list(group = c("In situ", "Control"),
                    genotype = c("AA", "AG", "GG"))))
medf <- as.data.frame(me)

add <- c(0, 1, 2)
dom <- c(0, 1, 1)
rez <- c(0, 0, 1)
g <- function(x) {
    x <- unlist(x)
    cbind(add[x], dom[x], rez[x])
}
it <- independence_test(group ~ genotype, 
    data = medf, weights = ~ Freq, xtrafo = g)
statistic(it, "linear")

it <- independence_test(group ~ genotype,
    data = medf, weights = ~ Freq, xtrafo = g,
    distribution = approximate(B = 49999))
pvalue(it)

stopifnot(all.equal(statistic(independence_test(t(me), xtrafo = g), "linear"),
                    statistic(it, "linear")))

### alternative trafo for ordered variables didn't work
### spotted by Ludwig Hothorn <hothorn@biostat.uni-hannover.de>
tmp <- data.frame(x = ordered(sample(1:3, 20, replace = TRUE)), y = rnorm(20))
it1 <- independence_test(y ~ x, data = tmp, scores = list(x = c(1, 1, 2)))
g <- function(x) c(1, 1, 2)[unlist(x)]
it2 <- independence_test(y ~ x, data = tmp, xtrafo = g)
it3 <- independence_test(y ~ x, data = tmp, 
    xtrafo = function(data) trafo(data, ordered_trafo = g))
stopifnot(all.equal(statistic(it1), statistic(it2)))
stopifnot(all.equal(statistic(it1), statistic(it3)))

### precision problems in SR algorithm, >= <= issue
### spotted by "Fay, Michael (NIH/NIAID) [E]" <mfay@niaid.nih.gov>
x <- c(1,2,3.1,4,5,6)
g <- factor(c(0,0,0,1,1,1))
it <- independence_test(x ~ g, distribution = exact())
stopifnot(pvalue(it) == 0.1)
itMC <- independence_test(x ~ g, distribution = approximate(99999))
ci <- attr(pvalue(itMC), "conf.int")
stopifnot(ci[1] < pvalue(it) && ci[2] > pvalue(it))

### any() not applied to logicals
### spotted by R-2.7.0 and Kaspar Daniel Hansen
x <- c(1.83,  0.50,  1.62,  2.48, 1.68, 1.88, 1.55, 3.06, 1.30)
y <- c(0.878, 0.647, 0.598, 2.05, 1.06, 1.29, 1.06, 3.14, 1.29)
### must not give a warning
wilcoxsign_test(x ~ y, alternative = "greater", 
                distribution = exact())

### inconsistencies with confidence intervals
### spotted by Fritz Scholz <fscholz@u.washington.edu>
Route = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L), .Label = c("A", "B"), class = "factor")
Route.Time = c(5.8, 5.8, 5.9, 6, 6, 6, 6.3, 6.3, 6.4, 6.5, 6.5, 6.5,
6.8, 7.1, 7.3, 10.2)
Route2 <- factor(as.character(Route), levels = c("B", "A"))

wilcox_test(Route.Time~Route,conf.int=TRUE)
wilcox_test(Route.Time~Route2,conf.int=TRUE)
wilcox_test(Route.Time~Route,conf.int=TRUE, distr = exact())
wilcox_test(Route.Time~Route2,conf.int=TRUE, distr = exact())

### evaluate all formulae in xxx_tests parent.frame
### spotted by Matthieu Dubois <matthdub@gmail.com>
y <- as.data.frame(matrix(rnorm(200), ncol=2))
group <- gl(2, 50)
lapply(y, function(var) wilcox_test(var ~ group))

### make sure a parallel design with 
### n = 2 isn't confused with a block design
### spotted by Fritz Scholz <fscholz@u.washington.edu>
dat <- data.frame(y = c(1, 2), g = gl(2, 1))
wt <- wilcox_test(y ~ g, data = dat, distribution = exact())
s <- support(wt)
stopifnot(all(dperm(wt, s) == c(0.5, 0.5)))

### dpqperm
wte <- wilcox_test(Route.Time ~ Route, distribution = exact())
wta <- wilcox_test(Route.Time ~ Route, distribution = approximate())
de <- dperm(wte, support(wte))
pe <- pperm(wte, support(wte))
stopifnot(max(abs(cumsum(de) - pe)) < .Machine$double.eps)
da <- dperm(wta, support(wta))
pa <- pperm(wta, support(wta))
stopifnot(max(abs(cumsum(da) - pa)) < .Machine$double.eps)
qperm(wte, seq(from = 0.1, to = 0.9, by = 0.1))
qperm(wta, seq(from = 0.1, to = 0.9, by = 0.1))

### split-up and confint (spotted by Fritz Scholz <fscholz@u.washington.edu>)
iseed <- 25
n <- m <- 4
conf.level <- 0.98
alternative <- "greater"
set.seed(iseed)
x <- round(rnorm(m), 2)
y <- round(rnorm(n, 2), 2)
score.factor <- factor(c(rep("Y",n),rep("X",m)),
                       levels = c("Y","X"))
dat.fr <- data.frame(scores=c(y,x), fac = score.factor)
it <- normal_test(scores ~ fac, data = dat.fr,
    conf.int = TRUE, conf.level = conf.level,
    alternative = alternative, dist = exact())
confint(it)
