mdrrDescr[, 1:5]
splom(mdrrDescr[, 1:5])
nlp <- predict(modelFit, newdata)
nlp
?predict.blackboost
?predict.mboost
data
test4 <- train(mdrrDescr[, 1:2], mdrrClass, "gamboost", tuneLength = 2, trControl = trainControl(summaryFunction = caret:::twoClassSummary, classProbs = TRUE, number = 5))
mdrrDescr <- mdrrDescr[, -nearZeroVar(mdrrDescr)]
test4 <- train(mdrrDescr[, 1:2], mdrrClass, "glmboost", tuneLength = 2, trControl = trainControl(summaryFunction = caret:::twoClassSummary, classProbs = TRUE, number = 5))
test4 <- train(mdrrDescr[, 1:2], mdrrClass, "blackboost", tuneLength = 2, trControl = trainControl(summaryFunction = caret:::twoClassSummary, classProbs = TRUE, number = 5))
test4 <- train(mdrrDescr[, 1:2], mdrrClass, "fda", tuneLength = 5, trControl = trainControl(summaryFunction = caret:::twoClassSummary, classProbs = TRUE, number = 5))
test4 <- train(mdrrDescr[, 1:2], mdrrClass, "pda", tuneLength = 5, trControl = trainControl(summaryFunction = caret:::twoClassSummary, classProbs = TRUE, number = 5))
test4 <- train(mdrrDescr[, 1:2], mdrrClass, "qda", tuneLength = 5, trControl = trainControl(summaryFunction = caret:::twoClassSummary, classProbs = TRUE, number = 5))
test4 <- train(mdrrDescr[, 1:2], mdrrClass, "mda", tuneLength = 5, trControl = trainControl(summaryFunction = caret:::twoClassSummary, classProbs = TRUE, number = 5))
test4 <- train(mdrrDescr[, 1:2], mdrrClass, "slda", tuneLength = 5, trControl = trainControl(summaryFunction = caret:::twoClassSummary, classProbs = TRUE, number = 5))
test4 <- train(mdrrDescr[, 1:2], mdrrClass, "cforest", tuneLength = 2, trControl = trainControl(summaryFunction = caret:::twoClassSummary, classProbs = TRUE, number = 5))
test4 <- train(mdrrDescr[, 1:20], mdrrClass, "cforest", tuneLength = 2, trControl = trainControl(summaryFunction = caret:::twoClassSummary, classProbs = TRUE, number = 5))
library(odfWeave)
?odfWeave
test4
caret:::getTrainPerf(test4)
caret:::getTrainPerf(test4)[1,"TrainROC"]
round(caret:::getTrainPerf(test4)[1,"TrainROC"], 3)
length(test4$control$index)
subset(modelLookup(), seq)
subset(modelLookup(), seq)$model
unique(subset(modelLookup(), seq)$model)
paste("\\texttt{", unique(subset(modelLookup(), seq)$model), "}", sep = "")
paste(paste("\\texttt{", unique(subset(modelLookup(), seq)$model), "}", sep = ""), collapse = ", ")
subset(modelLookup()$model)
unique(modelLookup()$model)
length(unique(modelLookup()$model))
?url
close(load(url("http://caret.r-forge.r-project.org/Classification_and_Regression_Training_files/exampleModels.RData")))
resamps <- resamples(list(CART = rpartFit,
                          CondInfTree = ctreeFit,
                          MARS = earthFit,
                          M5 = m5Fit))
dotplot(resamps, scales = list(x = list(relation = "free")))
difValues <- diff(resamps)
plotTheme <- caretTheme()
plotTheme$plotTheme$plot.symbol$pch <- 16
plotTheme$plotTheme$plot.line$col <- "black"
trellis.par.set(plotTheme)
print(dotplot(difValues))
plotTheme
plotTheme <- caretTheme()
plotTheme$plot.symbol$pch <- 16
plotTheme$plot.line$col <- "black"
trellis.par.set(plotTheme)
print(dotplot(difValues))
    plotData
    dotplot(Difference ~ value,
            data = plotData,
            xlab = paste("Difference in", caret:::useMathSymbols(metric)),
            panel = function(x, y)
            {
              plotTheme <- trellis.par.get()
               panel.dotplot(middle$x, middle$mod,
                            col = plotTheme$plot.symbol$col[1],
                            pch = plotTheme$plot.symbol$pch[1],
                            cex = plotTheme$plot.symbol$cex[1])
              panel.abline(v = 0,
                           col = plotTheme$reference.line$col[1],
                           lty = plotTheme$reference.line$lty[1],
                           lwd = plotTheme$reference.line$lwd[1])

              middle <- aggregate(x, list(mod = y), median)
              upper <- aggregate(x, list(mod = as.numeric(y)), max)
              lower <- aggregate(x, list(mod = as.numeric(y)), min)
              for(i in seq(along = upper$mod))
                {
                  panel.segments(upper$x[i
], upper$mod[i], lower$x[i], lower$mod[i],
                                 col = plotTheme$plot.line$col[1],
                                 lwd = plotTheme$plot.line$lwd[1],
                                 lty = plotTheme$plot.line$lty[1])
                                len <- .03
              panel.segments(lower$x[i], upper$mod[i]+len, 
                             lower$x[i], lower$mod[i]-len, 
                             lty = plotTheme$plot.line$lty[1],
                             col = plotTheme$plot.line$col[1],
                             lwd = plotTheme$plot.line$lwd[1])
              panel.segments(upper$x[i],upper$mod[i]+len, 
                             upper$x[i], lower$mod[i]-len, 
                             lty = plotTheme$plot.line$lty[1],
                             col = plotTheme$plot.line$col[1],
                             lwd = plotTheme$plot.line$lwd[1])
                }

             
              
            },
            ...)
  }
test <- function(x, data = NULL, metric = x$metric[1], ...)
  {
    if(length(metric) > 1)
      {
        metric <- metric[1]
        warning("Sorry Dave, only one value of metric is allowed right now. I'll use the first value")

      }
    h <- which(x$metric == metric)
    plotData <- as.data.frame(matrix(NA, ncol = 3, nrow = ncol(x$difs[[metric]])))
    ## Get point and interval estimates on the differences
    index <- 0
    for(i in seq(along = x$models))
      {
        for(j in seq(along = x$models))
          {
            
            if(i < j)
              {
                index <- index + 1
                plotData[index, 1] <- x$statistics[[h]][index][[1]]$estimate
                plotData[index, 2:3] <- x$statistics[[h]][index][[1]]$conf.int

              }
          }
      }
    names(plotData)[1:3] <- c("Estimate", "LowerLimit", "UpperLimit")
    plotData$Difference <- gsub(".diff.", " - ", colnames(x$difs[[metric]]), fixed = TRUE)
    plotData <- melt(plotData, id.vars = "Difference")
    plotData
    dotplot(Difference ~ value,
            data = plotData,
            xlab = paste("Difference in", caret:::useMathSymbols(metric)),
            panel = function(x, y)
            {
              plotTheme <- trellis.par.get()
              

              middle <- aggregate(x, list(mod = y), median)
              upper <- aggregate(x, list(mod = as.numeric(y)), max)
              lower <- aggregate(x, list(mod = as.numeric(y)), min)
              panel.dotplot(middle$x, middle$mod,
                            col = plotTheme$plot.symbol$col[1],
                            pch = plotTheme$plot.symbol$pch[1],
                            cex = plotTheme$plot.symbol$cex[1])
              panel.abline(v = 0,
                           col = plotTheme$reference.line$col[1],
                           lty = plotTheme$reference.line$lty[1],
                           lwd = plotTheme$reference.line$lwd[1])
              for(i in seq(along = upper$mod))
                {
                  panel.segm
ents(upper$x[i], upper$mod[i], lower$x[i], lower$mod[i],
                                 col = plotTheme$plot.line$col[1],
                                 lwd = plotTheme$plot.line$lwd[1],
                                 lty = plotTheme$plot.line$lty[1])
                  len <- .03
                  panel.segments(lower$x[i], upper$mod[i]+len, 
                                 lower$x[i], lower$mod[i]-len, 
                                 lty = plotTheme$plot.line$lty[1],
                                 col = plotTheme$plot.line$col[1],
                                 lwd = plotTheme$plot.line$lwd[1])
                  panel.segments(upper$x[i],upper$mod[i]+len, 
                                 upper$x[i], lower$mod[i]-len, 
                                 lty = plotTheme$plot.line$lty[1],
                                 col = plotTheme$plot.line$col[1],
                                 lwd = plotTheme$plot.line$lwd[1])
                }

              
              
            },
            ..
.)
  }
plotTheme <- caretTheme()
plotTheme$plot.symbol$pch <- 16
plotTheme$plot.line$col <- "black"
trellis.par.set(plotTheme)
print(test(difValues))
plotTheme <- caretTheme()
plotTheme$plot.symbol$pch <- 16
plotTheme$plot.line$col <- "black"
trellis.par.set(plotTheme)
print(test(resamps))
dotplot(resamps)
predict(tt)
predict.nullMoel
predict.nullModel
tt$levels

predict.nullModel <- function (object, newdata = NULL, type  = NULL, ...)
  {
    if(is.null(type))
      {
        type <- if(is.null(object$levels)) "raw" else "class"
      }

    n <- if(is.null(newdata)) object$n else nrow(newdata)
    if(!is.null(object$levels))
      {
        if(type == "prob")
          {
            out <- as.data.frame(matrix(0, ncol = n, nrow = length(x$levels)))
            names(out) <- x$levels
            out[,x$value] <- x$pct
          } else {
            out <- factor(rep(object$value, n), levels = object$levels)
          }
      } else {
        if(type %in% c("prob", "class")) stop("ony raw predicitons are applicable to regression models")
        out <- rep(object$value, n)
      }
    out
  }

predict.nullModel <- function (object, newdata = NULL, type  = NULL, ...)
  {
    if(is.null(type))
      {
        type <- if(is.null(object$levels)) "raw" else "class"
      }

    n <- if(is.null(newdata)) object$n else nrow(newdata)
    if(!is.null(object$levels))
      {
        if(type == "prob")
          {
            out <- as.data.frame(matrix(0, ncol = n, nrow = length(object$levels)))
            names(out) <- object$levels
            out[, object$value] <- object$pct
          } else {
            out <- factor(rep(object$value, n), levels = object$levels)
          }
      } else {
        if(type %in% c("prob", "class")) stop("ony raw predicitons are applicable to regression models")
        out <- rep(object$value, n)
      }
    out
  }

predict.nullModel <- function (object, newdata = NULL, type  = NULL, ...)
  {
    if(is.null(type))
      {
        type <- if(is.null(object$levels)) "raw" else "class"
      }

    n <- if(is.null(newdata)) object$n else nrow(newdata)
    if(!is.null(object$levels))
      {
        if(type == "prob")
          {
            out <- as.data.frame(matrix(0, nrow = n, ncol = length(object$levels)))
            names(out) <- object$levels
            out[, object$value] <- object$pct
          } else {
            out <- factor(rep(object$value, n), levels = object$levels)
          }
      } else {
        if(type %in% c("prob", "class")) stop("ony raw predicitons are applicable to regression models")
        out <- rep(object$value, n)
      }
    out
  }
predict(tt, type = "prob")
7library(randomForest)
library(randomForest)
?randomForest
on:#
##data(iris)#
set.seed(71)#
iris.rf <- randomForest(Species ~ ., data=iris, importance=TRUE,#
                        proximity=TRUE)
names(iris.rf)
iris.rf$y
?sbf
test<- list(summary = defaultSummary,
              fit = function(x, y, ...)
              {
                if(ncol(x) > 0)
                  {
                    library(randomForest)
                    randomForest(x, y, ...)
                  } else nullModel(y = y)
              },
              pred = function(object, x)
              {
                browser()
                out <- predict(object, x)
                if(is.factor(out$y))
                  {
                    out <- cbind(as.data.frame(out),
                                 as.data.frame(predict(object, x, type = "prob")))
                  }
                
                print(head(out))
                out
              },
              score = function(x, y)
              {
                ## should return a named logical vector
                if(is.factor(y)) anovaScores(x, y) else gamScores(x, y)
              },
              filter = function(score, x, y) score <= 0.05
              )
test <- list(summary = defaultSummary,
              fit = function(x, y, ...)
              {
                if(ncol(x) > 0)
                  {
                    library(randomForest)
                    randomForest(x, y, ...)
                  } else nullModel(y = y)
              },
              pred = function(object, x)
              {
                browser()
                out <- predict(object, x)
                if(is.factor(out$y))
                  {
                    out <- cbind(data.frame(pred = out),
                                 as.data.frame(predict(object, x, type = "prob")))
                  }
                
                print(head(out))
                out
              },
              score = function(x, y)
              {
                ## should return a named logical vector
                if(is.factor(y)) anovaScores(x, y) else gamScores(x, y)
              },
              filter = function(score, x, y) score <= 0.05
              )
as.data.frame(predict(object, #
        x, type = "prob"))
data.frame(pred = out)
 out <- cbind(data.frame(pred = out), as.data.frame(predict(object, #
        x, type = "prob")))
defaultSummary
twoClassSummary
levels(1:10])
levels(1:10)
#
## Use a GAM is the filter, then fit a random forest model#
RFwithGAM2 <- sbf(mdrrDescr, mdrrClass,#
                 sbfControl = sbfControl(functions = tmp,#
                                         verbose = FALSE, #
                                         method = "cv"))
levels(y)
sbfControl$functions$summary
debug(sbfControl$functions$summary)
sbfControl$functions$summary(data.frame(obs = y, pred = sample(y)), #
    lev = levels(y))
sensitivity(data[, "pred"], data[, "obs"], lev[1])
specificity(data[, "pred"], data[, "obs"], lev[2])
classLevels <- levels(y)
testOutput <- data.frame(pred = sample(y, min(10, length(y))),
                           obs = sample(y, min(10, length(y))))
  if(is.factor(y))
    {
      for(i in seq(along = classLevels)) testOutput[, classLevels[i]] <- runif(nrow(testOutput))
    }
test <- sbfControl$functions$summary(testOutput,
                                       lev = classLevels)
test
#
## Use a GAM is the filter, then fit a random forest model#
RFwithGAM <- sbf(mdrrDescr, mdrrClass,#
                 sbfControl = sbfControl(functions = rfSBF,#
                                         verbose = FALSE, #
                                         method = "cv"))
tmp <- rfSBF
tmp$summary <- towClassSummary
library(MASS)
?lda
library(klaR)
?NaiveBayes
#
data(iris)#
m <- NaiveBayes(Species ~ ., data = iris)#
head(predict(m))
library(ipred)
?bagging
ata(BreastCancer)#
#
# Test set error bagging (nbagg = 50): 3.7% (Breiman, 1998, Table 5)#
#
mod <- bagging(Class ~ Cl.thickness + Cell.size#
                + Cell.shape + Marg.adhesion   #
                + Epith.c.size + Bare.nuclei   #
                + Bl.cromatin + Normal.nucleoli#
                + Mitoses, data=BreastCancer, coob=TRUE)#
print(mod)
#
data(BreastCancer)#
#
# Test set error bagging (nbagg = 50): 3.7% (Breiman, 1998, Table 5)#
#
mod <- bagging(Class ~ Cl.thickness + Cell.size#
                + Cell.shape + Marg.adhesion   #
                + Epith.c.size + Bare.nuclei   #
                + Bl.cromatin + Normal.nucleoli#
                + Mitoses, data=BreastCancer, coob=TRUE)#
print(mod)
predict(mod)
predict(mod, type = "prob")
names(mod)
mod$y
#
## Use a GAM is the filter, then fit a random forest model#
RFwithGAM <- sbf(mdrrDescr, mdrrClass,#
                 sbfControl = sbfControl(functions = ldaFunc,#
                                         verbose = FALSE, #
                                         method = "cv"))
undebug(sbf.default)
test <- <- list(summary = defaultSummary,
               fit = function(x, y, ...)
               {
                 if(ncol(x) > 0)
                   {
                     library(MASS)
                     lda(x, y, ...)
                   } else nullModel(y = y)
               },
               pred = function(object, x)
               {
                 if(class(object) == "nullModel")
                   {
                     out <- predict(object, x)
                   } else {
                     browser()
                     tmp <- predict(object, x)
                     out <- cbind(data.frame(pred = tmp$class),
                                  as.data.frame(tmp$posterior)) 
                   }
                 out
               },
               score = function(x, y)
               {
                 ## should return a named logical vector
                 anovaScores(x, y)
               },
               filter = function(score, x, y) score <= 0.05
               )
test<- list(summary = defaultSummary,
               fit = function(x, y, ...)
               {
                 if(ncol(x) > 0)
                   {
                     library(MASS)
                     lda(x, y, ...)
                   } else nullModel(y = y)
               },
               pred = function(object, x)
               {
                 if(class(object) == "nullModel")
                   {
                     out <- predict(object, x)
                   } else {
                     browser()
                     tmp <- predict(object, x)
                     out <- cbind(data.frame(pred = tmp$class),
                                  as.data.frame(tmp$posterior)) 
                   }
                 out
               },
               score = function(x, y)
               {
                 ## should return a named logical vector
                 anovaScores(x, y)
               },
               filter = function(score, x, y) score <= 0.05
               )
out
ymp
test <- list(summary = defaultSummary,
               fit = function(x, y, ...)
               {
                 if(ncol(x) > 0)
                   {
                     library(MASS)
                     lda(x, y, ...)
                   } else nullModel(y = y)
               },
               pred = function(object, x)
               {
                 if(class(object) == "nullModel")
                   {
                     tmp <- predict(object, x)
                     out <- cbind(data.frame(pred = tmp),
                                  as.data.frame(
                                                predict(object,
                                                        x,
                                                        type = "prob"))) 
                   } else {
                     tmp <- predict(object, x)
                     out <- cbind(data.frame(pred = tmp$class),
                                  as.data.frame(tmp$posterior)) 
                   }
                 print(head(out))

                 out
               },
               score = function(x, y)
               {
                 ## should return a named logical vector
                 anovaScores(x, y)
               },
               filter = function(score, x, y) score <= 0.05
               )
debug(sbf.default)
sbfResults[[1]]
#
## Use a GAM is the filter, then fit a random forest model#
RFwithGAM <- sbf(mdrrDescr, mdrrClass,#
                 sbfControl = sbfControl(functions = ldaSBF,#
                                         verbose = FALSE, #
                                         method = "cv"))
tmp <- ldaSBF
tmp
#
## Use a GAM is the filter, then fit a random forest model#
RFwithGAM <- sbf(mdrrDescr, mdrrClass,#
                 sbfControl = sbfControl(functions = tmp,#
                                         verbose = FALSE, #
                                         method = "cv"))
#
## Use a GAM is the filter, then fit a random forest model#
RFwithGAM <- sbf(mdrrDescr, sample(mdrrClass),#
                 sbfControl = sbfControl(functions = tmp,#
                                         verbose = FALSE, #
                                         method = "cv"))
#
## Use a GAM is the filter, then fit a random forest model#
RFwithGAM <- sbf(mdrrDescr[, 1:10], sample(mdrrClass),#
                 sbfControl = sbfControl(functions = ldaSBF,#
                                         verbose = FALSE, #
                                         method = "cv"))
datA(mdrr)
#
## Use a GAM is the filter, then fit a random forest model#
RFwithGAM <- sbf(mdrrDescr[, 1:10], sample(mdrrClass),#
                 sbfControl = sbfControl(functions = nbFuncs,#
                                         verbose = FALSE, #
                                         method = "cv"))
#
## Use a GAM is the filter, then fit a random forest model#
RFwithGAM <- sbf(mdrrDescr[, 1:10], sample(mdrrClass),#
                 sbfControl = sbfControl(functions = nbSBF,#
                                         verbose = FALSE, #
                                         method = "cv"))
#
## Use a GAM is the filter, then fit a random forest model#
RFwithGAM <- sbf(mdrrDescr[, 1:10], sample(mdrrClass),#
                 sbfControl = sbfControl(functions = tmp,#
                                         verbose = FALSE, #
                                         method = "cv"))
#
## Use a GAM is the filter, then fit a random forest model#
RFwithGAM <- sbf(mdrrDescr[, 1:10], sample(mdrrClass),#
                 sbfControl = sbfControl(functions = test,#
                                         verbose = FALSE, #
                                         method = "cv"))
#
## Use a GAM is the filter, then fit a random forest model#
RFwithGAM <- sbf(mdrrDescr, mdrrClass,#
                 sbfControl = sbfControl(functions = test,#
                                         verbose = FALSE, #
                                         method = "cv"))
test <- nbSBF
tmp$summary <- twoClassSummary
test <- rfSBF
test <- list(summary = defaultSummary,
              fit = function(x, y, ...)
              {
                if(ncol(x) > 0)
                  {
                    library(randomForest)
                    randomForest(x, y, ...)
                  } else nullModel(y = y)
              },
              pred = function(object, x)
              {
                browser()
                out <- predict(object, x)
                if(is.factor(object$y))
                  {
                    out <- cbind(data.frame(pred = out),
                                 as.data.frame(predict(object, x, type = "prob")))
                  }
                out
              },
              score = function(x, y)
              {
                ## should return a named logical vector
                if(is.factor(y)) anovaScores(x, y) else gamScores(x, y)
              },
              filter = function(score, x, y) score <= 0.05
              )
test <- treebagSBF
tt
tt <- nullModel(mdrrDescr, mdrrClass)
unclass(tt)
test <- list(summary = defaultSummary,
                     fit = function(x, y, ...)
                     {
                       if(ncol(x) > 0)
                         {
                           library(ipred)
                           ipredbagg(y, x, ...)
                         } else nullModel(y = y)
                     },

                     pred = function(object, x)
                     {

                       if(class(object) == "nullModel")
                         {
                           tmp <- predict(object, x)
                           if(is.factor(object$levels))
                             {
                               out <- cbind(data.frame(pred = tmp),
                                            as.data.frame(predict(object, x, type = "prob")))
                             } else out <- tmp                           
                         } else {
                           tmp <- predict(object, x)
                           if(is.factor(object$y))
               {
                               out <- cbind(data.frame(pred = tmp),
                                            as.data.frame(predict(object, x, type = "prob")))
                             } else out <- tmp
                         }
                       out
                     },
                     score = function(x, y)
                     {
                       ## should return a named logical vector
                       anovaScores(x, y)
                     },
                   filter = function(score, x, y) score <= 0.05
                   )
test<- list(summary = defaultSummary,
                     fit = function(x, y, ...)
                     {
                       if(ncol(x) > 0)
                         {
                           library(ipred)
                           ipredbagg(y, x, ...)
                         } else nullModel(y = y)
                     },

                     pred = function(object, x)
                     {
browser()
                       if(class(object) == "nullModel")
                         {
                           tmp <- predict(object, x)
                           if(is.factor(object$levels))
                             {
                               out <- cbind(data.frame(pred = tmp),
                                            as.data.frame(predict(object, x, type = "prob")))
                             } else out <- tmp                           
                         } else {
                           tmp <- predict(object, x)
                           if(is.factor(object$y))
                       {
                               out <- cbind(data.frame(pred = tmp),
                                            as.data.frame(predict(object, x, type = "prob")))
                             } else out <- tmp
                         }
                       out
                     },
                     score = function(x, y)
                     {
                       ## should return a named logical vector
                       anovaScores(x, y)
                     },
                   filter = function(score, x, y) score <= 0.05
                   )
class(object)
object
object$levels
test <- list(summary = defaultSummary,
                     fit = function(x, y, ...)
                     {
                       if(ncol(x) > 0)
                         {
                           library(ipred)
                           ipredbagg(y, x, ...)
                         } else nullModel(y = y)
                     },

                     pred = function(object, x)
                     {
                       if(class(object) == "nullModel")
                         {
                           tmp <- predict(object, x)
                           if(!is.null(object$levels))
                             {
                               out <- cbind(data.frame(pred = tmp),
                                            as.data.frame(predict(object, x, type = "prob")))
                             } else out <- tmp                           
                         } else {
                           tmp <- predict(object, x)
                           if(is.factor(object$y))
             {
                               out <- cbind(data.frame(pred = tmp),
                                            as.data.frame(predict(object, x, type = "prob")))
                             } else out <- tmp
                         }
                       out
                     },
                     score = function(x, y)
                     {
                       ## should return a named logical vector
                       anovaScores(x, y)
                     },
                   filter = function(score, x, y) score <= 0.05
                   )
#
## Use a GAM is the filter, then fit a random forest model#
RFwithGAM <- sbf(mdrrDescr[, 1:20],mdrrClass,#
                 sbfControl = sbfControl(functions = test,#
                                         verbose = FALSE, #
                                         method = "cv"))
test <- list(summary = defaultSummary,
              fit = function(x, y, ...)
              {
                if(ncol(x) > 0)
                  {
                    library(randomForest)
                    randomForest(x, y, ...)
                  } else nullModel(y = y)
              },
              pred = function(object, x)
              {
                if(class(object) == "nullModel")
                  {
                    tmp <- predict(object, x)
                    if(!is.null(object$levels))
                      {
                        out <- cbind(data.frame(pred = tmp),
                                     as.data.frame(predict(object, x, type = "prob")))
                      } else out <- tmp                           
                  } else {
                    tmp <- predict(object, x)
                    if(is.factor(object$y))
                      {
                        out <- cbind(data.frame(pred = tmp),
                                     as.data.frame(predict(object, x,
 type = "prob")))
                      } else out <- tmp
                  }                
               
                out
              },
              score = function(x, y)
              {
                ## should return a named logical vector
                if(is.factor(y)) anovaScores(x, y) else gamScores(x, y)
              },
              filter = function(score, x, y) score <= 0.05
              )
#
## Use a GAM is the filter, then fit a random forest model#
RFwithGAM <- sbf(mdrrDescr[, 1:20], mdrrClass,#
                 sbfControl = sbfControl(functions = test,#
                                         verbose = FALSE, #
                                         method = "cv"))
?train
data(iris)#
TrainData <- iris[,1:4]#
TrainClasses <- iris[,5]#
#
knnFit1 <- train(TrainData, TrainClasses,#
                 "knn",#
                 tuneLength = 10,#
                 trControl = trainControl(method = "cv"))
knnFit$method
knnFit1$method
modelLookup(knnFit1$method)
modelLookup(knnFit1$method)$probModel
modelLookup(knnFit1$method)$probModel[1]
test <-  <- list(summary = defaultSummary,
                 fit = function(x, y, ...)
                 {
                   if(ncol(x) > 0)
                     {
                       train(x, y, ...)
                     } else nullModel(y = y)                                      
                 },
                 pred = function(object, x)
                 {
                   
                   if(class(object) != "nullModel")
                     {
                       tmp <- predict(object, x)
                       if(is.factor(object$y) & modelLookup(object$method)$probModel[1])
                         {
                           out <- cbind(data.frame(pred = tmp),
                                        as.data.frame(predict(object, x, type = "prob")))
                         } else out <- tmp
                     } else {
                       tmp <- predict(object, x)
                       if(!is.null(object$levels))
                         {
                           out <- cbind
(data.frame(pred = tmp),
                                        as.data.frame(predict(object, x, type = "prob")))
                         } else out <- tmp 
                     }
                   out
                 },
                 score = function(x, y)
                 {
                   ## should return a named logical vector
                   if(is.factor(y)) anovaScores(x, y) else gamScores(x, y)
                 },
                 filter = function(score, x, y) score <= 0.05
                 )
test <- list(summary = defaultSummary,
                 fit = function(x, y, ...)
                 {
                   if(ncol(x) > 0)
                     {
                       train(x, y, ...)
                     } else nullModel(y = y)                                      
                 },
                 pred = function(object, x)
                 {
                   
                   if(class(object) != "nullModel")
                     {
                       tmp <- predict(object, x)
                       if(is.factor(object$y) & modelLookup(object$method)$probModel[1])
                         {
                           out <- cbind(data.frame(pred = tmp),
                                        as.data.frame(predict(object, x, type = "prob")))
                         } else out <- tmp
                     } else {
                       tmp <- predict(object, x)
                       if(!is.null(object$levels))
                         {
                           out <- cbind(dat
a.frame(pred = tmp),
                                        as.data.frame(predict(object, x, type = "prob")))
                         } else out <- tmp 
                     }
                   out
                 },
                 score = function(x, y)
                 {
                   ## should return a named logical vector
                   if(is.factor(y)) anovaScores(x, y) else gamScores(x, y)
                 },
                 filter = function(score, x, y) score <= 0.05
                 )
data(mdrr)
#
## Use a GAM is the filter, then fit a random forest model#
RFwithGAM <- sbf(mdrrDescr[, 1:20], sample(mdrrClass),#
                 sbfControl = sbfControl(functions = test,#
                                         verbose = FALSE, #
                                         method = "cv"))
test<- list(summary = defaultSummary,
                 fit = function(x, y, ...)
                 {
                   if(ncol(x) > 0)
                     {
                       train(x, y, ...)
                     } else nullModel(y = y)                                      
                 },
                 pred = function(object, x)
                 {
                   browser()
                   if(class(object) != "nullModel")
                     {
                       tmp <- predict(object, x)
                       if(is.factor(object$y) & modelLookup(object$method)$probModel[1])
                         {
                           out <- cbind(data.frame(pred = tmp),
                                        as.data.frame(predict(object, x, type = "prob")))
                         } else out <- tmp
                     } else {
                       tmp <- predict(object, x)
                       if(!is.null(object$levels))
                         {
                           out <- c
bind(data.frame(pred = tmp),
                                        as.data.frame(predict(object, x, type = "prob")))
                         } else out <- tmp 
                     }
                   out
                 },
                 score = function(x, y)
                 {
                   ## should return a named logical vector
                   if(is.factor(y)) anovaScores(x, y) else gamScores(x, y)
                 },
                 filter = function(score, x, y) score <= 0.05
                 )
#
## Use a GAM is the filter, then fit a random forest model#
RFwithGAM <- sbf(mdrrDescr[, 1:20], mdrrClass, method = "pls"#
                 sbfControl = sbfControl(functions = test,#
                                         verbose = FALSE, #
                                         method = "cv"))
is.factor(object$y)
Q
names(knnFit1)
knnFit1$modelType

caretSBF <- list(summary = defaultSummary,
                 fit = function(x, y, ...)
                 {
                   if(ncol(x) > 0)
                     {
                       train(x, y, ...)
                     } else nullModel(y = y)                                      
                 },
                 pred = function(object, x)
                 {
                   if(class(object) != "nullModel")
                     {
                       tmp <- predict(object, x)
                       if(object$modelType == "Classification" &
                          modelLookup(object$method)$probModel[1])
                         {
                           out <- cbind(data.frame(pred = tmp),
                                        as.data.frame(predict(object, x, type = "prob")))
                         } else out <- tmp
                     } else {
                       tmp <- predict(object, x)
                       if(!is.null(object$levels))
                         {
            out <- cbind(data.frame(pred = tmp),
                                        as.data.frame(predict(object, x, type = "prob")))
                         } else out <- tmp 
                     }
                   out
                 },
                 score = function(x, y)
                 {
                   ## should return a named logical vector
                   if(is.factor(y)) anovaScores(x, y) else gamScores(x, y)
                 },
                 filter = function(score, x, y) score <= 0.05
                 )
test <- caretSBF
test$summary <- twoClassSummary
#
## Use a GAM is the filter, then fit a random forest model#
RFwithGAM <- sbf(mdrrDescr[, 1:20], mdrrClass, method = "pls",#
                 sbfControl = sbfControl(functions = test,#
                                         verbose = FALSE, #
                                         method = "cv"))
#
## Use a GAM is the filter, then fit a random forest model#
RFwithGAM <- sbf(scale(mdrrDescr), mdrrClass, method = "pls", tuneLength = 20,#
                 sbfControl = sbfControl(functions = test,#
                                         verbose = FALSE, #
                                         method = "cv"))
RFwithGAM
library(caret)
?rfe
data(mdrr)

mdrrDescr <- mdrrDescr[, -nearZeroVar(mdrrDescr)][, 1:10]


rfeTest <- rfe(mdrrDescr, mdrrClass,
               sizes = c(2, 4, 8),
               rfeControl = rfeControl(functions = ldaFuncs))
warnings()
debug(rfe.default)

rfeTest <- rfe(mdrrDescr, mdrrClass,
               sizes = c(2, 4, 8),
               rfeControl = rfeControl(functions = ldaFuncs, method = "cv", number = 5))
perfNames
rfeResults[[1]]
trceback()


library(caret)
data(mdrr)

mdrrDescr <- mdrrDescr[, -nearZeroVar(mdrrDescr)][, 1:10]


rfeTest <- rfe(mdrrDescr, mdrrClass,
               sizes = c(2, 4, 8),
               rfeControl = rfeControl(functions = ldaFuncs, method = "cv", number = 5))
selectedVars
rfeControl$functions$selectVar(selectedVars, bestSubset)
bestVar <- rfeControl$functions$selectVar(selectedVars, bestSubset)
bestSubset
byResample
traceback()
n
rfePred[[1]]
rfePred[[2]]
rfePred
byResample[[1]]
resampleResults <- lapply(byResample, rfeControl$functions$summary, #
    lev = classLevels)
c(apply(resampleResults, 2, mean, na.rm = TRUE), #
    apply(resampleResults, 2, sd, na.rm = TRUE))
externPerf
resampleResults


library(caret)
data(mdrr)

mdrrDescr <- mdrrDescr[, -nearZeroVar(mdrrDescr)][, 1:10]

test <- ldaFuncs
test$summary <- twoClassSummary
rfeTest <- rfe(mdrrDescr, mdrrClass,
               sizes = c(2, 4, 8), metric = "ROC",
               rfeControl = rfeControl(functions = test, method = "cv", number = 5))


library(caret)
data(mdrr)

mdrrDescr <- mdrrDescr[, -nearZeroVar(mdrrDescr)][, 1:10]

test <- ldaFuncs
test$summary <- twoClassSummary
rfeTest <- rfe(mdrrDescr, mdrrClass,
               sizes = c(2, 4, 8),
               rfeControl = rfeControl(functions = test, method = "cv", number = 5))


library(caret)
data(mdrr)

mdrrDescr <- mdrrDescr[, -nearZeroVar(mdrrDescr)][, 1:10]

test <- ldaFuncs
test$summary <- twoClassSummary
rfeTest <- rfe(mdrrDescr, mdrrClass,
               sizes = c(2, 4, 8),
               metric = "ROC",
               rfeControl = rfeControl(functions = test, method = "cv", number = 10))
rfeTest$resample
test <- list(summary = defaultSummary,
                fit = function(x, y, first, last, ...)
                {
                  library(klaR)
                  NaiveBayes(x, y, usekernel = TRUE, fL = 2, ...)
                },
                pred = function(object, x)
                {
                   tmp <- predict(object, x)
                   out <- cbind(data.frame(pred = tmp$class),
                                as.data.frame(tmp$posterior))
                   out
                },
                rank = function(object, x, y)
                {
                  vimp <- filterVarImp(x, y)
                  if(is.factor(y))
                    {
                      avImp <- apply(vimp, 1, mean)
                      vimp$Overall <- avImp
                    }
                  
                  vimp <- vimp[
                               order(
                                     vimp$Overall,
                                     decreasing = TRUE)
                               ,,
                        drop = FALSE]
                  
                  vimp$var <- rownames(vimp)                  
                  vimp
                },
                selectSize = pickSizeBest,
                selectVar = pickVars)
test <-  list(summary = defaultSummary,
                 fit = function(x, y, first, last, ...)
                 {
                   library(randomForest)
                   randomForest(x, y, importance = first, ...)
                 },
                 pred = function(object, x)
                 {
                   tmp <- predict(object, x)
                   if(is.factor(object$y))
                     {
                       out <- cbind(data.frame(pred = tmp),
                                    as.data.frame(predict(object, x, type = "prob")))
                     } else out <- tmp
                 },
                 rank = function(object, x, y)
                 {
                   vimp <- varImp(object)

                   if(is.factor(y))
                     {
                       if(all(levels(y) %in% colnames(vimp)))
                         {
                           avImp <- apply(vimp[, levels(y), drop = TRUE],
                                          1,
              mean)
                           vimp$Overall <- avImp
                         }

                     }
                   
                   vimp <- vimp[
                                order(
                                      vimp$Overall,
                                      decreasing = TRUE)
                                ,,
                                drop = FALSE]
                   
                   vimp$var <- rownames(vimp)                  
                   vimp
                 },
                 selectSize = pickSizeBest,
                 selectVar = pickVars)
test <- list(summary = defaultSummary,
                     fit = function(x, y, first, last, ...)
                     {
                       library(ipred)
                       ipredbagg(y, x, ...)
                     },
                     pred = function(object, x)
                     {
                       tmp <- predict(object, x)
                       if(is.factor(object$y))
                         {
                           out <- cbind(data.frame(pred = tmp),
                                        as.data.frame(predict(object, x, type = "prob")))
                         } else out <- tmp
                       out
                     },
                     rank = function(object, x, y)
                     {
                       vimp <- varImp(object, scale = FALSE)
                       vimp <- vimp[
                                    order(vimp$Overall, decreasing = TRUE)
                                    ,,drop = FALSE]
                       vimp$var <- rownames(vimp)
                    vimp
                     },
                     selectSize = pickSizeBest,
                     selectVar = pickVars)
test$summary <- twoClassSummary
rfeTest <- rfe(mdrrDescr, mdrrClass,
               sizes = c(2, 4, 8),
               metric = "ROC",
               rfeControl = rfeControl(functions = test, method = "cv", number = 10))
          } else {
                         if(all(levels(y) %in% colnames(vimp)))
                           {
                             avImp <- apply(vimp[, levels(y), drop = TRUE],
                                            1,
                                            mean)
                             vimp$Overall <- avImp
                           } else stop("need importance columns for each class")
                         
                       } 
                     vimp$var <- rownames(vimp)
                     vimp
                   },
                   selectSize = pickSizeBest,
                   selectVar = pickVars
                   )
test$summary <- twoClassSummary
rfeTest <- rfe(mdrrDescr, mdrrClass,
               sizes = c(2, 4, 8),
               metric = "ROC", method = "svmRadial", tuneLength = 5,
               rfeControl = rfeControl(functions = test, method = "cv", number = 10))
test$summary <- twoClassSummary
rfeTest <- rfe(mdrrDescr, mdrrClass,
               sizes = c(2, 4, 8),
               metric = "ROC", method = "svmRadial", tuneLength = 5,
               rfeControl = rfeControl(functions = test, method = "cv", number = 3))
test <- list(summary = defaultSummary,
                   fit = function(x, y, first, last, ...) train(x, y, ...),
                   pred = function(object, x)
                   {
                     tmp <- predict(object, x)
                     if(object$modelType == "Classification" &
                        modelLookup(object$method)$probModel[1])
                       {
                         out <- cbind(data.frame(pred = tmp),
                                      as.data.frame(predict(object, x, type = "prob")))
                       } else out <- tmp
                     out
                   },
                   rank = function(object, x, y)
                   {
                     vimp <- varImp(object, scale = FALSE)$importance
                     if(object$modelType == "Regression")
                       {
                         vimp <- vimp[
                                      order(vimp[,1], decreasing = TRUE)
                                      ,,drop = FALSE]
          } else {
                         if(all(levels(y) %in% colnames(vimp)))
                           {
                             avImp <- apply(vimp[, levels(y), drop = TRUE],
                                            1,
                                            mean)
                             vimp$Overall <- avImp
                           } 
                         
                       } 
                     vimp$var <- rownames(vimp)
                     vimp
                   },
                   selectSize = pickSizeBest,
                   selectVar = pickVars
                   )
test$summary <- twoClassSummary
rfeTest <- rfe(scale(mdrrDescr), mdrrClass,
               sizes = c(2, 4, 8),
               metric = "ROC", method = "pls", tuneLength = 5,
               rfeControl = rfeControl(functions = test, method = "cv", number = 3))
rfeTest
xyplot(rfeTest)
test$summary <- twoClassSummary
rfeTest <- rfe(scale(mdrrDescr), mdrrClass,
               sizes = c(2, 4, 8),
               metric = "ROC", method = "pls", tuneLength = 5,
               rfeControl = rfeControl(functions = test, number = 20))
xyplot(rfeTest, metric = "ROC")
library(mlbench)
n <- 100
p <- 40
sigma <- 1
set.seed(1)
sim <- mlbench.friedman1(n, sd = sigma)
colnames(sim$x) <- paste("real", 1:ncol(sim$x), sep = "")
bogus <- matrix(rnorm(n * p), nrow = n)
colnames(bogus) <- paste("bogus", 1:ncol(bogus), sep = "")
x <- cbind(sim$x, bogus)
y <- sim$y
colnamnes(x)
n <- 100
p <- 40
sigma <- 1
set.seed(1)
sim <- mlbench.friedman1(n, sd = sigma)
colnames(sim$x) <- rep(c("real", "bogus"), each = 5)
bogus <- matrix(rnorm(n * p), nrow = n)
colnames(bogus) <- paste("bogus", 1:ncol(bogus), sep = "")
x <- cbind(sim$x, bogus)
y <- sim$y
?mlbench.friedman1
n <- 100
p <- 40
sigma <- 1
set.seed(1)
sim <- mlbench.friedman1(n, sd = sigma)
colnames(sim$x) <- c(paste("bogus", 1:5, sep = ""),
                     paste("bogus", 1:5, sep = ""))
bogus <- matrix(rnorm(n * p), nrow = n)
colnames(bogus) <- paste("bogus", 5+(1:ncol(bogus)), sep = "")
x <- cbind(sim$x, bogus)
y <- sim$y
n <- 100
p <- 40
sigma <- 1
set.seed(1)
sim <- mlbench.friedman1(n, sd = sigma)
colnames(sim$x) <- c(paste("real", 1:5, sep = ""),
                     paste("bogus", 1:5, sep = ""))
bogus <- matrix(rnorm(n * p), nrow = n)
colnames(bogus) <- paste("bogus", 5+(1:ncol(bogus)), sep = "")
x <- cbind(sim$x, bogus)
y <- sim$y
colnames(x)
setwd("Documents/Code/caret/pkg/caret/inst/doc/")
Stangle("caretSelection.Rnw")
library(caret)
library(mlbench)
library(Hmisc)
library(randomForest)


###################################################
### chunk number 2: simSettings
###################################################
n <- 100
p <- 40
sigma <- 1
set.seed(1)
sim <- mlbench.friedman1(n, sd = sigma)
colnames(sim$x) <- c(paste("real", 1:5, sep = ""),
                     paste("bogus", 1:5, sep = ""))
bogus <- matrix(rnorm(n * p), nrow = n)
colnames(bogus) <- paste("bogus", 5+(1:ncol(bogus)), sep = "")
x <- cbind(sim$x, bogus)
y <- sim$y
normalization <- preProcess(x)
x <- predict(normalization, x)
x <- as.data.frame(x)
subsets <- c(1:5, 10, 15, 20, 25)
set.seed(10)

ctrl <- rfeControl(functions = lmFuncs,
                   method = "cv",
                   verbose = FALSE,
                   returnResamp = "final")

lmProfile <- rfe(x, y,
                 sizes = subsets,
                 rfeControl = ctrl)

lmProfile
rfRFE <-  list(summary = defaultSummary,
                 fit = function(x, y, first, last, ...)
                 {
                   library(randomForest)
                   randomForest(x, y, importance = first, ...)
                 },
                 pred = function(object, x)
                 {
                   predict(object, x)
                 },
                 rank = function(object, x, y)
                 {
                   vimp <- varImp(object)

                   if(is.factor(y))
                     {
                       if(all(levels(y) %in% colnames(vimp)))
                         {
                           avImp <- apply(vimp[, levels(y), drop = TRUE],
                                          1,
                                          mean)
                           vimp$Overall <- avImp
                         }

                     }
                   
                   vimp <- vimp[
                                order(
                                      vimp$Overall,
                                      decreasing = TRUE)
                                ,,
                                drop = FALSE]
                   
                   vimp$var <- rownames(vimp)                  
                   vimp
                 },
                 selectSize = pickSizeBest,
                 selectVar = pickVars)
ctrl$functions <- rfRFE
ctrl$returnResamp <- "all"
set.seed(10)
rfProfile <- rfe(x, y,
                 sizes = subsets,
                 rfeControl = ctrl)
set.seed(10)
rfWithFilter <- sbf(x, y,
                    sbfControl = sbfControl(
                      functions = rfSBF,
                      method = "cv",
                      verbose = FALSE))
test <- resamples(list(lmRFE = lmProfile, rfRFE = rfProfile, rfFilter = rfWithFilter))
dotplot(test)
dotplot(test, scales = list(x = list(relaiton = "Free")))
dotplot(test, scales = list(x = list(relation = "free")))
parallel(test)
dotplot(diff(test))
?lattice.resamples
?dotplot.resamples
?parallel
bootValues <- resamples(list(lmRFE = lmProfile, rfRFE = rfProfile, rfFilter = rfWithFilter))
differences <- diff(bootValues)
differences
summary(differences)
?dotplot.diff.resamples
parallel(test, metric = "Rsquared")
   trellis.par.set(caretTheme())
   print(parallel(bootValues, metric = "Rsquared"))
