% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/hmda.test.R
\name{hmda.test}
\alias{hmda.test}
\title{Normalize a vector based on specified minimum and maximum values}
\usage{
hmda.test(wmshap, features = NULL, domains = NULL, n = 5000)
}
\arguments{
\item{wmshap}{object of class 'shapley', as returned by the 'shapley' function}

\item{features}{character, name of two features to be compared with permutation test}

\item{domains}{list of two character vectors, each including a number of features.}

\item{n}{integer, number of permutations}
}
\value{
normalized numeric vector
}
\description{
This function normalizes a vector based on specified minimum
             and maximum values. If the minimum and maximum values are not
             specified, the function will use the minimum and maximum values
             of the vector.
}
\examples{

\dontrun{
# load the required libraries for building the base-learners and the ensemble models
library(h2o)            #shapley supports h2o models
library(autoEnsemble)   #autoEnsemble models, particularly useful under severe class imbalance
library(shapley)

# initiate the h2o server
h2o.init(ignore_config = TRUE, nthreads = 2, bind_to_localhost = FALSE, insecure = TRUE)

# upload data to h2o cloud
prostate_path <- system.file("extdata", "prostate.csv", package = "h2o")
prostate <- h2o.importFile(path = prostate_path, header = TRUE)

### H2O provides 2 types of grid search for tuning the models, which are
### AutoML and Grid. Below, I demonstrate how weighted mean shapley values
### can be computed for both types.

set.seed(10)

#######################################################
### PREPARE AutoML Grid (takes a couple of minutes)
#######################################################
# run AutoML to tune various models (GBM) for 60 seconds
y <- "CAPSULE"
prostate[,y] <- as.factor(prostate[,y])  #convert to factor for classification
aml <- h2o.automl(y = y, training_frame = prostate, max_runtime_secs = 120,
                 include_algos=c("GBM"),

                 # this setting ensures the models are comparable for building a meta learner
                 seed = 2023, nfolds = 10,
                 keep_cross_validation_predictions = TRUE)

### call 'shapley' function to compute the weighted mean and weighted confidence intervals
### of SHAP values across all trained models.
### Note that the 'newdata' should be the testing dataset!
result <- shapley(models = aml, newdata = prostate, plot = TRUE)

#######################################################
### Significance testing of contributions of two features
#######################################################
# testing the WMSHAP contributions of two features
hmda.test(result, features = c("GLEASON", "PSA"), n = 5000)

# testing the WMSHAP contributions of two domains (groups of features, latent factors, etc.)
hmda.test(result,
          n = 5000,
          domains = list(Demographic = c("RACE", "AGE"),
                         Cancer = c("VOL", "PSA", "GLEASON")))
}
}
\author{
E. F. Haghish
}
