# --------------------------------------
# Author: Andreas Alfons
#         Erasmus Universiteit Rotterdam
# --------------------------------------

#' (Robustly) fit a mediation model
#'
#' (Robustly) estimate the effects in a mediation model.
#'
#' If \code{method} is \code{"regression"}, \code{robust} is \code{TRUE} and
#' \code{median} is \code{FALSE} (the defaults), the effects are estimated via
#' robust regressions with \code{\link[robustbase]{lmrob}}.
#'
#' If \code{method} is \code{"regression"}, \code{robust} is \code{TRUE} and
#' \code{median} is \code{TRUE}, the effects are estimated via median
#' regressions with \code{\link[quantreg]{rq}}.  Unlike the robust regressions
#' above, median regressions are not robust against outliers in the explanatory
#' variables.
#'
#' If \code{method} is \code{"covariance"} and \code{robust} is \code{TRUE},
#' the effects are estimated based on a Huber M-estimator of location and
#' scatter.  Note that this covariance-based approach is less robust than the
#' approach based on robust regressions described above.
#'
#' @aliases print.fit_mediation summary.reg_fit_mediation
#' summary.cov_fit_mediation
#'
#' @param data  a data frame containing the variables.
#' @param x  a character string, an integer or a logical vector specifying the
#' column of \code{data} containing the independent variable.
#' @param y  a character string, an integer or a logical vector specifying the
#' column of \code{data} containing the dependent variable.
#' @param m  a character, integer or logical vector specifying the columns of
#' \code{data} containing the hypothesized mediator variables.
#' @param covariates  optional; a character, integer or logical vector
#' specifying the columns of \code{data} containing additional covariates to be
#' used as control variables.
#' @param method  a character string specifying the method of
#' estimation.  Possible values are \code{"regression"} (the default)
#' to estimate the effects via regressions, or \code{"covariance"} to
#' estimate the effects via the covariance matrix.  Note that the effects are
#' always estimated via regressions if more than one hypothesized mediator is
#' supplied in \code{m}, or if control variables are specified via
#' \code{covariates}.
#' @param robust  a logical indicating whether to robustly estimate the effects
#' (defaults to \code{TRUE}).
#' @param median  a logical indicating if the effects should be estimated via
#' median regression (defaults to \code{FALSE}).  This is ignored unless
#' \code{method} is \code{"regression"} and \code{robust} is \code{TRUE}.
#' @param control  a list of tuning parameters for the corresponding robust
#' method.  For robust regression (\code{method = "regression"},
#' \code{robust = TRUE} and \code{median = FALSE}), a list of tuning
#' parameters for \code{\link[robustbase]{lmrob}} as generated by
#' \code{\link{reg_control}}.  For Huberized covariance matrix estimation
#' (\code{method = "covariance"} and \code{robust = TRUE}), a list of tuning
#' parameters for \code{\link{cov_Huber}} as generated by
#' \code{\link{cov_control}}.  No tuning parameters are necessary for median
#' regression (\code{method = "regression"}, \code{robust = TRUE} and
#' \code{median = TRUE}).
#' @param \dots  additional arguments can be used to specify tuning parameters
#' directly instead of via \code{control}.
#'
#' @return An object inheriting from class \code{"fit_mediation"} (class
#' \code{"reg_fit_mediation"} if \code{method} is \code{"regression"} or
#' \code{"cov_fit_mediation"} if \code{method} is \code{"covariance"}) with
#' the following components:
#' \item{a}{a numeric vector containing the point estimates of the effect of
#' the independent variable on the proposed mediator variables.}
#' \item{b}{a numeric vector containing the point estimates of the direct
#' effect of the proposed mediator variables on the dependent variable.}
#' \item{c}{numeric; the point estimate of the direct effect of the
#' independent variable on the dependent variable.}
#' \item{c_prime}{numeric; the point estimate of the total effect of the
#' independent variable on the dependent variable.}
#' \item{fit_mx}{an object of class \code{"\link[robustbase]{lmrob}"} or
#' \code{"\link[stats]{lm}"} containing the estimation results from the
#' regression of the proposed mediator variable on the independent variable, or
#' a list of such objects in case of more than one hypothesized mediator
#' (only \code{"reg_fit_mediation"}).}
#' \item{fit_ymx}{an object of class \code{"\link[robustbase]{lmrob}"} or
#' \code{"\link[stats]{lm}"} containing the estimation results from the
#' regression of the dependent variable on the proposed mediator and
#' independent variables (only \code{"reg_fit_mediation"}).}
#' \item{fit_yx}{an object of class \code{"\link[stats]{lm}"} containing the
#' estimation results from the regression of the dependent variable on the
#' independent variable (only \code{"reg_fit_mediation"} and if \code{robust}
#' is \code{FALSE}).}
#' \item{cov}{an object of class \code{"\link{cov_Huber}"} or
#' \code{"\link{cov_ML}"} containing the covariance matrix estimates
#' (only \code{"cov_fit_mediation"}).}
#' \item{x, y, m, covariates}{character vectors specifying the respective
#' variables used.}
#' \item{data}{a data frame containing the independent, dependent and
#' proposed mediator variables, as well as covariates.}
#' \item{robust}{a logical indicating whether the effects were estimated
#' robustly.}
#' \item{median}{a logical indicating whether the effects were estimated
#' via median regression (only \code{"reg_fit_mediation"}).}
#' \item{control}{a list of tuning parameters used (only if \code{robust} is
#' \code{TRUE}).}
#'
#' @author Andreas Alfons
#'
#' @references
#' Alfons, A., Ates, N.Y. and Groenen, P.J.F. (2018) A robust bootstrap test
#' for mediation analysis.  \emph{ERIM Report Series in Management}, Erasmus
#' Research Institute of Management.  URL
#' \url{https://hdl.handle.net/1765/109594}.
#'
#' Yuan, Y. and MacKinnon, D.P. (2014) Robust mediation analysis based on
#' median regression. \emph{Psychological Methods}, \bold{19}(1),
#' 1--20.
#'
#' Zu, J. and Yuan, K.-H. (2010) Local influence and robust procedures for
#' mediation analysis. \emph{Multivariate Behavioral Research}, \bold{45}(1),
#' 1--44.
#'
#' @seealso \code{\link{test_mediation}}
#'
#' \code{\link[robustbase]{lmrob}}, \code{\link[stats]{lm}},
#' \code{\link{cov_Huber}}, \code{\link{cov_ML}}
#'
#' @examples
#' data("BSG2014")
#' fit <- fit_mediation(BSG2014,
#'                      x = "ValueDiversity",
#'                      y = "TeamCommitment",
#'                      m = "TaskConflict")
#' test <- test_mediation(fit)
#' summary(test)
#'
#' @keywords multivariate
#'
#' @import boot
#' @import robustbase
#' @importFrom quantreg rq
#' @export

fit_mediation <- function(data, x, y, m, covariates = NULL,
                          method = c("regression", "covariance"),
                          robust = TRUE, median = FALSE, control, ...) {
  ## initializations
  # prepare data set
  data <- as.data.frame(data)
  x <- data[, x, drop = FALSE]
  p_x <- ncol(x)
  if (p_x != 1L) stop("exactly one independent variable required")
  y <- data[, y, drop = FALSE]
  p_y <- ncol(y)
  if (p_y != 1L) stop("exactly one dependent variable required")
  m <- data[, m, drop = FALSE]
  p_m <- ncol(m)
  if (p_m == 0L) stop("at least one hypothesized mediator variable required")
  covariates <- data[, covariates, drop = FALSE]
  p_covariates <- ncol(covariates)
  data <- cbind(x, y, m, covariates)
  # extract names
  cn <- names(data)
  x <- cn[1L]
  y <- cn[2L]
  m <- cn[2L + seq_len(p_m)]
  covariates <- cn[-(seq_len(2L + p_m))]
  # make sure that variables are numeric
  convert <- !sapply(data, is.numeric)
  data[convert] <- lapply(data[convert], as.numeric)
  # remove incomplete observations
  data <- data[complete.cases(data), ]
  # check if there are enough observations
  d <- dim(data)
  if (d[1L] <= d[2L]) stop("not enough observations")
  # check other arguments
  method <- match.arg(method)
  if ((p_m > 1L || p_covariates > 0L) && method == "covariance") {
    method <- "regression"
    warning("covariance method not available with multiple mediators ",
            "or any covariates; using regression method")
  }
  robust <- isTRUE(robust)
  median <- isTRUE(median)
  if (robust && missing(control)) {
    if (method == "regression" && !median) control <- reg_control(...)
    else control <- cov_control(...)
  }
  if (!robust || method != "regression") median <- FALSE
  ## estimate effects
  if (method == "regression") {
    reg_fit_mediation(x, y, m, covariates, data = data, robust = robust,
                      median = median, control = control)
  } else {
    cov_fit_mediation(x, y, m, data = data, robust = robust, control = control)
  }
}


## estimate the effects in a mediation model via regressions
reg_fit_mediation <- function(x, y, m, covariates = character(), data,
                              robust = TRUE, median = FALSE,
                              control = reg_control()) {
  # number of mediators
  p_m <- length(m)
  # construct formulas for regression models
  m_term <- paste(m, collapse = "+")
  covariate_term <- paste(c("", covariates), collapse = "+")
  f_mx <- paste(m, "~", x, covariate_term, sep = "")
  f_ymx <- as.formula(paste(y, "~", m_term, "+", x, covariate_term, sep = ""))
  # compute regression models
  if (robust) {
    # for the robust methods, the total effect is estimated as c' = ab + c
    # to satisfy this relationship
    # TODO: check if this makes sense for median regression
    # (what if, e.g., the conditional distribution is asymmetric)
    if (median) {
      # LAD-estimator for median regression
      if (p_m == 1L) {
        f_mx <- as.formula(f_mx)
        fit_mx <- rq(f_mx, data = data, tau = 0.5, model = FALSE)
      } else {
        fit_mx <- lapply(f_mx, function(f) {
          f_mx <- as.formula(f)
          rq(f_mx, data = data, tau = 0.5, model = FALSE)
        })
        names(fit_mx) <- m
      }
      fit_ymx <- rq(f_ymx, data = data, tau = 0.5, model = FALSE)
    } else {
      # MM-estimator for robust regression
      if (p_m == 1L) {
        f_mx <- as.formula(f_mx)
        fit_mx <- lmrob(f_mx, data = data, control = control,
                        model = FALSE, x = FALSE)
      } else {
        fit_mx <- lapply(f_mx, function(f) {
          f_mx <- as.formula(f)
          lmrob(f_mx, data = data, control = control,
                model = FALSE, x = FALSE)
        })
        names(fit_mx) <- m
      }
      fit_ymx <- lmrob(f_ymx, data = data, control = control,
                       model = FALSE, x = FALSE)
    }
    # neither method fits the direct path
    fit_yx <- NULL
  } else {
    # for the standard method, there is not much additional cost in performing
    # the regression for the total effect
    if (p_m == 1L) {
      f_mx <- as.formula(f_mx)
      fit_mx <- lm(f_mx, data = data, model = FALSE)
    } else {
      fit_mx <- lapply(f_mx, function(f) {
        f_mx <- as.formula(f)
        lm(f_mx, data = data, model = FALSE)
      })
      names(fit_mx) <- m
    }
    fit_ymx <- lm(f_ymx, data = data, model = FALSE)
    f_yx <- as.formula(paste(y, "~", x, covariate_term, sep = ""))
    fit_yx <- lm(f_yx, data = data, model = FALSE)
  }
  # extract effects
  if (p_m == 1L) {
    a <- unname(coef(fit_mx)[2L])
    b <- unname(coef(fit_ymx)[1L + seq_len(p_m)])
  } else {
    a <- sapply(fit_mx, function(fit) unname(coef(fit)[2L]))
    b <- coef(fit_ymx)[1L + seq_len(p_m)]
  }
  c <- unname(coef(fit_ymx)[2L + p_m])
  if (robust) c_prime <- if(p_m == 1L) a*b + c else sum(a*b) + c
  else c_prime <- unname(coef(fit_yx)[2L])
  # return results
  result <- list(a = a, b = b, c = c, c_prime = c_prime, fit_mx = fit_mx,
                 fit_ymx = fit_ymx, fit_yx = fit_yx, x = x, y = y, m = m,
                 covariates = covariates, data = data, robust = robust,
                 median = median)
  if(robust && !median) result$control <- control
  class(result) <- c("reg_fit_mediation", "fit_mediation")
  result
}


## estimate the effects in a mediation model via the covariance matrix
cov_fit_mediation <- function(x, y, m, data, robust = TRUE,
                              control = cov_control()) {
  # compute scatter matrix (Huber M-estimator or MLE of covariance matrix)
  cov <- if(robust) cov_Huber(data, control=control) else cov_ML(data)
  S <- cov$cov
  # compute coefficients of mediation model
  a <- S[m, x] / S[x, x]
  det <- S[x, x] * S[m, m] - S[m, x]^2
  b <- (-S[m, x] * S[y, x] + S[x, x] * S[y, m]) / det
  c <- (S[m, m] * S[y, x] - S[m, x] * S[y, m]) / det
  c_prime <- S[y, x] / S[x, x]
  # return results
  result <- list(a=a, b=b, c=c, c_prime=c_prime, cov=cov, x=x, y=y, m=m,
                 data=data, robust=robust)
  if(robust) result$control <- control
  class(result) <- c("cov_fit_mediation", "fit_mediation")
  result
}
