#' @title Gaussian process emulator construction
#'
#' @description This function builds and trains a GP emulator.
#'
#' @param X a matrix where each row is an input data point and each column is an input dimension.
#' @param Y a matrix with only one column and each row being an output data point.
#' @param struc an object produced by [kernel()] that gives a user-defined GP specifications. When `struc = NULL`,
#'     the GP specifications are automatically generated using information provided in `name`, `lengthscale`,
#'     `nugget_est`, `nugget`, and `internal_input_idx`. Defaults to `NULL`.
#' @param name kernel function to be used. Either `"sexp"` for squared exponential kernel or
#'     `"matern2.5"` for Matérn-2.5 kernel. Defaults to `"sexp"`. This argument is only used when `struc = NULL`.
#' @param lengthscale initial values of lengthscales in the kernel function. It can be a single numeric value or a vector:
#' * if it is a single numeric value, it is assumed that kernel functions across input dimensions share the same lengthscale;
#' * if it is a vector (which must have a length of `ncol(X)`), it is assumed that kernel functions across input dimensions have different lengthscales.
#'
#' Defaults to a vector of 0.2. This argument is only used when `struc = NULL`.
#' @param nugget_est a bool indicating if the nugget term is to be estimated:
#' 1. `FALSE`: the nugget term is fixed to `nugget`.
#' 2. `TRUE`: the nugget term will be estimated.
#'
#' Defaults to `FALSE`. This argument is only used when `struc = NULL`.
#' @param nugget the initial nugget value. If `nugget_est = FALSE`, the assigned value is fixed during the training.
#'     Set `nugget` to a small value (e.g., `1e-6`) and the corresponding bool in `nugget_est` to `FASLE` for deterministic emulations where the emulator
#'     interpolates the training data points. Set `nugget` to a reasonable larger value and the corresponding bool in `nugget_est` to `TRUE` for stochastic
#'     emulations where the computer model outputs are assumed to follow a homogeneous Gaussian distribution. Defaults to `1e-6`. This argument is only used
#'     when `struc = NULL`.
#' @param training a bool indicating if the initialized GP emulator will be trained.
#'     When set to `FALSE`, [gp()] returns an untrained GP emulator, to which one can apply [summary()] to inspect its specifications
#'     (especially when a customized `struc` is provided) or apply [predict()] to check its emulation performance before the training. Defaults to `TRUE`.
#' @param verb a bool indicating if the trace information on GP emulator construction and training will be printed during the function execution.
#'     Defaults to `TRUE`.
#' @param internal_input_idx the column indices of `X` that are generated by the linked emulators in the preceding layers.
#'     Set `internal_input_idx = NULL` if the GP emulator is in the first layer of a system or all columns in `X` are
#'     generated by the linked emulators in the preceding layers. Defaults to `NULL`. This argument is only used when `struc = NULL`.
#' @param linked_idx either a vector or a list of vectors:
#' * If `linked_idx` is a vector, it gives indices of columns in the pooled output matrix (formed by column-combined outputs of all
#'   emulators in the feeding layer) that feed into the GP emulator. If the GP emulator is in the first layer of a linked emulator system,
#'   the vector gives the column indices of the global input (formed by column-combining all input matrices of emulators in the first layer)
#'   that the GP emulator will use. The length of the vector shall equal to the length of `internal_input_idx` when `internal_input_idx` is not `NULL`.
#' * When the GP emulator is not in the first layer of a linked emulator system, `linked_idx` can be a list that gives the information on connections
#'   between the GP emulator and emulators in all preceding layers. The length of the list should equal to the number of layers before
#'   the GP emulator. Each element of the list is a vector that gives indices of columns in the pooled output matrix (formed by column-combined outputs
#'   of all emulators) in the corresponding layer that feed into the GP emulator. If the GP emulator has no connections to any emulator in a certain layer,
#'   set `NULL` in the corresponding position of the list. The order of input dimensions in `X[,internal_input_idx]` should be consistent with `linked_idx`.
#'   For example, a GP emulator in the second layer that is fed by the output dimension 1 and 3 of emulators in layer 1 should have `linked_idx = list( c(1,3) )`.
#'   In addition, the first and second columns of `X[,internal_input_idx]` should correspond to the output dimensions 1 and 3 from layer 1.
#'
#' Set `linked_idx = NULL` if the GP emulator will not be used for linked emulations. However, if this is no longer the case, one can use [set_linked_idx()]
#' to add linking information to the GP emulator. Defaults to `NULL`.
#'
#' @return An S3 class named `gp` that contains three slots:
#' * `constructor_obj`: a 'python' object that stores the information of the constructed GP emulator.
#' * `container_obj`: a 'python' object that stores the information for the linked emulation.
#' * `emulator_obj`: a 'python' object that stores the information for the predictions from the GP emulator.
#'
#' The returned `gp` object can be used by
#' * [predict()] for GP predictions.
#' * [validate()] for LOO and OOS validations.
#' * [plot()] for validation plots.
#' * [lgp()] for linked (D)GP emulator constructions.
#'
#' @details See further examples and tutorials at <https://mingdeyu.github.io/dgpsi-R/>.
#' @note Any R vector detected in `X` and `Y` will be treated as a column vector and automatically converted into a single-column
#'     R matrix.
#' @examples
#' \dontrun{
#' # load the package and the Python env
#' library(dgpsi)
#' init_py()
#'
#' # construct a step function
#' f <- function(x) {
#'    if (x < 0.5) return(-1)
#'    if (x >= 0.5) return(1)
#'   }
#'
#' # generate training data
#' X <- seq(0, 1, length = 10)
#' Y <- sapply(X, f)
#'
#' # training
#' m <- gp(X, Y)
#'
#' # summarizing
#' summary(m)
#'
#' # LOO cross validation
#' m <- validate(m)
#' plot(m)
#'
#' # prediction
#' test_x <- seq(0, 1, length = 200)
#' m <- predict(m, x = test_x)
#'
#' # OOS validation
#' validate_x <- sample(test_x, 10)
#' validate_y <- sapply(validate_x, f)
#' plot(m, validate_x, validate_y)
#'
#' # write and read the constructed emulator
#' write(m, 'step_gp')
#' m <- read('step_gp')
#' }
#'
#' @md
#' @export
gp <- function(X, Y, struc = NULL, name = 'sexp', lengthscale = rep(0.2, ncol(X)), nugget_est = FALSE, nugget = 1e-6, training = TRUE, verb = TRUE, internal_input_idx = NULL, linked_idx = NULL) {
  if ( !is.matrix(X)&!is.vector(X) ) stop("'X' must be a vector or a matrix.", call. = FALSE)
  if ( !is.matrix(Y)&!is.vector(Y) ) stop("'Y' must be a vector or a matrix.", call. = FALSE)
  if ( is.vector(X) ) X <- as.matrix(X)
  if ( is.vector(Y) ) Y <- as.matrix(Y)

  if ( nrow(X)!=nrow(Y) ) stop("'X' and 'Y' have different number of data points.", call. = FALSE)
  n_dim_X <- ncol(X)
  n_dim_Y <- ncol(Y)
  if ( n_dim_Y != 1 ) {
    stop("'Y' must be a vector or a matrix with only one column for a GP emulator.", call. = FALSE)
  }

  if( !is.null(linked_idx) ) {
    if ( !is.list(linked_idx) ) {
      linked_idx <- reticulate::np_array(as.integer(linked_idx - 1))
    } else {
      for ( i in 1:length(linked_idx) ){
        if ( !is.null(linked_idx[[i]]) ) linked_idx[[i]] <- reticulate::np_array(as.integer(linked_idx[[i]] - 1))
      }
    }
  }

  if ( is.null(struc) ) {
    if ( verb ) message("Auto-generating a GP structure ...", appendLF = FALSE)

    if ( length(lengthscale) != 1 & length(lengthscale) != n_dim_X) {
      stop("length(lengthscale) must be 1 or ncol(X).", call. = FALSE)
    }

    if( !is.null(internal_input_idx) ) {
      external_input_idx <- setdiff(1:n_dim_X, internal_input_idx)
      if ( length(external_input_idx) == 0) {
        internal_input_idx = NULL
        external_input_idx = NULL
      } else {
        internal_input_idx <- reticulate::np_array(as.integer(internal_input_idx - 1))
        external_input_idx <- reticulate::np_array(as.integer(external_input_idx - 1))
      }
    } else {
      external_input_idx = NULL
    }

    struc <- pkg.env$dgpsi$kernel(length = reticulate::np_array(lengthscale), name = name, scale_est = TRUE, nugget = nugget, nugget_est = nugget_est,
                                  input_dim = internal_input_idx, connect = external_input_idx)

    if ( verb ) {
      message(" done")
      Sys.sleep(0.5)
    }
  }

  if ( verb ) message("Initializing the GP emulator ...", appendLF = FALSE)

  obj <- pkg.env$dgpsi$gp(X, Y, struc)

  if ( verb ) {
    Sys.sleep(0.5)
    message(" done")
  }

  if ( training ) {
    if ( verb ){
      Sys.sleep(0.5)
      message("Training the GP emulator ...", appendLF = FALSE)
    }
    obj$train()
    if ( verb ) message(" done")
  }

  res <- list()
  res[['constructor_obj']] <- obj
  res[['container_obj']] <- pkg.env$dgpsi$container(obj$export(), linked_idx)
  res[['emulator_obj']] <- obj

  class(res) <- "gp"

  return(res)
}
