#' Detect Regime Changes in Time Series
#'
#' @description
#' Main function for detecting regime changes (changepoints) in time series data.
#' Supports multiple detection methods, both frequentist and Bayesian, and can
#' operate in offline (retrospective) or online (sequential) modes.
#'
#' @param data Numeric vector, time series (ts object), or matrix for multivariate data.
#'   For matrices, rows are observations and columns are variables.
#' @param method Detection method. One of:
#'   \itemize{
#'     \item \code{"pelt"}: Pruned Exact Linear Time (default for offline)
#'     \item \code{"bocpd"}: Bayesian Online Changepoint Detection
#'     \item \code{"cusum"}: Cumulative Sum
#'     \item \code{"binseg"}: Binary Segmentation
#'     \item \code{"wbs"}: Wild Binary Segmentation
#'     \item \code{"shiryaev"}: Shiryaev-Roberts procedure
#'     \item \code{"ensemble"}: Combination of multiple methods
#'   }
#' @param type Type of change to detect:
#'   \itemize{
#'     \item \code{"mean"}: Changes in mean only
#'     \item \code{"variance"}: Changes in variance only
#'     \item \code{"both"}: Changes in mean and/or variance (default)
#'     \item \code{"trend"}: Changes in linear trend
#'     \item \code{"distribution"}: Non-parametric distributional changes
#'   }
#' @param mode Operation mode:
#'   \itemize{
#'     \item \code{"offline"}: Retrospective analysis with full data (default)
#'     \item \code{"online"}: Sequential analysis for monitoring
#'   }
#' @param n_changepoints Expected number of changepoints:
#'   \itemize{
#'     \item \code{"single"}: Detect at most one changepoint
#'     \item \code{"multiple"}: Detect multiple changepoints (default)
#'     \item An integer: Detect exactly this many changepoints
#'   }
#' @param penalty Penalty for model complexity (offline methods):
#'   \itemize{
#'     \item \code{"BIC"}: Bayesian Information Criterion (default)
#'     \item \code{"AIC"}: Akaike Information Criterion
#'     \item \code{"MBIC"}: Modified BIC
#'     \item \code{"MDL"}: Minimum Description Length
#'     \item A number: Manual penalty value
#'   }
#' @param min_segment Minimum segment length (number of observations)
#' @param prior Prior specification for Bayesian methods (from prior functions)
#' @param hazard Hazard prior for changepoint occurrence (Bayesian methods)
#' @param threshold Detection threshold (for online/CUSUM methods)
#' @param uncertainty Logical; if TRUE, compute confidence intervals
#' @param bootstrap_reps Number of bootstrap replicates for uncertainty (if uncertainty = TRUE)
#' @param ... Additional arguments passed to specific methods
#'
#' @return An object of class \code{"regime_result"} containing:
#'   \itemize{
#'     \item \code{changepoints}: Vector of detected changepoint locations
#'     \item \code{n_changepoints}: Number of changepoints detected
#'     \item \code{segments}: List of segment information (start, end, parameters)
#'     \item \code{confidence_intervals}: Confidence intervals for changepoint locations
#'     \item \code{existence_probability}: Probability that each changepoint exists
#'     \item \code{posterior}: Posterior distribution (Bayesian methods)
#'     \item \code{information_criterion}: BIC/AIC values
#'     \item \code{method}: Method used
#'     \item \code{call}: The function call
#'   }
#'
#' @examples
#'
#' set.seed(123)
#' data <- c(rnorm(100, mean = 0), rnorm(100, mean = 3))
#'
#'
#' result <- detect_regimes(data)
#' print(result)
#' plot(result)
#'
#'
#' result <- detect_regimes(data, method = "bocpd",
#'                          prior = normal_gamma())
#'
#'
#' result <- detect_regimes(data, method = "cusum",
#'                          mode = "online", threshold = 5)
#'
#' @export
detect_regimes <- function(
    data,
    method = c("pelt", "bocpd", "cusum", "binseg", "wbs", "shiryaev", "ensemble"),
    type = c("both", "mean", "variance", "trend", "distribution"),
    mode = c("offline", "online"),
    n_changepoints = "multiple",
    penalty = "BIC",
    min_segment = 2,
    prior = NULL,
    hazard = NULL,
    threshold = NULL,
    uncertainty = TRUE,
    bootstrap_reps = 200,
    ...
) {
  cl <- match.call()
  
  method <- match.arg(method)
  type <- match.arg(type)
  mode <- match.arg(mode)
  
  data_info <- validate_data(data)
  data <- data_info$data
  is_multivariate <- data_info$multivariate
  n <- data_info$n
  d <- data_info$d
  
  validate_params(n_changepoints, penalty, min_segment, n)
  
  if (is.null(prior) && method %in% c("bocpd", "shiryaev")) {
    prior <- if (is_multivariate) {
      normal_wishart(mu0 = rep(0, d))
    } else {
      normal_gamma()
    }
  }
  
  if (is.null(hazard) && method %in% c("bocpd", "shiryaev")) {
    hazard <- geometric_hazard(lambda = 1 / min(100, n / 4))
  }
  
  if (is.null(threshold)) {
    threshold <- switch(method,
      cusum = 4,
      shiryaev = 0.5,
      bocpd = 0.5,
      NULL
    )
  }
  
  result <- switch(method,
    pelt = detect_pelt(data, type, penalty, min_segment, ...),
    bocpd = detect_bocpd(data, type, prior, hazard, threshold, ...),
    cusum = detect_cusum(data, type, threshold, mode, ...),
    binseg = detect_binseg(data, type, penalty, min_segment, n_changepoints, ...),
    wbs = detect_wbs(data, type, penalty, min_segment, n_changepoints, ...),
    shiryaev = detect_shiryaev(data, type, prior, hazard, threshold, ...),
    ensemble = detect_ensemble(data, type, penalty, min_segment, ...)
  )
  
  if (uncertainty && !method %in% c("bocpd", "shiryaev")) {
    result$confidence_intervals <- bootstrap_ci(
      data, method, type, penalty, min_segment, bootstrap_reps, ...
    )
  }
  
  result$segments <- compute_segments(data, result$changepoints, type)
  
  result$n <- n
  result$method <- method
  result$type <- type
  result$mode <- mode
  result$call <- cl
  result$data <- data
  
  class(result) <- c("regime_result", "list")
  
  result
}


#' Validate Input Data
#' @noRd
validate_data <- function(data) {
  if (inherits(data, "ts")) {
    data <- as.numeric(data)
  }
  
  if (is.data.frame(data)) {
    data <- as.matrix(data)
  }
  
  if (is.matrix(data)) {
    if (!is.numeric(data)) {
      cli::cli_abort("Data matrix must be numeric")
    }
    n <- nrow(data)
    d <- ncol(data)
    multivariate <- d > 1
  } else {
    if (!is.numeric(data)) {
      cli::cli_abort("Data must be numeric")
    }
    data <- as.numeric(data)
    n <- length(data)
    d <- 1
    multivariate <- FALSE
  }
  
  if (n < 4) {
    cli::cli_abort("Data must have at least 4 observations")
  }
  
  if (any(is.na(data))) {
    cli::cli_warn("Data contains missing values. These will be handled by interpolation.")
    data <- interpolate_na(data)
  }
  
  if (any(is.infinite(data))) {
    cli::cli_abort("Data contains infinite values")
  }
  
  list(
    data = data,
    n = n,
    d = d,
    multivariate = multivariate
  )
}

#' Validate Parameters
#' @noRd
validate_params <- function(n_changepoints, penalty, min_segment, n) {
  if (is.character(n_changepoints)) {
    if (!n_changepoints %in% c("single", "multiple", "unknown")) {
      cli::cli_abort("n_changepoints must be 'single', 'multiple', 'unknown', or an integer")
    }
  } else if (is.numeric(n_changepoints)) {
    if (n_changepoints < 0 || n_changepoints != round(n_changepoints)) {
      cli::cli_abort("n_changepoints must be a non-negative integer")
    }
  } else {
    cli::cli_abort("n_changepoints must be a character or integer")
  }
  
  if (is.character(penalty)) {
    if (!penalty %in% c("BIC", "AIC", "MBIC", "MDL", "none")) {
      cli::cli_abort("penalty must be 'BIC', 'AIC', 'MBIC', 'MDL', 'none', or a number")
    }
  } else if (!is.numeric(penalty) || length(penalty) != 1) {
    cli::cli_abort("penalty must be a single number or a valid string")
  }
  
  if (!is.numeric(min_segment) || length(min_segment) != 1 ||
      min_segment < 1 || min_segment != round(min_segment)) {
    cli::cli_abort("min_segment must be a positive integer")
  }
  
  if (min_segment > n / 2) {
    cli::cli_abort("min_segment ({min_segment}) cannot be more than half the data length ({n/2})")
  }
  
  invisible(TRUE)
}

#' Interpolate Missing Values
#' @noRd
interpolate_na <- function(data) {
  if (is.matrix(data)) {
    apply(data, 2, interpolate_na_vector)
  } else {
    interpolate_na_vector(data)
  }
}

#' @noRd
interpolate_na_vector <- function(x) {
  na_idx <- which(is.na(x))
  if (length(na_idx) == 0) return(x)
  
  non_na_idx <- which(!is.na(x))
  if (length(non_na_idx) == 0) {
    cli::cli_abort("All values are missing")
  }
  
  x[na_idx] <- stats::approx(non_na_idx, x[non_na_idx], xout = na_idx, rule = 2)$y
  x
}


#' Compute Segment Information
#' @noRd
compute_segments <- function(data, changepoints, type) {
  if (is.matrix(data)) {
    return(compute_segments_multivariate(data, changepoints, type))
  }
  
  n <- length(data)
  
  boundaries <- c(0, sort(changepoints), n)
  
  segments <- vector("list", length(boundaries) - 1)
  
  for (i in seq_along(segments)) {
    start <- boundaries[i] + 1
    end <- boundaries[i + 1]
    segment_data <- data[start:end]
    
    segments[[i]] <- list(
      id = i,
      start = start,
      end = end,
      length = end - start + 1,
      params = list(
        mean = mean(segment_data),
        variance = var(segment_data),
        sd = sd(segment_data),
        median = median(segment_data),
        mad = mad(segment_data)
      )
    )
  }
  
  segments
}

#' @noRd
compute_segments_multivariate <- function(data, changepoints, type) {
  n <- nrow(data)
  d <- ncol(data)
  
  boundaries <- c(0, sort(changepoints), n)
  
  segments <- vector("list", length(boundaries) - 1)
  
  for (i in seq_along(segments)) {
    start <- boundaries[i] + 1
    end <- boundaries[i + 1]
    segment_data <- data[start:end, , drop = FALSE]
    
    segments[[i]] <- list(
      id = i,
      start = start,
      end = end,
      length = end - start + 1,
      params = list(
        mean = colMeans(segment_data),
        variance = apply(segment_data, 2, var),
        covariance = if (nrow(segment_data) > 1) cov(segment_data) else matrix(0, d, d),
        correlation = if (nrow(segment_data) > 2) cor(segment_data) else diag(d)
      )
    )
  }
  
  segments
}


#' Bootstrap Confidence Intervals for Changepoints
#' @noRd
bootstrap_ci <- function(data, method, type, penalty, min_segment, B, ...) {
  n <- if (is.matrix(data)) nrow(data) else length(data)
  
  original <- switch(method,
    pelt = detect_pelt(data, type, penalty, min_segment, ...),
    binseg = detect_binseg(data, type, penalty, min_segment, "multiple", ...),
    wbs = detect_wbs(data, type, penalty, min_segment, "multiple", ...),
    cusum = detect_cusum(data, type, 4, "offline", ...)
  )
  
  n_cp <- length(original$changepoints)
  if (n_cp == 0) return(list())
  
  boot_estimates <- matrix(NA, nrow = B, ncol = n_cp)
  
  for (b in seq_len(B)) {
    boot_data <- block_bootstrap(data, block_size = max(5, n / 20))
    
    boot_result <- tryCatch({
      switch(method,
        pelt = detect_pelt(boot_data, type, penalty, min_segment, ...),
        binseg = detect_binseg(boot_data, type, penalty, min_segment, n_cp, ...),
        wbs = detect_wbs(boot_data, type, penalty, min_segment, n_cp, ...),
        cusum = detect_cusum(boot_data, type, 4, "offline", ...)
      )
    }, error = function(e) list(changepoints = rep(NA, n_cp)))
    
    if (length(boot_result$changepoints) > 0) {
      matched <- match_changepoints(original$changepoints, boot_result$changepoints)
      boot_estimates[b, ] <- matched
    }
  }
  
  ci_list <- vector("list", n_cp)
  for (j in seq_len(n_cp)) {
    valid <- boot_estimates[!is.na(boot_estimates[, j]), j]
    if (length(valid) > 10) {
      ci_list[[j]] <- list(
        estimate = original$changepoints[j],
        lower = quantile(valid, 0.025),
        upper = quantile(valid, 0.975),
        se = sd(valid)
      )
    } else {
      ci_list[[j]] <- list(
        estimate = original$changepoints[j],
        lower = NA,
        upper = NA,
        se = NA
      )
    }
  }
  
  ci_list
}

#' Block Bootstrap
#' @noRd
block_bootstrap <- function(data, block_size = 10) {
  if (is.matrix(data)) {
    n <- nrow(data)
    n_blocks <- ceiling(n / block_size)
    block_starts <- sample(1:(n - block_size + 1), n_blocks, replace = TRUE)
    
    indices <- unlist(lapply(block_starts, function(s) s:(s + block_size - 1)))
    indices <- indices[1:n]
    data[indices, , drop = FALSE]
  } else {
    n <- length(data)
    n_blocks <- ceiling(n / block_size)
    block_starts <- sample(1:(n - block_size + 1), n_blocks, replace = TRUE)
    
    indices <- unlist(lapply(block_starts, function(s) s:(s + block_size - 1)))
    indices <- indices[1:n]
    data[indices]
  }
}

#' Match Bootstrap Changepoints to Original
#' @noRd
match_changepoints <- function(original, bootstrap) {
  if (length(original) == 0 || length(bootstrap) == 0) {
    return(rep(NA, length(original)))
  }
  
  n_orig <- length(original)
  matched <- rep(NA, n_orig)
  
  bootstrap <- sort(bootstrap)
  used <- rep(FALSE, length(bootstrap))
  
  for (i in seq_along(original)) {
    distances <- abs(bootstrap - original[i])
    distances[used] <- Inf
    
    best <- which.min(distances)
    if (length(best) > 0 && distances[best] < Inf) {
      matched[i] <- bootstrap[best]
      used[best] <- TRUE
    }
  }
  
  matched
}


#' @export
print.regime_result <- function(x, ...) {
  cat("\n")
  cat("Regime Change Detection Results\n")
  cat("================================\n\n")
  
  cat("Method:", x$method, "\n")
  cat("Change type:", x$type, "\n")
  cat("Mode:", x$mode, "\n\n")
  
  cat("Data: n =", x$n, "observations\n\n")
  
  cat("Changepoints detected:", x$n_changepoints, "\n")
  
  if (x$n_changepoints > 0) {
    cat("Locations:", paste(x$changepoints, collapse = ", "), "\n\n")
    
    if (!is.null(x$confidence_intervals) && length(x$confidence_intervals) > 0) {
      cat("95% Confidence Intervals:\n")
      for (i in seq_along(x$confidence_intervals)) {
        ci <- x$confidence_intervals[[i]]
        if (!is.na(ci$lower)) {
          cat(sprintf("  CP %d: %d [%d, %d]\n", i, ci$estimate, 
                      round(ci$lower), round(ci$upper)))
        }
      }
      cat("\n")
    }
    
    cat("Segments:\n")
    for (seg in x$segments) {
      cat(sprintf("  Segment %d: [%d, %d] (n=%d) | mean=%.3f, sd=%.3f\n",
                  seg$id, seg$start, seg$end, seg$length,
                  seg$params$mean, seg$params$sd))
    }
  }
  
  if (!is.null(x$information_criterion)) {
    cat("\nInformation Criteria:\n")
    cat(sprintf("  BIC: %.2f\n", x$information_criterion$BIC))
    cat(sprintf("  AIC: %.2f\n", x$information_criterion$AIC))
  }
  
  cat("\n")
  invisible(x)
}

#' @export
summary.regime_result <- function(object, ...) {
  if (object$n_changepoints > 0) {
    cp_df <- data.frame(
      changepoint = seq_along(object$changepoints),
      location = object$changepoints,
      stringsAsFactors = FALSE
    )
    
    if (!is.null(object$confidence_intervals)) {
      cp_df$ci_lower <- sapply(object$confidence_intervals, `[[`, "lower")
      cp_df$ci_upper <- sapply(object$confidence_intervals, `[[`, "upper")
    }
    
    if (!is.null(object$existence_probability)) {
      cp_df$prob <- object$existence_probability
    }
  } else {
    cp_df <- data.frame()
  }
  
  seg_df <- do.call(rbind, lapply(object$segments, function(s) {
    data.frame(
      segment = s$id,
      start = s$start,
      end = s$end,
      n = s$length,
      mean = s$params$mean,
      sd = s$params$sd,
      stringsAsFactors = FALSE
    )
  }))
  
  structure(
    list(
      method = object$method,
      type = object$type,
      n = object$n,
      n_changepoints = object$n_changepoints,
      changepoints = cp_df,
      segments = seg_df,
      information_criterion = object$information_criterion
    ),
    class = "regime_summary"
  )
}
