#' @title Process MS-like omics data
#'
#' @description
#' This function performs common preprocessing steps for mass spectrometry (MS)-like
#' omics datasets, including QC sample removal, zero-to-NA conversion, feature
#' prevalence filtering, transformation, and feature-wise value imputation.
#'
#' @param X A numeric data frame or matrix (samples in rows, features in columns).
#' @param remove_ids A regex or character vector to filter out rows in `X` (e.g. QCs).
#'   Set to `NULL` to skip.
#' @param min_prev Numeric between 0 and 1. Minimum non-missing prevalence threshold.
#'   Zeros are first converted to NA.
#' @param rename_feat Logical. If `TRUE`, features will be renamed as "feat_n"
#'  and original labels stored.
#' @param transform One of `"none"`, `"log"`, or `"sqrt"`.
#' @param log_base_num Numeric logarithm base. Required if `transform = "log"`.
#' @param impute One of `"none"`, `"min_val"`, or `"QRILC"`.
#'  Note: `imputeLCMD::impute.QRILC()` requires log-transformed data.
#'  Log-transform will be forced internally regardless of `transform = ` setting.
#' @param min_val_factor Numeric >= 1. Scaling factor for min value imputation.
#' @param platform whether data was generated by mass spectrometry (`"ms"`) or
#'  nuclear magnetic resonance spectroscopy (`"nmr"`), the latter allowing negative
#'  values in the matrix.
#' @param seed Optional integer. If provided, sets the random seed for reproducible
#'   `imputeLCMD::imputeQRILC()` permutation results.
#' @param verbose Logical. Show messages about the processing steps.
#' @param ... Extra arguments passed to `imputeLCMD::impute.QRILC()`.
#'
#' @return A list:
#' \describe{
#'   \item{X_names}{Feature mapping original vs. new names.}
#'   \item{X_processed}{Processed numeric matrix.}
#' }
#'
#' @seealso [imputeLCMD::impute.QRILC()] for imputing missing values.
#'
#' @references
#' Lazar, C., Gatto, L., Ferro, M., Bruley, C., & Burger, T. (2016).
#' Accounting for the multiple natures of missing values in label-free quantitative
#' proteomics data sets to compare imputation strategies.
#' *Journal of Proteome Research*, 15(4), 1116–1125. \doi{10.1021/acs.jproteome.5b00981}
#'
#' Wei, R., Wang, J., Su, M., Jia, E., Chen, S., Chen, T., & Ni, Y. (2018).
#' Missing value imputation approach for mass spectrometry-based metabolomics data.
#' *Scientific Reports*, 8, 663. \doi{10.1038/s41598-017-19120-0}
#'
#' @examples
#' X <- matrix(sample(c(0:10), size = 80, replace = TRUE),
#'             nrow = 20, ncol = 4,
#'             dimnames = list(paste0("sample", 1:20),
#'                             paste0("feat", 1:4)))
#'
#' result <- process_ms(X, verbose = FALSE) # Generates NA warning
#'
#' @export
process_ms <- function(X, remove_ids = NULL, min_prev = 0.8, rename_feat = TRUE,
                       transform = c("none", "log", "sqrt"), log_base_num = 10,
                       impute = c("none", "min_val", "QRILC"), min_val_factor = 1,
                       platform = c("ms", "nmr"), seed = NULL,
                       verbose = TRUE, ...) {

  # Match argument choices
  transform <- match.arg(transform)
  impute <- match.arg(impute)
  platform <- match.arg(platform)

  # Check X
  X <- check_omics_matrix(X, platform = platform)

  # Check parameters
  if (transform == "log" && (!is.numeric(log_base_num) || is.na(log_base_num))) {
    stop("'log_base_num' must be a numeric logarithm base value.")
  }
  if (impute == "min_val" && (!is.numeric(min_val_factor) || min_val_factor < 1)) {
    stop("'min_val_factor' must be a numeric value >= 1")
  }
  if (impute == "QRILC" && transform != "log") {
    warning("Overriding 'transform' to 'log' for QRILC compatibility.\n",
            "The default logarithm base 10 will be used unless specified otherwise.\n")
    transform <- "log"
  }

  if (!is.null(seed)) {
    if (!is.numeric(seed) || length(seed) != 1) {
      stop("'seed' must be a single numeric value.")
    }
    set.seed(seed)
  }

  # 1. Remove samples (e.g. QC, blanks, etc.)
  X_noqc <- remove_samples(X, remove_ids = remove_ids)

  # 2. Convert data zeros to NA (assumed MNAR, left-censored)
  X_noqc[X_noqc == 0] <- NA

  # 3. Feature prevalence filtering
  X_prev <- remove_features(X_noqc, label = "NA", min_prev= min_prev,
                            verbose = verbose)

  # 4. Imputation
  if (sum(is.na(X_prev)) == 0 && impute != "none" && verbose) {
    message("Your data has no missing values. Imputation will be skipped.\n")
    X_imp <- X_prev
  } else if (impute == "none") {
    warning("Your data contains NAs. Samples could be silently omitted in downstream analyses.\n")
    X_imp <- X_prev
  } else if (impute == "min_val") {
    feat_min <- apply(X_prev, 2, min, na.rm= TRUE)
    min_imp <- feat_min / min_val_factor
    X_imp <- X_prev
    #replace NA with column-wise values
    for (i in seq_len(ncol(X_imp))) {
      X_imp[is.na(X_imp[, i]), i] <- min_imp[i]
    }
  } else if (impute == "QRILC") {
    # Log-transform before QRILC
    X_imp <- log(X_prev, base= log_base_num)
    X_imp <- imputeLCMD::impute.QRILC(X_imp, ...)[[1]]
  }

  # 5. Transformation
  if (transform == "log" && impute != "QRILC") {
    X_trans <- log(X_imp, base = log_base_num)
  } else if (transform == "sqrt") {
    X_trans <- sqrt(X_imp)
  } else {
    X_trans <- X_imp
  }

  # 6. Rename features (improved compatibility for downstream analysis)
  if (rename_feat) {
    X_names <- data.frame(feat_id = paste0("feat_", seq_len(ncol(X_trans))),
                          feat_name = colnames(X_trans))

    colnames(X_trans) <- X_names$feat_id
  } else {
    X_names <- NULL
  }

  return(list(X_names = X_names,
              X_processed = X_trans))
}
