params <-
list(family = "red")

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>", fig.width = 6, fig.height = 4)
library(multivarious)
library(ggplot2)

## ----motivation_example-------------------------------------------------------
set.seed(123)
n_samples <- 100
n_features <- 50

# Create background data (e.g., healthy controls)
# Main variation is in features 1-10
background <- matrix(rnorm(n_samples * n_features), n_samples, n_features)
background[, 1:10] <- background[, 1:10] * 3  # Strong common variation

# Create foreground data (e.g., patients)
# Has the same common variation PLUS disease-specific signal in features 20-25
foreground <- background[1:60, ]  # Start with same structure
foreground[, 20:25] <- foreground[, 20:25] + matrix(rnorm(60 * 6, sd = 2), 60, 6)

# Standard PCA on combined data
all_data <- rbind(background, foreground)
regular_pca <- pca(all_data, ncomp = 2)

# Contrastive PCA
cpca_result <- cPCAplus(X_f = foreground, X_b = background, ncomp = 2)

# Compare what each method finds
loadings_df <- rbind(
  data.frame(
    feature = factor(1:30),
    value   = abs(regular_pca$v[1:30, 1]),
    method  = "Standard PCA"
  ),
  data.frame(
    feature = factor(1:30),
    value   = abs(cpca_result$v[1:30, 1]),
    method  = "Contrastive PCA"
  )
)

ggplot(loadings_df, aes(x = feature, y = value)) +
  geom_col(fill = "#1f78b4") +
  facet_wrap(~method, nrow = 1) +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(
    x = "Feature",
    y = "|Loading|",
    title = "Top loading coefficients for PC1"
  )

## ----basic_usage--------------------------------------------------------------
# Basic usage
cpca_fit <- cPCAplus(
  X_f = foreground,  # Your group of interest (foreground)
  X_b = background,  # Your reference group (background)
  ncomp = 5          # Number of components to extract
)

# The result is a bi_projector object with familiar methods
print(cpca_fit)

# Project new data
new_samples <- matrix(rnorm(10 * n_features), 10, n_features)
new_scores <- project(cpca_fit, new_samples)

# Reconstruct using top components
reconstructed <- reconstruct(cpca_fit, comp = 1:2)

## ----understanding_output-----------------------------------------------------
# Which features contribute most to the first contrastive component?
top_features <- order(abs(cpca_fit$v[, 1]), decreasing = TRUE)[1:10]
print(paste("Top contributing features:", paste(top_features, collapse = ", ")))

# How much more variable is each component in foreground vs background?
print(paste("Variance ratios:", paste(round(cpca_fit$values[1:3], 2), collapse = ", ")))

## ----biomedical_example, eval=FALSE-------------------------------------------
# # Identify disease-specific patterns
# tumor_cpca <- cPCAplus(
#   X_f = tumor_samples,
#   X_b = healthy_tissue,
#   ncomp = 10
# )

## ----technical_example, eval=FALSE--------------------------------------------
# # Use technical replicates as background to find biological signal
# bio_cpca <- cPCAplus(
#   X_f = biological_samples,
#   X_b = technical_replicates,
#   ncomp = 5
# )

## ----time_example, eval=FALSE-------------------------------------------------
# # Find patterns specific to treatment timepoint
# treatment_cpca <- cPCAplus(
#   X_f = after_treatment,
#   X_b = before_treatment,
#   ncomp = 5
# )

## ----high_dim-----------------------------------------------------------------
# Create high-dimensional example
n_f <- 50; n_b <- 80; p <- 1000
X_background_hd <- matrix(rnorm(n_b * p), n_b, p)
X_foreground_hd <- X_background_hd[1:n_f, ] + 
                   matrix(c(rnorm(n_f * 20, sd = 2), rep(0, n_f * (p-20))), n_f, p)

# Use sample-space strategy for efficiency
cpca_hd <- cPCAplus(X_f = X_foreground_hd, X_b = X_background_hd,
                    ncomp = 5, strategy = "sample")

## ----regularization-----------------------------------------------------------
# Small background sample size can lead to instability
small_background <- matrix(rnorm(20 * 100), 20, 100)
small_foreground <- matrix(rnorm(30 * 100), 30, 100)

# Add regularization
cpca_regularized <- cPCAplus(
  X_f = small_foreground,
  X_b = small_background,
  ncomp = 5,
  lambda = 0.1  # Regularization parameter for background covariance
)