## -----------------------------------------------------------------------------
# Load the necessary libraries
library(bkmr)
library(simBKMRdata)
library(tidyverse)


## -----------------------------------------------------------------------------
# Set the seed for reproducibility
set.seed(2025)

# Load the dataset and preprocess
data("metalExposChildren_df")

bkmrAnalysisData_df <- 
  metalExposChildren_df %>%
  select(QI, Cadmium:Manganese) %>%  # Selecting relevant columns
  na.omit() %>%  # Remove rows with missing values
  mutate_at(
    vars(Cadmium:Manganese), ~ log10(. + 1) %>% as.vector
  )  # Log-transform metal concentrations


## -----------------------------------------------------------------------------
# Create a histogram with density plot for each metal
metalDataPlot <- bkmrAnalysisData_df %>%
  select(Cadmium:Manganese) %>%
  pivot_longer(cols = everything(), names_to = "Metal", values_to = "Value") %>%
  ggplot(aes(x = Value, fill = Metal)) + 
  geom_histogram(aes(y = ..density..), bins = 30, alpha = 0.5, position = "identity") + 
  geom_density(aes(color = Metal), linewidth = 1) + 
  facet_wrap(~Metal, scales = "free") + 
  theme_minimal() + 
  labs(title = "Histogram with Density Plot for Metals", 
       x = "Concentration", 
       y = "Density") + 
  theme(legend.position = "none")

# Display the plot
metalDataPlot


## -----------------------------------------------------------------------------
# Extract the response variable (IQ score or QI)
iq <- bkmrAnalysisData_df %>%
  pull(QI) %>%
  na.omit()

# Convert exposure variables (metals) to a matrix for modeling
expos <- data.matrix(bkmrAnalysisData_df %>% select(Cadmium:Manganese))


## -----------------------------------------------------------------------------
# Generate knot points using a cover design for Bayesian modeling
knots50 <- fields::cover.design(expos, nd = 50)$design


## -----------------------------------------------------------------------------
#| label: run-BKMR-MCMC
# Fit the BKMR model using MCMC
modelFit <- kmbayes(
  y = iq,         # Response variable
  Z = expos,      # Exposure matrix (metal concentrations)
  X = NULL,       # No additional covariates
  iter = 1000,    # Number of MCMC iterations
  family = "gaussian",  # Gaussian response
  verbose = TRUE,       # Output progress for each iteration
  varsel = TRUE,       # Perform variable selection
  knots = knots50      # Knot points generated earlier
)


## -----------------------------------------------------------------------------
# Extract posterior inclusion probabilities (PIPs) and sort them
pipFit <- ExtractPIPs(modelFit) %>%
  arrange(desc(PIP))

# Display the PIPs
pipFit


## -----------------------------------------------------------------------------
# Calculate the dynamic threshold for model inclusion
pipThresh_fn <- calculate_pip_threshold(
  absCV = sd(bkmrAnalysisData_df$QI) / mean(bkmrAnalysisData_df$QI),  # Coefficient of variation
  sampSize = length(bkmrAnalysisData_df$QI)  # Sample size
)

# Display the threshold
pipThresh_fn


## -----------------------------------------------------------------------------
# Identify exposures with PIPs greater than the threshold
significant_exposures <- pipFit %>%
  filter(PIP > pipThresh_fn)

# Display significant exposures
significant_exposures