--- title: "Bibliography Management with boilerplate" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Bibliography Management with boilerplate} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE ) ``` # Introduction The boilerplate package provides powerful bibliography management features that integrate seamlessly with your scientific writing workflow. This vignette demonstrates how to set up centralised bibliography management, validate citations, and ensure consistency across your manuscripts. # Overview: Why Centralised Bibliography Management? Managing references across multiple manuscripts can be challenging. Common problems include: - Inconsistent citation formatting - Missing references in bibliography files - Duplicated effort updating references across projects - Version control conflicts with large .bib files The boilerplate package solves these problems by: 1. **Centralising** your bibliography in one location (e.g., GitHub) 2. **Caching** bibliography files locally for performance 3. **Validating** that all citations exist in your bibliography 4. **Automating** bibliography distribution to project directories # Setting Up Bibliography Management ## Step 1: Configure Your Bibliography Source Add bibliography information to your boilerplate database: ```{r setup-bibliography} library(boilerplate) # Load your database # Initialise with default content if needed temp_bib <- file.path(tempdir(), "bibliography_example") boilerplate_init(data_path = temp_bib, create_empty = FALSE, create_dirs = TRUE, confirm = FALSE, quiet = TRUE) db <- boilerplate_import(data_path = temp_bib, quiet = TRUE) # Add bibliography configuration # Using the example bibliography included with the package example_bib <- system.file("extdata", "example_references.bib", package = "boilerplate") db <- boilerplate_add_bibliography( db, url = paste0("file://", example_bib), local_path = "references.bib", validate = TRUE ) # Save the updated database boilerplate_save(db, data_path = temp_bib, confirm = FALSE, quiet = TRUE) ``` ## Step 2: Download and Cache the Bibliography The bibliography is automatically cached for performance: ```{r download-bibliography} # Download/update bibliography (cached for 7 days by default) bib_file <- boilerplate_update_bibliography(db) # Force update if needed bib_file <- boilerplate_update_bibliography(db, force = TRUE) # Check cache age boilerplate_update_bibliography(db, force = FALSE) #> ℹ Using cached bibliography from /path/to/cache/references.bib #> ⚠ Bibliography cache is 5.2 days old. Consider using force=TRUE to update. ``` ## Step 3: Copy Bibliography to Your Project When working on a manuscript, copy the bibliography to your project directory: ```{r copy-bibliography} # Copy to current project boilerplate_copy_bibliography(db, target_dir = ".") # Copy and update from source first boilerplate_copy_bibliography(db, target_dir = ".", update_first = TRUE) # The bibliography is now available as ./references.bib ``` # Validating References ## Check All Citations Exist Ensure all citations in your boilerplate text exist in the bibliography: ```{r validate-references} # Validate references across all text categories validation <- boilerplate_validate_references(db) # Check specific categories only validation <- boilerplate_validate_references( db, categories = c("methods", "results") ) # Review validation results if (!validation$valid) { cat("Missing references:\n") print(validation$missing) } # See all available references length(validation$available) #> [1] 1847 # Example: large bibliography # See which references are actually used validation$used #> [1] "@smith2023" "@jones2024" "@doe2022meta" ``` ## Handle Missing References When validation finds missing references: ```{r handle-missing} # Example validation with missing references validation <- boilerplate_validate_references(db, quiet = TRUE) if (length(validation$missing) > 0) { cat("Please add these references to your bibliography:\n") cat(paste0("- ", validation$missing, "\n")) # Generate BibTeX entries for missing references # (This is a manual process - add to your central .bib file) for (ref in validation$missing) { cat("\n@article{", gsub("@", "", ref), ",\n", sep = "") cat(" title = {},\n") cat(" author = {},\n") cat(" journal = {},\n") cat(" year = {},\n") cat("}\n") } } ``` # Integration with Document Generation ## Automatic Bibliography Distribution When generating text, automatically copy the bibliography: ```{r generate-with-bibliography} # Generate methods text with automatic bibliography copying methods_text <- boilerplate_generate_text( category = "methods", sections = c("sample.default", "statistical.default"), global_vars = list(n = 1000), db = db, copy_bibliography = TRUE, bibliography_path = "." # Copy to project root ) # The bibliography is now available for your Quarto/R Markdown document ``` ## Quarto/R Markdown Integration In your Quarto or R Markdown document: ```{yaml} --- title: "My Research Paper" bibliography: references.bib --- ``` ```{r document-setup} # Your document continues with access to all references # All citations in boilerplate text will be properly resolved ``` # Advanced Workflows ## Project-Specific Bibliography Subsets For large bibliographies, create project-specific subsets: ```{r subset-bibliography} # Get citations used in current project validation <- boilerplate_validate_references(db) used_refs <- validation$used # Read full bibliography bib_lines <- readLines("references.bib") # Extract entries for used citations # Note: This is a simplified example - real implementation would need proper BibTeX parsing # The function extract_bibtex_entries() is not part of boilerplate # You would need to implement this or use a BibTeX parsing package # Example approach (pseudo-code): # library(RefManageR) # or another BibTeX parsing package # bib_data <- ReadBib("references.bib") # project_bib <- bib_data[used_refs] # WriteBib(project_bib, "project_references.bib") ``` ## Multi-Author Collaboration For collaborative projects with shared boilerplate: ```{r collaboration-workflow} # 1. Team lead sets up central bibliography team_db <- boilerplate_import() team_db <- boilerplate_add_bibliography( team_db, url = "https://github.com/our-lab/shared-refs/raw/main/lab_references.bib", local_path = "lab_references.bib" ) # 2. Each team member updates their local cache bib_file <- boilerplate_update_bibliography(team_db, force = TRUE) # 3. Validate before submission validation <- boilerplate_validate_references(team_db) stopifnot(validation$valid) # Ensure no missing references ``` ## Automated Reference Checking Add to your CI/CD pipeline: ```{r ci-check} # .github/workflows/check-references.yml # Run this check on every pull request # In R script: check_references.R library(boilerplate) db <- boilerplate_import() validation <- boilerplate_validate_references(db, quiet = TRUE) if (!validation$valid) { stop( "Missing references found: ", paste(validation$missing, collapse = ", ") ) } message("All references validated successfully!") ``` # Best Practices ## 1. Maintain a Central Bibliography - Keep your bibliography in version control (e.g., GitHub) - Use a consistent naming scheme for citation keys - Regular updates and maintenance - Consider using tools like Zotero with Better BibTeX for key management ## 2. Cache Management ```{r cache-management} # Check cache location # The cache directory uses R's standard user directory cache_dir <- tools::R_user_dir("boilerplate", "cache") # This provides a platform-independent location that complies with CRAN policies: # - On Unix-like systems (Mac/Linux): ~/.local/share/boilerplate # - On Windows: Usually in %LOCALAPPDATA%/boilerplate/boilerplate/cache # View cached files if (dir.exists(cache_dir)) { list.files(cache_dir, pattern = "\\.bib$") } # Clear old cache if needed if (dir.exists(cache_dir)) { old_files <- list.files( cache_dir, pattern = "\\.bib$", full.names = TRUE ) if (length(old_files) > 0) { old_files <- old_files[file.mtime(old_files) < Sys.Date() - 30] if (length(old_files) > 0) file.remove(old_files) } } ``` ## 3. Version Control Strategy - Don't commit large .bib files to every project - Use .gitignore for local bibliography copies - Track bibliography URL in boilerplate database - Use git-lfs for very large bibliography files ## 4. Citation Key Conventions Establish consistent citation keys: ``` @article{bulbulia2024methods, # author + year + keyword @book{smith2023statistics, # clear, memorable keys @inproceedings{jones2024ml, # avoid special characters ``` # Troubleshooting ## Common Issues and Solutions ### Bibliography Not Downloading ```{r troubleshoot-download} # Check URL is accessible url <- db$bibliography$url con <- url(url) open(con) # If this fails, check network/firewall settings ``` ### Cache Issues ```{r troubleshoot-cache} # Force fresh download bib_file <- boilerplate_update_bibliography(db, force = TRUE) # Check cache directory permissions cache_dir <- tools::R_user_dir("boilerplate", "cache") if (dir.exists(cache_dir)) { file.access(cache_dir, mode = 2) # 0 = success } ``` ### Validation Errors ```{r troubleshoot-validation} # Debug validation issues validation <- boilerplate_validate_references(db, quiet = FALSE) # Check specific text for citations text <- db$methods$sample$default citations <- grep("@[a-zA-Z0-9_:-]+", text, value = TRUE) print(citations) ``` # Complete Example Workflow Here's a complete workflow from setup to document generation: ```{r complete-example} # 1. Initial setup (run once) library(boilerplate) # Initialise new project temp_complete <- file.path(tempdir(), "complete_example") boilerplate_init(data_path = temp_complete, create_empty = FALSE, create_dirs = TRUE, confirm = FALSE, quiet = TRUE) # Import database db <- boilerplate_import(data_path = temp_complete, quiet = TRUE) # Configure bibliography # Using the example bibliography included with the package example_bib <- system.file("extdata", "example_references.bib", package = "boilerplate") db <- boilerplate_add_bibliography( db, url = paste0("file://", example_bib), local_path = "references.bib" ) # Save configuration boilerplate_save(db, data_path = temp_complete, confirm = FALSE, quiet = TRUE) # 2. Daily workflow # Update bibliography if needed boilerplate_update_bibliography(db) # Copy to project boilerplate_copy_bibliography(db, ".") # 3. Before submission # Validate all references validation <- boilerplate_validate_references(db) if (validation$valid) { message("Ready for submission!") } else { warning("Missing references: ", paste(validation$missing, collapse = ", ")) } # 4. Generate final document final_text <- boilerplate_generate_text( category = "methods", sections = c("sample.default", "statistical.default"), db = db, copy_bibliography = TRUE ) ``` # Summary The boilerplate package's bibliography management features provide: 1. **Centralised management** - One bibliography, many projects 2. **Automatic distribution** - Bibliography copied when needed 3. **Validation** - Ensure all citations are defined 4. **Caching** - Fast local access with periodic updates 5. **Integration** - Works seamlessly with Quarto/R Markdown By following this workflow, you can maintain consistent, accurate references across all your manuscripts while reducing duplicate effort and potential errors.