--- title: "Working with JSON Databases in boilerplate" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Working with JSON Databases in boilerplate} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup} library(boilerplate) ``` ## Introduction The `boilerplate` package now supports JSON format for all database operations. JSON provides several advantages over the traditional RDS format: - **Human-readable**: JSON files can be opened and edited in any text editor - **Version control friendly**: Changes are easily tracked in Git - **Language agnostic**: JSON files can be read by any programming language - **Web-friendly**: JSON is the standard format for web applications - **Template variables**: Support for `{{variable}}` placeholders in your text ## Basic JSON Operations ### Importing and Saving JSON Databases ```{r json-basic} # Create a temporary directory for examples temp_dir <- tempdir() json_path <- file.path(temp_dir, "json_example") dir.create(json_path, showWarnings = FALSE) # Create a sample database sample_db <- list() # Add methods entries sample_db <- boilerplate_add_entry( sample_db, path = "methods.sampling", value = "Participants were randomly selected from {{population}}." ) sample_db <- boilerplate_add_entry( sample_db, path = "methods.analysis.regression", value = "We conducted linear regression using {{software}}." ) # Add measures sample_db <- boilerplate_add_entry( sample_db, path = "measures.age", value = list( name = "Age", description = "Participant age in years", type = "continuous", range = c(18, 65) ) ) # Save as JSON boilerplate_save( sample_db, data_path = json_path, format = "json", confirm = FALSE, quiet = TRUE, create_dirs = TRUE ) # Import JSON database (auto-detects format) imported_db <- boilerplate_import( data_path = json_path, quiet = TRUE ) # Check structure # Note: Since we only added methods and measures, those will be the only categories str(imported_db, max.level = 3) ``` ### Working with Category-Specific JSON Files ```{r json-categories} # Save categories with proper structure methods_db <- list(methods_db = sample_db$methods) measures_db <- list(measures_db = sample_db$measures) jsonlite::write_json( methods_db, file.path(json_path, "methods_db.json"), pretty = TRUE, auto_unbox = TRUE ) jsonlite::write_json( measures_db, file.path(json_path, "measures_db.json"), pretty = TRUE, auto_unbox = TRUE ) # Import specific category methods_only <- boilerplate_import( data_path = json_path, category = "methods", quiet = TRUE ) names(methods_only) ``` ## Migrating from RDS to JSON If you have existing RDS databases, you can easily migrate them to JSON format: ```{r migration} # Create RDS databases for migration example rds_path <- file.path(temp_dir, "rds_example") dir.create(rds_path, showWarnings = FALSE) # Save as RDS first saveRDS(sample_db$methods, file.path(rds_path, "methods_db.rds")) saveRDS(sample_db$measures, file.path(rds_path, "measures_db.rds")) # Migrate to JSON migration_output <- file.path(temp_dir, "migrated_json") results <- boilerplate_migrate_to_json( source_path = rds_path, output_path = migration_output, format = "unified", # Creates a single unified JSON file backup = TRUE, # Creates backup of RDS files quiet = FALSE ) # Check migration results print(results$migrated) # Verify the migrated data # The migrated file is in the output directory migrated_file <- file.path(migration_output, "boilerplate_unified.json") if (file.exists(migrated_file)) { migrated_db <- boilerplate_import( data_path = migrated_file, quiet = TRUE ) names(migrated_db) } else { # Alternative: import from the directory migrated_db <- boilerplate_import( data_path = migration_output, quiet = TRUE ) names(migrated_db) } ``` ## Batch Editing JSON Databases The package provides tools for batch editing JSON databases: ```{r batch-edit} # Create a measures database for editing measures_db <- list( anxiety_scale = list( name = "Generalized Anxiety Disorder 7-item", description = "GAD-7 anxiety measure", reference = "Spitzer2006", items = list( "Feeling nervous or on edge", "Not being able to stop worrying" ) ), depression_scale = list( name = "Patient Health Questionnaire", description = "PHQ-9 depression measure", reference = "Kroenke2001", items = list( "Little interest or pleasure", "Feeling down or hopeless" ) ) ) # Batch update all references to include @ symbol updated_db <- boilerplate_batch_edit( db = measures_db, # Can also pass file path directly field = "reference", new_value = "@reference_2024", # This will update all references target_entries = "*", # Apply to all entries preview = FALSE, # Don't preview, just update confirm = FALSE, # Don't ask for confirmation quiet = TRUE # Suppress messages ) # For more complex edits, use boilerplate_batch_clean # to add @ prefix to existing references for (measure in names(measures_db)) { if (!is.null(measures_db[[measure]]$reference)) { ref <- measures_db[[measure]]$reference if (!startsWith(ref, "@")) { measures_db[[measure]]$reference <- paste0("@", ref) } } } # Check the updates measures_db$anxiety_scale$reference measures_db$depression_scale$reference ``` ## Standardising Measures in JSON Format ```{r standardise} # Standardise measures database standardised <- boilerplate_standardise_measures( db = measures_db, json_compatible = TRUE, quiet = TRUE ) # Check standardization added missing fields str(standardised$anxiety_scale) ``` ## Validating JSON Structure and Health The package provides multiple ways to validate your JSON databases: ### Schema Validation ```{r validate, eval=FALSE} # Save a JSON database boilerplate_save( measures_db, data_path = temp_dir, category = "measures", format = "json", confirm = FALSE, quiet = TRUE ) json_file <- file.path(temp_dir, "measures_db.json") # Validate structure (requires schema files) validation_errors <- validate_json_database( json_file, type = "measures" ) if (length(validation_errors) == 0) { message("JSON structure is valid!") } else { message("Validation errors found:") print(validation_errors) } ``` ### Database Validation ```{r validation} # Validate the saved JSON file json_file <- file.path(json_path, "boilerplate_unified.json") if (file.exists(json_file)) { validation_errors <- validate_json_database(json_file, type = "unified") if (length(validation_errors) == 0) { message("JSON database structure is valid!") } else { warning("Database validation found issues:") print(validation_errors) } } # Check that paths exist methods_paths <- boilerplate_list_paths(boilerplate_methods(sample_db)) cat("Methods entries:", length(methods_paths), "\n") measures_names <- names(boilerplate_measures(sample_db)) cat("Measures entries:", length(measures_names), "\n") ``` ## Integration with Existing Workflow JSON databases work seamlessly with all existing boilerplate functions: ```{r integration} # Generate text using JSON database text <- boilerplate_generate_text( category = "methods", sections = "sampling", db = imported_db, global_vars = list( population = "university students" ) ) cat(text) # Generate text from nested path analysis_text <- boilerplate_generate_text( category = "methods", sections = "analysis.regression", db = imported_db, global_vars = list( software = "R version 4.3.0" ) ) cat(analysis_text) # Generate measures text measures_text <- boilerplate_generate_measures( variable_heading = "Demographics", variables = "age", db = imported_db ) cat(measures_text) ``` ## Best Practices 1. **Use meaningful file names**: Name your JSON files descriptively (e.g., `study1_methods.json`) 2. **Version control**: JSON files work great with Git - commit them to track changes 3. **Use template variables**: Include `{{variable}}` placeholders in your text for dynamic content: ```r db <- boilerplate_add_entry( db, path = "methods.power", value = "Power analysis indicated {{n_required}} participants needed for {{power}}% power." ) ``` 4. **Regular validation**: Use `validate_json_database()` to ensure database structure is correct 5. **Backup before migration**: Use the `backup = TRUE` option when migrating 6. **Choose appropriate format**: - Use unified format for complete databases - Use separate files for modular management 7. **Track database contents**: Regularly check database contents: ```r # List all paths methods_paths <- boilerplate_list_paths(boilerplate_methods(db)) measures_names <- names(boilerplate_measures(db)) ``` ## Conclusion JSON support in boilerplate provides a modern, flexible way to manage your research text databases. Whether you're starting fresh or migrating existing RDS databases, the JSON functionality integrates seamlessly with your workflow while providing better visibility and version control. ```{r cleanup, include=FALSE} # Clean up unlink(c(json_path, rds_path, migration_output), recursive = TRUE) ```