Applying demographic requirements to a cohort

library(CodelistGenerator)
library(CohortConstructor)
library(CohortCharacteristics)
library(ggplot2)

In this vignette we’ll show how requirements related to patient demographics can be applied to a cohort. Again we’ll use the Eunomia synthetic data.

con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomiaDir())
cdm <- CDMConnector::cdmFromCon(con, cdmSchema = "main", 
                    writeSchema = "main", writePrefix = "my_study_")

Let’s start by creating a cohort of people with a fracture. We’ll first look for codes that might represent a fracture and the build a cohort using these codes, setting cohort exit to 180 days after the fracture.

fracture_codes <- getCandidateCodes(cdm, "fracture")
fracture_codes <- list("fracture" = fracture_codes$concept_id)
cdm$fracture <- conceptCohort(cdm = cdm, 
                                 conceptSet = fracture_codes, 
                                 name = "fracture")

summary_attrition <- summariseCohortAttrition(cdm$fracture)
plotCohortAttrition(summary_attrition)
%0 2->3 3->4 4->5 5->6 6->7 9->10 11->12 13->14 15->16 17->18 1 Database: Synthea Cohort name: fracture 2 N subjects = 1,596 N records = 2,522 3 N subjects = 1,596 N records = 2,522 4 N subjects = 1,596 N records = 2,522 5 N subjects = 1,596 N records = 2,522 6 N subjects = 1,596 N records = 2,522 7 N subjects = 1,596 N records = 2,445 8 Initial qualifying events 9 Record start <= record end 10 N subjects = 0 N records = 0 11 Record in observation 12 N subjects = 0 N records = 0 13 Non-missing sex 14 N subjects = 0 N records = 0 15 Non-missing year of birth 16 N subjects = 0 N records = 0 17 Merge overlapping records 18 N subjects = 0 N records = 77

Restrict cohort by age

We can choose a specific age range for individuals in our cohort using requireAge() from CohortConstructor.

cdm$fracture <- cdm$fracture |> 
  requireAge(indexDate = "cohort_start_date",
             ageRange = list(c(18, 100)))

summary_attrition <- summariseCohortAttrition(cdm$fracture)
plotCohortAttrition(summary_attrition)
%0 2->3 3->4 4->5 5->6 6->7 7->8 10->11 12->13 14->15 16->17 18->19 20->21 1 Database: Synthea Cohort name: fracture 2 N subjects = 1,596 N records = 2,522 3 N subjects = 1,596 N records = 2,522 4 N subjects = 1,596 N records = 2,522 5 N subjects = 1,596 N records = 2,522 6 N subjects = 1,596 N records = 2,522 7 N subjects = 1,596 N records = 2,445 8 N subjects = 859 N records = 1,148 9 Initial qualifying events 10 Record start <= record end 11 N subjects = 0 N records = 0 12 Record in observation 13 N subjects = 0 N records = 0 14 Non-missing sex 15 N subjects = 0 N records = 0 16 Non-missing year of birth 17 N subjects = 0 N records = 0 18 Merge overlapping records 19 N subjects = 0 N records = 77 20 Age requirement: 18 to 100 21 N subjects = 737 N records = 1,297

Note that by default individuals are filtered based on the age they were when they entered the cohort.

Restrict cohort by sex

We can also specify a sex criteria for individuals in our cohort using requireSex() from CohortConstructor.

cdm$fracture <- cdm$fracture |> 
  requireSex(sex = "Female")

summary_attrition <- summariseCohortAttrition(cdm$fracture)
plotCohortAttrition(summary_attrition)
%0 2->3 3->4 4->5 5->6 6->7 7->8 8->9 11->12 13->14 15->16 17->18 19->20 21->22 23->24 1 Database: Synthea Cohort name: fracture 2 N subjects = 1,596 N records = 2,522 3 N subjects = 1,596 N records = 2,522 4 N subjects = 1,596 N records = 2,522 5 N subjects = 1,596 N records = 2,522 6 N subjects = 1,596 N records = 2,522 7 N subjects = 1,596 N records = 2,445 8 N subjects = 859 N records = 1,148 9 N subjects = 447 N records = 604 10 Initial qualifying events 11 Record start <= record end 12 N subjects = 0 N records = 0 13 Record in observation 14 N subjects = 0 N records = 0 15 Non-missing sex 16 N subjects = 0 N records = 0 17 Non-missing year of birth 18 N subjects = 0 N records = 0 19 Merge overlapping records 20 N subjects = 0 N records = 77 21 Age requirement: 18 to 100 22 N subjects = 737 N records = 1,297 23 Sex requirement: Female 24 N subjects = 412 N records = 544

Restrict cohort by number of prior observations

We can also specify a minimum number of days of prior observations for each individual using requirePriorObservation() from CohortConstructor.

cdm$fracture <- cdm$fracture |> 
  requirePriorObservation(indexDate = "cohort_start_date",
                          minPriorObservation = 365)

summary_attrition <- summariseCohortAttrition(cdm$fracture)
plotCohortAttrition(summary_attrition)
%0 2->3 3->4 4->5 5->6 6->7 7->8 8->9 9->10 12->13 14->15 16->17 18->19 20->21 22->23 24->25 26->27 1 Database: Synthea Cohort name: fracture 2 N subjects = 1,596 N records = 2,522 3 N subjects = 1,596 N records = 2,522 4 N subjects = 1,596 N records = 2,522 5 N subjects = 1,596 N records = 2,522 6 N subjects = 1,596 N records = 2,522 7 N subjects = 1,596 N records = 2,445 8 N subjects = 859 N records = 1,148 9 N subjects = 447 N records = 604 10 N subjects = 447 N records = 604 11 Initial qualifying events 12 Record start <= record end 13 N subjects = 0 N records = 0 14 Record in observation 15 N subjects = 0 N records = 0 16 Non-missing sex 17 N subjects = 0 N records = 0 18 Non-missing year of birth 19 N subjects = 0 N records = 0 20 Merge overlapping records 21 N subjects = 0 N records = 77 22 Age requirement: 18 to 100 23 N subjects = 737 N records = 1,297 24 Sex requirement: Female 25 N subjects = 412 N records = 544 26 Prior observation requirement: 365 days 27 N subjects = 0 N records = 0

As well as specifying a minimum amount of prior observation, we can require some mimimum amount of follow-up by using requireFutureObservation() in a similar way.

Applying multiple demographic requirements to a cohort

We can implement multiple demographic requirements at the same time by using the more general requireDemographics() function.

cdm$fracture <- conceptCohort(cdm = cdm, 
                                 conceptSet = fracture_codes, 
                                 name = "fracture") |> 
  requireDemographics(indexDate = "cohort_start_date",
                      ageRange = c(18,100),
                      sex = "Female",
                      minPriorObservation = 365, 
                      minFutureObservation = 30)

summary_attrition <- summariseCohortAttrition(cdm$fracture)
plotCohortAttrition(summary_attrition)
%0 2->3 3->4 4->5 5->6 6->7 7->8 8->9 9->10 10->11 13->14 15->16 17->18 19->20 21->22 23->24 25->26 27->28 29->30 1 Database: Synthea Cohort name: fracture 2 N subjects = 1,596 N records = 2,522 3 N subjects = 1,596 N records = 2,522 4 N subjects = 1,596 N records = 2,522 5 N subjects = 1,596 N records = 2,522 6 N subjects = 1,596 N records = 2,522 7 N subjects = 1,596 N records = 2,445 8 N subjects = 859 N records = 1,148 9 N subjects = 447 N records = 604 10 N subjects = 447 N records = 604 11 N subjects = 445 N records = 598 12 Initial qualifying events 13 Record start <= record end 14 N subjects = 0 N records = 0 15 Record in observation 16 N subjects = 0 N records = 0 17 Non-missing sex 18 N subjects = 0 N records = 0 19 Non-missing year of birth 20 N subjects = 0 N records = 0 21 Merge overlapping records 22 N subjects = 0 N records = 77 23 Age requirement: 18 to 100 24 N subjects = 737 N records = 1,297 25 Sex requirement: Female 26 N subjects = 412 N records = 544 27 Prior observation requirement: 365 days 28 N subjects = 0 N records = 0 29 Future observation requirement: 30 days 30 N subjects = 2 N records = 6