Choice simulation converts experimental designs into realistic choice
data by predicting how respondents would answer choice questions. This
is essential for testing designs, conducting power analyses, and
validating experimental assumptions before data collection. This article
shows how to use cbc_choices()
to simulate choice
patterns.
Before starting, let’s define some basic profiles and a basic random design to work with:
library(cbcTools)
profiles <- cbc_profiles(
price = c(1, 1.5, 2, 2.5, 3),
type = c('Fuji', 'Gala', 'Honeycrisp'),
freshness = c('Poor', 'Average', 'Excellent')
)
design <- cbc_design(
profiles = profiles,
method = "random",
n_alts = 2,
n_q = 6,
n_resp = 100
)
design
#> Design method: random
#> Structure: 100 respondents × 6 questions × 2 alternatives
#> Profile usage: 45/45 (100.0%)
#>
#> 💡 Use cbc_inspect() for a more detailed summary
#>
#> First few rows of design:
#> profileID respID qID altID obsID price typeGala typeHoneycrisp
#> 1 31 1 1 1 1 1.0 0 0
#> 2 15 1 1 2 1 3.0 0 1
#> 3 14 1 2 1 2 2.5 0 1
#> 4 3 1 2 2 2 2.0 0 0
#> 5 42 1 3 1 3 1.5 0 1
#> 6 43 1 3 2 3 2.0 0 1
#> freshnessAverage freshnessExcellent
#> 1 0 1
#> 2 0 0
#> 3 0 0
#> 4 0 0
#> 5 0 1
#> 6 0 1
#> ... and 1194 more rows
cbc_choices()
supports two simulation approaches:
Without priors, choices are simulated randomly with equal probabilities:
# Random choice simulation (default)
choices_random <- cbc_choices(design)
head(choices_random)
#> CBC Choice Data
#> ===============
#> Observations: 3 choice tasks
#> Alternatives per task: 2
#> Total choices made: 3
#>
#> Simulation method: random
#> Priors: None (random choices)
#> Simulated at: 2025-07-14 10:45:28
#>
#> Choice rates by alternative:
#> Alt 1: 33.3% (1 choices)
#> Alt 2: 66.7% (2 choices)
#>
#> First few rows:
#> profileID respID qID altID obsID price typeGala typeHoneycrisp
#> 1 31 1 1 1 1 1.0 0 0
#> 2 15 1 1 2 1 3.0 0 1
#> 3 14 1 2 1 2 2.5 0 1
#> 4 3 1 2 2 2 2.0 0 0
#> 5 42 1 3 1 3 1.5 0 1
#> 6 43 1 3 2 3 2.0 0 1
#> freshnessAverage freshnessExcellent choice
#> 1 0 1 0
#> 2 0 0 1
#> 3 0 0 0
#> 4 0 0 1
#> 5 0 1 1
#> 6 0 1 0
# Check choice distribution
table(choices_random$choice, choices_random$altID)
#>
#> 1 2
#> 0 294 306
#> 1 306 294
Random simulation is useful for:
With priors, choices follow realistic utility-based patterns:
# Create priors for utility-based simulation
priors <- cbc_priors(
profiles = profiles,
price = -0.25, # Negative preference for higher prices
type = c(0.5, 1), # Gala and Honeycrisp preferred over Fuji
freshness = c(0.6, 1.2) # Average and Excellent preferred over Poor
)
# Utility-based choice simulation
choices_utility <- cbc_choices(design, priors = priors)
head(choices_utility)
#> CBC Choice Data
#> ===============
#> Observations: 3 choice tasks
#> Alternatives per task: 2
#> Total choices made: 3
#>
#> Simulation method: utility_based
#> Priors: Used for utility-based simulation
#> Simulated at: 2025-07-14 10:45:28
#>
#> Choice rates by alternative:
#> Alt 1: 66.7% (2 choices)
#> Alt 2: 33.3% (1 choices)
#>
#> First few rows:
#> profileID respID qID altID obsID price typeGala typeHoneycrisp
#> 1 31 1 1 1 1 1.0 0 0
#> 2 15 1 1 2 1 3.0 0 1
#> 3 14 1 2 1 2 2.5 0 1
#> 4 3 1 2 2 2 2.0 0 0
#> 5 42 1 3 1 3 1.5 0 1
#> 6 43 1 3 2 3 2.0 0 1
#> freshnessAverage freshnessExcellent choice
#> 1 0 1 1
#> 2 0 0 0
#> 3 0 0 0
#> 4 0 0 1
#> 5 0 1 1
#> 6 0 1 0
The simulated choice data includes all design columns plus a
choice
column:
head(choices_utility)
#> CBC Choice Data
#> ===============
#> Observations: 3 choice tasks
#> Alternatives per task: 2
#> Total choices made: 3
#>
#> Simulation method: utility_based
#> Priors: Used for utility-based simulation
#> Simulated at: 2025-07-14 10:45:28
#>
#> Choice rates by alternative:
#> Alt 1: 66.7% (2 choices)
#> Alt 2: 33.3% (1 choices)
#>
#> First few rows:
#> profileID respID qID altID obsID price typeGala typeHoneycrisp
#> 1 31 1 1 1 1 1.0 0 0
#> 2 15 1 1 2 1 3.0 0 1
#> 3 14 1 2 1 2 2.5 0 1
#> 4 3 1 2 2 2 2.0 0 0
#> 5 42 1 3 1 3 1.5 0 1
#> 6 43 1 3 2 3 2.0 0 1
#> freshnessAverage freshnessExcellent choice
#> 1 0 1 1
#> 2 0 0 0
#> 3 0 0 0
#> 4 0 0 1
#> 5 0 1 1
#> 6 0 1 0
For designs with no-choice options, specify no-choice priors:
# Create design with no-choice option
design_nochoice <- cbc_design(
profiles = profiles,
n_alts = 2,
n_q = 6,
n_resp = 100,
no_choice = TRUE,
method = "random"
)
# Create priors including no-choice utility
priors_nochoice <- cbc_priors(
profiles = profiles,
price = -0.25,
type = c(0.5, 1.0),
freshness = c(0.6, 1.2),
no_choice = -0.5 # Negative = no-choice less attractive
)
# Simulate choices
choices_nochoice <- cbc_choices(
design_nochoice,
priors = priors_nochoice
)
# Examine no-choice rates
nochoice_rate <- mean(choices_nochoice$choice[choices_nochoice$no_choice == 1])
cat("No-choice selection rate:", round(nochoice_rate * 100, 1), "%\n")
#> No-choice selection rate: 13.3 %
Simulate heterogeneous preferences using random parameters:
# Create priors with random parameters
priors_random <- cbc_priors(
profiles = profiles,
price = rand_spec(dist = "n", mean = -0.1, sd = 0.05),
type = rand_spec(dist = "n", mean = c(0.1, 0.2), sd = c(0.05, 0.1)),
freshness = c(0.1, 0.2), # Keep some parameters fixed
n_draws = 100
)
# Simulate choices with preference heterogeneity
choices_mixed <- cbc_choices(design, priors = priors_random)
Include interaction effects in choice simulation:
# Create priors with interactions
priors_interactions <- cbc_priors(
profiles = profiles,
price = -0.1,
type = c("Fuji" = 0.5, "Gala" = 1),
freshness = c("Average" = 0.6, "Excellent" = 1.2),
interactions = list(
# Price sensitivity varies by apple type
int_spec(
between = c("price", "type"),
with_level = "Fuji",
value = 0.5
),
int_spec(
between = c("price", "type"),
with_level = "Gala",
value = 0.2
)
)
)
# Simulate choices with interaction effects
choices_interactions <- cbc_choices(
design,
priors = priors_interactions
)
Based on the priors used, we expect:
Examine aggregate choice patterns to validate simulation:
# Decode the choice data first to get categorical variables
choices_decoded <- cbc_decode(choices_utility)
# Aggregate attribute choices across all respondents
choices <- choices_decoded
# Price choices
price_choices <- aggregate(choice ~ price, data = choices, sum)
price_choices$prop <- price_choices$choice / sum(price_choices$choice)
print(price_choices)
#> price choice prop
#> 1 1.0 123 0.2050000
#> 2 1.5 119 0.1983333
#> 3 2.0 121 0.2016667
#> 4 2.5 107 0.1783333
#> 5 3.0 130 0.2166667
# Type choices
type_choices <- aggregate(choice ~ type, data = choices, sum)
type_choices$prop <- type_choices$choice / sum(type_choices$choice)
print(type_choices)
#> type choice prop
#> 1 Fuji 171 0.285
#> 2 Gala 213 0.355
#> 3 Honeycrisp 216 0.360
# Freshness choices
freshness_choices <- aggregate(choice ~ freshness, data = choices, sum)
freshness_choices$prop <- freshness_choices$choice /
sum(freshness_choices$choice)
print(freshness_choices)
#> freshness choice prop
#> 1 Poor 145 0.2416667
#> 2 Average 210 0.3500000
#> 3 Excellent 245 0.4083333
For random parameter models, examine variation across respondents:
# Create dataset with only chosen alternatives
chosen_alts <- choices_mixed[choices_mixed$choice == 1, ]
# Mean attribute levels chosen by each respondent
resp_means <- aggregate(
cbind(
price,
typeGala,
typeHoneycrisp,
freshnessAverage,
freshnessExcellent
) ~
respID,
data = chosen_alts,
mean
)
# Look at variation across respondents
cat("Price variation across respondents:\n")
#> Price variation across respondents:
cat("Mean:", round(mean(resp_means$price), 2), "\n")
#> Mean: 2.01
cat("SD:", round(sd(resp_means$price), 2), "\n")
#> SD: 0.28
cat("\nHoneycrisp choice rate variation:\n")
#>
#> Honeycrisp choice rate variation:
cat("Mean:", round(mean(resp_means$typeHoneycrisp), 2), "\n")
#> Mean: 0.32
cat("SD:", round(sd(resp_means$typeHoneycrisp), 2), "\n")
#> SD: 0.2
For D-optimal designs created with priors, use the same priors for choice simulation:
cbcTools warns when different priors are used:
# Create different priors
different_priors <- cbc_priors(
profiles = profiles,
price = -0.2, # Different from design optimization
type = c(0.2, 0.4),
freshness = c(0.2, 0.4)
)
# This will generate a warning about inconsistent priors
choices_inconsistent <- cbc_choices(
design_optimal,
priors = different_priors
)
After simulating choices:
cbc_power()
to determine sample size requirementsFor details on power analysis, see the Power Analysis vignette.