pirouette has the option to investigate the error BEAST2
makes on a twin tree. A twin tree is a tree with the same topology as
the original phylogeny, yet where the branch lengths follow a
birth-death branch length distribution.
Twinning is useful to separate the effect of an unknown tree prior (i.e. speciation model) on a phylogeny’s shape from the noise (the minimal error) made by BEAST2.
In this example, the following tree is used:
phylogeny <- ape::read.tree(text = "((A:1, B:1):1, (C:1, D:1):1);")
ape::plot.phylo(phylogeny, main = "The True Phylogeny")This phylogeny follows a speciation model unknown to BEAST2, as two speciation events happened at exactly the same time. Such a phylogeny will have a likelihood of zero for all of the BEAST2 tree priors.
Twinning:
alignment_params <- pirouette::create_alignment_params(
  root_sequence = pirouette::create_blocked_dna(length = 100),
  rng_seed = 314
)We select our inference models in two ways:
| type | run_if | measure evidence | inference model | 
|---|---|---|---|
| generative | always | TRUE | Birth-Death | 
| candidate | best_candidate | TRUE | Yule | 
experiment_1 <- create_test_gen_experiment(
  inference_model = beautier::create_test_inference_model(
    tree_prior = beautier::create_bd_tree_prior()
  )
)
if (rappdirs::app_dir()$os != "win") {
  experiment_2 <- create_test_cand_experiment(
    inference_model = beautier::create_test_inference_model(
      tree_prior = beautier::create_yule_tree_prior()
    )
  )
  experiment_1$inference_conditions$do_measure_evidence <- TRUE
  experiments <- list(experiment_1, experiment_2)
  twinning_params$twin_evidence_filename <- get_temp_evidence_filename()
  pir_params <- create_pir_params(
    alignment_params = alignment_params,
    experiments = experiments,
    twinning_params = twinning_params,
    evidence_filename = get_temp_evidence_filename()
  )
} else {
  experiments <- list(experiment_1)
  pir_params <- create_pir_params(
    alignment_params = alignment_params,
    experiments = experiments,
    twinning_params = twinning_params
  )
}Run:
df <- NULL
if (rappdirs::app_dir()$os != "win" &&
    beastier::is_beast2_installed() &&
    mauricer::is_beast2_ns_pkg_installed()
) {
  df <- pir_run(
    phylogeny = phylogeny,
    pir_params = pir_params
  )
} else {
  df <- create_test_pir_run_output()
}Show as a table:
| tree | inference_model | inference_model_weight | site_model | clock_model | tree_prior | error_1 | error_2 | error_3 | 
|---|---|---|---|---|---|---|---|---|
| true | generative | 0.5 | JC69 | relaxed_log_normal | birth_death | 0.1 | 0.11 | 0.12 | 
Show as a figure:
See true tree again:
See the alignment generated from the true tree:
if (rappdirs::app_dir()$os != "win" &&
    beastier::is_beast2_installed() &&
    mauricer::is_beast2_ns_pkg_installed()
) {
  check_file_exists(pir_params$alignment_params$fasta_filename)
  ape::image.DNAbin(
    ape::read.FASTA(file = pir_params$alignment_params$fasta_filename)
  )
}See the posterior trees generated from the true alignment, for the generative model:
if (rappdirs::app_dir()$os != "win" &&
    beastier::is_beast2_installed() &&
    mauricer::is_beast2_ns_pkg_installed()
) {
  # BEAUti offers the '$(tree)' shorthand notation.
  # Here, do what BEAUti does...
  treelog_filename <-
    pir_params$experiments[[1]]$inference_model$mcmc$treelog$filename
  treelog_filename <- gsub(
    x = treelog_filename,
    pattern = "\\$\\(tree\\)",
    replacement = beautier::get_alignment_id(
      pir_params$alignment_params$fasta_filename
    )
  )
  check_file_exists(treelog_filename)
  babette::plot_densitree(tracerer::parse_beast_trees(treelog_filename))
}See the evidence of the true alignments for the models:
if (rappdirs::app_dir()$os != "win" &&
    beastier::is_beast2_installed() &&
    mauricer::is_beast2_ns_pkg_installed()
) {
  knitr::kable(readr::read_csv(pir_params$evidence_filename))
}See the posterior trees generated from the true alignment, for the model with the most evidence:
if (rappdirs::app_dir()$os != "win" &&
    beastier::is_beast2_installed() &&
    mauricer::is_beast2_ns_pkg_installed()
) {
  experiment <- pir_params$experiments[[2]]
  trees_filename <- experiment$inference_model$mcmc$treelog$filename
  check_file_exists(trees_filename)
  babette::plot_densitree(tracerer::parse_beast_trees(trees_filename))
}See the posterior parameter estimates generated from the true alignment, for the generative model:
if (rappdirs::app_dir()$os != "win" &&
    beastier::is_beast2_installed() &&
    mauricer::is_beast2_ns_pkg_installed()
) {
    # A tracelog's filename is set to NA by default.
    # Here, do what BEAUti does...
    tracelog_filename <-
      pir_params$experiments[[1]]$inference_model$mcmc$tracelog$filename
    if (is.na(tracelog_filename)) {
      pir_params$experiments[[1]]$inference_model$mcmc$tracelog$filename <-
      paste0(
        beautier::get_alignment_id(pir_params$alignment_params$fasta_filename),
        ".log"
      )
    }
  check_file_exists(
    pir_params$experiments[[1]]$inference_model$mcmc$tracelog$filename
  )
  df <- tracerer::parse_beast_tracelog_file(
    pir_params$experiments[[1]]$inference_model$mcmc$tracelog$filename
  )
  ggplot(data = df, aes(x = Sample, y = likelihood)) + geom_line()
}See the posterior parameter estimates generated from the true alignment, for the model with the most evidence:
if (rappdirs::app_dir()$os != "win" &&
    beastier::is_beast2_installed() &&
    mauricer::is_beast2_ns_pkg_installed()
) {
  experiment <- pir_params$experiments[[2]]
  log_filename <- experiment$inference_model$mcmc$tracelog$filename
  check_file_exists(log_filename)
  df <- tracerer::parse_beast_tracelog_file(log_filename)
  ggplot(data = df, aes(x = Sample, y = likelihood)) + geom_line()
}See the twin tree:
if (rappdirs::app_dir()$os != "win" &&
    beastier::is_beast2_installed() &&
    mauricer::is_beast2_ns_pkg_installed()
) {
  ape::plot.phylo(
    ape::read.tree(pir_params$twinning_params$twin_tree_filename),
    main = "The Twin Tree"
  )
}See the alignment generated from the twin tree:
if (rappdirs::app_dir()$os != "win" &&
    beastier::is_beast2_installed() &&
    mauricer::is_beast2_ns_pkg_installed()
) {
  check_file_exists(pir_params$twinning_params$twin_alignment_filename)
  ape::image.DNAbin(
    ape::read.FASTA(file = pir_params$twinning_params$twin_alignment_filename)
  )
}See the posterior trees generated from the twin alignment, for the generative model:
if (rappdirs::app_dir()$os != "win" &&
    beastier::is_beast2_installed() &&
    mauricer::is_beast2_ns_pkg_installed()
) {
  trees_filename <- to_twin_filename(
    pir_params$experiments[[1]]$inference_model$mcmc$treelog$filename
  )
  check_file_exists(trees_filename)
  babette::plot_densitree(
    tracerer::parse_beast_trees(trees_filename)
  )
}See the evidence of the true alignments for the models:
if (rappdirs::app_dir()$os != "win" &&
    beastier::is_beast2_installed() &&
    mauricer::is_beast2_ns_pkg_installed()
) {
  twin_evidence_filename <- pir_params$twinning_params$twin_evidence_filename
  check_file_exists(twin_evidence_filename)
  knitr::kable(readr::read_csv(twin_evidence_filename))
}See the posterior trees generated from the twin alignment, for the model with the most evidence:
if (rappdirs::app_dir()$os != "win" &&
    beastier::is_beast2_installed() &&
    mauricer::is_beast2_ns_pkg_installed()
) {
  trees_filename <- to_twin_filename(
    pir_params$experiments[[2]]$inference_model$mcmc$treelog$filename
  )
  check_file_exists(trees_filename)
  babette::plot_densitree(tracerer::parse_beast_trees(trees_filename))
}See the posterior parameter estimates generated from the true alignment, for the generative model:
if (rappdirs::app_dir()$os != "win" &&
    beastier::is_beast2_installed() &&
    mauricer::is_beast2_ns_pkg_installed()
) {
  log_filename <- to_twin_filename(
    pir_params$experiments[[2]]$inference_model$mcmc$tracelog$filename
    )
  check_file_exists(log_filename)
  df <- tracerer::parse_beast_tracelog_file(log_filename)
  ggplot(data = df, aes(x = Sample, y = likelihood)) + geom_line()
}See the posterior parameter estimates generated from the true alignment, for the model with the most evidence:
if (rappdirs::app_dir()$os != "win" &&
    beastier::is_beast2_installed() &&
    mauricer::is_beast2_ns_pkg_installed()
) {
  log_filename <- to_twin_filename(
    pir_params$experiments[[2]]$inference_model$mcmc$tracelog$filename
  )
  check_file_exists(log_filename)
  df <- tracerer::parse_beast_tracelog_file(log_filename)
  ggplot(data = df, aes(x = Sample, y = likelihood)) + geom_line()
}