## ----configure_knitr, eval = TRUE---------------------------------------------
knitr::opts_chunk$set(echo = TRUE, fig.width=5, fig.height=4)

## ----clear_memory, eval = TRUE------------------------------------------------
rm(list=ls()) 

## ----runchunks, eval = TRUE---------------------------------------------------
# Set whether or not the following chunks will be executed (run):
execute.vignette <- FALSE

## ----setup, eval = execute.vignette-------------------------------------------
# knitr::opts_chunk$set(echo = TRUE, fig.width=5, fig.height=4)
# library(httk)
# library(ggplot2)
# library(gridExtra)
# library(cowplot)
# library(ggrepel)
# library(dplyr)
# library(stringr)
# library(forcats)
# library(smatr)

## ----data, eval = execute.vignette--------------------------------------------
# met_data <- metabolism_data_Linakis2020
# conc_data <- concentration_data_Linakis2020
# #conc_data <- subset(concentration_data_Linakis2020, !(SAMPLING_MATRIX %in%
# #                      c("EEB","MEB","EB")))
# #conc_data <- concentration_data_Linakis2020
# #  subset(concentration_data_Linakis2020, !(SOURCE_CVT %in% c(
# #  "11453305")))
# conc_data[,"DOSE_U"] <- ifelse(conc_data[,"DOSE_U"] == "ppm",
#                                yes = "ppmv",
#                                conc_data[,"DOSE_U"])
# conc_data[,"ORIG_CONC_U"] <- ifelse(conc_data[,"ORIG_CONC_U"] == "ppm",
#                                     yes = "ppmv",
#                                     conc_data[,"ORIG_CONC_U"])
# # Not sure what to do with percent:
# conc_data <- subset(conc_data,toupper(ORIG_CONC_U) != "PERCENT")
# # Rename this column:
# colnames(conc_data)[colnames(conc_data)=="ORIG_CONC_U"] <- "CONC_U"
# conc_data$ORIGINAL_CONC_U <- conc_data$CONC_U
# conc_data$ORIGINAL_CONC <- conc_data$CONCENTRATION
# # Maybe Linakis et al translated all concentrations to uM?
# conc_data$CONC_U <- "uM"

## ----convert_to_ppmweight, eval = FALSE---------------------------------------
# gas.media <- c("EB","MEB","EEB","EB (+W)")
# gas.units <- unique(subset(conc_data,
#   SAMPLING_MATRIX %in% gas.media)$CONC_U)
# target.unit <- "ppmv"
# for (this.unit in gas.units)
#   if (this.unit != target.unit)
#   {
#     these.chems <- unique(subset(conc_data,
#       SAMPLING_MATRIX %in% gas.media &
#       CONC_U==this.unit)$DTXSID)
#     for (this.chem in these.chems)
#     {
#       this.factor <- convert_units(
#         input.units=this.unit,
#         output.units=target.unit,
#         dtxsid=this.chem, state="gas")
#       print(paste("Use",this.factor,"to convert",this.unit,"to",target.unit))
# 
#       # Scale the observation
#       conc_data[conc_data$DTXSID==this.chem &
#                   conc_data$SAMPLING_MATRIX %in% gas.media &
#                   conc_data$CONC_U==this.unit,"CONCENTRATION"] <-
#         this.factor * conc_data[
#           conc_data$DTXSID==this.chem &
#             conc_data$SAMPLING_MATRIX %in% gas.media &
#             conc_data$CONC_U==this.unit,"CONCENTRATION"]
#       # Change the reported unit
#       conc_data[conc_data$DTXSID==this.chem &
#                   conc_data$SAMPLING_MATRIX %in% gas.media &
#                   conc_data$CONC_U==this.unit,"CONC_U"] <-
#         target.unit
# 
#     }
#   }

## ----normalize_other_units, eval = FALSE--------------------------------------
# tissue.media <- c("VBL","BL","ABL","PL","BL (+W)")
# tissue.units <- unique(subset(conc_data,
#   SAMPLING_MATRIX %in% tissue.media)$CONC_U)
# target.unit <- "uM"
# for (this.unit in tissue.units)
#   if (this.unit != target.unit)
#   {
#     these.chems <- unique(subset(conc_data,
#       SAMPLING_MATRIX %in% tissue.media &
#       CONC_U==this.unit)$DTXSID)
#     for (this.chem in these.chems)
#     {
#       this.factor <- try(convert_units(
#         input.units=this.unit,
#         output.units=target.unit,
#         dtxsid=this.chem))
#       print(paste("Use",this.factor,"to convert",this.unit,"to",target.unit))
# 
#       # Scale the observation
#       conc_data[conc_data$DTXSID==this.chem &
#                   conc_data$SAMPLING_MATRIX %in% tissue.media &
#                   conc_data$CONC_U==this.unit,"CONCENTRATION"] <-
#         this.factor * conc_data[
#           conc_data$DTXSID==this.chem &
#             conc_data$SAMPLING_MATRIX %in% tissue.media &
#             conc_data$CONC_U==this.unit,"CONCENTRATION"]
#       # Change the reported unit
#       conc_data[conc_data$DTXSID==this.chem &
#                   conc_data$SAMPLING_MATRIX %in% tissue.media &
#                   conc_data$CONC_U==this.unit,"CONC_U"] <-
#         target.unit
# 
#     }
#   }

## ----summary, eval = execute.vignette-----------------------------------------
# # Small molecule chemicals
# summary(met_data$AVERAGE_MASS)
# # Generally more lipophilic chemicals
# summary(met_data$OCTANOL_WATER_PARTITION_LOGP_OPERA_PRED)
# # Unsurprisingly then, the chemicals are generally less water-soluble
# summary(met_data$WATER_SOLUBILITY_MOL.L_OPERA_PRED)
# # ~60% of samples in humans
# table(conc_data$CONC_SPECIES)/nrow(conc_data)*100
# # ~72% of samples are from blood
# table(conc_data$SAMPLING_MATRIX)/nrow(conc_data)*100

## ----scenarios, eval = execute.vignette---------------------------------------
# # Create a dataframe with 1 row for each unique external exposure scenario
# unique_scenarios <- conc_data[with(conc_data,
#   order(PREFERRED_NAME,
#         CONC_SPECIES,
#         SAMPLING_MATRIX,
#         as.numeric(as.character(DOSE)),EXP_LENGTH,-TIME)),] %>%
#   distinct(DTXSID,DOSE,DOSE_U,EXP_LENGTH,CONC_SPECIES,SAMPLING_MATRIX, .keep_all = TRUE)

## ----runmodel, eval = execute.vignette----------------------------------------
# # Store the output of each simulation:
# simlist <- list()
# # Store the Cvt data relevant to each simulation
# obslist <- list()
# # Conduct one simulation for each unique combination of chemical, species, dose:
# for (i in 1:nrow(unique_scenarios))
#   if (unique_scenarios$CASRN[i] %in% get_cheminfo(model="gas_pbtk",
#                                                   suppress.messages = TRUE))
# {
#   # Identify relevant Cvt data:
#     relconc <- subset(conc_data,conc_data$DTXSID == unique_scenarios$DTXSID[i] &
#       conc_data$DOSE == unique_scenarios$DOSE[i] &
#       conc_data$EXP_LENGTH == unique_scenarios$EXP_LENGTH[i] &
#       conc_data$CONC_SPECIES == unique_scenarios$CONC_SPECIES[i] &
#       conc_data$SAMPLING_MATRIX == unique_scenarios$SAMPLING_MATRIX[i])
#     obslist[[i]] <- relconc
# #
# #
# #
# #
# #
# # THE FOLLOWING CODE RUNS solve_gas_pbtk FOR EACH SCENARIO
# # (UNIQUE COMBINATION OF CHEMICAL, SPECIES, DOSE, ETC.)
# #
# #
# #
# #
# #
#     solver.out <- try(suppressWarnings(as.data.frame(solve_gas_pbtk(
#         chem.cas = unique_scenarios$CASRN[i],
#         days = (unique_scenarios$TIME[i]+unique_scenarios$EXP_LENGTH[i]),
# # Make sure we get predicted conc's at the observed times:
#         times=unique(c(0,signif(obslist[[i]]$TIME,4))), # days
#         tsteps = 500,
#         exp.conc = as.numeric(unique_scenarios$DOSE[i]), # SED (06-22-2021) think this is ppmv for all scenarios
#          input.units = unique_scenarios$DOSE_U[i], # specify the units for exp.conc (ppmv)
#         exp.duration = unique_scenarios$EXP_LENGTH[i], # days
#         period = (unique_scenarios$TIME[i]+unique_scenarios$EXP_LENGTH[i]), # days
#         species = as.character(unique_scenarios$CONC_SPECIES[i]),
#         monitor.vars = c(
#           "Cven",
#           "Clung",
#           "Cart",
#           "Cplasma",
#           "Calvppmv",
#           "Cendexhppmv",
#           "Cmixexhppmv",
#           "Calv",
#           "Cendexh",
#           "Cmixexh",
#           "Cmuc",
#           "AUC"),
#         vmax.km = FALSE,
#         suppress.messages = TRUE))))
# #
# #
# #
# #
# #
# #
# #
# #
# #
# #
#     if (class(solver.out) %in% "try-error")
#       solver.out <- data.frame(time=NA,Conc=NA)
# 
#     print(solver.out)
# # Get the blood:plasma ratio:
#     this.Rb2p <- suppressWarnings(available_rblood2plasma(
#       chem.cas=unique_scenarios$CASRN[i],
#       species=as.character(unique_scenarios$CONC_SPECIES[i])))
#     solver.out$Rb2p <- this.Rb2p
# 
#     # The column simconc holds the appropriate prediction for the sampling matrix
#     # BL : blood
#     # EEB : end-exhaled breath
#     # MEB : mixed exhaled breath
#     # VBL : venous blood
#     # ABL : arterial blood
#     # EB : unspecified exhaled breath sample (assumed to be EEB)
#     # PL: plasma
#     # +W with work/exercise
#     #
#     # The model outputs are provided in the following units:
# 	  # uM: Cven, Clung, Cart, Cplasma, Calv, Cendexh, Cmixexh, Cmuc
# 	  # ppmv: Calvppmv, Cendexhppmv, Cmixexhppmv
# 	  # uM*days: AUC
#     if (unique_scenarios$SAMPLING_MATRIX[i] == "VBL" |
#       unique_scenarios$SAMPLING_MATRIX[i] == "BL" |
#       unique_scenarios$SAMPLING_MATRIX[i] == "BL (+W)")
#     {
#       solver.out$simconc <- solver.out$Cven*this.Rb2p
#       solver.out$unit <- "uM"
#     } else if (unique_scenarios$SAMPLING_MATRIX[i] == "ABL") {
#       solver.out$simconc <- solver.out$Cart*this.Rb2p
#       solver.out$unit <- "uM"
#     } else if (unique_scenarios$SAMPLING_MATRIX[i] == "EB" |
#       unique_scenarios$SAMPLING_MATRIX[i] == "EEB" |
#       unique_scenarios$SAMPLING_MATRIX[i] == "EB (+W)")
#     {
#       solver.out$simconc <- solver.out$Cendexh # uM
#       solver.out$unit <- "uM"
#     } else if (unique_scenarios$SAMPLING_MATRIX[i] == "MEB") {
#         solver.out$simconc <- solver.out$Cmixexh # uM
#         solver.out$unit <- "uM"
#     } else if (unique_scenarios$SAMPLING_MATRIX[i] == "PL") {
#       solver.out$simconc <- solver.out$Cplasma
#       solver.out$unit <- "uM"
#     } else {
#       solver.out$simconc <- NA
#       solver.out$unit <- NA
#     }
#     simlist[[i]] <- solver.out
# }

## ----Makestudyplots, eval = execute.vignette----------------------------------
# cvtlist <- list()
# for(i in 1:nrow(unique_scenarios))
# {
#     plot.data <- simlist[[i]]
#     relconc <- obslist[[i]]
# 
#     if (!is.null(plot.data))
#     {
# #Right now this is only calculating real concentrations according to mg/L in blood
#     cvtlist[[i]] <- ggplot(data=plot.data, aes(time*24, simconc)) +
#       geom_line() +
#       xlab("Time (h)") +
#       ylab(paste0("Simulated ",
#         unique_scenarios$SAMPLING_MATRIX[i],
#         "\nConcentration (" ,
#         solver.out$unit, ")")) +
#       ggtitle(paste0(
#         unique_scenarios$PREFERRED_NAME[i],
#         " (",
#         unique_scenarios$CONC_SPECIES[i],
#         ", ",
#         round(as.numeric(unique_scenarios$DOSE[i]), digits = 2),
#         unique_scenarios$DOSE_U[i],
#         " for ",
#         round(unique_scenarios$EXP_LENGTH[i]*24, digits = 2),
#         "h in ",
#         unique_scenarios$SAMPLING_MATRIX[i], ")")) +
#       geom_point(data = relconc, aes(TIME*24,CONCENTRATION)) +
#       theme(plot.title = element_text(face = 'bold', size = 20),
#         axis.title.x = element_text(face = 'bold', size = 20),
#         axis.text.x = element_text(size=16),
#         axis.title.y = element_text(face = 'bold', size = 20),
#         axis.text.y = element_text(size = 16),
#         legend.title = element_text(face = 'bold', size = 16),
#         legend.text = element_text(face = 'bold',size = 14))+
#       theme_bw()
#     }
# }

## ----init_dataset, eval = execute.vignette------------------------------------
# # Creation of simulated vs. observed concentration dataset
# unique_scenarios$RSQD <- 0
# unique_scenarios$RMSE <- 0
# unique_scenarios$AIC <- 0
# simobslist <- list()
# obvpredlist <- list()

## ----merge_datasets, eval = execute.vignette----------------------------------
# for(i in 1:length(simlist))
# {
#   obsdata <- as.data.frame(obslist[[i]])
#   simdata <- as.data.frame(simlist[[i]])
# # skips over anything for which there was no observed data or
# # insufficient information to run simulation:
#   if (!is.null(simdata) &
#       !is.null(obsdata) &
#       dim(simdata)[1]>1)
#   {
# # Make sure we are looking at consistent time points:
#     simobscomb <- simdata[simdata$time %in% signif(obsdata$TIME,4),]
#     obsdata <- subset(obsdata,signif(TIME,4) %in% simobscomb$time)
# # Merge with obsdata
#     colnames(obsdata)[colnames(obsdata) ==
#       "TIME"] <-
#       "obstime"
# # Round to match sim time:
#     obsdata$time <- signif(obsdata$obstime,4)
#     colnames(obsdata)[colnames(obsdata) ==
#       "CONCENTRATION"] <-
#       "obsconc"
#     colnames(obsdata)[colnames(obsdata) ==
#       "PREFERRED_NAME"] <-
#       "chem"
#     colnames(obsdata)[colnames(obsdata) ==
#       "DOSE"] <-
#       "dose"
#     colnames(obsdata)[colnames(obsdata) ==
#       "EXP_LENGTH"] <-
#       "explen"
#     colnames(obsdata)[colnames(obsdata) ==
#       "CONC_SPECIES"] <-
#       "species"
#     colnames(obsdata)[colnames(obsdata) ==
#       "SAMPLING_MATRIX"] <-
#       "matrix"
#     colnames(obsdata)[colnames(obsdata) ==
#       "AVERAGE_MASS"] <-
#       "mw"
#     colnames(obsdata)[colnames(obsdata) ==
#       "CONC_U"] <-
#       "CONC_U"
#     simobscomb <- suppressWarnings(merge(obsdata[,c(
#       "time",
#       "obstime",
#       "obsconc",
#       "chem",
#       "dose",
#       "explen",
#       "species",
#       "matrix",
#       "mw",
#       "CONC_U",
#       "ORIGINAL_CONC_U"
#       )], simobscomb, by="time", all.x=TRUE))
# 
# # Merge with met_data
#     this.met_data <- subset(met_data,
#       PREFERRED_NAME == simobscomb[1,"chem"] &
#       SPECIES == simobscomb[1,"species"])
#     colnames(this.met_data)[colnames(this.met_data)=="CHEM_CLASS"] <-
#       "chemclass"
#     colnames(this.met_data)[colnames(this.met_data) ==
#       "OCTANOL_WATER_PARTITION_LOGP_OPERA_PRED"] <-
#       "logp"
#     colnames(this.met_data)[colnames(this.met_data) ==
#       "WATER_SOLUBILITY_MOL.L_OPERA_PRED"] <-
#       "sol"
#     colnames(this.met_data)[colnames(this.met_data) ==
#       "HENRYS_LAW_ATM.M3.MOLE_OPERA_PRED"] <-
#       "henry"
#     colnames(this.met_data)[colnames(this.met_data) ==
#       "VMAX"] <-
#       "vmax"
#     colnames(this.met_data)[colnames(this.met_data) ==
#       "KM"] <-
#       "km"
#     simobscomb <- suppressWarnings(cbind(simobscomb,this.met_data[c(
#       "chemclass",
#       "logp",
#       "sol",
#       "henry",
#       "vmax",
#       "km")]))
#     simobslist[[i]] <- simobscomb
#   }
# }

## ----time_quartile, eval = execute.vignette-----------------------------------
# for(i in 1:length(simobslist))
#   if (!is.null(simobslist[[i]]))
#   {
#     simobscomb <- simobslist[[i]]
#     for (j in 1:nrow(simobscomb))
#     {
#       max.time <- max(simobscomb$time,na.rm=TRUE)
#       if (is.na(max.time)) simobscomb$tquart <- NA
#       else if (max.time == 0) simobscomb$tquart <- "1"
#       else if (!is.na(simobscomb$time[j]))
#       {
#         simobscomb$tquart[j] <- as.character(1 +
#           floor(simobscomb$time[j]/max.time/0.25))
#         simobscomb$tquart[simobscomb$tquart=="5"] <-
#           "4"
#       } else simobscomb$tquart[j] >- NA
#     }
#     simobslist[[i]] <- simobscomb
#   }

## ----calc_AUC, eval = execute.vignette----------------------------------------
# for(i in 1:length(simobslist))
#   if (!is.null(simobslist[[i]]))
#   {
#     simobscomb <- simobslist[[i]]
# # Calculat the AUC with the trapezoidal rule:
#     if (nrow(simobscomb)>1)
#     {
#       for (k in 2:max(nrow(simobscomb)-1,2,na.rm=TRUE))
#       {
#         simobscomb$obsAUCtrap[1] <- 0
#         simobscomb$simAUCtrap[1] <- 0
#         if (min(simobscomb$time) <= (simobscomb$explen[1]*1.03) &
#             nrow(simobscomb) >=2)
#         {
#           simobscomb$obsAUCtrap[k] <- simobscomb$obsAUCtrap[k-1] +
#             0.5*(simobscomb$time[k] - simobscomb$time[k-1]) *
#             (simobscomb$obsconc[k] + simobscomb$obsconc[k-1])
#           simobscomb$simAUCtrap[k] <- simobscomb$simAUCtrap[k-1] +
#             0.5*(simobscomb$time[k]-simobscomb$time[k-1]) *
#             (simobscomb$simconc[k] + simobscomb$simconc[k-1])
#         } else {
#           simobscomb$obsAUCtrap <- 0
#           simobscomb$simAUCtrap <- 0
#         }
#       }
#     } else {
#       simobscomb$obsAUCtrap <- 0
#       simobscomb$simAUCtrap <- 0
#     }
#     simobscomb$AUCobs <- max(simobscomb$obsAUCtrap)
#     simobscomb$AUCsim <- max(simobscomb$simAUCtrap)
#     simobscomb$calcAUC <- max(simobscomb$AUC)
#     if (min(simobscomb$time) <= simobscomb$explen[1]*1.03)
#     {
#       simobscomb$Cmaxobs <- max(simobscomb$obsconc)
#       simobscomb$Cmaxsim <- max(simobscomb$simconc)
#     } else {
#       simobscomb$Cmaxobs <- 0
#       simobscomb$Cmaxsim <- 0
#     }
#     simobslist[[i]] <- simobscomb
#   }

## ----calc_error, eval = execute.vignette--------------------------------------
# for(i in 1:length(simobslist))
#   if (!is.null(simobslist[[i]]))
#   {
#     simobscomb <- simobslist[[i]]
#     unique_scenarios$RSQD[i] <- 1 - (
#       sum((simobscomb$obsconc - simobscomb$simconc)^2) /
#       sum((simobscomb$obsconc-mean(simobscomb$obsconc))^2)
#       )
#     unique_scenarios$RMSE[i] <-
#       sqrt(mean((simobscomb$simconc - simobscomb$obsconc)^2))
#     unique_scenarios$AIC[i] <-
#       nrow(simobscomb)*(
#         log(2*pi) + 1 +
#         log((sum((simobscomb$obsconc-simobscomb$simconc)^2) /
#           nrow(simobscomb)))
#       ) + ((44+1)*2) #44 is the number of parameters from inhalation_inits.R
#     simobslist[[i]] <- simobscomb
#   }

## ----combine_studies, eval = execute.vignette---------------------------------
# obsvpredlist <- list()
# for(i in 1:length(simobslist))
#   if (!is.null(simobslist[[i]]))
#   {
#     simobscomb <- simobslist[[i]]
#     obsvpredlist[[i]] <- ggplot(simobscomb, aes(x = simconc, y = obsconc)) +
#       geom_point() +
#       geom_abline() +
#       xlab("Simulated Concentrations (uM)") +
#       ylab("Observed Concentrations (uM)") +
#       ggtitle(paste0(
#         unique_scenarios$PREFERRED_NAME[i],
#         " (",
#         unique_scenarios$CONC_SPECIES[i],
#         ", ",
#         round(as.numeric(unique_scenarios$DOSE[i]), digits = 2),
#         unique_scenarios$DOSE_U[i],
#         " for ",
#         round(unique_scenarios$EXP_LENGTH[i]*24, digits = 2),
#         "h in ",
#         unique_scenarios$SAMPLING_MATRIX[i], ")")) +
#       theme_bw() +
#       theme(plot.title = element_text(face = 'bold', size = 20),
#         axis.title.x = element_text(face = 'bold', size = 20),
#         axis.text.x = element_text(size=16),
#         axis.title.y = element_text(face = 'bold', size = 20),
#         axis.text.y = element_text(size = 16),
#         legend.title = element_text(face = 'bold', size = 16),
#         legend.text = element_text(face = 'bold',size = 14))
#   }

## ----obsvspredplots, eval = execute.vignette----------------------------------
# # Create pdfs of observed vs. predicted concentration plots
# dir.create("Linakis2020")
# pdf("Linakis2020/obvpredplots.pdf", width = 10, height = 10)
# for (i in 1:length(obsvpredlist)) {
#   print(obsvpredlist[[i]])
# }
# dev.off()

## ----annotate_study_stats, eval = execute.vignette----------------------------
# for (i in 1:length(cvtlist))
#   if (!is.null(cvtlist[[i]]))
# {
#   cvtlist[[i]] <- cvtlist[[i]] +
#     geom_text(
#       x = Inf,
#       y = Inf,
#       hjust = 1.3,
#       vjust = 1.3,
# #      size = 6,
#       label = paste0(
#         "RMSE: ",
#         round(unique_scenarios$RMSE[i],digits = 2),
#         "\nAIC: ",
#         round(unique_scenarios$AIC[i],digits = 2)))# +
# #    theme(
# #      plot.title = element_text(face = 'bold', size = 15),
# #      axis.title.x = element_text(face = 'bold', size = 20),
# #      axis.text.x = element_text(size=16),
# #      axis.title.y = element_text(face = 'bold', size = 20),
# #      axis.text.y = element_text(size = 16),
# #      legend.title = element_text(face = 'bold', size = 16),
# #      legend.text = element_text(face = 'bold',size = 14))
# }

## ----cvtplots, eval = execute.vignette----------------------------------------
# # Create pdfs of simulated concentration-time plots against observed c-t values
# pdf("Linakis2020/simdataplots.pdf")
# for (i in 1:length(cvtlist)) {
#   print(cvtlist[[i]])
# }
# dev.off()

## ----combine_obs_vs_pred, eval = execute.vignette-----------------------------
# simobsfull <- do.call("rbind",simobslist)
# simobsfullrat <- subset(simobsfull, simobsfull$species == "Rat")
# simobsfullhum <- subset(simobsfull, simobsfull$species == "Human")
# unique_scenarios <- subset(unique_scenarios,!is.na(unique_scenarios$RSQD))

## ----Standardize Units, eval = FALSE------------------------------------------
# these.chems <- unique(subset(simobsfull,unit=="ppmv")$chem)
# for (this.chem in these.chems)
# {
#   # Use HTTK unit conversion:
#   this.factor <- convert_units(
#     input.units="ppmv", output.units="um", chem.name=this.chem, state="gas")
# 
#   # Scale the observation
#   simobsfull[simobsfull$chem==this.chem &
#               simobsfull$unit=="ppmv","obsconc"] <-
#     this.factor * simobsfull[
#       simobsfull$chem==this.chem &
#         simobsfull$unit=="ppmv","obsconc"]
#   # Scale the prediction
#   simobsfull[simobsfull$chem==this.chem &
#               simobsfull$unit=="ppmv","simconc"] <-
#     this.factor * simobsfull[
#       simobsfull$chem==this.chem &
#         simobsfull$unit=="ppmv","simconc"]
#   # Change the reported unit
#   simobsfull[simobsfull$chem==this.chem &
#               simobsfull$unit=="ppmv","unit"] <-
#     "uM"
# }

## ----regression, eval = execute.vignette--------------------------------------
# table(unique_scenarios$CONC_SPECIES)
# nrow(simobsfull) - nrow(simobsfull[
#   !is.na(simobsfull$simconc) &
#   simobsfull$simconc > 0 &
#   simobsfull$obsconc > 0,])
# pmiss <- (nrow(simobsfull) -
#   nrow(simobsfull[
#     !is.na(simobsfull$simconc) &
#     simobsfull$simconc > 0 &
#     simobsfull$obsconc > 0,])) /
#   nrow(simobsfull) * 100
# missdata <- (simobsfull[
#   is.na(simobsfull$simconc) |
#   simobsfull$simconc <= 0 |
#   simobsfull$obsconc <= 0,])
# t0df <- simobsfull[simobsfull$obstime == 0,]
# lmall <- lm(
# #log transforms:
#   log10(simobsfull$obsconc[
#     !is.na(simobsfull$simconc) &
#     simobsfull$simconc > 0 &
#     simobsfull$obsconc > 0]) ~
# #log transforms:
#   log10(simobsfull$simconc[
#     !is.na(simobsfull$simconc) &
#     simobsfull$simconc > 0 &
#     simobsfull$obsconc > 0]))
# #Linear binned 1
# lmsub1 <- lm(
#   simobsfull$obsconc[
#     !is.na(simobsfull$simconc) &
#     simobsfull$simconc > 0 &
#     simobsfull$obsconc < 0.1] ~
#   simobsfull$simconc[
#     !is.na(simobsfull$simconc) &
#     simobsfull$simconc > 0 &
#     simobsfull$obsconc < 0.1])
# #Linear binned 2
# lmsub2 <- lm(
#   simobsfull$obsconc[
#     !is.na(simobsfull$simconc) &
#     simobsfull$simconc > 0 &
#     simobsfull$obsconc >= 0.1 &
#     simobsfull$obsconc < 10] ~
#   simobsfull$simconc[
#     !is.na(simobsfull$simconc) &
#     simobsfull$simconc > 0 &
#     simobsfull$obsconc >= 0.1 &
#     simobsfull$obsconc < 10])
# #Linear binned 3
# lmsub3 <- lm(
#   simobsfull$obsconc[
#     !is.na(simobsfull$simconc) &
#     simobsfull$simconc > 0 &
#     simobsfull$obsconc >= 10] ~
#   simobsfull$simconc[
#     !is.na(simobsfull$simconc) &
#     simobsfull$simconc > 0 &
#     simobsfull$obsconc >= 10])
# lmrat <- lm(
#   log10(simobsfullrat$obsconc[
#     !is.na(simobsfullrat$simconc) &
#     simobsfullrat$simconc > 0 &
#     simobsfullrat$obsconc > 0]) ~
#   log10(simobsfullrat$simconc[
#     !is.na(simobsfullrat$simconc) &
#     simobsfullrat$simconc > 0 &
#     simobsfullrat$obsconc > 0]))
# unique(simobsfullrat$chem)
# lmhum <- lm(
#   log10(simobsfullhum$obsconc[
#     !is.na(simobsfullhum$simconc) &
#     simobsfullhum$simconc > 0 &
#     simobsfullhum$obsconc > 0]) ~
#   log10(simobsfullhum$simconc[
#     !is.na(simobsfullhum$simconc) &
#     simobsfullhum$simconc > 0 &
#     simobsfullhum$obsconc > 0]))
# unique(simobsfullhum$chem)
# concregslope <- summary(lmall)$coef[2,1]
# concregr2 <- summary(lmall)$r.squared
# concregrmse <- sqrt(mean(lmall$residuals^2))
# totalrmse <- sqrt(mean((
#   log10(simobsfull$simconc[
#     !is.na(simobsfull$simconc) &
#     simobsfull$simconc > 0 &
#     simobsfull$obsconc > 0]) -
#   log10(simobsfull$obsconc[
#     !is.na(simobsfull$simconc) &
#     simobsfull$simconc > 0 &
#     simobsfull$obsconc > 0]))^2,
#    na.rm = TRUE))
# totalmae <- mean(abs(
#   log10(simobsfull$simconc[
#     !is.na(simobsfull$simconc) &
#     simobsfull$simconc > 0 &
#     simobsfull$obsconc > 0]) -
#   log10(simobsfull$obsconc[
#     !is.na(simobsfull$simconc) &
#     simobsfull$simconc > 0 &
#     simobsfull$obsconc > 0])),
#   na.rm = TRUE)
# totalaic <- nrow(
#   simobsfull[
#     !is.na(simobsfull$simconc) &
#     simobsfull$simconc >0 &
#     simobsfull$obsconc >
#     0,]) *
#   (log(2*pi) +
#      1 +
#      log((sum(
#        (simobsfull$obsconc[
#          !is.na(simobsfull$simconc) &
#          simobsfull$simconc > 0 &
#          simobsfull$obsconc > 0] -
#        simobsfull$simconc[
#          !is.na(simobsfull$simconc) &
#          simobsfull$simconc > 0 &
#          simobsfull$obsconc > 0])^2,
#        na.rm=TRUE) /
#      nrow(simobsfull[
#        !is.na(simobsfull$simconc) &
#        simobsfull$simconc > 0 &
#        simobsfull$obsconc > 0,])))) +
#   ((44+1)*2) #44 is the number of parameters from inhalation_inits.R
# mispred <- table(abs(
#   log10(simobsfull$simconc) -
#   log10(simobsfull$obsconc))>2 &
#   simobsfull$simconc>0)
# mispred[2]
# mispred[2] / nrow(simobsfull[
#   !is.na(simobsfull$simconc) &
#     simobsfull$simconc >0 &
#     simobsfull$obsconc > 0,])*100
# overpred <- table(
#   log10(simobsfull$simconc) -
#   log10(simobsfull$obsconc)>2 &
#   simobsfull$simconc>0)
# overpred[2]
# overpred[2] / nrow(simobsfull[
#   !is.na(simobsfull$simconc) &
#   simobsfull$simconc >0 &
#   simobsfull$obsconc > 0,])*100
# underpred <- table(
#   log10(simobsfull$obsconc) -
#   log10(simobsfull$simconc)>2 &
#   simobsfull$simconc>0)
# underpred[2]
# underpred[2] / nrow(simobsfull[
#   !is.na(simobsfull$simconc) &
#   simobsfull$simconc >0 &
#   simobsfull$obsconc > 0,])*100
# mispredhalf <- table(abs(
#   log10(simobsfull$simconc) -
#   log10(simobsfull$obsconc))>0.5 &
#   simobsfull$simconc>0)
# mispredhalf[2]
# mispredhalf[2] / nrow(simobsfull[
#   !is.na(simobsfull$simconc) &
#   simobsfull$simconc >0 &
#   simobsfull$obsconc > 0,])*100
# overpredhalf <- table(
#   log10(simobsfull$simconc) -
#   log10(simobsfull$obsconc)>0.5 &
#   simobsfull$simconc>0)
# overpredhalf[2]
# overpredhalf[2] / nrow(simobsfull[
#   !is.na(simobsfull$simconc) &
#   simobsfull$simconc >0 &
#   simobsfull$obsconc > 0,])*100
# underpredhalf <- table(
#   log10(simobsfull$obsconc) -
#   log10(simobsfull$simconc)>0.5 &
#   simobsfull$simconc>0)
# underpredhalf[2]
# underpredhalf[2] / nrow(simobsfull[
#   !is.na(simobsfull$simconc) &
#   simobsfull$simconc > 0 &
#   simobsfull$obsconc > 0,])*100
# chemunderpred <- subset(simobsfull,
#   log10(simobsfull$simconc) -
#   log10(simobsfull$obsconc) < 0 &
#   simobsfull$simconc > 0)
# table(chemunderpred$chemclass) / table(simobsfull$chemclass)*100

## ----obspredFig2, eval = execute.vignette-------------------------------------
# fig2 <- ggplot(
#   data = simobsfull[
#     simobsfull$simconc > 0 &
#     simobsfull$obsconc > 0,],
#   aes(x = log10(simconc), y = log10(obsconc))) +
#   geom_point(
#     color = ifelse(
#       abs(
#         log10(simobsfull[
#           simobsfull$simconc > 0 &
#           simobsfull$obsconc > 0,]$simconc) -
#         log10(simobsfull[
#           simobsfull$simconc > 0 &
#           simobsfull$obsconc > 0,]$obsconc)) >2,
#       'red',
#       'black')) +
#   geom_abline() +
#   xlab("Log(Simulated Concentrations)") +
#   ylab("Log(Observed Concentrations)") +
#   theme_bw() +
#   geom_smooth(method = 'lm',se = FALSE, aes(color = 'Overall')) +
#   geom_smooth(method = 'lm', se = FALSE, aes(color = species)) +
#   geom_text(
#     x = Inf,
#     y = -Inf,
#     hjust = 1.05,
#     vjust = -0.15,
#     size = 8,
#     label = paste0(
#   #    "Regression slope: ",
# #      round(summary(lmall)$coef[2,1],digits = 2),
#       "\nRegression R^2: ",
#       round(summary(lmall)$r.squared,digits = 2),
#       "\nRegression RMSE: ",
#       round(sqrt(mean(lmall$residuals^2)),digits = 2),
#       "\nRMSE (Identity): ",
#       round(totalrmse,digits = 2)
#  #     "\n% Missing:",
# #      round(pmiss, digits = 2), "%")
#     )) +
#   #geom_text(
#   #  data = simobsfull[
#   #    abs(log10(simobsfull$simconc) - log10(simobsfull$obsconc))>7 &
#   #    simobsfull$simconc>0 & simobsfull$obsconc > 0,],
#   #  aes(label = paste(chem,species,matrix)),
#    ## fontface = 'bold',
#   #  check_overlap = TRUE,
# #    size = 3.5,
#   #  hjust = 0.5,
#   #  vjust = -0.8) +
#   scale_color_discrete(name = 'Species', breaks = c("Overall","Human","Rat")) +
#   theme(
#     plot.title = element_text(face = 'bold', size = 15),
#     axis.title.x = element_text(face = 'bold', size = 30),
#     axis.text.x = element_text(size=16),
#     axis.title.y = element_text(face = 'bold', size = 30),
#     axis.text.y = element_text(size = 16),
#     legend.title = element_text(face = 'bold', size = 24),
#     legend.text = element_text(face = 'bold',size = 24))
# fig2 #Display plot in R

## ----make_how_to_add_models_version, eval=execute.vignette--------------------
# library(scales)
# # Function for formatting tick labels:
# scientific_10 <- function(x) {
#   out <- gsub("1e", "10^", scientific_format()(x))
#   out <- gsub("\\+","",out)
#   out <- gsub("10\\^01","10",out)
#   out <- parse(text=gsub("10\\^00","1",out))
# }
# 
# font.size.large <- 10
# font.size.small <- 8
# 
# figaddmodels <- ggplot(
#   data = simobsfull[
#     simobsfull$simconc > 0 &
#     simobsfull$obsconc > 0,],
#   aes(x = simconc, y = obsconc)) +
#   geom_point(
#     color = ifelse(
#       abs(
#         log10(simobsfull[
#           simobsfull$simconc > 0 &
#           simobsfull$obsconc > 0,]$simconc) -
#         log10(simobsfull[
#           simobsfull$simconc > 0 &
#           simobsfull$obsconc > 0,]$obsconc)) >2,
#       'red',
#       'black'),alpha=0.15) +
#   geom_abline() +
#   scale_y_log10(label=scientific_10,limits=c(1e-4,1e4))+
#   scale_x_log10(label=scientific_10,limits=c(1e-4,1e4))+
#   xlab("Simulated Concentrations") +
#   ylab("Observed Concentrations") +
#   theme_bw() +
#   geom_smooth(method = 'lm',se = FALSE, aes(color = 'Overall', linetype="Overall")) +
#   geom_smooth(method = 'lm', se = FALSE, aes(color = species, linetype = species)) +
#   geom_text(
#     x = 2,
#     y = -1,
#     size = 4,
#     label = paste0("RMSLE: ",
#             round(totalrmse,digits = 2)
#             )) +
#   scale_color_discrete(name = 'Species', breaks = c("Overall","Human","Rat")) +
#   scale_linetype_discrete(name = 'Species', breaks = c("Overall","Human","Rat")) +
#   theme(
#     plot.title = element_text(face = 'bold', size = font.size.small),
#     axis.title.x = element_text(face = 'bold', size = font.size.large),
#     axis.text.x = element_text(size=font.size.small),
#     axis.title.y = element_text(face = 'bold', size = font.size.large),
#     axis.text.y = element_text(size = font.size.small),
#     legend.title = element_text(face = 'bold', size = font.size.large),
#     legend.text = element_text(face = 'bold',size = font.size.large))
# print(figaddmodels)
# ggsave("c:/users/jwambaug/AddModelsFig1.tiff", width=6, height=4, dpi=300)
# 
# counts <- simobsfull[,c("chem","dose","explen","species")]
# counts <- subset(counts,!duplicated(counts))
# paste(length(unique(counts$chem)),
#       "chemicals across",dim(counts)[1],
#       "experimental conditions in",
#       length(unique(counts$species)),"species.")

## ----write_figure2, eval = FALSE----------------------------------------------
# pdf("Linakis2020/Figure2.pdf", width = 10, height = 10)
# print(fig2)
# dev.off()

## ----calculations, eval = execute.vignette------------------------------------
# # Create data and run calculations for populating plots
# cmaxfull <- subset(simobsfull, !duplicated(simobsfull$AUCobs) & simobsfull$Cmaxobs != 0)
# cmaxobs <- cmaxfull$Cmaxobs
# cmaxsim <- cmaxfull$Cmaxsim
# cmaxobs <- cmaxobs[!is.nan(cmaxsim)]
# cmaxsim <- cmaxsim[!is.nan(cmaxsim)]
# cmaxsim[!is.finite(log10(cmaxsim))] <- NA
# cmaxlm <- lm(log10(cmaxobs)~log10(cmaxsim), na.action = na.exclude)
# cmaxvcbkg <- subset(cmaxfull,
#   paste(
#     cmaxfull$chem,
#     cmaxfull$dose,
#     cmaxfull$explen,
#     cmaxfull$species,
#     cmaxfull$matrix) %in%
#   paste(
#     t0df$chem,
#     t0df$dose,
#     t0df$explen,
#     t0df$species,
#     t0df$matrix))
# cmaxvcbkg$cmaxcbkgratio <- cmaxvcbkg$Cmaxobs / cmaxvcbkg$obsconc
# cmaxvcbkg$adjustedCmaxsim <- cmaxvcbkg$Cmaxsim - cmaxvcbkg$obsconc
# aucfull <-subset(simobsfull,
#   !duplicated(simobsfull$AUCobs) &
#   simobsfull$AUCobs != 0)
# aucobs <- aucfull$AUCobs
# aucsim <- aucfull$AUCsim
# aucobs <- aucobs[!is.nan(aucsim)]
# aucsim <- aucsim[!is.nan(aucsim)]
# aucsim[!is.finite(log10(aucsim))] <- NA
# auclm <- lm(log10(aucobs)~log10(aucsim), na.action = na.exclude)
# cmaxslope <- summary(cmaxlm)$coef[2,1]
# cmaxrsq <- summary(cmaxlm)$r.squared
# totalrmsecmax <- sqrt(mean((log10(cmaxfull$Cmaxsim) -
#   log10(cmaxfull$Cmaxobs))^2, na.rm = TRUE))
# cmaxmiss <- nrow(cmaxfull[
#   abs(log10(cmaxfull$Cmaxsim) -
#   log10(cmaxfull$Cmaxobs)) > 1,])
# cmaxmissp <- nrow(cmaxfull[
#   abs(log10(cmaxfull$Cmaxsim) -
#   log10(cmaxfull$Cmaxobs)) > 1,]) /
#   nrow(cmaxfull) * 100
# cmaxmisschem <- table(cmaxfull[
#   abs(log10(cmaxfull$Cmaxsim) -
#   log10(cmaxfull$Cmaxobs)) > 1,]$chem)
# aucslope <- summary(auclm)$coef[2,1]
# aucrsq <- summary(auclm)$r.squared
# totalrmseauc <- sqrt(mean((
#   log10(aucfull$AUCsim) -
#   log10(aucfull$AUCobs))^2, na.rm = TRUE))
# aucmiss <- nrow(aucfull[
#   abs(log10(aucfull$AUCsim) -
#   log10(aucfull$AUCobs)) > 1,])
# aucmissp <- nrow(aucfull[
#   abs(log10(aucfull$AUCsim) -
#   log10(aucfull$AUCobs)) > 1,]) /
#   nrow(aucfull) * 100
# aucmisschem <- table(aucfull[
#   abs(log10(aucfull$AUCsim) -
#   log10(aucfull$AUCobs)) > 1,]$chem)

## ----Fig4plot, eval = execute.vignette----------------------------------------
# cmaxp <- ggplot(data = cmaxfull, aes(x = log10(Cmaxsim), y = log10(Cmaxobs))) +
#   geom_point(color =
#     ifelse(abs(log10(cmaxfull$Cmaxsim)  -log10(cmaxfull$Cmaxobs))>=2, "red","black"))  +
#   geom_abline()  +
#   xlab("Log(Simulated Max Concentration)") +
#   ylab("Log(Observed Max Concentration)") +
#   theme_bw() +
#   geom_smooth(method = 'lm', se = FALSE, aes(color = 'Overall')) +
#   geom_smooth(method = 'lm', se = FALSE, aes(color = species)) +
#   geom_text(x = Inf,
#     y = -Inf,
#     hjust = 1.05,
#     vjust = -0.15,
# #    size = 6,
#     label = paste0("Regression slope: ",
#       round(summary(cmaxlm)$coef[2,1],digits = 2),
#       "\nRegression R^2: ",
#       round(summary(cmaxlm)$r.squared,digits = 2))) +
#   geom_text_repel(
#     data = cmaxfull[
#       (log10(cmaxfull$Cmaxsim)-log10(cmaxfull$Cmaxobs))>=2 &
#       log10(cmaxfull$Cmaxsim) > 2,],
#     aes(label = paste(chem,species,matrix)),
#     force = 2,
#  #   size = 5.3,
#     fontface = 'bold',
#     color = 'black',
#     hjust = -0.05,
#     vjust = 0.5) +
#   scale_y_continuous(lim = c (-1,5)) +
#   scale_x_continuous(lim = c(-1,5)) +
#   geom_text_repel(
#     data = cmaxfull[
#       (log10(cmaxfull$Cmaxsim)-log10(cmaxfull$Cmaxobs))>=2 &
#       log10(cmaxfull$Cmaxsim) <= 2,],
#     aes(label = paste(chem,species,matrix)),
#     nudge_x = 0.0,
#     nudge_y = -0.2,
#     force = 2,
# #    size = 5.3,
#     fontface = 'bold',
#     color = 'black',
#     hjust = -0.05,
#     vjust = 0.5) +
#   geom_text(
#     data = cmaxfull[
#       (log10(cmaxfull$Cmaxsim)-log10(cmaxfull$Cmaxobs))<=-2,],
#     aes(label = paste(chem,species,matrix)),
# #    size = 5.3,
#     fontface = 'bold',
#     color = 'black',
#     hjust = 0.5,
#     vjust = -0.7) +
#   scale_color_discrete(
#     name = 'Species',
#     breaks = c("Overall","Human","Rat")) #+
# #  theme(plot.title = element_text(face = 'bold', size = 10),
# #    axis.title.x = element_text(face = 'bold', size = 10),
# #    axis.text.x = element_text(size=8),
# #    axis.title.y = element_text(face = 'bold', size = 10),
# #    axis.text.y = element_text(size = 8),
# #    legend.title = element_text(face = 'bold', size = 8),
# #    legend.text = element_text(face = 'bold',size = 8))
# cmaxp
# aucp <- ggplot(
#   data = aucfull,
#   aes(x = log10(AUCsim), y = log10(AUCobs))) +
#   geom_point(color =
#     ifelse(abs(log10(aucfull$AUCsim)-log10(aucfull$AUCobs))>=2, "red","black"))  +
#   geom_abline()  +
#   xlab("Log(Simulated AUC)") +
#   ylab("Log(Observed AUC)") +
#   theme_bw() +
#   geom_smooth(method = 'lm', se = FALSE, aes(color = "Overall")) +
#   geom_smooth(method = 'lm', se = FALSE, aes(color = species)) +
#   geom_text(
#     x = Inf,
#     y = -Inf,
#     hjust = 1.05,
#     vjust = -0.15,
# #    size = 6,
#     label = paste0(
#       "Regression slope: ",
#       round(summary(auclm)$coef[2,1],digits = 2),
#       "\nRegression R^2: ",
#       round(summary(auclm)$r.squared,digits = 2))) +
#   geom_text_repel(
#     data = aucfull[(log10(aucfull$AUCsim)-log10(aucfull$AUCobs))>=2,],
#     aes(label = paste(chem,species,matrix)),
# #    size = 5.3,
#     fontface = 'bold',
#     color = 'black',
#     hjust = -0.05,
#     vjust = 0.5) +
#   scale_y_continuous(lim = c (-2,4)) +
#   scale_x_continuous(lim = c(-2,4)) +
#   geom_text(
#     data = aucfull[(log10(aucfull$AUCsim)-log10(aucfull$AUCobs))<=-2,],
#     aes(label = paste(chem,species,matrix)),
# #    size = 5.3,
#     fontface = 'bold',
#     color = 'black',
#     hjust = 0.5,
#     vjust = -0.8) +
#   scale_color_discrete(name = 'Species', breaks = c("Overall","Human","Rat")) #+
# #  theme(
# #    plot.title = element_text(face = 'bold', size = 15),
# #    axis.title.x = element_text(face = 'bold', size = 20),
# #    axis.text.x = element_text(size=16),
# #    axis.title.y = element_text(face = 'bold', size = 20),
# #    axis.text.y = element_text(size = 16),
# #    legend.title = element_text(face = 'bold', size = 16),
# #    legend.text = element_text(face = 'bold',size = 14))
# aucp

## ----write_figure4, eval = execute.vignette-----------------------------------
# pdf("Linakis2020/Figure4.pdf", width = 20, height = 10)
# plot_grid(cmaxp,aucp,nrow = 1, labels = c('A','B'), label_size = 30)
# dev.off()

## ----Fig3plot, eval = execute.vignette----------------------------------------
# simobsfull$aggscen <- as.factor(paste(simobsfull$chem,
#   simobsfull$species,
#   simobsfull$matrix))
# chem.lm <- lm(
#   log10(simconc) - log10(obsconc) ~
#   aggscen,
#   data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,])
# chem.res <- resid(chem.lm)
# # Number of observations per chemical class
# numpt <- simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,] %>%
#   group_by(chemclass) %>% summarize(n = paste("n =", length(simconc)))
# fig3 <- ggplot(
#   data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,],
#   aes(x = aggscen, y = log10(simconc)-log10(obsconc), fill = chemclass)) +
#   geom_boxplot() +
#   geom_hline(yintercept = 0) +
#   geom_hline(yintercept = 2, linetype = 2)+
#   geom_hline(yintercept = -2, linetype = 2)+
#   xlab("Exposure Scenario") +
#   ylab("Log(Simulated Concentration)-\nLog(Observed Concentration)\n") +
#   facet_wrap(vars(chemclass), scales = 'free_x', nrow = 1) + #35 by 12 for poster
#   theme_bw() +
#   geom_text(
#     data = numpt,
#     aes(x = Inf, y = -Inf, hjust = 1.05, vjust = -0.5, label = n),
#     size = 10,
#     colour = 'black',
#     inherit.aes = FALSE,
#     parse = FALSE) +
#   theme(
#     axis.text.x = element_text(angle = 90, hjust = 1,vjust=0.5,size = 20, face = 'bold'),
#     strip.text.x = element_text(face = 'bold', size = 24),
#     legend.position = 'none',
#     axis.title.x = element_text(face = 'bold', size = 30),
#     axis.title.y = element_text(face = 'bold', size = 30),
#     axis.text.y = element_text(face = 'bold',size = 25, color = 'black'))
# fig3

## ----write_figure3, eval = execute.vignette-----------------------------------
# pdf("Linakis2020/Figure3.pdf", width = 40, height = 13)
# print(fig3)
# dev.off()

## ----FigS1, eval = execute.vignette-------------------------------------------
# figs1a <- ggplot(
#     data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,],
#     aes(x = tquart, y = log10(simconc)-log10(obsconc))) +
#   geom_boxplot()+
#   geom_hline(yintercept = 0)+
#   geom_hline(yintercept = 2, linetype = 2)+
#   geom_hline(yintercept = -2, linetype = 2)+
#   xlab("\nTime Quartile\n") +
#   ylab("Log(Simulated Concentration)-\nLog(Observed Concentration)\n") +
#   theme_bw()+
#   theme(
#     axis.text.x = element_text(size = 20, face = 'bold'),
#     strip.text.x = element_text(face = 'bold', size = 20),
#     legend.position = 'none',
#     axis.title.x = element_text(face = 'bold', size = 20),
#     axis.title.y = element_text(face = 'bold', size = 20),
#     axis.text.y = element_text(size = 20, face = 'bold'))
# figs1a
# figs1b <- ggplot(
#     data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,],
#     aes(x = mw, y = log10(simconc)-log10(obsconc))) +
#   geom_point()+
#   geom_hline(yintercept = 0)+
#   geom_hline(yintercept = 2, linetype = 2)+
#   geom_hline(yintercept = -2, linetype = 2)+
#   xlab("\nMolecular Weight (g/mol)\n") +
#   ylab("\nLog(Simulated Concentration)-\nLog(Observed Concentration)\n") +
#   theme_bw()+
#   theme(
#     axis.text.x = element_text(size = 20, face = 'bold'),
#     strip.text.x = element_text(face = 'bold', size = 20),
#     legend.position = 'none',
#     axis.title.x = element_text(face = 'bold', size = 20),
#     axis.title.y = element_text(face = 'bold', size = 20),
#     axis.text.y = element_text(size = 20, face = 'bold'))
# figs1b
# figs1c <- ggplot(
#     data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,],
#     aes(x = logp, y = log10(simconc)-log10(obsconc))) +
#   geom_point()+
#   geom_hline(yintercept = 0)+
#   geom_hline(yintercept = 2, linetype = 2)+
#   geom_hline(yintercept = -2, linetype = 2)+
#   xlab("\nLog P") +
#   ylab("Log(Simulated Concentration)-\nLog(Observed Concentration)\n") +
#   theme_bw() +
#   theme(
#     axis.text.x = element_text(size = 20, face = 'bold'),
#     strip.text.x = element_text(face = 'bold', size = 20),
#     legend.position = 'none',
#     axis.title.x = element_text(face = 'bold', size = 20),
#     axis.title.y = element_text(face = 'bold', size = 20),
#     axis.text.y = element_text(size = 20, face = 'bold'))
# figs1c
# figs1d <- ggplot(
#     data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,],
#     aes(x = sol, y = log10(simconc)-log10(obsconc))) +
#   geom_point()+
#   geom_hline(yintercept = 0)+
#   geom_hline(yintercept = 2, linetype = 2)+
#   geom_hline(yintercept = -2, linetype = 2)+
#   xlab("\nSolubility (mol/L)") +
#   ylab("\nLog(Simulated Concentration)-\nLog(Observed Concentration)\n") +
#   scale_x_log10()+
#   theme_bw() +
#   theme(
#     axis.text.x = element_text(size = 20, face = 'bold'),
#     strip.text.x = element_text(face = 'bold', size = 20),
#     legend.position = 'none',
#     axis.title.x = element_text(face = 'bold', size = 20),
#     axis.title.y = element_text(face = 'bold', size = 20),
#     axis.text.y = element_text(size = 20, face = 'bold'))
# figs1d

## ----write_figures1, eval = execute.vignette----------------------------------
# pdf("Linakis2020/FigureS1.pdf", width = 20, height = 20)
# plot_grid(figs1a,figs1b,figs1c,figs1d,nrow = 2, labels = c('A','B','C','D'), label_size = 30)
# dev.off()

## ----SupTable2, eval = execute.vignette---------------------------------------
# senschem <- list()
# sensslope <- list()
# sensrsq <- list()
# sensregrmse <- list()
# senstotalrmse <- list()
# senspmiss <- list()
# senscmaxslope <- list()
# senscmaxrsq <- list()
# senstotalrmsecmax <- list()
# sensaucslope <- list()
# sensaucrsq <- list()
# senstotalrmseauc <- list()
# for (i in 1:nrow(simobsfull))
# {
#   simobsfullsens <- subset(simobsfull,simobsfull$chem != simobsfull$chem[i])
#   senschem[i] <- as.character(simobsfull$chem[i])
#   senslm <- lm(
#     log10(simobsfullsens$obsconc[
#       !is.na(simobsfullsens$simconc) &
#       simobsfullsens$simconc > 0 &
#       simobsfullsens$obsconc > 0]) ~
#     log10(simobsfullsens$simconc[
#       !is.na(simobsfullsens$simconc) &
#       simobsfullsens$simconc >0 &
#       simobsfullsens$obsconc > 0]))
#   sensslope[i] <- round(summary(senslm)$coef[2,1],digits = 2)
#   sensrsq[i] <- round(summary(senslm)$r.squared,digits = 2)
#   sensregrmse[i] <- round(sqrt(mean(senslm$residuals^2)),digits = 2)
#   senstotalrmse[i] <- round(sqrt(mean((
#     log10(simobsfullsens$simconc[
#       !is.na(simobsfullsens$simconc) &
#       simobsfullsens$simconc >0 &
#       simobsfullsens$obsconc > 0]) -
#     log10(simobsfullsens$obsconc[
#       !is.na(simobsfullsens$simconc) &
#         simobsfullsens$simconc >0 &
#         simobsfullsens$obsconc > 0]))^2,
#     na.rm = TRUE)), digits = 2)
#   senspmiss[i] <- round((nrow(simobsfullsens) -
#     nrow(simobsfullsens[
#       !is.na(simobsfullsens$simconc) &
#       simobsfullsens$simconc >0 &
#       simobsfullsens$obsconc > 0,])) / nrow(simobsfullsens) * 100,
#     digits = 2)
#   senscmaxfull <- subset(simobsfullsens, !duplicated(simobsfullsens$Cmaxobs))
#   senscmaxlm <- lm(
#     log10(senscmaxfull$Cmaxobs[senscmaxfull$Cmaxobs>0]) ~
#     log10(senscmaxfull$Cmaxsim[senscmaxfull$Cmaxobs>0]),
#     na.action = na.exclude)
#   sensaucfull <-subset(simobsfullsens, !duplicated(simobsfullsens$AUCobs))
#   sensauclm <- lm(
#     log10(aucfull$AUCobs[aucfull$AUCobs>0]) ~
#     log10(aucfull$AUCsim[aucfull$AUCobs>0]),
#     na.action = na.exclude)
#   senscmaxslope[i] <- round(summary(senscmaxlm)$coef[2,1],digits = 2)
#   senscmaxrsq[i] <- round(summary(senscmaxlm)$r.squared,digits = 2)
#   senstotalrmsecmax[i] <- sqrt(mean((log10(senscmaxfull$Cmaxsim[senscmaxfull$Cmaxobs>0]) - log10(senscmaxfull$Cmaxobs[senscmaxfull$Cmaxobs>0]))^2, na.rm = TRUE))
#   sensaucslope[i] <- round(summary(sensauclm)$coef[2,1],digits = 2)
#   sensaucrsq[i] <- round(summary(sensauclm)$r.squared,digits = 2)
#   senstotalrmseauc[i] <- sqrt(mean((log10(sensaucfull$AUCsim[sensaucfull$AUCobs>0]) - log10(sensaucfull$AUCobs[sensaucfull$AUCobs>0]))^2, na.rm = TRUE))
# }
# sensitivitydf <- data.frame(Chemical <- as.character(senschem),
#                             sensSlope <- as.numeric(sensslope),
#                             sensRsq <- as.numeric(sensrsq),
#                             sensRegrmse <- as.numeric(sensregrmse),
#                             sensTotrmse <- as.numeric(senstotalrmse),
#                             sensPmiss <- as.numeric(senspmiss),
#                             sensCmaxslope <- as.numeric(senscmaxslope),
#                             sensCmaxrsq <- as.numeric(senscmaxrsq),
#                             sensCmaxrmse <- as.numeric(senstotalrmsecmax),
#                             sensAUCslope <- as.numeric(sensaucslope),
#                             sensAUCrsq <- as.numeric(sensaucrsq),
#                             sensAUCrmse <- as.numeric(senstotalrmseauc),
#                             stringsAsFactors=FALSE)
# sensitivitydf <- subset(sensitivitydf,!duplicated(sensitivitydf$Chemical....as.character.senschem.))
# names(sensitivitydf) <- c('Chemical Dropped','Regression Slope','Regression R^2','Regression RMSE','Orthogonal RMSE', 'Percent Data Censored','Cmax Regression Slope','Cmax Regression R^2','Cmax Orthogonal RMSE','AUC Regression Slope','AUC Regression R^2','AUC Orthogonal RMSE')
# notdropped <- c('None',concregslope,concregr2,concregrmse,totalrmse,pmiss,cmaxslope,cmaxrsq,totalrmsecmax,aucslope,aucrsq,totalrmseauc)
# sensitivitydf <- rbind(notdropped, sensitivitydf)
# sensitivitydf[,-1] <- sapply(sensitivitydf[,-1],as.numeric)
# sensitivitydf[,-1] <- round(sensitivitydf[,-1],2)
#   # Clean up and write file
# rm(chem.lm, obvpredplot, p, pdata1, plot.data, plots, relconc, sensaucfull, sensauclm, sensaucrsq, sensaucslope, senschem, senscmaxfull, senscmaxlm, senscmaxrsq, senscmaxslope, senslm, senspmiss, sensregrmse, sensrsq, sensslope, senstotalrmse, senstotalrmseauc, senstotalrmsecmax, solve, AUCrmse, AUCrsq, AUCslope, chem.res, Chemical, Cmaxrmse, Cmaxrsq, Cmaxslope, colors, count, i, j, k, met_col, name, name1, Pmiss, Regrmse, Rsq, Slope, rem, Totrmse)
# 
# write.csv(sensitivitydf, 'supptab2.csv',row.names = FALSE)

## ----SupTable1, eval = execute.vignette---------------------------------------
# supptab1 <- subset(unique_scenarios, !duplicated(unique_scenarios$PREFERRED_NAME) | !duplicated(unique_scenarios$SOURCE_CVT) | !duplicated(unique_scenarios$CONC_SPECIES))
# for(i in 1:nrow(supptab1)){
#   tryCatch({
#     supptab1$Metabolism_Source[i] <- met_data$SOURCE_MET[met_data$DTXSID %in% supptab1$DTXSID[i] & met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
#     supptab1$Log_P[i] <- met_data$OCTANOL_WATER_PARTITION_LOGP_OPERA_PRED[met_data$DTXSID %in% supptab1$DTXSID[i]& met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
#     supptab1$Solubility[i] <- met_data$WATER_SOLUBILITY_MOL.L_OPERA_PRED[met_data$DTXSID %in% supptab1$DTXSID[i]& met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
#     supptab1$Blood_Air_Partition_Coefficient[i] <- met_data$CALC_PBA[met_data$DTXSID %in% supptab1$DTXSID[i]& met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
#     supptab1$Chem_Class[i] <- met_data$CHEM_CLASS[met_data$DTXSID %in% supptab1$DTXSID[i] & met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
#     supptab1$Species[i] <- met_data$SPECIES[met_data$DTXSID %in% supptab1$DTXSID[i] & met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
#     supptab1$Vmax[i] <- met_data$VMAX[met_data$DTXSID %in% supptab1$DTXSID[i] & met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
#     supptab1$Km[i] <- met_data$KM[met_data$DTXSID %in% supptab1$DTXSID[i] & met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
#   }, error = function(e){})
# }
# supptab1 <- supptab1[c('PREFERRED_NAME','DTXSID','CASRN','Chem_Class','AVERAGE_MASS','Log_P','Solubility','Blood_Air_Partition_Coefficient','Species','Vmax','Km','Metabolism_Source','SOURCE_CVT')]
# names(supptab1) <- c('Chemical','DTXSID','CASRN','CAMEO Chemical Class','Molecular Weight (g/mol)','Log P','Solubility (mol/L)','Blood Air Partition Coefficient','Available Species Data','Vmax (pmol/min/10^6 cells)','KM (uM)','Metabolism Data Source','Concentration-Time Data Source')
# write.csv(supptab1, "supptab1.csv", row.names = FALSE)