% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/run_experiment.R
\name{run_experiment}
\alias{run_experiment}
\title{Run a full experiment for comparing multiple algorithms using multiple
instances}
\usage{
run_experiment(instances, algorithms, d, se.max, power = 0.8,
  sig.level = 0.05, power.target = "mean", dif = "simple",
  comparisons = "all.vs.all", alternative = "two.sided",
  test = "t.test", method = "param", nstart = 20, nmax = 100 *
  length(algorithms), force.balanced = FALSE, ncpus = 2,
  boot.R = 499, seed = NULL, save.partial.results = NA,
  load.partial.results = NA, save.final.result = NA)
}
\arguments{
\item{instances}{list object containing the definitions of the
\emph{available} instances. This list may (or may not) be exhausted in the
experiment. To estimate the number of required instances,
see \code{\link[=calc_instances]{calc_instances()}}. For more details, see Section \code{Instance List}.}

\item{algorithms}{a list object containing the definitions of all algorithms.
See Section \code{Algorithms} for details.}

\item{d}{minimally relevant effect size (MRES), expressed as a standardized
effect size, i.e., "deviation from H0" / "standard deviation".
See \code{\link[=calc_instances]{calc_instances()}} for details.}

\item{se.max}{desired upper limit for the standard error of the estimated
difference between pairs of algorithms. See Section
\code{Pairwise Differences} for details.}

\item{power}{(desired) test power. See \code{\link[=calc_instances]{calc_instances()}} for details.
Any value equal to or greater than one will force the method to use all
available instances in \code{Instance.list}.}

\item{sig.level}{family-wise significance level (alpha) for the experiment.
See \code{\link[=calc_instances]{calc_instances()}} for details.}

\item{power.target}{which comparison should have the desired \code{power}?
Accepts "mean", "median", or "worst.case" (this last one
is equivalent to the Bonferroni correction).}

\item{dif}{type of difference to be used. Accepts "perc" (for percent
differences) or "simple" (for simple differences)}

\item{comparisons}{type of comparisons being performed. Accepts "all.vs.first"
(in which cases the first object in \code{algorithms} is considered to be
the reference algorithm) or "all.vs.all" (if there is no reference
and all pairwise comparisons are desired).}

\item{alternative}{type of alternative hypothesis ("two.sided" or
"less" or "greater"). See \code{\link[=calc_instances]{calc_instances()}} for details.}

\item{test}{type of test to be used
("t.test", "wilcoxon" or "binomial")}

\item{method}{method to use for estimating the standard errors. Accepts
"param" (for parametric) or "boot" (for bootstrap)}

\item{nstart}{initial number of algorithm runs for each algorithm.
See Section \code{Initial Number of Observations} for details.}

\item{nmax}{maximum number of runs to execute on each instance (see
\code{\link[=calc_nreps]{calc_nreps()}}). Loaded results (see \code{load.partial.results}
below) do not count towards this maximum.}

\item{force.balanced}{logical flag to force the use of balanced sampling for
the algorithms on each instance}

\item{ncpus}{number of cores to use}

\item{boot.R}{number of bootstrap resamples to use (if \code{method == "boot"})}

\item{seed}{seed for the random number generator}

\item{save.partial.results}{should partial results be saved to files? Can be
either \code{NA} (do not save) or a character string
pointing to a folder. File names are generated
based on the instance aliases. \strong{Existing files with
matching names will be overwritten.}
\code{run_experiment()} uses \strong{.RDS} files for saving
and loading.}

\item{load.partial.results}{should partial results be loaded from files? Can
be either \code{NA} (do not save) or a character
string pointing to a folder containing the
file(s) to be loaded. \code{run_experiment()} will
use .RDS file(s) with a name(s) matching instance
\code{alias}es. \code{run_experiment()} uses \strong{.RDS} files
for saving and loading.}

\item{save.final.result}{should the final results be saved to file? Can be
either \code{NA} (do not save) or a character string
pointing to a folder where the results will be
saved on a \strong{.RDS} file starting with
\code{CAISEr_results_} and ending with 12-digit
datetime tag in the format \code{YYYYMMDDhhmmss}.}
}
\value{
a list object containing the following fields:
\itemize{
\item \code{Configuration} - the full input configuration (for reproducibility)
\item \code{data.raw} - data frame containing all observations generated
\item \code{data.summary} - data frame summarizing the experiment.
\item \code{N} - number of instances sampled
\item \code{N.star} - number of instances required
\item \code{total.runs} - total number of algorithm runs performed
\item \code{instances.sampled} - names of the instances sampled
\item \code{Underpowered} - flag: TRUE if N < N.star
}
}
\description{
Design and run a full experiment - calculate the required number of
instances, run the algorithms on each problem instance using the iterative
approach based on optimal sample size ratios, and return the results of the
experiment. This routine builds upon \code{\link[=calc_instances]{calc_instances()}} and \code{\link[=calc_nreps]{calc_nreps()}},
so refer to the documentation of these two functions for details.
}
\section{Instance List}{

Parameter \code{instances} must contain a list of instance objects, where
each field is itself a list, as defined in the documentation of function
\code{\link[=calc_nreps]{calc_nreps()}}. In short, each element of \code{instances} is an \code{instance}, i.e.,
a named list containing all relevant parameters that define the problem
instance. This list must contain at least the field \code{instance$FUN}, with the
name of the problem instance function, that is, a routine that calculates
y = f(x). If the instance requires additional parameters, these must also be
provided as named fields.
An additional field, "instance$alias", can be used to provide the instance
with a unique identifier (e.g., when using an instance generator).
}

\section{Algorithm List}{

Object \code{algorithms} is a list in which each component is a named
list containing all relevant parameters that define an algorithm to be
applied for solving the problem instance. In what follows \code{algorithms[[k]]}
refers to any algorithm specified in the \code{algorithms} list.

\code{algorithms[[k]]} must contain an \code{algorithms[[k]]$FUN} field, which is a
character object with the name of the function that calls the algorithm; as
well as any other elements/parameters that \code{algorithms[[k]]$FUN} requires
(e.g., stop criteria, operator names and parameters, etc.).

The function defined by the routine \code{algorithms[[k]]$FUN} must have the
following structure: supposing that the list in \code{algorithms[[k]]} has
fields \code{algorithm[[k]]$FUN = "myalgo"}, \code{algorithms[[k]]$par1 = "a"} and
\code{algorithms[[k]]$par2 = 5}, then:

\preformatted{
         myalgo <- function(par1, par2, instance, ...){
               #
               # <do stuff>
               #
               return(results)
         }
   }

That is, it must be able to run if called as:

\preformatted{
         # remove '$FUN' and '$alias' field from list of arguments
         # and include the problem definition as field 'instance'
         myargs          <- algorithm[names(algorithm) != "FUN"]
         myargs          <- myargs[names(myargs) != "alias"]
         myargs$instance <- instance

         # call function
         do.call(algorithm$FUN,
                 args = myargs)
   }

The \code{algorithm$FUN} routine must return a list containing (at
least) the performance value of the final solution obtained, in a field named
\code{value} (e.g., \code{result$value}) after a given run. In general it is easier to
write a small wrapper function around existing implementations.
}

\section{Initial Number of Observations}{

In the \emph{general case} the initial number of observations / algorithm /
instance (\code{nstart}) should be relatively high. For the parametric case
we recommend 10~15 if outliers are not expected, and 30~40 (at least) if that
assumption cannot be made. For the bootstrap approach we recommend using at
least 15 or 20. However, if some distributional assumptions can be
made - particularly low skewness of the population of algorithm results on
the test instances), then \code{nstart} can in principle be as small as 5 (if the
output of the algorithm were known to be normal, it could be 1).

In general, higher sample sizes are the price to pay for abandoning
distributional assumptions. Use lower values of \code{nstart} with caution.
}

\section{Pairwise Differences}{

Parameter \code{dif} informs the type of difference in performance to be used
for the estimation (\eqn{\mu_a} and \eqn{\mu_b} represent the mean
performance of any two algorithms on the test instance, and \eqn{mu}
represents the grand mean of all algorithms given in \code{algorithms}):
\itemize{
\item If \code{dif == "perc"} and \code{comparisons == "all.vs.first"}, the estimated
quantity is:
\eqn{\phi_{1b} = (\mu_1 - \mu_b) / \mu_1 = 1 - (\mu_b / \mu_1)}.
\item If \code{dif == "perc"} and \code{comparisons == "all.vs.all"}, the estimated
quantity is:
\eqn{\phi_{ab} = (\mu_a - \mu_b) / \mu}.
\item If \code{dif == "simple"} it estimates \eqn{\mu_a - \mu_b}.
}
}

\section{Sample Sizes for Nonparametric Methods}{

If the parameter `` is set to either \code{Wilcoxon} or `Binomial`, this
routine approximates the number of instances using the ARE of these tests
in relation to the paired t.test:
\itemize{
\item \code{n.wilcox = n.ttest / 0.86 = 1.163 * n.ttest}
\item \code{n.binom = n.ttest / 0.637 = 1.570 * n.ttest}
}
}

\examples{
# Example using four dummy algorithms and 100 dummy instances.
# See [dummyalgo()] and [dummyinstance()] for details.
# Generating 4 dummy algorithms here, with means 15, 10, 30, 15 and standard
# deviations 2, 4, 6, 8.
algorithms <- mapply(FUN = function(i, m, s){
  list(FUN   = "dummyalgo",
       alias = paste0("algo", i),
       distribution.fun  = "rnorm",
       distribution.pars = list(mean = m, sd = s))},
  i = c(alg1 = 1, alg2 = 2, alg3 = 3, alg4 = 4),
  m = c(15, 10, 30, 15),
  s = c(2, 4, 6, 8),
  SIMPLIFY = FALSE)

# Generate 100 dummy instances with centered exponential distributions
instances <- lapply(1:100,
                    function(i) {rate <- runif(1, 1, 10)
                                 list(FUN   = "dummyinstance",
                                      alias = paste0("Inst.", i),
                                      distr = "rexp", rate = rate,
                                      bias  = -1 / rate)})

my.results <- run_experiment(instances, algorithms,
                             d = .5, se.max = .1,
                             power = .9, sig.level = .05,
                             power.target = "mean",
                             dif = "perc", comparisons = "all.vs.all",
                             ncpus = 1, seed = 1234)

# Take a look at the results
summary(my.results)
plot(my.results)

}
\references{
\itemize{
\item F. Campelo, F. Takahashi:
Sample size estimation for power and accuracy in the experimental
comparison of algorithms. Journal of Heuristics 25(2):305-338, 2019.
\item P. Mathews.
Sample size calculations: Practical methods for engineers and scientists.
Mathews Malnar and Bailey, 2010.
\item A.C. Davison, D.V. Hinkley:
Bootstrap methods and their application. Cambridge University Press (1997)
\item E.C. Fieller:
Some problems in interval estimation. Journal of the Royal Statistical
Society. Series B (Methodological) 16(2), 175–185 (1954)
\item V. Franz:
Ratios: A short guide to confidence limits and proper use (2007).
https://arxiv.org/pdf/0710.2024v1.pdf
\item D.C. Montgomery, C.G. Runger:
Applied Statistics and Probability for Engineers, 6th ed. Wiley (2013)
\item D.J. Sheskin:
Handbook of Parametric and Nonparametric Statistical Procedures,
4th ed., Chapman & Hall/CRC, 1996.
}
}
\author{
Felipe Campelo (\email{fcampelo@ufmg.br},
\email{f.campelo@aston.ac.uk})
}
