% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/unknown.R
\name{step_unknown}
\alias{step_unknown}
\alias{tidy.step_unknown}
\title{Assign missing categories to "unknown"}
\usage{
step_unknown(
  recipe,
  ...,
  role = NA,
  trained = FALSE,
  new_level = "unknown",
  objects = NULL,
  skip = FALSE,
  id = rand_id("unknown")
)

\method{tidy}{step_unknown}(x, ...)
}
\arguments{
\item{recipe}{A recipe object. The step will be added to the
sequence of operations for this recipe.}

\item{...}{One or more selector functions to choose which
variables that will be affected by the step. These variables
should be character or factor types. See \code{\link[=selections]{selections()}} for more
details. For the \code{tidy} method, these are not currently used.}

\item{role}{Not used by this step since no new variables are
created.}

\item{trained}{A logical to indicate if the quantities for
preprocessing have been estimated.}

\item{new_level}{A single character value that will be assigned
to new factor levels.}

\item{objects}{A list of objects that contain the information
on factor levels that will be determined by \code{\link[=prep.recipe]{prep.recipe()}}.}

\item{skip}{A logical. Should the step be skipped when the
recipe is baked by \code{\link[=bake.recipe]{bake.recipe()}}? While all operations are baked
when \code{\link[=prep.recipe]{prep.recipe()}} is run, some operations may not be able to be
conducted on new data (e.g. processing the outcome variable(s)).
Care should be taken when using \code{skip = TRUE} as it may affect
the computations for subsequent operations}

\item{id}{A character string that is unique to this step to identify it.}

\item{x}{A \code{step_unknown} object.}
}
\value{
An updated version of \code{recipe} with the new step
added to the sequence of existing steps (if any). For the
\code{tidy} method, a tibble with columns \code{terms} (the
columns that will be affected) and \code{value} (the factor
levels that is used for the new value)
}
\description{
\code{step_unknown} creates a \emph{specification} of a recipe
step that will assign a missing value in a factor level to"unknown".
}
\details{
The selected variables are adjusted to have a new
level (given by \code{new_level}) that is placed in the last
position.

Note that if the original columns are character, they will be
converted to factors by this step.

If \code{new_level} is already in the data given to \code{prep}, an error
is thrown.
}
\examples{
library(modeldata)
data(okc)

rec <-
  recipe(~ diet + location, data = okc) \%>\%
  step_unknown(diet, new_level = "unknown diet") \%>\%
  step_unknown(location, new_level = "unknown location") \%>\%
  prep()

table(bake(rec, new_data = NULL) \%>\% pull(diet),
      okc \%>\% pull(diet),
      useNA = "always") \%>\%
  as.data.frame() \%>\%
  dplyr::filter(Freq > 0)

tidy(rec, number = 1)
}
\seealso{
\code{\link[=step_factor2string]{step_factor2string()}}, \code{\link[=step_string2factor]{step_string2factor()}},
\code{\link[=dummy_names]{dummy_names()}}, \code{\link[=step_regex]{step_regex()}}, \code{\link[=step_count]{step_count()}},
\code{\link[=step_ordinalscore]{step_ordinalscore()}}, \code{\link[=step_unorder]{step_unorder()}}, \code{\link[=step_other]{step_other()}}, \code{\link[=step_novel]{step_novel()}}
}
\concept{factors}
\concept{preprocessing}
\keyword{datagen}
