% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/discretize_cart.R
\name{step_discretize_cart}
\alias{step_discretize_cart}
\alias{tidy.step_discretize_cart}
\title{Discretize numeric variables with CART}
\usage{
step_discretize_cart(
  recipe,
  ...,
  role = NA,
  trained = FALSE,
  outcome = NULL,
  cost_complexity = 0.01,
  tree_depth = 10,
  min_n = 20,
  rules = NULL,
  skip = FALSE,
  id = rand_id("discretize_cart")
)

\method{tidy}{step_discretize_cart}(x, ...)
}
\arguments{
\item{recipe}{A recipe object. The step will be added to the sequence of
operations for this recipe.}

\item{...}{One or more selector functions to choose which variables are
affected by the step. See \code{\link[=selections]{selections()}} for more details.}

\item{role}{Defaults to \code{"predictor"}.}

\item{trained}{A logical to indicate if the quantities for preprocessing
have been estimated.}

\item{outcome}{A call to \code{vars} to specify which variable is used as the
outcome to train CART models in order to discretize explanatory
variables.}

\item{cost_complexity}{The regularization parameter. Any split that does not
decrease the overall lack of fit by a factor of \code{cost_complexity} is not
attempted. Corresponds to \code{cp} in \code{\link[rpart:rpart]{rpart::rpart()}}. Defaults to 0.01.}

\item{tree_depth}{The \emph{maximum} depth in the final tree. Corresponds to
\code{maxdepth} in  \code{\link[rpart:rpart]{rpart::rpart()}}. Defaults to 10.}

\item{min_n}{The number of data points in a node required to continue
splitting. Corresponds to \code{minsplit} in  \code{\link[rpart:rpart]{rpart::rpart()}}. Defaults to 20.}

\item{rules}{The splitting rules of the best CART tree to retain for
each variable. If length zero, splitting could not be used on that column.}

\item{skip}{A logical. Should the step be skipped when the
recipe is baked by \code{\link[recipes:bake]{recipes::bake.recipe()}}? While all operations are baked
when \code{\link[recipes:prep]{recipes::prep.recipe()}} is run, some operations may not be able to be
conducted on new data (e.g. processing the outcome variable(s)).
Care should be taken when using \code{skip = TRUE} as it may affect
the computations for subsequent operations}

\item{id}{A character string that is unique to this step to identify it.}

\item{x}{A \code{step_discretize_cart} object.}
}
\value{
An updated version of \code{recipe} with the new step added to the
sequence of existing steps (if any).
}
\description{
\code{step_discretize_cart} creates a \emph{specification} of a recipe step that will
discretize numeric data (e.g. integers or doubles) into bins in a
supervised way using a CART model.
}
\details{
\code{step_discretize_cart()} creates non-uniform bins from numerical
variables by utilizing the information about the outcome variable and
applying a CART model.

The best selection of buckets for each variable is selected using
the standard cost-complexity pruning of CART, which makes this
discretization method resistant to overfitting.

This step requires the \pkg{rpart} package. If not installed, the
step will stop with a note about installing the package.

Note that the original data will be replaced with the new bins.
}
\examples{
library(modeldata)
data(ad_data)
library(rsample)

split <- initial_split(ad_data, strata = "Class")

ad_data_tr <- training(split)
ad_data_te <- testing(split)

cart_rec <- 
  recipe(Class ~ ., data = ad_data_tr) \%>\%
  step_discretize_cart(tau, age, p_tau, Ab_42, outcome = "Class", id = "cart splits")

cart_rec <- prep(cart_rec, training = ad_data_tr)

# The splits: 
tidy(cart_rec, id = "cart splits")

bake(cart_rec, ad_data_te, tau)
}
\seealso{
\code{\link[embed:step_discretize_xgb]{embed::step_discretize_xgb()}}, \code{\link[recipes:recipe]{recipes::recipe()}},
\code{\link[recipes:prep]{recipes::prep.recipe()}}, \code{\link[recipes:bake]{recipes::bake.recipe()}}
}
\concept{discretization}
\concept{factors}
\concept{preprocessing}
\keyword{binning}
