% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/soundex.R
\name{soundex}
\alias{soundex}
\alias{refinedSoundex}
\title{Soundex}
\usage{
soundex(word, maxCodeLen = 4L, clean = TRUE)

refinedSoundex(word, maxCodeLen = 10L, clean = TRUE)
}
\arguments{
\item{word}{string or vector of strings to encode}

\item{maxCodeLen}{maximum length of the resulting encodings, in characters}

\item{clean}{if \code{TRUE}, return \code{NA} for unknown alphabetical characters}
}
\value{
soundex encoded character vector
}
\description{
The Soundex phonetic algorithms
}
\details{
The function \code{soundex} phonentically encodes the given
string using the soundex algorithm.  The function \code{refinedSoundex}
uses Apache's refined soundex algorithm.  Both implementations are loosely
based on the Apache Commons Java editons.

The variable \code{maxCodeLen} is the limit on how long the returned
soundex should be.

The \code{soundex} and \code{revisedSoundex} algorithms are only
defined for inputs over the standard English alphabet, \emph{i.e.},
"A-Z." Non-alphabetical characters are removed from the string in a
locale-dependent fashion.  This strips spaces, hyphens, and numbers.
Other letters, such as "Ü," may be permissible in the current locale
but are unknown to \code{soundex} and \code{revisedSoundex}.  For
inputs outside of its known range, the output is undefined and
\code{NA} is returned and a \code{warning} this thrown.  If
\code{clean} is \code{FALSE}, \code{soundex} and
\code{revisedSoundex} attempts to process the strings.  The default
is \code{TRUE}.
}
\section{Caveats}{

The \code{soundex} and \code{refinedSoundex} algorithms are only
defined for inputs over the standard English alphabet, \emph{i.e.},
"A-Z." For inputs outside this range, the output is undefined.
}

\examples{
soundex("wheel")
soundex(c("school", "benji"))

}
\references{
Charles P. Bourne and Donald F. Ford, "A study of methods for
systematically abbreviating English words and names," \emph{Journal
of the ACM}, vol. 8, no. 4 (1961), p. 538-552.

James P. Howard, II, "Phonetic Spelling Algorithm Implementations
for R," \emph{Journal of Statistical Software}, vol. 25, no. 8,
(2020), p. 1--21, <10.18637/jss.v095.i08>.

Howard B. Newcombe, James M. Kennedy, "Record linkage: making
maximum use of the discriminating power of identifying information,"
\emph{Communications of the ACM}, vol. 5, no. 11 (1962), p. 563-566.
}
\seealso{
Other phonics: 
\code{\link{caverphone}()},
\code{\link{cologne}()},
\code{\link{lein}()},
\code{\link{metaphone}()},
\code{\link{mra_encode}()},
\code{\link{nysiis}()},
\code{\link{onca}()},
\code{\link{phonex}()},
\code{\link{phonics}()},
\code{\link{rogerroot}()},
\code{\link{statcan}()}
}
\concept{phonics}
