% Generated by roxygen2 (4.0.1): do not edit by hand
\name{check_spelling}
\alias{check_spelling}
\alias{check_spelling_interactive}
\alias{correct}
\alias{which_misspelled}
\title{Check Spelling}
\usage{
check_spelling(text.var, range = 2, assume.first.correct = TRUE,
  method = "jw", dictionary = qdapDictionaries::GradyAugmented,
  parallel = TRUE, cores = parallel::detectCores()/2, n.suggests = 8)

which_misspelled(x, suggest = FALSE, range = 2,
  assume.first.correct = TRUE,
  dictionary = qdapDictionaries::GradyAugmented, method = "jw",
  nchar.dictionary = nchar(dictionary),
  first.char.dictionary = substring(dictionary, 1, 1), n.suggests = 8)

check_spelling_interactive(text.var, range = 2, assume.first.correct = TRUE,
  click = TRUE, method = "jw",
  dictionary = qdapDictionaries::GradyAugmented, parallel = TRUE,
  cores = parallel::detectCores()/2, n.suggests = 8, ...)

correct(x, ...)
}
\arguments{
\item{text.var}{The text variable.}

\item{range}{An integer of length 1 to use as a range for number of
characters, beyond the number of characters of a word not found in the
\code{dictionary}, to initially limit \code{dictionary} size and thus time to
find a suggested replacement term.  This may be expanded if no suitable
suggestion is returned.}

\item{assume.first.correct}{logical.  If \code{TRUE} it is assumed that the
first letter of the misspelled word is correct.  This reduces the dictionary
size, thus speeding up computation.}

\item{method}{Method for distance calculation. The default is "jaccard".  It
is assumed that smaller measures indicate closer distance.  Measures that do
not adhere to this assumption will result in incorrect output (see
\code{\link[stringdist]{stringdist}} for details).}

\item{dictionary}{A character vector of terms to search for.  To reduce
overhead it is expected that this dictionary is lower case, unique terms.}

\item{parallel}{logical.  If \code{TRUE} attempts to run the function on
multiple cores.  Note that this may not mean a speed boost if you have one
core or if the data set is smaller as the cluster takes time to create.}

\item{cores}{The number of cores to use if \code{parallel = TRUE}.  Default
is half the number of available cores.}

\item{n.suggests}{The number of terms to suggest.  In the case of a tie
(multiple terms have the same distance from misspelled word) all will be provided.
Dictionary reduction may result in less than \code{n.suggests} suggested terms.}

\item{x}{If \code{which_misspelled} - A character string.  If \code{correct} -
An object from \code{check_spelling_interactive}.}

\item{suggest}{logical.  If \code{TRUE} returns a
\code{\link[base]{data.frame}} with possible suggestions for misspelled words
(words not found in the dictionary).}

\item{nchar.dictionary}{A vector that corresponds in length and content to
\code{dictionary} with elements that are the precalculated number of
characters for each word in the dictionary.}

\item{first.char.dictionary}{A vector that corresponds in length and content
to \code{dictionary} with elements that are the pre-allotted first characters
of each word in the dictionary.}

\item{click}{logical.  If \code{TRUE} the interface is a point and click GUI.
If \code{FALSE} the interface is command line driven.}

\item{\ldots}{ignored}
}
\value{
\code{check_spelling} - Returns a \code{\link[base]{data.frame}} with
\code{row} (row number), \code{not.found}  \code{word.no} (number of
misspelled word), \code{not.found} (a word not found in the dictionary),
\code{suggestion} (the most likely replacement for the word), and
\code{more.suggestions} (A list of vectors of up to 10 most likely replacements).

\code{which_misspelled} - Returns either a named vector (names are
the word number) of possible misspelled words (if\code{suggestions = FALSE})
or a \code{\link[base]{data.frame}} with \code{word.no} (number of misspelled
word), \code{not.found} (a word not found in the dictionary),
\code{suggestion} (the most likely replacement for the word), and
\code{more.suggestions} (A list of vectors of up to 10 most likely replacements).

\code{check_spelling_interactive} - Returns a character vector with
the corrected text, the replacement list (via an \code{attribute} to the
character vector), and a function to correct the same spelling errors in
subsequent text character vectors.

\code{correct} - Returns a function for correcting spelling errors.
}
\description{
\code{check_spelling} - Check the spelling for an vector of strings.  The
function use the following technique:\cr
\itemize{
  \item Separate the words from a string into a bag of words.
  \item Look those words up in a dictionary to find words not recognized/found (considered possibly misspelled).
  \item These misses (possible misspellings) will be what is looked up for suggested replacements.
  \item Optionally, reduce dictionary by assuming the first letter of the misspelled word is correct (dictionary for this letter only).
  \item Reduce dictionary by eliminating words outside of the range of number of characters of the misspelled word.
  \item Use \code{\link[stringdist]{stringdist}} to find string distances between possible replacements and the misspelled term.
  \item Select \emph{n} (\code{n.suggests}) terms from dictionary that are closest to the misspelled term.
}

\code{which_misspelled}  - Check the spelling for a string.

\code{check_spelling_interactive} - Interactively check spelling.

\code{correct} - Access the spell corrector function from a
\code{"check_spelling_interactive"} object for subsequent text character
vector spelling corrections.
}
\note{
A possible misspelled word is defined as not found in the
\code{dictionary}.

\code{check_spelling_interactive} - The user may go back (undo) by
pressing \code{"TYPE MY OWN"} entering either \code{"!"} (not) or \code{"0"}
(similar to a phone system).  The second choice in the
\code{"SELECT REPLACEMNT:"} will be the original word and is prefixed with
\code{"IGNORE:"}.  Press this to keep the original word.
}
\examples{
\dontrun{
x <- "Robots are evl creatres and deserv exterimanitation."
which_misspelled(x, suggest=FALSE)
which_misspelled(x, suggest=TRUE)

check_spelling(DATA$state)

## browseURL("http://stackoverflow.com/a/24454727/1000343")
terms <- c("accounts", "account", "accounting", "acounting", "acount", "acounts", "accounnt")

set.seed(10)
(fake_text <- unlist(lapply(terms, function(x) {
    unbag(sample(c(x, sample(DICTIONARY[[1]], sample(1:5, 1)))))
})))

check_spelling(fake_text)

##============================##
## INTERACTIVE SPELL CHECKING ##
##============================##

## No misspellings found
check_spelling_interactive(DATA$state)

## character method approach (minimal example)
dat <- DATA$state; dat[1] <- "I likedd the cokie icekream"
(o <- check_spelling_interactive(dat))
preprocessed(o)
fixit <- attributes(o)$correct
fixit(dat)

## character method approach (larger example)
m <- check_spelling_interactive(mraja1spl$dialogue[1:75])
preprocessed(m)
fixit <- attributes(m)$correct
fixit(mraja1spl$dialogue[1:75])

## check_spelling method approach
out <- check_spelling(mraja1spl$dialogue[1:75])
(x <- check_spelling_interactive(out))
preprocessed(x)
correct(x)(mraja1spl$dialogue[1:75])
(y <- check_spelling_interactive(out, click=FALSE))
preprocessed(y)

## Examine Methods (?stringdist::stringdist)
strings <- c(
    "Robots are evl creatres and deserv exterimanitation kream.",
    "I gots me a biggert measrue, tommorrow"
)

meths <- c("osa", "lv", "dl", "hamming", "lcs", "qgram", "cosine", "jaccard", "jw")

setNames(lapply(meths, function(x) check_spelling(strings, method=x)), meths)
}
}
\references{
\url{http://stackoverflow.com/a/24454727/1000343} \cr
\url{http://journal.r-project.org/archive/2011-2/RJournal_2011-2_Hornik+Murdoch.pdf}
}
\seealso{
\code{\link[stringdist]{stringdist}}
}

