% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tm_clean.R
\name{tm_clean}
\alias{tm_clean}
\title{Clean subject line text prior to analysis}
\usage{
tm_clean(data, token = "words", stopwords = NULL, ...)
}
\arguments{
\item{data}{A Meeting Query dataset in the form of a data frame.}

\item{token}{A character vector accepting either \code{"words"} or \code{"ngrams"},
determining type of tokenisation to return.}

\item{stopwords}{A character vector OR a single-column data frame labelled
\code{'word'} containing custom stopwords to remove.}

\item{...}{Additional parameters to pass to \code{tidytext::unnest_tokens()}.}
}
\value{
data frame with two columns:
\itemize{
\item \code{line}
\item \code{word}
}
}
\description{
This function processes the \code{Subject} column in a Meeting Query by applying
tokenisation using\code{tidytext::unnest_tokens()}, and removing any stopwords
supplied in a data frame (using the argument \code{stopwords}). This is a
sub-function that feeds into \code{tm_freq()}, \code{tm_cooc()}, and \code{tm_wordcloud()}.
The default is to return a data frame with tokenised counts of words or
ngrams.
}
\examples{
# words
tm_clean(mt_data)

# ngrams
tm_clean(mt_data, token = "ngrams")

}
\seealso{
Other Text-mining: 
\code{\link{meeting_tm_report}()},
\code{\link{pairwise_count}()},
\code{\link{subject_validate}()},
\code{\link{subject_validate_report}()},
\code{\link{tm_cooc}()},
\code{\link{tm_freq}()},
\code{\link{tm_wordcloud}()}
}
\concept{Text-mining}
