% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/nlp_cooccurrence.R
\name{cooccurrence}
\alias{cooccurrence}
\alias{cooccurrence.character}
\alias{cooccurrence.cooccurrence}
\alias{cooccurrence.data.frame}
\title{Create a cooccurence data.frame}
\usage{
cooccurrence(x, order = TRUE, ...)

\method{cooccurrence}{character}(x, order = TRUE, ...)

\method{cooccurrence}{cooccurrence}(x, order = TRUE, ...)

\method{cooccurrence}{data.frame}(x, order = TRUE, ..., group, term)
}
\arguments{
\item{x}{either
\itemize{
  \item a data.frame where the data.frame contains 1 row per document/term,
  in which case you need to provide \code{group} and \code{term}. This uses cooccurrence.data.frame.
  \item a character vector with terms. This uses cooccurrence.character.
  \item an object of class \code{cooccurrence}.This uses cooccurrence.cooccurrence.
}}

\item{order}{logical indicating if we need to sort the output from high cooccurrences to low coccurrences. Defaults to TRUE.}

\item{...}{other arguments passed on to the methods}

\item{group}{character string with a column in the data frame \code{x}. To be used if \code{x} is a data.frame.}

\item{term}{character string with a column in the data frame \code{x}, containing 1 term per row. To be used if \code{x} is a data.frame.}
}
\value{
a data.frame with columns term1, term2 and cooc indicating
for the combination of term1 and term2 how many times this combination occurred
}
\description{
A cooccurence data.frame indicates how many times each term co-occurs with another term.
This type of dataset is a data.frame with fields term1, term2 and cooc where cooc indicates how many times
term1 and term2 co-occurred.\cr
The dataset can be constructed based upon a data frame where you look within a group if 2 terms occurred.\cr
It also can be constructed based upon a vector of words in which case we look how many times each word is 
followed by another word.
}
\section{Methods (by class)}{
\itemize{
\item \code{character}: Create a cooccurence data.frame based on a vector of terms

\item \code{cooccurrence}: Aggregate co-occurrence statistics by summing the cooc by term/term2

\item \code{data.frame}: Create a cooccurence data.frame based on a data.frame where you look within a document / sentence / paragraph / group 
if terms co-occur
}}

\examples{
data(brussels_reviews_anno)

## By document, which lemma's co-occur
x <- subset(brussels_reviews_anno, xpos \%in\% c("NN", "JJ") & language \%in\% "fr")
x <- cooccurrence(x, group = "doc_id", term = "lemma")
head(x)

## Which words follow each other
x <- c("A", "B", "A", "B", "c")
cooccurrence(x)

data(brussels_reviews_anno)
x <- subset(brussels_reviews_anno, language == "es")
x <- cooccurrence(x$lemma)
head(x)

## Which nouns follow each other in the same document
library(data.table)
x <- as.data.table(brussels_reviews_anno)
x <- subset(x, language == "nl" & xpos \%in\% c("NN"))
x <- x[, cooccurrence(lemma, order = FALSE), by = list(doc_id)]
head(x)

x_nodoc <- cooccurrence(x)
x_nodoc <- subset(x_nodoc, term1 != "appartement" & term2 != "appartement")
head(x_nodoc)
}
