% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/model_mpin.ecm.R
\name{mpin_ecm}
\alias{mpin_ecm}
\title{MPIN model estimation via an ECM algorithm}
\usage{
mpin_ecm(data, layers = NULL, xtraclusters = 4, initialsets = NULL,
                      ..., verbose = TRUE)
}
\arguments{
\item{data}{A dataframe with 2 variables: the first
corresponds to buyer-initiated trades (buys), and the second corresponds
to seller-initiated trades (sells).}

\item{layers}{An integer referring to the assumed number of
information layers in the data. If the argument \code{layers} is given, then
the ECM algorithm will use the number of layers provided. If \code{layers} is
omitted, the function \code{mpin_ecm()} will simultaneously optimize the number
of layers as well as the parameters of the \code{MPIN} model.}

\item{xtraclusters}{An integer used to divide trading days into
\code{#(1 + layers + xtraclusters)} clusters, thereby resulting in
\code{#comb((layers + xtraclusters, layers)} initial parameter sets in line
with \insertCite{ErsanAlici2016;textual}{PINstimation}, and
\insertCite{Ersan2016;textual}{PINstimation}. The default value is \code{4}
as chosen in \insertCite{Ersan2016;textual}{PINstimation}.}

\item{initialsets}{A dataframe containing initial parameter
sets for estimation of the \code{MPIN} model. The default value is \code{NULL}.
If \code{initialsets} is \code{NULL}, the initial parameter sets are provided by
the function \code{initials_mpin()}.}

\item{...}{Additional arguments passed on to the function \code{mpin_ecm}. The
recognized arguments are \code{hyperparams}, and \code{is_parallel}.
\itemize{
\item \code{hyperparams} is a list containing the hyperparameters of the ECM
algorithm. When not empty, it contains one or more  of the following
elements: \code{minalpha}, \code{maxeval}, \code{tolerance}, \code{criterion}, and \code{maxlayers}.
More about these elements are in the details section.
\item  \code{is_parallel} is a logical variable that specifies whether
the computation is performed using parallel or sequential processing. The
default value is \code{FALSE}. For more details, please refer to the
vignette 'Parallel processing' in the package, or
\href{https://pinstimation.com/articles/parallel_processing.html}{online}.
}}

\item{verbose}{(\code{logical}) a binary variable that determines whether detailed
information about the steps of the estimation of the MPIN model is displayed.
No output is produced when \code{verbose} is set to \code{FALSE}. The default
value is \code{TRUE}.}
}
\value{
Returns an object of class \code{estimate.mpin.ecm}.
}
\description{
Estimates the multilayer probability of informed trading
(\code{MPIN}) using an Expectation Conditional Maximization algorithm, as in
\insertCite{Ghachem2022;textual}{PINstimation}.
}
\details{
The argument 'data' should be a numeric dataframe, and contain
at least two variables. Only the first two variables will be considered:
The first variable is assumed to correspond to the total number of
buyer-initiated trades, while the second variable is assumed to
correspond to the total number of seller-initiated trades. Each row or
observation correspond to a trading day. \code{NA} values will be ignored.

The initial parameters for the expectation-conditional maximization
algorithm are computed using the function \code{initials_mpin()}  with
default settings. The factorization of the \code{MPIN} likelihood function
used is developed by \insertCite{Ersan2016;textual}{PINstimation}, and
is implemented in \code{fact_mpin()}.\cr\cr
The argument \code{hyperparams} contains the hyperparameters of the ECM algorithm.
It is either empty or contains one or more of the following elements:
\itemize{
\item \code{minalpha} (\code{numeric}) It stands for the minimum share of days
belonging  to a given layer, i.e., layers falling below this threshold are
removed during the iteration, and the model is estimated with a lower number
of layers. When missing, \code{minalpha} takes the default value of \code{0.001}.

\item \code{maxeval}: (\code{integer}) It stands for maximum number of iterations of
the ECM  algorithm for each initial parameter set. When missing, \code{maxeval}
takes the default value of \code{100}.

\item \code{tolerance} (\code{numeric}) The ECM algorithm is stopped when the
(relative) change of log-likelihood is  smaller than tolerance. When
missing, \code{tolerance} takes the default value of \code{0.001}.

\item \code{criterion} (\code{character}) It is the model selection criterion used to
find the optimal estimate  for the \code{MPIN} model. It take one of these values
\code{"BIC"}, \code{"AIC"} and \code{"AWE"}; which stand for Bayesian Information
Criterion,  Akaike Information Criterion and Approximate Weight of Evidence,
respectively \insertCite{Akogul2016}{PINstimation}. When missing,
\code{criterion} takes  the default value of \code{"BIC"}.

\item \code{maxlayers} (\code{integer}) It is the upper limit of number of layers used
for estimation in the ECM algorithm.  If the argument \code{layers} is missing,
the ECM algorithm will estimate \code{MPIN} models for all layers in the integer
set from \code{1} to \code{maxlayers}. When missing, \code{maxlayers} takes the default
value of \code{8}.

\item \code{maxinit} (\code{integer}) It is the maximum number of initial sets used
for each individual estimation in the ECM algorithm. When missing, \code{maxinit}
takes the default value of \code{100}.
}

If the argument \code{layers} is given, then the Expectation Conditional
Maximization algorithm will use the number of  layers provided. If
\code{layers} is omitted, the function \code{mpin_ecm()} will simultaneously
optimize the number of layers as well as the parameters of the \code{MPIN} model.
Practically, the function \code{mpin_ecm()} uses the ECM algorithm to optimize
the \code{MPIN} model parameters for each number of layers within the integer
set from \code{1} to \code{8} (or to \code{maxlayers} if specified in the argument
\code{hyperparams}); and returns the optimal model with the lowest Bayesian
information  criterion (BIC) (or the lowest information criterion
\code{criterion} if specified in the argument \code{hyperparams}).
}
\examples{
# There is a preloaded quarterly dataset called 'dailytrades' with 60
# observations. Each observation corresponds to a day and contains the
# total number of buyer-initiated trades ('B') and seller-initiated
# trades ('S') on that day. To know more, type ?dailytrades

xdata <- dailytrades

# Estimate the MPIN model using the expectation-conditional maximization
# (ECM) algorithm.

# ------------------------------------------------------------------------ #
# Estimate the MPIN model, assuming that there exists 2 information layers #
# in the dataset                                                           #
# ------------------------------------------------------------------------ #

estimate <- mpin_ecm(xdata, layers = 2, verbose = FALSE)

# Show the estimation output

show(estimate)

# Display the optimal parameters from the Expectation Conditional
# Maximization algorithm

show(estimate@parameters)

# Display the global multilayer probability of informed trading

show(estimate@mpin)

# Display the multilayer probability of informed trading per layer

show(estimate@mpinJ)

# Display the first five rows of the initial parameter sets used in the
# expectation-conditional maximization estimation

show(round(head(estimate@initialsets, 5), 4))

# ------------------------------------------------------------------------ #
# Omit the argument 'layers', so the ECM algorithm optimizes both the      #
# number of layers and the MPIN model parameters.                          #
# ------------------------------------------------------------------------ #
\donttest{
estimate <- mpin_ecm(xdata, verbose = FALSE)

# Show the estimation output

show(estimate)

# Display the optimal parameters from the estimation of the MPIN model using
# the expectation-conditional maximization (ECM) algorithm

show(estimate@parameters)

# Display the multilayer probability of informed trading

show(estimate@mpin)

# Display the multilayer probability of informed trading per layer

show(estimate@mpinJ)

# Display the first five rows of the initial parameter sets used in the
# expectation-conditional maximization estimation.

show(round(head(estimate@initialsets, 5), 4))
}
# ------------------------------------------------------------------------ #
# Tweak in the hyperparameters of the ECM algorithm                        #
# ------------------------------------------------------------------------ #

# Create a variable ecm.params containing the hyperparameters of the ECM
# algorithm. This will surely make the ECM algorithm take more time to give
# results

ecm.params <- list(tolerance = 0.0000001)

# If we suspect that the data contains more than eight information layers, we
# can raise the number of models to be estimated to 10 as an example, i.e.,
# maxlayers = 10.

ecm.params$maxlayers <- 10

# We can also choose Approximate Weight of Evidence (AWE) for model
# selection instead of the default Bayesian Information Criterion (BIC)

ecm.params$criterion <- 'AWE'

# We can also increase the maximum number of initial sets to 200, in
# order to obtain higher level of accuracy for models with high number of
# layers.  We set the sub-argument 'maxinit' to `200`. Remember that its
# default value is `100`.

ecm.params$maxinit <- 200
\donttest{
estimate <- mpin_ecm(xdata, xtraclusters = 2, hyperparams = ecm.params,
                                                      verbose = FALSE)

# We can change the model selection criterion by calling selectModel()

estimate <- selectModel(estimate, "AIC")

# We get the mpin_ecm estimation results for the MPIN model with 2 layers
# using the slot models. We then show the first five rows of the
# corresponding slot details.

models <- estimate@models
show(round(head(models[[2]]@details, 5), 4))

# We can also use the function getSummary to get an idea about the change in
# the estimation parameters as a function of the number of layers in the
# MPIN model. The function getSummary returns a dataframe that contains,
# among others, the number of layers of the model, the number of layers in
# the optimal model,the MPIN value, and the values of the different
# information criteria, namely AIC, BIC and AWE.

summary <- getSummary(estimate)

# We can plot the MPIN value and the layers at the optimal model as a
# function of the number of layers to see whether additional layers in the
# model actually contribute to a better precision in the probability of
# informed trading. Remember that the hyperparameter 'minalpha' is
# responsible for dropping layers with "frequency" lower than 'minalpha'.

plot(summary$layers, summary$MPIN,
   type = "o", col = "red",
   xlab = "MPIN model layers", ylab = "MPIN value"
 )

plot(summary$layers, summary$em.layers,
   type = "o", col = "blue",
   xlab = "MPIN model layers", ylab = "layers at the optimal model"
)
}
}
\references{
\insertAllCited
}
