% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/init.R
\name{sgdgmf.init}
\alias{sgdgmf.init}
\alias{sgdgmf.init.ols}
\alias{sgdgmf.init.glm}
\alias{sgdgmf.init.random}
\alias{sgdgmf.init.custom}
\title{Initialize the parameters of a generalized matrix factorization model}
\usage{
sgdgmf.init(
  Y,
  X = NULL,
  Z = NULL,
  ncomp = 2,
  family = gaussian(),
  weights = NULL,
  offset = NULL,
  method = c("ols", "glm", "random", "values"),
  type = c("deviance", "pearson", "working", "link"),
  niter = 0,
  values = list(),
  verbose = FALSE,
  parallel = FALSE,
  nthreads = 1,
  savedata = TRUE
)

sgdgmf.init.ols(
  Y,
  X = NULL,
  Z = NULL,
  ncomp = 2,
  family = gaussian(),
  weights = NULL,
  offset = NULL,
  type = c("deviance", "pearson", "working", "link"),
  verbose = FALSE
)

sgdgmf.init.glm(
  Y,
  X = NULL,
  Z = NULL,
  ncomp = 2,
  family = gaussian(),
  weights = NULL,
  offset = NULL,
  type = c("deviance", "pearson", "working", "link"),
  verbose = FALSE,
  parallel = FALSE,
  nthreads = 1
)

sgdgmf.init.random(
  Y,
  X = NULL,
  Z = NULL,
  ncomp = 2,
  family = gaussian(),
  weights = NULL,
  offset = NULL,
  sigma = 1
)

sgdgmf.init.custom(
  Y,
  X = NULL,
  Z = NULL,
  ncomp = 2,
  family = gaussian(),
  values = list(),
  verbose = FALSE
)
}
\arguments{
\item{Y}{matrix of responses (\eqn{n \times m})}

\item{X}{matrix of row-specific fixed effects (\eqn{n \times p})}

\item{Z}{matrix of column-specific fixed effects (\eqn{q \times m})}

\item{ncomp}{rank of the latent matrix factorization}

\item{family}{a model family, as in the \code{\link{glm}} interface}

\item{weights}{matrix of constant weights (\eqn{n \times m})}

\item{offset}{matrix of constant offset (\eqn{n \times m})}

\item{method}{optimization method to be used for the initial fit}

\item{type}{type of residuals to be used for initializing \code{U} via incomplete SVD decomposition}

\item{niter}{number of iterations to refine the initial estimate (only if \code{method="ols"} or \code{"svd"})}

\item{values}{a list of custom initial values for \code{B}, \code{A}, \code{U} and \code{V}}

\item{verbose}{if \code{TRUE}, prints the status of the initialization process}

\item{parallel}{if \code{TRUE}, allows for parallel computing using the \code{foreach} package (only if \code{method="glm"})}

\item{nthreads}{number of cores to be used in parallel (only if \code{parallel=TRUE} and \code{method="glm"})}

\item{savedata}{if \code{TRUE}, stores a copy of the input data}
}
\value{
An \code{initgmf} object, namely a list, containing the initial estimates of the GMF parameters.
In particular, the returned object collects the following information:
\itemize{
  \item \code{Y}: response matrix (only if \code{savedata=TRUE})
  \item \code{X}: row-specific covariate matrix (only if \code{savedata=TRUE})
  \item \code{Z}: column-specific covariate matrix (only if \code{savedata=TRUE})
  \item \code{B}: the estimated col-specific coefficient matrix
  \item \code{A}: the estimated row-specific coefficient matrix
  \item \code{U}: the estimated factor matrix
  \item \code{V}: the estimated loading matrix
  \item \code{phi}: the estimated dispersion parameter
  \item \code{method}: the selected estimation method
  \item \code{family}: the model family
  \item \code{ncomp}: rank of the latent matrix factorization
  \item \code{type}: type of residuals used for the initialization of \code{U}
  \item \code{verbose}: if \code{TRUE}, print the status of the initialization process
  \item \code{parallel}: if \code{TRUE}, allows for parallel computing
  \item \code{nthreads}: number of cores to be used in parallel
  \item \code{savedata}: if \code{TRUE}, stores a copy of the input data
}
}
\description{
Provide four initialization methods to set the initial values of
a generalized matrix factorization (GMF) model identified by a \code{\link{glm}} family
and a linear predictor of the form \eqn{g(\mu) = \eta = X B^\top + A Z^\top + U V^\top},
with bijective link function \eqn{g(\cdot)}.
See \code{\link{sgdgmf.fit}} for more details on the model specification.
}
\details{
If \code{method = "ols"}, the initialization is performed fitting a sequence of linear
regressions followed by a residual SVD decomposition.
To account for non-Gaussian distribution of the data, regression and
decomposition are applied on the transformed response matrix \eqn{Y_h = (g \circ h)(Y)},
where \eqn{h(\cdot)} is a function which prevent \eqn{Y_h} to take infinite values.
For instance, in the Binomial case \eqn{h(y) = 2 (1-\epsilon) y + \epsilon},
while in the Poisson case \eqn{h(y) = y + \epsilon}, where \eqn{\epsilon} is a small
positive constant, typically \code{0.1} or \code{0.01}.

If \code{method = "glm"}, the initialization is performed by fitting a sequence of
generalized linear models followed by a residual SVD decomposition.
In particular, to set \eqn{\beta_j}, we use independent GLM fit with \eqn{y_j \sim X \beta_j}.
Similarly, to set \eqn{\alpha_i}, we fit the model \eqn{y_i \sim Z \alpha_i + o_i}, with offset \eqn{o_i = B x_i}.
Then, we obtain \eqn{U} via SVD on the residuals. Finally, we obtain \eqn{V} via independent GLM fit
under the model \eqn{y_j \sim U v_j + o_j}, with offset \eqn{o_i = X \beta_j + A z_j}.

Both under \code{method = "ols"} and \code{method = "glm"}, it is possible to specify the
parameter \code{type} to change the type of residuals used for the SVD decomposition.

If  \code{method = "random"}, the initialization is performed using independent Gaussian
random values for all the parameters in the model.

If \code{method = "values"}, the initialization is performed using user-specified
values provided as an input, which must have compatible dimensions.
}
\examples{
library(sgdGMF)

# Set the data dimensions
n = 100; m = 20; d = 5

# Generate data using Poisson, Binomial and Gamma models
data_pois = sim.gmf.data(n = n, m = m, ncomp = d, family = poisson())
data_bin = sim.gmf.data(n = n, m = m, ncomp = d, family = binomial())
data_gam = sim.gmf.data(n = n, m = m, ncomp = d, family = Gamma(link = "log"), dispersion = 0.25)

# Initialize the GMF parameters assuming 3 latent factors
init_pois = sgdgmf.init(data_pois$Y, ncomp = 3, family = poisson(), method = "ols")
init_bin = sgdgmf.init(data_bin$Y, ncomp = 3, family = binomial(), method = "ols")
init_gam = sgdgmf.init(data_gam$Y, ncomp = 3, family = Gamma(link = "log"), method = "ols")

# Get the fitted values in the link and response scales
mu_hat_pois = fitted(init_pois, type = "response")
mu_hat_bin = fitted(init_bin, type = "response")
mu_hat_gam = fitted(init_gam, type = "response")

# Compare the results
oldpar = par(no.readonly = TRUE)
par(mfrow = c(3,3), mar = c(1,1,3,1))
image(data_pois$Y, axes = FALSE, main = expression(Y[Pois]))
image(data_pois$mu, axes = FALSE, main = expression(mu[Pois]))
image(mu_hat_pois, axes = FALSE, main = expression(hat(mu)[Pois]))
image(data_bin$Y, axes = FALSE, main = expression(Y[Bin]))
image(data_bin$mu, axes = FALSE, main = expression(mu[Bin]))
image(mu_hat_bin, axes = FALSE, main = expression(hat(mu)[Bin]))
image(data_gam$Y, axes = FALSE, main = expression(Y[Gam]))
image(data_gam$mu, axes = FALSE, main = expression(mu[Gam]))
image(mu_hat_gam, axes = FALSE, main = expression(hat(mu)[Gam]))
par(oldpar)

}
\keyword{internal}
