% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/StatModels.R
\name{LogisticRegressionDP}
\alias{LogisticRegressionDP}
\title{Privacy-preserving Logistic Regression}
\description{
This class implements differentially private logistic regression
\insertCite{chaudhuri2011}{DPpack}. Either the output or the objective
perturbation method can be used.
}
\details{
To use this class for logistic regression, first use the \code{new}
method to construct an object of this class with the desired function
values and hyperparameters. After constructing the object, the \code{fit}
method can be applied with a provided dataset and data bounds to fit the
model. In fitting, the model stores a vector of coefficients \code{coeff}
which satisfy differential privacy. These can be released directly, or used
in conjunction with the \code{predict} method to privately predict the
outcomes of new datapoints.

Note that in order to guarantee differential privacy for logistic
regression, certain constraints must be satisfied for the values used to
construct the object, as well as for the data used to fit. These conditions
depend on the chosen perturbation method. The regularizer must be
1-strongly convex and differentiable. It also must be doubly differentiable
if objective perturbation is chosen. Additionally, it is assumed that if x
represents a single row of the dataset X, then the l2-norm of x is at most
1 for all x. In order to ensure this constraint is satisfied, the dataset
is preprocessed and scaled, and the resulting coefficients are
postprocessed and un-scaled so that the stored coefficients correspond to
the original data. Due to this constraint on x, it is best to avoid using a
bias term in the model whenever possible. If a bias term must be used, the
issue can be partially circumvented by adding a constant column to X before
fitting the model, which will be scaled along with the rest of X. The
\code{fit} method contains functionality to add a column of constant 1s to
X before scaling, if desired.
}
\examples{
# Build train dataset X and y, and test dataset Xtest and ytest
N <- 200
K <- 2
X <- data.frame()
y <- data.frame()
for (j in (1:K)){
  t <- seq(-.25, .25, length.out = N)
  if (j==1) m <- stats::rnorm(N,-.2, .1)
  if (j==2) m <- stats::rnorm(N, .2, .1)
  Xtemp <- data.frame(x1 = 3*t , x2 = m - t)
  ytemp <- data.frame(matrix(j-1, N, 1))
  X <- rbind(X, Xtemp)
  y <- rbind(y, ytemp)
}
Xtest <- X[seq(1,(N*K),10),]
ytest <- y[seq(1,(N*K),10),,drop=FALSE]
X <- X[-seq(1,(N*K),10),]
y <- y[-seq(1,(N*K),10),,drop=FALSE]

# Construct object for logistic regression
regularizer <- 'l2' # Alternatively, function(coeff) coeff\%*\%coeff/2
eps <- 1
gamma <- 0.1
lrdp <- LogisticRegressionDP$new(regularizer, eps, gamma)

# Fit with data
# Bounds for X based on construction
upper.bounds <- c( 1, 1)
lower.bounds <- c(-1,-1)
lrdp$fit(X, y, upper.bounds, lower.bounds) # No bias term
lrdp$coeff # Gets private coefficients

# Predict new data points
predicted.y <- lrdp$predict(Xtest)
n.errors <- sum(predicted.y!=ytest)

}
\references{
\insertRef{chaudhuri2011}{DPpack}

\insertRef{Chaudhuri2009}{DPpack}
}
\section{Super class}{
\code{\link[DPpack:EmpiricalRiskMinimizationDP.CMS]{DPpack::EmpiricalRiskMinimizationDP.CMS}} -> \code{LogisticRegressionDP}
}
\section{Methods}{
\subsection{Public methods}{
\itemize{
\item \href{#method-new}{\code{LogisticRegressionDP$new()}}
\item \href{#method-fit}{\code{LogisticRegressionDP$fit()}}
\item \href{#method-predict}{\code{LogisticRegressionDP$predict()}}
\item \href{#method-clone}{\code{LogisticRegressionDP$clone()}}
}
}
\if{html}{
\out{<details open ><summary>Inherited methods</summary>}
\itemize{
}
\out{</details>}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-new"></a>}}
\if{latex}{\out{\hypertarget{method-new}{}}}
\subsection{Method \code{new()}}{
Create a new \code{LogisticRegressionDP} object.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{LogisticRegressionDP$new(
  regularizer,
  eps,
  gamma,
  perturbation.method = "objective",
  regularizer.gr = NULL
)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{regularizer}}{String or regularization function. If a string, must be
'l2', indicating to use l2 regularization. If a function, must have form
\code{regularizer(coeff)}, where \code{coeff} is a vector or matrix, and
return the value of the regularizer at \code{coeff}. See
\code{\link{regularizer.l2}} for an example. Additionally, in order to
ensure differential privacy, the function must be 1-strongly convex and
doubly differentiable.}

\item{\code{eps}}{Positive real number defining the epsilon privacy budget. If set
to Inf, runs algorithm without differential privacy.}

\item{\code{gamma}}{Nonnegative real number representing the regularization
constant.}

\item{\code{perturbation.method}}{String indicating whether to use the 'output' or
the 'objective' perturbation methods \insertCite{chaudhuri2011}{DPpack}.
Defaults to 'objective'.}

\item{\code{regularizer.gr}}{Optional function representing the gradient of the
regularization function with respect to \code{coeff} and of the form
\code{regularizer.gr(coeff)}. Should return a vector. See
\code{\link{regularizer.gr.l2}} for an example. If \code{regularizer} is
given as a string, this value is ignored. If not given and
\code{regularizer} is a function, non-gradient based optimization methods
are used to compute the coefficient values in fitting the model.}
}
\if{html}{\out{</div>}}
}
\subsection{Returns}{
A new \code{LogisticRegressionDP} object.
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-fit"></a>}}
\if{latex}{\out{\hypertarget{method-fit}{}}}
\subsection{Method \code{fit()}}{
Fit the differentially private logistic regression model. This
method runs either the output perturbation or the objective perturbation
algorithm \insertCite{chaudhuri2011}{DPpack}, depending on the value of
perturbation.method used to construct the object, to generate an
objective function. A numerical optimization method is then run to find
optimal coefficients for fitting the model given the training data and
hyperparameters. The built-in \code{\link{optim}} function using the
"BFGS" optimization method is used. If \code{regularizer} is given as
'l2' or if \code{regularizer.gr} is given in the construction of the
object, the gradient of the objective function is utilized by
\code{optim} as well. Otherwise, non-gradient based optimization methods
are used. The resulting privacy-preserving coefficients are stored in
\code{coeff}.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{LogisticRegressionDP$fit(X, y, upper.bounds, lower.bounds, add.bias = FALSE)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{X}}{Dataframe of data to be fit.}

\item{\code{y}}{Vector or matrix of true labels for each row of \code{X}.}

\item{\code{upper.bounds}}{Numeric vector of length \code{ncol(X)} giving upper
bounds on the values in each column of X. The \code{ncol(X)} values are
assumed to be in the same order as the corresponding columns of \code{X}.
Any value in the columns of \code{X} larger than the corresponding upper
bound is clipped at the bound.}

\item{\code{lower.bounds}}{Numeric vector of length \code{ncol(X)} giving lower
bounds on the values in each column of \code{X}. The \code{ncol(X)}
values are assumed to be in the same order as the corresponding columns
of \code{X}. Any value in the columns of \code{X} larger than the
corresponding upper bound is clipped at the bound.}

\item{\code{add.bias}}{Boolean indicating whether to add a bias term to \code{X}.
Defaults to FALSE.}
}
\if{html}{\out{</div>}}
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-predict"></a>}}
\if{latex}{\out{\hypertarget{method-predict}{}}}
\subsection{Method \code{predict()}}{
Predict label(s) for given \code{X} using the fitted
coefficients.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{LogisticRegressionDP$predict(X, add.bias = FALSE, raw.value = FALSE)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{X}}{Dataframe of data on which to make predictions. Must be of same
form as \code{X} used to fit coefficients.}

\item{\code{add.bias}}{Boolean indicating whether to add a bias term to \code{X}.
Defaults to FALSE. If add.bias was set to TRUE when fitting the
coefficients, add.bias should be set to TRUE for predictions.}

\item{\code{raw.value}}{Boolean indicating whether to return the raw predicted
value or the rounded class label. If FALSE (default), outputs the
predicted labels 0 or 1. If TRUE, returns the raw score from the logistic
regression.}
}
\if{html}{\out{</div>}}
}
\subsection{Returns}{
Matrix of predicted labels or scores corresponding to each row of
\code{X}.
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-clone"></a>}}
\if{latex}{\out{\hypertarget{method-clone}{}}}
\subsection{Method \code{clone()}}{
The objects of this class are cloneable with this method.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{LogisticRegressionDP$clone(deep = FALSE)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{deep}}{Whether to make a deep clone.}
}
\if{html}{\out{</div>}}
}
}
}
