% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/Matrix.utils.R
\name{dMcast}
\alias{dMcast}
\title{Casts or pivots a long \code{data frame} into a wide sparse matrix.}
\usage{
dMcast(data, formula, fun.aggregate = "sum", value.var = NULL,
  as.factors = FALSE)
}
\arguments{
\item{data}{a data frame}

\item{formula}{casting \code{\link[stats]{formula}}, see details for specifics.}

\item{fun.aggregate}{name of aggregation function.  Defaults to 'sum'}

\item{value.var}{name of column that stores values to be aggregated numerics}

\item{as.factors}{if TRUE, treat all columns as factors, including}
}
\description{
Similar in function to \code{\link[reshape2]{dcast}}, but produces a sparse 
\code{\link{Matrix}} as an output. Sparse matrices are beneficial for this 
application because such outputs are often very wide and sparse. Conceptually
similar to a \code{pivot} operation.
}
\details{
Casting formulas are slightly different than those in \code{dcast} and follow
the conventions of \code{\link{model.matrix}}. See \code{\link{formula}} for 
details.  Briefly, the left hand side of the \code{~} will be used as the 
grouping criteria.  This can either be a single variable, or a group of 
variables linked using \code{:}.  The right hand side specifies what the 
columns will be. Unlike \code{dcast}, using the \code{+} operator will append
the values for each variable as additional columns.  This is useful for 
things such as one-hot encoding.  Using \code{:} will combine the columns as 
interactions.
}
\examples{
#Classic air quality example
melt<-function(data,idColumns)
{
  cols<-setdiff(colnames(data),idColumns)
  results<-lapply(cols,function (x) cbind(data[,idColumns],variable=x,value=as.numeric(data[,x])))
  results<-Reduce(rbind,results)
}
names(airquality) <- tolower(names(airquality))
aqm <- melt(airquality, idColumns=c("month", "day"))
dMcast(aqm, month:day ~variable,fun.aggregate = 'mean',value.var='value')
dMcast(aqm, month ~ variable, fun.aggregate = 'mean',value.var='value') 

#One hot encoding
#Preserving numerics
dMcast(warpbreaks,~.)
#Pivoting numerics as well
dMcast(warpbreaks,~.,as.factors=TRUE)

\dontrun{
orders<-data.frame(orderNum=as.factor(sample(1e6, 1e7, TRUE)), 
   sku=as.factor(sample(1e3, 1e7, TRUE)), 
   customer=as.factor(sample(1e4,1e7,TRUE)), 
   state = sample(letters, 1e7, TRUE),
   amount=runif(1e7)) 
# For simple aggregations resulting in small tables, dcast.data.table (and
   reshape2) will be faster
system.time(a<-dcast.data.table(as.data.table(orders),sku~state,sum,
   value.var = 'amount')) # .5 seconds 
system.time(b<-reshape2::dcast(orders,sku~state,sum,
   value.var = 'amount')) # 2.61 seconds 
system.time(c<-dMcast(orders,sku~state,
   value.var = 'amount')) # 28 seconds 
   
# However, this situation changes as the result set becomes larger 
system.time(a<-dcast.data.table(as.data.table(orders),customer~sku,sum,
   value.var = 'amount')) # 4.4 seconds 
system.time(b<-reshape2::dcast(orders,customer~sku,sum,
   value.var = 'amount')) # 34.7 seconds 
 system.time(c<-dMcast(orders,customer~sku,
   value.var = 'amount')) # 27 seconds 
   
# More complicated: 
system.time(a<-dcast.data.table(as.data.table(orders),customer~sku+state,sum,
   value.var = 'amount')) # 18.1 seconds, object size = 2084 Mb 
system.time(b<-reshape2::dcast(orders,customer~sku+state,sum,
   value.var = 'amount')) # Does not return 
system.time(c<-dMcast(orders,customer~sku:state,
   value.var = 'amount')) # 30.69 seconds, object size = 115.4 Mb

system.time(a<-dcast.data.table(as.data.table(orders),orderNum~sku,sum,
   value.var = 'amount')) # Does not return 
system.time(c<-dMcast(orders,orderNum~sku,
   value.var = 'amount')) # 36.33 seconds, object size = 175Mb
}
}
\seealso{
\code{\link[reshape]{cast}}

\code{\link[reshape2]{dcast}}
}

