% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/partydf.R
\name{partition}
\alias{partition}
\title{Partition data across workers in a cluster}
\usage{
partition(data, cluster)
}
\arguments{
\item{data}{Dataset to partition, typically grouped. When grouped, all
observations in a group will be assigned to the same cluster.}

\item{cluster}{Cluster to use.}
}
\value{
A [party_df].
}
\description{
Partitioning ensures that all observations in a group end up on the same
worker. To try and keep the observations on each worker balanced,
`partition()` uses a greedy algorithm that iteratively assigns each group to
the worker that currently has the fewest rows.
}
\examples{
library(dplyr)
cl <- default_cluster()
cluster_library(cl, "dplyr")

mtcars2 <- partition(mtcars, cl)
mtcars2 \%>\% mutate(cyl2 = 2 * cyl)
mtcars2 \%>\% filter(vs == 1)
mtcars2 \%>\% group_by(cyl) \%>\% summarise(n())
mtcars2 \%>\% select(-cyl)
}
