% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bfile_split.R
\name{bfile_split}
\alias{bfile_split}
\title{Splits a big file in several smaller files without loading it entirely in memory}
\usage{
bfile_split(
  file = NULL,
  by_nfiles,
  by_nrows,
  by_columns,
  drop_empty_files = T,
  write_sep = NA,
  write_dir = NULL,
  meta_output = NULL,
  ...
)
}
\arguments{
\item{file}{String. Name or full path to a file compatible with data.table::fread()}

\item{by_nfiles}{Numeric. Number of files with an equal number of rows to be created. Only the last one will be slightly larger, containing the remainder.}

\item{by_nrows}{Numeric. Number of rows composing the new split files. The last one may be smaller, containing only the remainder.}

\item{by_columns}{Vector of strings or numeric. Indicates either the names or index number of the columns whose combinations of unique values will be used to split the files.}

\item{drop_empty_files}{Logical. Defaults to TRUE. Used only with the 'by_column' argument. If changed to FALSE, empty files may be created.}

\item{write_sep}{One character-length string. Will be provided to data.table::fwrite() for writing the output. If not provided, the delimiter will be guessed from the input file with the bsep() function}

\item{write_dir}{String. Path to the output directory. By default, it will be the working directory. If the directory doesn"t exist, it will be created.}

\item{meta_output}{List. Optional. Output of the bmeta() function on the same file. It indicates the names and numbers of columns and rows. If not provided, it will be calculated. It can take a while on file with several million rows.}

\item{...}{Arguments that must be passed to data.table::fread() like 'sep=' and 'dec='.}
}
\value{
Creates a number of csv files from the original larger file
}
\description{
This function helps splitting a big csv file in smaller csv files using one of those 3 methods:
\enumerate{
\item by_nrows: Each new file will contain a number of rows defined by the user
\item by_nfiles: The user decide the number of files created with the rows equally distributed
\item by_columns: The file will be split by the combinations of unique values in the columns chosen by the user
Like all other functions in the bread package, this is achieved using Unix commands
that allow opening, reading and splitting big files that wouldn"t fit in memory
(The goal being to help with the 'cannot allocate vector of size' error).
}
}
\examples{
\donttest{
\dontrun{
file <- system.file('extdata', 'test.csv', package = 'bread')
## Filtering on 2 columns, using regex.
bfile_split(file = file, by_nrows = 5)
bfile_split(file = file, by_nfiles = 3)
bfile_split(file = file, by_columns = c('YEAR', 'COLOR'))
## For very big files with several million rows, the bmeta() function takes
##a long time to count the rows without loading the file in memory.
## Best practice is to save the result of bmeta() in a variable and provide it
## to bfile_split()
meta <- bmeta(file = file)
bfile_split(file = file, by_nrows = 5, meta_output = meta)
## write_sep can be used to write the output files with a different delimiters than the input file
bfile_split(file = file, by_nrows = 5, write_sep = '*')
}
}
}
\keyword{allocate}
\keyword{big}
\keyword{file}
\keyword{size}
\keyword{split}
\keyword{vector}
