% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cache.R
\name{Cache}
\alias{Cache}
\title{Saves a wide variety function call outputs to disk and optionally RAM, for recovery later}
\usage{
Cache(
  FUN,
  ...,
  notOlderThan = NULL,
  .objects = NULL,
  .cacheExtra = NULL,
  .functionName = NULL,
  outputObjects = NULL,
  algo = "xxhash64",
  cacheRepo = NULL,
  cachePath = NULL,
  length = getOption("reproducible.length", Inf),
  compareRasterFileLength,
  userTags = c(),
  omitArgs = NULL,
  classOptions = list(),
  debugCache = character(),
  sideEffect = FALSE,
  makeCopy = FALSE,
  quick = getOption("reproducible.quick", FALSE),
  verbose = getOption("reproducible.verbose", 1),
  cacheId = NULL,
  useCache = getOption("reproducible.useCache", TRUE),
  useCloud = FALSE,
  cloudFolderID = NULL,
  showSimilar = getOption("reproducible.showSimilar", FALSE),
  drv = getDrv(getOption("reproducible.drv", NULL)),
  conn = getOption("reproducible.conn", NULL)
)
}
\arguments{
\item{FUN}{Either a function (e.g., \code{rnorm}), a function call (e.g., \code{rnorm(1)}),
or an unevaluated function call (e.g., using
\code{quote}).}

\item{...}{Arguments passed to \code{FUN}, if \code{FUN} is not an expression.}

\item{notOlderThan}{A time. Load an object from the Cache if it was created after this.}

\item{.objects}{Character vector of objects to be digested. This is only applicable
if there is a list, environment (or similar) with named objects
within it. Only this/these objects will be considered for caching,
i.e., only use a subset of
the list, environment or similar objects. In the case of nested list-type
objects, this will only be applied outermost first.}

\item{.cacheExtra}{A an arbitrary R object that will be included in the \code{CacheDigest},
but otherwise not passed into the \code{FUN}.}

\item{.functionName}{A an arbitrary character string that provides a name that is different
than the actual function name (e.g., "rnorm") which will be used for messaging. This
can be useful when the actual function is not helpful for a user, such as \code{do.call}.}

\item{outputObjects}{Optional character vector indicating which objects to
return. This is only relevant for list, environment (or similar) objects}

\item{algo}{The algorithms to be used; currently available choices are
    \code{md5}, which is also the default, \code{sha1}, \code{crc32},
    \code{sha256}, \code{sha512}, \code{xxhash32}, \code{xxhash64},
    \code{murmur32}, \code{spookyhash}, \code{blake3}, and \code{crc32c}.}

\item{cacheRepo}{Same as \code{cachePath}, but kept for backwards compatibility.}

\item{cachePath}{A repository used for storing cached objects.
This is optional if \code{Cache} is used inside a SpaDES module.}

\item{length}{Numeric. If the element passed to Cache is a \code{Path} class
object (from e.g., \code{asPath(filename)}) or it is a \code{Raster} with
file-backing, then this will be
passed to \code{digest::digest}, essentially limiting the number of bytes
to digest (for speed). This will only be used if \code{quick = FALSE}.
Default is \code{getOption("reproducible.length")}, which is set to \code{Inf}.}

\item{compareRasterFileLength}{Being deprecated; use \code{length}.}

\item{userTags}{A character vector with descriptions of the Cache function call. These
will be added to the Cache so that this entry in the Cache can be found using
\code{userTags} e.g., via \code{\link[=showCache]{showCache()}}.}

\item{omitArgs}{Optional character string of arguments in the FUN to omit from the digest.}

\item{classOptions}{Optional list. This will pass into \code{.robustDigest} for
specific classes. Should be options that the \code{.robustDigest} knows what
to do with.}

\item{debugCache}{Character or Logical. Either \code{"complete"} or \code{"quick"} (uses
partial matching, so "c" or "q" work). \code{TRUE} is equivalent to \code{"complete"}.
If \code{"complete"}, then the returned object from the Cache
function will have two attributes, \code{debugCache1} and \code{debugCache2},
which are the entire \code{list(...)} and that same object, but after all
\code{.robustDigest} calls, at the moment that it is digested using
\code{digest}, respectively. This \code{attr(mySimOut, "debugCache2")}
can then be compared to a subsequent call and individual items within
the object \code{attr(mySimOut, "debugCache1")} can be compared.
If \code{"quick"}, then it will return the same two objects directly,
without evalutating the \code{FUN(...)}.}

\item{sideEffect}{Now deprecated. Logical or path. Determines where the function will look for
new files following function completion. See Details.
\emph{NOTE: this argument is experimental and may change in future releases.}}

\item{makeCopy}{Now deprecated. Ignored if used.}

\item{quick}{Logical or character. If \code{TRUE},
no disk-based information will be assessed, i.e., only
memory content. See Details section about \code{quick} in \code{\link[=Cache]{Cache()}}.}

\item{verbose}{Numeric, -1 silent (where possible), 0 being very quiet,
1 showing more messaging, 2 being more messaging, etc.
Default is 1. Above 3 will output much more information about the internals of
Caching, which may help diagnose Caching challenges. Can set globally with an
option, e.g., \verb{options('reproducible.verbose' = 0) to reduce to minimal}}

\item{cacheId}{Character string. If passed, this will override the calculated hash
of the inputs, and return the result from this cacheId in the \code{cachePath}.
Setting this is equivalent to manually saving the output of this function, i.e.,
the object will be on disk, and will be recovered in subsequent
This may help in some particularly finicky situations
where Cache is not correctly detecting unchanged inputs. This will guarantee
the object will be identical each time; this may be useful in operational code.}

\item{useCache}{Logical, numeric or \code{"overwrite"} or \code{"devMode"}. See details.}

\item{useCloud}{Logical. See Details.}

\item{cloudFolderID}{A googledrive dribble of a folder, e.g., using \code{drive_mkdir()}.
If left as \code{NULL}, the function will create a cloud folder with name from last
two folder levels of the \code{cachePath} path, :
\code{paste0(basename(dirname(cachePath)), "_", basename(cachePath))}.
This \code{cloudFolderID} will be added to \code{options("reproducible.cloudFolderID")},
but this will not persist across sessions. If this is a character string, it will
treat this as a folder name to create or use on GoogleDrive.}

\item{showSimilar}{A logical or numeric. Useful for debugging.
If \code{TRUE} or \code{1}, then if the Cache
does not find an identical archive in the \code{cachePath}, it will report (via message)
the next most similar archive, and indicate which argument(s) is/are different.
If a number larger than \code{1}, then it will report the N most similar archived
objects.}

\item{drv}{if using a database backend, drv must be an object that
inherits from DBIDriver e.g., from package RSQLite, e.g., SQLite}

\item{conn}{an optional DBIConnection object, as returned by dbConnect().}
}
\value{
Returns the value of the
function call or the cached version (i.e., the result from a previous call
to this same cached function with identical arguments).
}
\description{
\if{html}{\figure{lifecycle-maturing.svg}{options: alt="maturing"}}

A function that can be used to wrap around other functions to cache function calls
for later use. This is normally most effective when the function to cache is
slow to run, yet the inputs and outputs are small. The benefit of caching, therefore,
will decline when the computational time of the "first" function call is fast and/or
the argument values and return objects are large. The default setting (and first
call to Cache) will always save to disk. The 2nd call to the same function will return
from disk, unless \code{options("reproducible.useMemoise" = TRUE)}, then the 2nd time
will recover the object from RAM and is normally much faster (at the expense of RAM use).
}
\details{
There are other similar functions in the R universe. This version of Cache has
been used as part of a robust continuous workflow approach. As a result, we have
tested it with many "non-standard" R objects (e.g., RasterLayer, terra objects) and
environments (which are always unique, so do not cache readily).

This version of the \code{Cache} function accommodates those four special,
though quite common, cases by:
\enumerate{
\item converting any environments into list equivalents;
\item identifying the dispatched S4 method (including those made through
inheritance) before hashing so the correct method is being cached;
\item by hashing the linked file, rather than the Raster object.
Currently, only file-backed \verb{Raster*} or \verb{terra*} objects are digested
(e.g., not \code{ff} objects, or any other R object where the data
are on disk instead of in RAM);
\item Uses \code{\link[digest:digest]{digest::digest()}} (formerly fastdigest, which does
not translate between operating systems).
This is used for file-backed objects as well.
\item Cache will save arguments passed by user in a hidden environment. Any
nested Cache functions will use arguments in this order 1) actual arguments
passed at each Cache call, 2) any inherited arguments from an outer Cache
call, 3) the default values of the Cache function. See section on \emph{Nested
Caching}.
}

\code{Cache} will add a tag to the entry in the cache database called \code{accessed},
which will assign the time that it was accessed, either read or write.
That way, cached items can be shown (using \code{showCache}) or removed (using
\code{clearCache}) selectively, based on their access dates, rather than only
by their creation dates. See example in \code{\link[=clearCache]{clearCache()}}.
}
\note{
As indicated above, several objects require pre-treatment before
caching will work as expected. The function \code{.robustDigest} accommodates this.
It is an S4 generic, meaning that developers can produce their own methods for
different classes of objects. Currently, there are methods for several types
of classes. See \code{\link[=.robustDigest]{.robustDigest()}}.
}
\section{Nested Caching}{

Commonly, Caching is nested, i.e., an outer function is wrapped in a \code{Cache}
function call, and one or more inner functions are also wrapped in a \code{Cache}
function call. A user \emph{can} always specify arguments in every Cache function
call, but this can get tedious and can be prone to errors. The normal way that
\emph{R} handles arguments is it takes the user passed arguments if any, and
default arguments for all those that have no user passed arguments. We have inserted
a middle step. The order or precedence for any given \code{Cache} function call is
\enumerate{
\item user arguments, 2. inherited arguments, 3. default arguments. At this time,
the top level \code{Cache} arguments will propagate to all inner functions unless
each individual \code{Cache} call has other arguments specified, i.e., "middle"
nested \code{Cache} function calls don't propagate their arguments to further "inner"
\code{Cache} function calls.  See example.
}

\code{userTags} is unique of all arguments: its values will be appended to the
inherited \code{userTags}.
}

\section{quick}{

The \code{quick} argument is attempting to sort out an ambiguity with character strings:
are they file paths or are they simply character strings. When \code{quick = TRUE},
\code{Cache} will treat these as character strings; when \code{quick = FALSE},
they will be attempted to be treated as file paths first; if there is no file, then
it will revert to treating them as character strings. If user passes a
character vector to this, then this will behave like \code{omitArgs}:
\code{quick = "file"} will treat the argument \code{"file"} as character string.

The most often encountered situation where this ambiguity matters is in arguments about
filenames: is the filename an input pointing to an object whose content we want to
assess (e.g., a file-backed raster), or an output (as in saveRDS) and it should not
be assessed. If only run once, the output file won't exist, so it will be treated
as a character string. However, once the function has been run once, the output file
will exist, and \code{Cache(...)} will assess it, which is incorrect. In these cases,
the user is advised to use \code{quick = "TheOutputFilenameArgument"} to
specify the argument whose content on disk should not be assessed, but whose
character string should be assessed (distinguishing it from \code{omitArgs = "TheOutputFilenameArgument"}, which will not assess the file content nor the
character string).

This is relevant for objects of class \code{character}, \code{Path} and
\code{Raster} currently. For class \code{character}, it is ambiguous whether
this represents a character string or a vector of file paths. If it is known
that character strings should not be treated as paths, then \code{quick = TRUE} is appropriate, with no loss of information. If it is file or
directory, then it will digest the file content, or \code{basename(object)}.
For class \code{Path} objects, the file's metadata (i.e., filename and file
size) will be hashed instead of the file contents if \code{quick = TRUE}. If
set to \code{FALSE} (default), the contents of the file(s) are hashed. If
\code{quick = TRUE}, \code{length} is ignored. \code{Raster} objects are
treated as paths, if they are file-backed.
}

\section{Caching Speed}{

Caching speed may become a critical aspect of a final product. For example,
if the final product is a shiny app, rerunning the entire project may need
to take less then a few seconds at most. There are 3 arguments that affect
Cache speed: \code{quick}, \code{length}, and
\code{algo}. \code{quick} is passed to \code{.robustDigest}, which currently
only affects \code{Path} and \verb{Raster*} class objects. In both cases, \code{quick}
means that little or no disk-based information will be assessed.
}

\section{Filepaths}{

If a function has a path argument, there is some ambiguity about what should be
done. Possibilities include:
\enumerate{
\item hash the string as is (this will be very system specific, meaning a
\code{Cache} call will not work if copied between systems or directories);
\item hash the \code{basename(path)};
\item hash the contents of the file.
}
If paths are passed in as is (i.e,. character string), the result will not be predictable.
Instead, one should use the wrapper function \code{asPath(path)}, which sets the
class of the string to a \code{Path}, and one should decide whether one wants
to digest the content of the file (using \code{quick = FALSE}),
or just the filename (\code{(quick = TRUE)}). See examples.
}

\section{Stochasticity or randomness}{

In general, it is expected that caching will only be used when randomness is not
desired, e.g., \code{Cache(rnorm(1))} is unlikely to be useful in many cases. However,
\code{Cache} captures the call that is passed to it, leaving all functions unevaluated.
As a result \code{Cache(glm, x ~ y, rnorm(1))} will not work as a means of forcing
a new evaluation each time, as the \code{rnorm(1)} is not evaluated before the call
is assessed against the cache database. To force a new call each time, evaluate
the randomness prior to the Cache call, e.g., \verb{ran = rnorm(1); Cache(glm, x ~ y, ran)}.
Note this does not work for \code{glm} because \code{glm} accepts \code{...}.
Rather, this randomness should be passed to \code{.cacheExtra}, e.g.,
\code{Cache(glm, x ~ y, .cacheExtra = ran)}
}

\section{\code{drv} and \code{conn}}{

By default, \code{drv} uses an SQLite database. This can be sufficient for most cases.
However, if a user has dozens or more cores making requests to the Cache database,
it may be insufficient. A user can set up a different database backend, e.g.,
PostgreSQL that can handle multiple simultaneous read-write situations. See
\url{https://github.com/PredictiveEcology/SpaDES/wiki/Using-alternate-database-backends-for-Cache}.
}

\section{\code{useCache}}{

Logical or numeric. If \code{FALSE} or \code{0}, then the entire Caching
mechanism is bypassed and the
function is evaluated as if it was not being Cached. Default is
\code{getOption("reproducible.useCache")}), which is \code{TRUE} by default,
meaning use the Cache mechanism. This may be useful to turn all Caching on or
off in very complex scripts and nested functions. Increasing levels of numeric
values will cause deeper levels of Caching to occur (though this may not
work as expected in all cases). The following is no longer supported:
Currently, only implemented
in \code{postProcess}: to do both caching of inner \code{cropInputs}, \code{projectInputs}
and \code{maskInputs}, and caching of outer \code{postProcess}, use
\code{useCache = 2}; to skip the inner sequence of 3 functions, use \code{useCache = 1}.
For large objects, this may prevent many duplicated save to disk events.

If \code{useCache = "overwrite"}
(which can be set with \code{options("reproducible.useCache" = "overwrite")}), then the function invoke the caching mechanism but will purge
any entry that is matched, and it will be replaced with the results of the
current call.

If \code{useCache = "devMode"}: The point of this mode is to facilitate using the Cache when
functions and datasets are continually in flux, and old Cache entries are
likely stale very often. In \code{devMode}, the cache mechanism will work as
normal if the Cache call is the first time for a function OR if it
successfully finds a copy in the cache based on the normal Cache mechanism.
It \emph{differs} from the normal Cache if the Cache call does \emph{not} find a copy
in the \code{cachePath}, but it does find an entry that matches based on
\code{userTags}. In this case, it will delete the old entry in the \code{cachePath}
(identified based on matching \code{userTags}), then continue with normal \code{Cache}.
For this to work correctly, \code{userTags} must be unique for each function call.
This should be used with caution as it is still experimental. Currently, if
\code{userTags} are not unique to a single entry in the cachePath, it will
default to the behaviour of \code{useCache = TRUE} with a message. This means
that \code{"devMode"} is most useful if used from the start of a project.
}

\section{\code{useCloud}}{

This is experimental and there are many conditions under which this is known
to not work correctly. This is a way to store all or some of the local Cache in the cloud.
Currently, the only cloud option is Google Drive, via \pkg{googledrive}.
For this to work, the user must be or be able to be authenticated
with \code{googledrive::drive_auth}. The principle behind this
\code{useCloud} is that it will be a full or partial mirror of a local Cache.
It is not intended to be used independently from a local Cache. To share
objects that are in the Cloud with another person, it requires 2 steps. 1)
share the \code{cloudFolderID$id}, which can be retrieved by
\code{getOption("reproducible.cloudFolderID")$id} after at least one Cache
call has been made. 2) The other user must then set their  \code{cacheFolderID} in a
\verb{Cache\\(..., reproducible.cloudFolderID = \\"the ID here\\"\\)} call or
set their option manually
\verb{options\\(\\"reproducible.cloudFolderID\\" = \\"the ID here\\"\\)}.

If \code{TRUE}, then this Cache call will download
(if local copy doesn't exist, but cloud copy does exist), upload
(local copy does or doesn't exist and
cloud copy doesn't exist), or
will not download nor upload if object exists in both. If \code{TRUE} will be at
least 1 second slower than setting this to \code{FALSE}, and likely even slower as the
cloud folder gets large. If a user wishes to keep "high-level" control, set this to
\code{getOption("reproducible.useCloud", FALSE)} or
\code{getOption("reproducible.useCloud", TRUE)} (if the default behaviour should
be \code{FALSE} or \code{TRUE}, respectively) so it can be turned on and off with
this option. NOTE: \emph{This argument will not be passed into inner/nested Cache calls.})
}

\section{Object attributes}{

Users should be cautioned that object attributes may not be preserved, especially
in the case of objects that are file-backed, such as \code{Raster} or \code{SpatRaster} objects.
If a user needs to keep attributes, they may need to manually re-attach them to
the object after recovery. With the example of \code{SpatRaster} objects, saving
to disk requires \code{terra::wrap} if it is a memory-backed object. When running
\code{terra::unwrap} on this object, any attributes that a user had added are lost.
}

\section{\code{sideEffect}}{

This feature is now deprecated. Do not use as it is ignored.
}

\examples{
data.table::setDTthreads(2)
tmpDir <- file.path(tempdir())
opts <- options(reproducible.cachePath = tmpDir)

# Usage -- All below are equivalent; even where args are missing or provided,
#   Cache evaluates using default values, if these are specified in formals(FUN)
a <- list()
b <- list(fun = rnorm)
bbb <- 1
ee <- new.env(parent = emptyenv())
ee$qq <- bbb

a[[1]] <- Cache(rnorm(1)) # no evaluation prior to Cache
a[[2]] <- Cache(rnorm, 1) # no evaluation prior to Cache
a[[3]] <- Cache(do.call, rnorm, list(1))
a[[4]] <- Cache(do.call(rnorm, list(1)))
a[[5]] <- Cache(do.call(b$fun, list(1)))
a[[6]] <- Cache(do.call, b$fun, list(1))
a[[7]] <- Cache(b$fun, 1)
a[[8]] <- Cache(b$fun(1))
a[[10]] <- Cache(quote(rnorm(1)))
a[[11]] <- Cache(stats::rnorm(1))
a[[12]] <- Cache(stats::rnorm, 1)
a[[13]] <- Cache(rnorm(1, 0, get("bbb", inherits = FALSE)))
a[[14]] <- Cache(rnorm(1, 0, get("qq", inherits = FALSE, envir = ee)))
a[[15]] <- Cache(rnorm(1, bbb - bbb, get("bbb", inherits = FALSE)))
a[[16]] <- Cache(rnorm(sd = 1, 0, n = get("bbb", inherits = FALSE))) # change order
a[[17]] <- Cache(rnorm(1, sd = get("ee", inherits = FALSE)$qq), mean = 0)

# with base pipe -- this is put in quotes ('') because R version 4.0 can't understand this
#  if you are using R >= 4.1 or R >= 4.2 if using the _ placeholder,
#  then you can just use pipe normally
usingPipe1 <- "b$fun(1) |> Cache()"  # base pipe

# For long pipe, need to wrap sequence in { }, or else only last step is cached
usingPipe2 <-
  '{"bbb" |>
      parse(text = _) |>
      eval() |>
      rnorm()} |>
    Cache()'
if (getRversion() >= "4.1") {
  a[[9]] <- eval(parse(text = usingPipe1)) # recovers cached copy
}
if (getRversion() >= "4.2") { # uses the _ placeholder; only available in R >= 4.2
  a[[18]] <- eval(parse(text = usingPipe2)) # recovers cached copy
}

length(unique(a)) == 1 #  all same

### Pipe -- have to use { } or else only final function is Cached
if (getRversion() >= "4.1") {
  b1a <- 'sample(1e5, 1) |> rnorm() |> Cache()'
  b1b <- 'sample(1e5, 1) |> rnorm() |> Cache()'
  b2a <- '{sample(1e5, 1) |> rnorm()} |> Cache()'
  b2b <- '{sample(1e5, 1) |> rnorm()} |> Cache()'
  b1a <- eval(parse(text = b1a))
  b1b <- eval(parse(text = b1b))
  b2a <- eval(parse(text = b2a))
  b2b <- eval(parse(text = b2b))
  all.equal(b1a, b1b) # Not TRUE because the sample is run first
  all.equal(b2a, b2b) # TRUE because of {  }
}

#########################
# Advanced examples
#########################

# .cacheExtra -- add something to digest
Cache(rnorm(1), .cacheExtra = "sfessee11") # adds something other than fn args
Cache(rnorm(1), .cacheExtra = "nothing") # even though fn is same, the extra is different

# omitArgs -- remove something from digest (kind of the opposite of .cacheExtra)
Cache(rnorm(2, sd = 1), omitArgs = "sd") # removes one or more args from cache digest
Cache(rnorm(2, sd = 2), omitArgs = "sd") # b/c sd is not used, this is same as previous

# cacheId -- force the use of a digest -- can give undesired consequences
Cache(rnorm(3), cacheId = "k323431232") # sets the cacheId for this call
Cache(runif(14), cacheId = "k323431232") # recovers same as above, i.e, rnorm(3)

# Turn off Caching session-wide
opts <- options(reproducible.useCache = FALSE)
Cache(rnorm(3)) # doesn't cache
options(opts)

# showSimilar can help with debugging why a Cache call isn't picking up a cached copy
Cache(rnorm(4), showSimilar = TRUE) # shows that the argument `n` is different

###############################################
# devMode -- enables cache database to stay
#            small even when developing code
###############################################
opt <- options("reproducible.useCache" = "devMode")
clearCache(tmpDir, ask = FALSE)
centralTendency <- function(x) {
  mean(x)
}
funnyData <- c(1, 1, 1, 1, 10)
uniqueUserTags <- c("thisIsUnique", "reallyUnique")
ranNumsB <- Cache(centralTendency, funnyData, cachePath = tmpDir,
                  userTags = uniqueUserTags) # sets new value to Cache
showCache(tmpDir) # 1 unique cacheId -- cacheId is 71cd24ec3b0d0cac

# During development, we often redefine function internals
centralTendency <- function(x) {
  median(x)
}
# When we rerun, we don't want to keep the "old" cache because the function will
#   never again be defined that way. Here, because of userTags being the same,
#   it will replace the entry in the Cache, effetively overwriting it, even though
#   it has a different cacheId
ranNumsD <- Cache(centralTendency, funnyData, cachePath = tmpDir, userTags = uniqueUserTags)
showCache(tmpDir) # 1 unique artifact -- cacheId is 632cd06f30e111be

# If it finds it by cacheID, doesn't matter what the userTags are
ranNumsD <- Cache(centralTendency, funnyData, cachePath = tmpDir, userTags = "thisIsUnique")
options(opt)

#########################################
# For more in depth uses, see vignette
if (interactive())
  browseVignettes(package = "reproducible")
}
\seealso{
\code{\link[=showCache]{showCache()}}, \code{\link[=clearCache]{clearCache()}}, \code{\link[=keepCache]{keepCache()}},
\code{\link[=CacheDigest]{CacheDigest()}} to determine the digest of a given function or expression,
as used internally within \code{Cache}, \code{\link[=movedCache]{movedCache()}}, \code{\link[=.robustDigest]{.robustDigest()}}, and
for more advanced uses there are several helper functions,
e.g., \code{\link[=rmFromCache]{rmFromCache()}}, \code{\link[=CacheStorageDir]{CacheStorageDir()}}
}
\author{
Eliot McIntire
}
