\name{oclRun}
\alias{oclRun}
\title{
Run a kernel using OpenCL.
}
\description{
  \code{oclRun} is used to execute code that has been compiled for
  OpenCL.
}
\usage{
oclRun(kernel, size, ..., dim = size)
}
\arguments{
  \item{kernel}{Kernel object as obtained from \code{\link{oclSimpleKernel}}}
  \item{size}{Length of the output vector}
  \item{\dots}{Additional arguments passed to the kernel}
  \item{dim}{Numeric vector describing the global work
    dimensions, i.e., the index range that the kernel will be run
    on. The kernel can use \code{get_global_id(n)} to obtain the
    \code{(n + 1)}-th dimension index and \code{get_global_size(n)} to
    get the dimension. OpenCL standard supports only up to three
    dimensions, you can use use index vectors as arguments if more
    dimensions are required. Note that \code{dim} is not necessarily the
    dimension of the result although it can be.}
}
\details{
  \code{oclRun} pushes kernel arguments, executes the kernel and
  retrieves the result. The kernel is expected to have either
  \code{__global double *} or \code{__global float *}
  type (write-only) as the first argument which will be used for the
  result and \code{const unsigned int} second argument denoting the result
  length. All other arguments are assumed to be read-only and will be
  filled according to the \code{\dots} values.
  These can either be OpenCL buffers as generated by \code{\link{clBuffer}} for
  pointer arguments, or scalar values (vectors of length one) for scalar
  arguments. Only integer (\code{int}), and numeric (\code{double} or
  \code{float}) scalars and OpenCL buffers are supported as kernel arguments.
  The caller is responsible for matching the argument types according to the
  kernel in a way similar to \code{\link{.C}} and \code{\link{.Call}}.
}
\value{
  The resulting buffer of length \code{size}.
}
\author{
  Simon Urbanek, Aaron Puchert
}
\seealso{
  \code{\link{oclSimpleKernel}}, \code{\link{clBuffer}}
}
\examples{
library(OpenCL)
ctx = oclContext(precision="single")

code = c("
__kernel void dnorm(
  __global numeric* output,
 const unsigned int count,
  __global numeric* input,
 const numeric mu, const numeric sigma)
{
  size_t i = get_global_id(0);
  if(i < count)
      output[i] = exp(-0.5 * ((input[i] - mu) / sigma) * ((input[i] - mu) / sigma))
      / (sigma * sqrt( 2 * 3.14159265358979323846264338327950288 ) );
}")
k.dnorm <- oclSimpleKernel(ctx, "dnorm", code)
f <- function(x, mu=0, sigma=1)
  as.numeric(oclRun(k.dnorm, length(x), as.clBuffer(x, ctx), mu, sigma))

## expect differences since the above uses single-precision but
## it should be close enough
f(1:10/2) - dnorm(1:10/2)

## does the device support double-precision?
if (any("cl_khr_fp64" == oclInfo(attributes(ctx)$device)$exts)) {
  k.dnorm <- oclSimpleKernel(ctx, "dnorm", code, "double")
  f <- function(x, mu=0, sigma=1) {
    buf <- clBuffer(ctx, length(x), "double")
    buf[] <- x
    as.numeric(oclRun(k.dnorm, length(x), buf, mu, sigma))
  }

  ## probably not identical, but close...
  f(1:10/2) - dnorm(1:10/2)
} else cat("\nSorry, your device doesn't support double-precision\n")

## Note that in practice you can use precision="best" in the first
## example which will pick "double" on devices that support it and
## "single" elsewhere
}
\keyword{interface}
