% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/Search.R
\name{Search}
\alias{Search}
\title{Full text search of Elasticsearch}
\usage{
Search(index = NULL, type = NULL, q = NULL, df = NULL,
  analyzer = NULL, default_operator = NULL, explain = NULL,
  source = NULL, fields = NULL, sort = NULL, track_scores = NULL,
  timeout = NULL, terminate_after = NULL, from = NULL, size = NULL,
  search_type = NULL, lowercase_expanded_terms = NULL,
  analyze_wildcard = NULL, version = FALSE, body = list(), raw = FALSE,
  asdf = FALSE, scroll = NULL, search_path = "_search", ...)
}
\arguments{
\item{index}{Index name}

\item{type}{Document type}

\item{q}{The query string (maps to the query_string query, see Query String Query
for more details). See \url{http://bit.ly/esquerystring} for documentation and
examples.}

\item{df}{(character) The default field to use when no field prefix is defined
within the query.}

\item{analyzer}{(character) The analyzer name to be used when analyzing the
query string.}

\item{default_operator}{(character) The default operator to be used, can be
\code{AND} or \code{OR}. Default: \code{OR}}

\item{explain}{(logical) For each hit, contain an explanation of how scoring of
the hits was computed. Default: \code{FALSE}}

\item{source}{(logical) Set to \code{FALSE} to disable retrieval of the \code{_source}
field. You can also retrieve part of the document by using \code{_source_include} &
\code{_source_exclude} (see the \code{body} documentation for more details)}

\item{fields}{(character) The selective stored fields of the document to return for
each hit. Not specifying any value will cause no fields to return.}

\item{sort}{(character) Sorting to perform. Can either be in the form of fieldName, or
\code{fieldName:asc}/\code{fieldName:desc}. The fieldName can either be an actual
field within the document, or the special \code{_score} name to indicate sorting based on
scores. There can be several sort parameters (order is important).}

\item{track_scores}{(logical) When sorting, set to TRUE in order to still track scores
and return them as part of each hit.}

\item{timeout}{(numeric) A search timeout, bounding the search request to be executed
within the specified time value and bail with the hits accumulated up to that point
when expired. Default: no timeout.}

\item{terminate_after}{(numeric) The maximum number of documents to collect for each
shard, upon reaching which the query execution will terminate early. If set, the
response will have a boolean field terminated_early to indicate whether the query
execution has actually terminated_early. Defaults to no terminate_after.}

\item{from}{(character) The starting from index of the hits to return. Pass in as a
character string to avoid problems with large number conversion to scientific
notation. Default: 0}

\item{size}{(character) The number of hits to return. Pass in as a character string
to avoid problems with large number conversion to scientific notation. Default: 10.}

\item{search_type}{(character) The type of the search operation to perform. Can be
\code{query_then_fetch} (Deafault), \code{dfs_query_then_fetch}, \code{count},
\code{scan}, \code{query_and_fetch}, or \code{dfs_query_and_fetch}. The last two are
not intended to be specified by users.
See \url{http://bit.ly/19Am9xP} for more details on the different types of search that can
be performed.}

\item{lowercase_expanded_terms}{(logical) Should terms be automatically lowercased or not.
Default: TRUE.}

\item{analyze_wildcard}{(logical) Should wildcard and prefix queries be analyzed or not.
Default: FALSE.}

\item{version}{(logical) Print the document version with each document.}

\item{body}{Query, either a list or json.}

\item{raw}{If TRUE (default), data is parsed to list. If FALSE, then raw JSON.}

\item{asdf}{(logical) If \code{TRUE}, use \code{\link[jsonlite]{fromJSON}} to parse JSON
directly to a data.frame. If \code{FALSE} (Default), list output is given.}

\item{scroll}{(character) Specify how long a consistent view of the index should
be maintained for scrolled search, e.g., "30s", "1m". See \code{\link{units-time}}.}

\item{search_path}{(character) The path to use for searching. Default to \code{_search},
but in some cases you may already have that in the base url set using \code{\link{connect}},
in which case you can set this to \code{NULL}}

\item{...}{Curl args passed on to \code{\link[httr]{POST}}}
}
\description{
Full text search of Elasticsearch
}
\details{
This function name has the "S" capitalized to avoid conflict with the function
\code{base::search}. I hate mixing cases, as I think it confuses users, but in this case
it seems neccessary.
}
\examples{
\dontrun{
# URI string queries
Search(index="shakespeare")
Search(index="shakespeare", type="act")
Search(index="shakespeare", type="scene")
Search(index="shakespeare", type="line")

## Return certain fields
Search(index="shakespeare", fields=c('play_name','speaker'))

## sorting
Search(index="shakespeare", type="act", sort="text_entry")
Search(index="shakespeare", type="act", sort="speaker:desc", fields='speaker')
Search(index="shakespeare", type="act",
 sort=c("speaker:desc","play_name:asc"), fields=c('speaker','play_name'))

## paging
Search(index="shakespeare", size=1, fields='text_entry')$hits$hits
Search(index="shakespeare", size=1, from=1, fields='text_entry')$hits$hits

## queries
### Search in all fields
Search(index="shakespeare", type="act", q="york")

### Searchin specific fields
Search(index="shakespeare", type="act", q="speaker:KING HENRY IV")$hits$total

### Exact phrase search by wrapping in quotes
Search(index="shakespeare", type="act", q='speaker:"KING HENRY IV"')$hits$total

### can specify operators between multiple words parenthetically
Search(index="shakespeare", type="act", q="speaker:(HENRY OR ARCHBISHOP)")$hits$total

### where the field line_number has no value (or is missing)
Search(index="shakespeare", q="_missing_:line_number")$hits$total

### where the field line_number has any non-null value
Search(index="shakespeare", q="_exists_:line_number")$hits$total

### wildcards, either * or ?
Search(index="shakespeare", q="*ay")$hits$total
Search(index="shakespeare", q="m?y")$hits$total

### regular expressions, wrapped in forward slashes
Search(index="shakespeare", q="text_entry:/[a-z]/")$hits$total

### fuzziness
Search(index="shakespeare", q="text_entry:ma~")$hits$total
Search(index="shakespeare", q="text_entry:the~2")$hits$total
Search(index="shakespeare", q="text_entry:the~1")$hits$total

### Proximity searches
Search(index="shakespeare", q='text_entry:"as hath"~5')$hits$total
Search(index="shakespeare", q='text_entry:"as hath"~10')$hits$total

### Ranges, here where line_id value is between 10 and 20
Search(index="shakespeare", q="line_id:[10 TO 20]")$hits$total

### Grouping
Search(index="shakespeare", q="(hath OR as) AND the")$hits$total

# Limit number of hits returned with the size parameter
Search(index="shakespeare", size=1)

# Give explanation of search in result
Search(index="shakespeare", size=1, explain=TRUE)

## terminate query after x documents found
## setting to 1 gives back one document for each shard
Search(index="shakespeare", terminate_after=1)
## or set to other number
Search(index="shakespeare", terminate_after=2)

## Get version number for each document
Search(index="shakespeare", version=TRUE, size=2)

## Get raw data
Search(index="shakespeare", type="scene", raw=TRUE)

## Curl debugging
library('httr')
out <- Search(index="shakespeare", type="line", config=verbose())



# Query DSL searches - queries sent in the body of the request
## Pass in as an R list

aggs <- list(aggs = list(stats = list(terms = list(field = "text_entry"))))
Search(index="shakespeare", body=aggs)

## or pass in as json query with newlines, easy to read
aggs <- '{
    "aggs": {
        "stats" : {
            "terms" : {
                "field" : "text_entry"
            }
        }
    }
}'
Search(index="shakespeare", body=aggs)

## or pass in collapsed json string
aggs <- '{"aggs":{"stats":{"terms":{"field":"text_entry"}}}}'
Search(index="shakespeare", body=aggs)

## Aggregations
### Histograms
aggs <- '{
    "aggs": {
        "latbuckets" : {
           "histogram" : {
               "field" : "decimalLatitude",
               "interval" : 5
           }
        }
    }
}'
Search(index="gbif", body=aggs, size=0)

### Histograms w/ more options
aggs <- '{
    "aggs": {
        "latbuckets" : {
           "histogram" : {
               "field" : "decimalLatitude",
               "interval" : 5,
               "min_doc_count" : 0,
               "extended_bounds" : {
                   "min" : -90,
                   "max" : 90
               }
           }
        }
    }
}'
Search(index="gbif", body=aggs, size=0)

### Ordering the buckets by their doc_count - ascending:
aggs <- '{
    "aggs": {
        "latbuckets" : {
           "histogram" : {
               "field" : "decimalLatitude",
               "interval" : 5,
               "min_doc_count" : 0,
               "extended_bounds" : {
                   "min" : -90,
                   "max" : 90
               },
               "order" : {
                   "_count" : "desc"
               }
           }
        }
    }
}'
out <- Search(index="gbif", body=aggs, size=0)
lapply(out$aggregations$latbuckets$buckets, data.frame)

### By default, the buckets are returned as an ordered array. It is also possible to
### request the response as a hash instead keyed by the buckets keys:
aggs <- '{
    "aggs": {
        "latbuckets" : {
           "histogram" : {
               "field" : "decimalLatitude",
               "interval" : 10,
               "keyed" : true
           }
        }
    }
}'
Search(index="gbif", body=aggs, size=0)

# match query
match <- '{"query": {"match" : {"text_entry" : "Two Gentlemen"}}}'
Search(index="shakespeare", body=match)

# multi-match (multiple fields that is) query
mmatch <- '{"query": {"multi_match" : {"query" : "henry", "fields": ["text_entry","play_name"]}}}'
Search(index="shakespeare", body=mmatch)

# bool query
mmatch <- '{
 "query": {
   "bool" : {
     "must_not" : {
       "range" : {
         "speech_number" : {
           "from" : 1, "to": 5
}}}}}}'
Search(index="shakespeare", body=mmatch)

# Boosting query
boost <- '{
 "query" : {
  "boosting" : {
      "positive" : {
          "term" : {
              "play_name" : "henry"
          }
      },
      "negative" : {
          "term" : {
              "text_entry" : "thou"
          }
      },
      "negative_boost" : 0.8
    }
 }
}'
Search(index="shakespeare", body=boost)

# Fuzzy query
## fuzzy query on numerics
fuzzy <- list(query = list(fuzzy = list(speech_number = 7)))
Search(index="shakespeare", body=fuzzy)$hits$total
fuzzy <- list(query = list(fuzzy = list(speech_number = list(value = 7, fuzziness = 4))))
Search(index="shakespeare", body=fuzzy)$hits$total

### fuzzy query on date/time
fuzzy <- list(query = list(fuzzy = list(eventDate = list(value = "2014-01-29T23:00:00.000",
   fuzziness = "1d"))))
out <- Search(index="gbif", body=fuzzy, fields="eventDate")
out$hits$total
sapply(out$hits$hits, function(x) x$fields$eventDate) ## sweet!!!

# geoshape query
## not working yets
geo <- list(query = list(geo_shape = list(location = list(shape = list(type = "envelope",
   coordinates = "[[2,10],[10,20]]")))))
geo <- '{
 "query": {
   "geo_shape": {
     "location": {
       "point": {
         "type": "envelope",
         "coordinates": [[2,0],[2.93,100]]
       }
     }
   }
 }
}'
Search(index="gbifnewgeo", body=geo)

# range query
## with numeric
body <- list(query=list(range=list(decimalLongitude=list(gte=1, lte=3))))
Search('gbif', body=body)$hits$total

body <- list(query=list(range=list(decimalLongitude=list(gte=2.9, lte=10))))
Search('gbif', body=body)$hits$total

## with dates
body <- list(query=list(range=list(eventDate=list(gte="2012-01-01", lte="now"))))
Search('gbif', body=body)$hits$total

body <- list(query=list(range=list(eventDate=list(gte="2014-01-01", lte="now"))))
Search('gbif', body=body)$hits$total

# more like this query (more_like_this can be shortened to mlt)
body <- '{
 "query": {
   "more_like_this": {
     "fields": ["abstract","title"],
     "like_text": "and then",
     "min_term_freq": 1,
     "max_query_terms": 12
   }
 }
}'
Search('plos', body=body)$hits$total

body <- '{
 "query": {
   "more_like_this": {
     "fields": ["abstract","title"],
     "like_text": "cell",
     "min_term_freq": 1,
     "max_query_terms": 12
   }
 }
}'
Search('plos', body=body)$hits$total

# Highlighting
body <- '{
 "query": {
   "query_string": {
     "query" : "cell"
   }
 },
 "highlight": {
   "fields": {
     "title": {"number_of_fragments": 2}
   }
 }
}'
out <- Search('plos', 'article', body=body)
out$hits$total
sapply(out$hits$hits, function(x) x$highlight$title[[1]])

### Common terms query
body <- '{
 "query" : {
   "common": {
      "body": {
           "query": "this is",
           "cutoff_frequency": 0.01
       }
     }
  }
}'
Search('shakespeare', 'line', body=body)

## Scrolling search - instead of paging
Search('shakespeare', q="a*")$hits$total
res <- Search(index = 'shakespeare', q="a*", scroll="1m")
res <- Search(index = 'shakespeare', q="a*", scroll="1m", search_type = "scan")
scroll(scroll_id = res$`_scroll_id`)

res <- Search(index = 'shakespeare', q="a*", scroll="5m", search_type = "scan")
out <- list()
hits <- 1
while(hits != 0){
  res <- scroll(scroll_id = res$`_scroll_id`)
  hits <- length(res$hits$hits)
  if(hits > 0)
    out <- c(out, res$hits$hits)
}



# Using filters
## A bool filter
body <- '{
 "query":{
   "filtered":{
     "filter":{
        "bool": {
           "must_not" : {
               "range" : {
                   "year" : { "from" : 2011, "to" : 2012 }
               }
           }
         }
     }
   }
 }
}'
Search('gbif', body = body)$hits$total

## Geo filters - fun!
### Note that filers have many geospatial filter options, but queries have fewer, and
### require a geo_shape mapping

body <- '{
 "mappings": {
   "record": {
     "properties": {
         "location" : {"type" : "geo_point"}
      }
   }
 }
}'
index_create(index='gbifgeopoint', body=body)
path <- system.file("examples", "gbif_geopoint.json", package = "elastic")
docs_bulk(path)

### Points within a bounding box
body <- '{
 "query":{
   "filtered":{
     "filter":{
        "geo_bounding_box" : {
          "location" : {
            "top_left" : {
              "lat" : 60,
              "lon" : 14
            },
            "bottom_right" : {
              "lat" : 40,
              "lon" : 1
            }
          }
       }
     }
   }
 }
}'
out <- Search('gbifgeopoint', body = body)
out$hits$total
do.call(rbind, lapply(out$hits$hits, function(x) x$`_source`$location))

### Points within distance of a point
body <- '{
 "query":{
   "filtered":{
     "filter":{
        "geo_distance" : {
        "distance" : "200km" ,
           "location" : {
              "lon" : 4,
              "lat" : 50
            }
         }
       }
     }
   }
}'
out <- Search('gbifgeopoint', body = body)
out$hits$total
do.call(rbind, lapply(out$hits$hits, function(x) x$`_source`$location))

### Points within distance range of a point
body <- '{
 "query":{
   "filtered":{
     "filter":{
        "geo_distance_range" : {
          "from" : "200km",
          "to" : "400km",
          "location" : {
              "lon" : 4,
              "lat" : 50
           }
         }
       }
     }
   }
}'
out <- Search('gbifgeopoint', body = body)
out$hits$total
do.call(rbind, lapply(out$hits$hits, function(x) x$`_source`$location))

### Points within a polygon
body <- '{
 "query":{
   "filtered":{
     "filter":{
        "geo_polygon" : {
          "location" : {
             "points" : [
               [80.0, -20.0], [-80.0, -20.0], [-80.0, 60.0], [40.0, 60.0], [80.0, -20.0]
             ]
           }
         }
       }
     }
   }
}'
out <- Search('gbifgeopoint', body = body)
out$hits$total
do.call(rbind, lapply(out$hits$hits, function(x) x$`_source`$location))

### Geoshape filters using queries instead of filters
#### Get data with geojson type location data loaded first
body <- '{
 "mappings": {
   "record": {
     "properties": {
         "location" : {"type" : "geo_shape"}
      }
   }
 }
}'
index_create(index='geoshape', body=body)
path <- system.file("examples", "gbif_geoshape.json", package = "elastic")
docs_bulk(path)

#### Get data with a square envelope, w/ point defining upper left and the other
#### defining the lower right
body <- '{
 "query":{
   "geo_shape" : {
     "location" : {
         "shape" : {
           "type": "envelope",
            "coordinates": [[-30, 50],[30, 0]]
         }
       }
     }
   }
}'
out <- Search('geoshape', body = body)
out$hits$total

#### Get data with a circle, w/ point defining center, and radius
body <- '{
 "query":{
   "geo_shape" : {
     "location" : {
         "shape" : {
           "type": "circle",
           "coordinates": [-10, 45],
           "radius": "2000km"
         }
       }
     }
   }
}'
out <- Search('geoshape', body = body)
out$hits$total

#### Use a polygon, w/ point defining center, and radius
body <- '{
 "query":{
   "geo_shape" : {
     "location" : {
         "shape" : {
           "type": "polygon",
           "coordinates":  [
              [ [80.0, -20.0], [-80.0, -20.0], [-80.0, 60.0], [40.0, 60.0], [80.0, -20.0] ]
           ]
         }
       }
     }
   }
}'
out <- Search('geoshape', body = body)
out$hits$total

# Missing filter
body <- '{
 "query":{
   "constant_score" : {
     "filter" : {
       "missing" : { "field" : "play_name" }
     }
   }
 }
}'
Search("shakespeare", body = body)

# prefix filter
body <- '{
 "query":{
   "filtered" : {
     "filter" : {
       "prefix" : { "speaker" : "we" }
     }
   }
 }
}'
Search("shakespeare", body = body)$hits$total

# ids filter
body <- '{
 "query":{
   "filtered" : {
     "filter" : {
       "ids" : {
         "values": ["1","2","3","10","2000"]
       }
     }
   }
 }
}'
Search("shakespeare", body = body)$hits$total

# combined prefix and ids filters
body <- '{
 "query":{
   "filtered" : {
     "filter" : {
       "or": [{
         "ids" : {
           "values": ["1","2","3","10","2000"]
         }
       },
       {
         "prefix" : {
           "speaker" : "we"
         }
       }]
     }
   }
 }
}'
Search("shakespeare", body = body)$hits$total

body <- '{
 "query":{
   "filtered" : {
     "filter" : {
       "ids" : {
         "values": ["1","2","3","10","2000"]
       }
     },
     "filter" : {
       "prefix" : {
         "speaker" : "we"
       }
     }
   }
 }
}'
Search("shakespeare", body = body)$hits$total
}
}
\references{
\url{http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/search.html}
}
\seealso{
\code{\link{Search_uri}} \code{\link{scroll}}
}

