Add Search() function, two new aggregations (#7)

This commit implements a Search() function, which allow for running
search requests with both a query and aggregations. This function is
meant to more accurately implement the structure of search requests
accepted by ElasticSearch's Search API.

The Query() and Aggregate() functions are still included by the library,
but now simply call Search() internally, making them simple shortcuts.

Two new aggregations are also added: "terms" and "top_hits". These are
implemented a bit differently than previously implemented ones. The
structs and methods for ElasticSearch queries and aggregations will
eventually be auto-generated from a specification file, and will look
more like the new implementations of these new aggregations.
This commit is contained in:
Ido Perlmuter 2020-04-06 09:03:42 +00:00 committed by GitHub
parent d07b4e2126
commit 8e5dea816b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 473 additions and 210 deletions

View File

@ -63,27 +63,19 @@ func main() {
}
// run a boolean search query
qRes, err := esquery.Query(
res, err := esquery.Search().
Query(
esquery.
Bool().
Must(esquery.Term("title", "Go and Stuff")).
Filter(esquery.Term("tag", "tech")),
).Run(
es,
es.Search.WithContext(context.TODO()),
es.Search.WithIndex("test"),
)
if err != nil {
log.Fatalf("Failed searching for stuff: %s", err)
}
defer qRes.Body.Close()
// run an aggregation
aRes, err := esquery.Aggregate(
).
Aggs(
esquery.Avg("average_score", "score"),
esquery.Max("max_score", "score"),
).Run(
).
Size(20).
Run(
es,
es.Search.WithContext(context.TODO()),
es.Search.WithIndex("test"),
@ -92,7 +84,7 @@ func main() {
log.Fatalf("Failed searching for stuff: %s", err)
}
defer aRes.Body.Close()
defer res.Body.Close()
// ...
}
@ -163,6 +155,8 @@ The following aggregations are currently supported:
| `"percentiles"` | `Percentiles()` |
| `"stats"` | `Stats()` |
| `"string_stats"` | `StringStats()` |
| `"top_hits"` | `TopHits()` |
| `"terms"` | `TermsAgg()` |
#### Custom Queries and Aggregations

View File

@ -1,99 +0,0 @@
package esquery
import (
"bytes"
"encoding/json"
"github.com/elastic/go-elasticsearch/v7"
"github.com/elastic/go-elasticsearch/v7/esapi"
)
// AggregationRequest represents a complete request of type "aggregations"
// (a.k.a "aggs") to ElasticSearch's search API. It simply wraps a map of named
// aggregations, which are values of a type that implements the Mappable
// interface.
type AggregationRequest struct {
Aggs map[string]Mappable
}
// Aggregation is an interface that each aggregation type must implement. It
// is simply an extension of the Mappable interface to include a Named function,
// which returns the name of the aggregation.
type Aggregation interface {
Mappable
Name() string
}
// Aggregate generates a search request of type "aggs", represented by a
// *AggregationRequest object. It receives a variadic amount of values that
// implement the Aggregation interface, whether provided internally by the
// library or custom aggregations provided by consuming code.
func Aggregate(aggs ...Aggregation) *AggregationRequest {
req := &AggregationRequest{
Aggs: make(map[string]Mappable),
}
for _, agg := range aggs {
req.Aggs[agg.Name()] = agg
}
return req
}
// Map implements the Mappable interface. It converts the "aggs" request into a
// (potentially nested) map[string]interface{}.
func (req *AggregationRequest) Map() map[string]interface{} {
m := make(map[string]interface{})
for name, agg := range req.Aggs {
m[name] = agg.Map()
}
return map[string]interface{}{
"aggs": m,
}
}
// MarshalJSON implements the json.Marshaler interface, it simply encodes the
// map representation of the request (provided by the Map method) as JSON.
func (req *AggregationRequest) MarshalJSON() ([]byte, error) {
return json.Marshal(req.Map())
}
// Run executes the request using the provided ElasticSearch client. Zero or
// more search options can be provided as well. It returns the standard Response
// type of the official Go client.
func (req *AggregationRequest) Run(
api *elasticsearch.Client,
o ...func(*esapi.SearchRequest),
) (res *esapi.Response, err error) {
var b bytes.Buffer
err = json.NewEncoder(&b).Encode(req.Map())
if err != nil {
return nil, err
}
opts := append([]func(*esapi.SearchRequest){api.Search.WithBody(&b)}, o...)
return api.Search(opts...)
}
// RunSearch is the same as the Run method, except that it accepts a value of
// type esapi.Search (usually this is the Search field of an elasticsearch.Client
// object). Since the ElasticSearch client does not provide an interface type
// for its API (which would allow implementation of mock clients), this provides
// a workaround. The Search function in the ES client is actually a field of a
// function type.
func (req *AggregationRequest) RunSearch(
search esapi.Search,
o ...func(*esapi.SearchRequest),
) (res *esapi.Response, err error) {
var b bytes.Buffer
err = json.NewEncoder(&b).Encode(req.Map())
if err != nil {
return nil, err
}
opts := append([]func(*esapi.SearchRequest){search.WithBody(&b)}, o...)
return search(opts...)
}

86
aggs_bucket.go Normal file
View File

@ -0,0 +1,86 @@
package esquery
//----------------------------------------------------------------------------//
// TermsAggregation represents an aggregation of type "terms", as described in
// https://www.elastic.co/guide/en/elasticsearch/reference/current/
// search-aggregations-bucket-terms-aggregation.html
type TermsAggregation struct {
name string
field string
size *uint64
shardSize *float64
showTermDoc *bool
aggs []Aggregation
}
// TermsAgg creates a new aggregation of type "terms". The method name includes
// the "Agg" suffix to prevent conflict with the "terms" query.
func TermsAgg(name, field string) *TermsAggregation {
return &TermsAggregation{
name: name,
field: field,
}
}
// Name returns the name of the aggregation.
func (agg *TermsAggregation) Name() string {
return agg.name
}
// Size sets the number of term buckets to return.
func (agg *TermsAggregation) Size(size uint64) *TermsAggregation {
agg.size = &size
return agg
}
// ShardSize sets how many terms to request from each shard.
func (agg *TermsAggregation) ShardSize(size float64) *TermsAggregation {
agg.shardSize = &size
return agg
}
// ShowTermDocCountError sets whether to show an error value for each term
// returned by the aggregation which represents the worst case error in the
// document count.
func (agg *TermsAggregation) ShowTermDocCountError(b bool) *TermsAggregation {
agg.showTermDoc = &b
return agg
}
// Aggs sets sub-aggregations for the aggregation.
func (agg *TermsAggregation) Aggs(aggs ...Aggregation) *TermsAggregation {
agg.aggs = aggs
return agg
}
// Map returns a map representation of the aggregation, thus implementing the
// Mappable interface.
func (agg *TermsAggregation) Map() map[string]interface{} {
innerMap := map[string]interface{}{
"field": agg.field,
}
if agg.size != nil {
innerMap["size"] = *agg.size
}
if agg.shardSize != nil {
innerMap["shard_size"] = *agg.shardSize
}
if agg.showTermDoc != nil {
innerMap["show_term_doc_count_error"] = *agg.showTermDoc
}
outerMap := map[string]interface{}{
"terms": innerMap,
}
if len(agg.aggs) > 0 {
subAggs := make(map[string]map[string]interface{})
for _, sub := range agg.aggs {
subAggs[sub.Name()] = sub.Map()
}
outerMap["aggs"] = subAggs
}
return outerMap
}

View File

@ -395,3 +395,82 @@ func (agg *StringStatsAgg) Map() map[string]interface{} {
agg.apiName: structs.Map(agg),
}
}
// ---------------------------------------------------------------------------//
// TopHitsAgg represents an aggregation of type "top_hits", as described
// in https://www.elastic.co/guide/en/elasticsearch/reference/
// current/search-aggregations-metrics-top-hits-aggregation.html
type TopHitsAgg struct {
name string
from uint64
size uint64
sort []map[string]interface{}
source Source
}
// TopHits creates an aggregation of type "top_hits".
func TopHits(name string) *TopHitsAgg {
return &TopHitsAgg{
name: name,
}
}
// Name returns the name of the aggregation.
func (agg *TopHitsAgg) Name() string {
return agg.name
}
// From sets an offset from the first result to return.
func (agg *TopHitsAgg) From(offset uint64) *TopHitsAgg {
agg.from = offset
return agg
}
// Size sets the maximum number of top matching hits to return per bucket (the
// default is 3).
func (agg *TopHitsAgg) Size(size uint64) *TopHitsAgg {
agg.size = size
return agg
}
// Sort sets how the top matching hits should be sorted. By default the hits are
// sorted by the score of the main query.
func (agg *TopHitsAgg) Sort(name string, order Order) *TopHitsAgg {
agg.sort = append(agg.sort, map[string]interface{}{
name: map[string]interface{}{
"order": order,
},
})
return agg
}
// SourceIncludes sets the keys to return from the top matching documents.
func (agg *TopHitsAgg) SourceIncludes(keys ...string) *TopHitsAgg {
agg.source.includes = keys
return agg
}
// Map returns a map representation of the aggregation, thus implementing the
// Mappable interface.
func (agg *TopHitsAgg) Map() map[string]interface{} {
innerMap := make(map[string]interface{})
if agg.from > 0 {
innerMap["from"] = agg.from
}
if agg.size > 0 {
innerMap["size"] = agg.size
}
if len(agg.sort) > 0 {
innerMap["sort"] = agg.sort
}
if len(agg.source.includes) > 0 {
innerMap["_source"] = agg.source.Map()
}
return map[string]interface{}{
"top_hits": innerMap,
}
}

28
common.go Normal file
View File

@ -0,0 +1,28 @@
package esquery
// Source represents the "_source" option which is commonly accepted in ES
// queries. Currently, only the "includes" option is supported.
type Source struct {
includes []string
}
// Map returns a map representation of the Source object.
func (source Source) Map() map[string]interface{} {
return map[string]interface{}{
"includes": source.includes,
}
}
// Sort represents a list of keys to sort by.
type Sort []map[string]interface{}
// Order is the ordering for a sort key (ascending, descending).
type Order string
const (
// OrderAsc represents sorting in ascending order.
OrderAsc Order = "asc"
// OrderDesc represents sorting in descending order.
OrderDesc Order = "desc"
)

View File

@ -31,7 +31,7 @@ func (m *CustomQueryMap) Run(
api *elasticsearch.Client,
o ...func(*esapi.SearchRequest),
) (res *esapi.Response, err error) {
return Query(m).Run(api, o...)
return Search().Query(m).Run(api, o...)
}
//----------------------------------------------------------------------------//

8
es.go
View File

@ -105,3 +105,11 @@ package esquery
type Mappable interface {
Map() map[string]interface{}
}
// Aggregation is an interface that each aggregation type must implement. It
// is simply an extension of the Mappable interface to include a Named function,
// which returns the name of the aggregation.
type Aggregation interface {
Mappable
Name() string
}

View File

@ -1,77 +0,0 @@
package esquery
import (
"bytes"
"encoding/json"
"github.com/elastic/go-elasticsearch/v7"
"github.com/elastic/go-elasticsearch/v7/esapi"
)
// QueryRequest represents a complete request of type "query" to ElasticSearch's
// search API. It simply wraps a value of a type that implements the Mappable
// interface.
type QueryRequest struct {
Query Mappable
}
// Query generates a search request of type "query", represented by a
// *QueryRequest object. It receives any query type that implements the
// Mappable interface, whether provided internally by the library or custom
// types provided by consuming code.
func Query(q Mappable) *QueryRequest {
return &QueryRequest{q}
}
// Map implements the Mappable interface. It converts the "query" request into a
// (potentially nested) map[string]interface{}.
func (req *QueryRequest) Map() map[string]interface{} {
return map[string]interface{}{
"query": req.Query.Map(),
}
}
// MarshalJSON implements the json.Marshaler interface, it simply encodes the
// map representation of the query (provided by the Map method) as JSON.
func (req *QueryRequest) MarshalJSON() ([]byte, error) {
return json.Marshal(req.Map())
}
// Run executes the request using the provided ElasticSearch client. Zero or
// more search options can be provided as well. It returns the standard Response
// type of the official Go client.
func (req *QueryRequest) Run(
api *elasticsearch.Client,
o ...func(*esapi.SearchRequest),
) (res *esapi.Response, err error) {
var b bytes.Buffer
err = json.NewEncoder(&b).Encode(req.Map())
if err != nil {
return nil, err
}
opts := append([]func(*esapi.SearchRequest){api.Search.WithBody(&b)}, o...)
return api.Search(opts...)
}
// RunSearch is the same as the Run method, except that it accepts a value of
// type esapi.Search (usually this is the Search field of an elasticsearch.Client
// object). Since the ElasticSearch client does not provide an interface type
// for its API (which would allow implementation of mock clients), this provides
// a workaround. The Search function in the ES client is actually a field of a
// function type.
func (req *QueryRequest) RunSearch(
search esapi.Search,
o ...func(*esapi.SearchRequest),
) (res *esapi.Response, err error) {
var b bytes.Buffer
err = json.NewEncoder(&b).Encode(req.Map())
if err != nil {
return nil, err
}
opts := append([]func(*esapi.SearchRequest){search.WithBody(&b)}, o...)
return search(opts...)
}

147
search.go Normal file
View File

@ -0,0 +1,147 @@
package esquery
import (
"bytes"
"encoding/json"
"fmt"
"time"
"github.com/elastic/go-elasticsearch/v7"
"github.com/elastic/go-elasticsearch/v7/esapi"
)
// SearchRequest represents a request to ElasticSearch's Search API, described
// in https://www.elastic.co/guide/en/elasticsearch/reference/current/search.html.
// Not all features of the search API are currently supported, but a request can
// currently include a query, aggregations, and more.
type SearchRequest struct {
query Mappable
aggs []Aggregation
from *uint64
size *uint64
explain *bool
timeout *time.Duration
}
// Search creates a new SearchRequest object, to be filled via method chaining.
func Search() *SearchRequest {
return &SearchRequest{}
}
// Query sets a query for the request.
func (req *SearchRequest) Query(q Mappable) *SearchRequest {
req.query = q
return req
}
// Aggs sets one or more aggregations for the request.
func (req *SearchRequest) Aggs(aggs ...Aggregation) *SearchRequest {
req.aggs = aggs
return req
}
// From sets a document offset to start from.
func (req *SearchRequest) From(offset uint64) *SearchRequest {
req.from = &offset
return req
}
// Size sets the number of hits to return. The default - according to the ES
// documentation - is 10.
func (req *SearchRequest) Size(size uint64) *SearchRequest {
req.size = &size
return req
}
// Explain sets whether the ElasticSearch API should return an explanation for
// how each hit's score was calculated.
func (req *SearchRequest) Explain(b bool) *SearchRequest {
req.explain = &b
return req
}
// Timeout sets a timeout for the request.
func (req *SearchRequest) Timeout(dur time.Duration) *SearchRequest {
req.timeout = &dur
return req
}
// Map implements the Mappable interface. It converts the request to into a
// nested map[string]interface{}, as expected by the go-elasticsearch library.
func (req *SearchRequest) Map() map[string]interface{} {
m := make(map[string]interface{})
if req.query != nil {
m["query"] = req.query.Map()
}
if len(req.aggs) > 0 {
aggs := make(map[string]interface{})
for _, agg := range req.aggs {
aggs[agg.Name()] = agg.Map()
}
m["aggs"] = aggs
}
if req.size != nil {
m["size"] = *req.size
}
if req.from != nil {
m["from"] = *req.from
}
if req.explain != nil {
m["explain"] = *req.explain
}
if req.timeout != nil {
m["timeout"] = fmt.Sprintf("%.0fs", req.timeout.Seconds())
}
return m
}
// MarshalJSON implements the json.Marshaler interface. It returns a JSON
// representation of the map generated by the SearchRequest's Map method.
func (req *SearchRequest) MarshalJSON() ([]byte, error) {
return json.Marshal(req.Map())
}
// Run executes the request using the provided ElasticSearch client. Zero or
// more search options can be provided as well. It returns the standard Response
// type of the official Go client.
func (req *SearchRequest) Run(
api *elasticsearch.Client,
o ...func(*esapi.SearchRequest),
) (res *esapi.Response, err error) {
return req.RunSearch(api.Search, o...)
}
// RunSearch is the same as the Run method, except that it accepts a value of
// type esapi.Search (usually this is the Search field of an elasticsearch.Client
// object). Since the ElasticSearch client does not provide an interface type
// for its API (which would allow implementation of mock clients), this provides
// a workaround. The Search function in the ES client is actually a field of a
// function type.
func (req *SearchRequest) RunSearch(
search esapi.Search,
o ...func(*esapi.SearchRequest),
) (res *esapi.Response, err error) {
var b bytes.Buffer
err = json.NewEncoder(&b).Encode(req.Map())
if err != nil {
return nil, err
}
opts := append([]func(*esapi.SearchRequest){search.WithBody(&b)}, o...)
return search(opts...)
}
// Query is a shortcut for creating a SearchRequest with only a query. It is
// mostly included to maintain the API provided by esquery in early releases.
func Query(q Mappable) *SearchRequest {
return Search().Query(q)
}
// Aggregate is a shortcut for creating a SearchRequest with aggregations. It is
// mostly included to maintain the API provided by esquery in early releases.
func Aggregate(aggs ...Aggregation) *SearchRequest {
return Search().Aggs(aggs...)
}

97
search_test.go Normal file
View File

@ -0,0 +1,97 @@
package esquery
import (
"testing"
"time"
)
func TestSearchMaps(t *testing.T) {
runMapTests(t, []mapTest{
{
"a simple match_all query with a size and no aggs",
Search().Query(MatchAll()).Size(20),
map[string]interface{}{
"query": map[string]interface{}{
"match_all": map[string]interface{}{},
},
"size": 20,
},
},
{
"a complex query with an aggregation and various other options",
Search().
Query(
Bool().
Must(
Range("date").
Gt("some time in the past").
Lte("now").
Relation(RangeContains).
TimeZone("Asia/Jerusalem").
Boost(2.3),
Match("author").
Query("some guy").
Analyzer("analyzer?").
Fuzziness("fuzz"),
).
Boost(3.1),
).
Aggs(
Sum("total_score", "score"),
StringStats("tag_stats", "tags").
ShowDistribution(true),
).
Size(30).
From(5).
Explain(true).
Timeout(time.Duration(20000000000)),
map[string]interface{}{
"query": map[string]interface{}{
"bool": map[string]interface{}{
"must": []map[string]interface{}{
{
"range": map[string]interface{}{
"date": map[string]interface{}{
"gt": "some time in the past",
"lte": "now",
"relation": "CONTAINS",
"time_zone": "Asia/Jerusalem",
"boost": 2.3,
},
},
},
{
"match": map[string]interface{}{
"author": map[string]interface{}{
"query": "some guy",
"analyzer": "analyzer?",
"fuzziness": "fuzz",
},
},
},
},
"boost": 3.1,
},
},
"aggs": map[string]interface{}{
"total_score": map[string]interface{}{
"sum": map[string]interface{}{
"field": "score",
},
},
"tag_stats": map[string]interface{}{
"string_stats": map[string]interface{}{
"field": "tags",
"show_distribution": true,
},
},
},
"size": 30,
"from": 5,
"explain": true,
"timeout": "20s",
},
},
})
}