Query the latest document of each type on Elasticsearch
First, please note that I had to change your mapping for the headline
field to string
, as in your sample documents headlines are string
s and not object
s.
So, a query like the following one would retrieve what you expect:
curl -XPOST "$ELASTICSEARCH_ENDPOINT/news/_search" -d '{ "size": 0, "query": { "filtered": { "filter": { "term": { "user": "John" <--- filter for user=John } } } }, "aggs": { "sources": { "terms": { "field": "source" <--- aggregate by source }, "aggs": { "latest": { "top_hits": { "size": 1, <--- only take the first... "_source": [ <--- only the date and headline "headline", "timestamp" ], "sort": { "timestamp": "desc" <--- ...and only the latest hit } } } } } }}'
That will yield something like this:
{ ... "aggregations" : { "sources" : { "doc_count_error_upper_bound" : 0, "sum_other_doc_count" : 0, "buckets" : [ { "key" : "CNN", "doc_count" : 2, "latest" : { "hits" : { "total" : 2, "max_score" : null, "hits" : [ { "_index" : "news", "_type" : "news", "_id" : "AU7Sh3VDGDddn2ZNuDVl", "_score" : null, "_source":{ "headline": "More great news", "timestamp": "2015-07-28T00:08:23.000" }, "sort" : [ 1438042103000 ] } ] } } }, { "key" : "ESPN", "doc_count" : 2, "latest" : { "hits" : { "total" : 2, "max_score" : null, "hits" : [ { "_index" : "news", "_type" : "news", "_id" : "AU7Sh3VDGDddn2ZNuDVn", "_score" : null, "_source":{ "headline": "More sports news", "timestamp": "2015-07-28T00:10:35.000" }, "sort" : [ 1438042235000 ] } ] } } } ] } }}