Terms Aggregation for nested field in Elastic Search Terms Aggregation for nested field in Elastic Search elasticsearch elasticsearch

Terms Aggregation for nested field in Elastic Search


So as I've said. Your issue is that your text is analyzed and elasticsearch always aggregates at token level. So in order to fix that, your field values have to be indexed as single tokens. There are two options:

  • not to analyze them
  • index them using keyword analyzer + lowercase (case insensitive aggs)

So that would be settings to create custom keyword analyzer with lowercase filter and removed accent characters (ö => o and ß => ss and additional fields for your fields, so they can be used for aggregation (raw and keyword):

PUT /test{  "settings": {    "analysis": {      "analyzer": {        "my_analyzer_keyword": {          "type": "custom",          "tokenizer": "keyword",          "filter": [            "asciifolding",            "lowercase"          ]        }      }    }  },  "mappings": {    "data": {      "properties": {        "products_filter": {          "type": "nested",          "properties": {            "filter_name": {              "type": "string",              "analyzer": "standard",              "fields": {                "raw": {                  "type": "string",                  "index": "not_analyzed"                },                "keyword": {                  "type": "string",                  "analyzer": "my_analyzer_keyword"                }              }            },            "filter_value": {              "type": "string",              "analyzer": "standard",              "fields": {                "raw": {                  "type": "string",                  "index": "not_analyzed"                },                "keyword": {                  "type": "string",                  "analyzer": "my_analyzer_keyword"                }              }            }          }        }      }    }  }}

A test document, you've given us:

PUT /test/data/1{  "products_filter": [    {      "filter_name": "Rahmengröße",      "filter_value": "33,5 cm"    },    {      "filter_name": "color",      "filter_value": "gelb"    },    {      "filter_name": "Rahmengröße",      "filter_value": "39,5 cm"    },    {      "filter_name": "Rahmengröße",      "filter_value": "45,5 cm"    }  ]}

That would be query to aggregate using raw field:

GET /test/_search{  "size": 0,  "aggs": {    "Nesting": {      "nested": {        "path": "products_filter"      },      "aggs": {        "raw_names": {          "terms": {            "field": "products_filter.filter_name.raw",            "size": 0          },          "aggs": {            "raw_values": {              "terms": {                "field": "products_filter.filter_value.raw",                "size": 0              }            }          }        }      }    }  }}

It does bring expected result (buckets with filter names and subbuckets with their values):

{  "took": 1,  "timed_out": false,  "_shards": {    "total": 5,    "successful": 5,    "failed": 0  },  "hits": {    "total": 1,    "max_score": 0,    "hits": []  },  "aggregations": {    "Nesting": {      "doc_count": 4,      "raw_names": {        "doc_count_error_upper_bound": 0,        "sum_other_doc_count": 0,        "buckets": [          {            "key": "Rahmengröße",            "doc_count": 3,            "raw_values": {              "doc_count_error_upper_bound": 0,              "sum_other_doc_count": 0,              "buckets": [                {                  "key": "33,5 cm",                  "doc_count": 1                },                {                  "key": "39,5 cm",                  "doc_count": 1                },                {                  "key": "45,5 cm",                  "doc_count": 1                }              ]            }          },          {            "key": "color",            "doc_count": 1,            "raw_values": {              "doc_count_error_upper_bound": 0,              "sum_other_doc_count": 0,              "buckets": [                {                  "key": "gelb",                  "doc_count": 1                }              ]            }          }        ]      }    }  }}

Alternitavely, you could use field with keyword analyzer (and some normalization) to get a bit more generic and case insensitive results:

GET /test/_search{  "size": 0,  "aggs": {    "Nesting": {      "nested": {        "path": "products_filter"      },      "aggs": {        "keyword_names": {          "terms": {            "field": "products_filter.filter_name.keyword",            "size": 0          },          "aggs": {            "keyword_values": {              "terms": {                "field": "products_filter.filter_value.keyword",                "size": 0              }            }          }        }      }    }  }}

That's the result:

{  "took": 1,  "timed_out": false,  "_shards": {    "total": 5,    "successful": 5,    "failed": 0  },  "hits": {    "total": 1,    "max_score": 0,    "hits": []  },  "aggregations": {    "Nesting": {      "doc_count": 4,      "keyword_names": {        "doc_count_error_upper_bound": 0,        "sum_other_doc_count": 0,        "buckets": [          {            "key": "rahmengrosse",            "doc_count": 3,            "keyword_values": {              "doc_count_error_upper_bound": 0,              "sum_other_doc_count": 0,              "buckets": [                {                  "key": "33,5 cm",                  "doc_count": 1                },                {                  "key": "39,5 cm",                  "doc_count": 1                },                {                  "key": "45,5 cm",                  "doc_count": 1                }              ]            }          },          {            "key": "color",            "doc_count": 1,            "keyword_values": {              "doc_count_error_upper_bound": 0,              "sum_other_doc_count": 0,              "buckets": [                {                  "key": "gelb",                  "doc_count": 1                }              ]            }          }        ]      }    }  }}