ElasticSearch Analyzer and Tokenizer for Emails ElasticSearch Analyzer and Tokenizer for Emails elasticsearch elasticsearch

ElasticSearch Analyzer and Tokenizer for Emails


Mapping:

PUT /test{  "settings": {    "analysis": {      "filter": {        "email": {          "type": "pattern_capture",          "preserve_original": 1,          "patterns": [            "([^@]+)",            "(\\p{L}+)",            "(\\d+)",            "@(.+)",            "([^-@]+)"          ]        }      },      "analyzer": {        "email": {          "tokenizer": "uax_url_email",          "filter": [            "email",            "lowercase",            "unique"          ]        }      }    }  },  "mappings": {    "emails": {      "properties": {        "email": {          "type": "string",          "analyzer": "email"        }      }    }  }}

Test data:

POST /test/emails/_bulk{"index":{"_id":"1"}}{"email": "john.doe@gmail.com"}{"index":{"_id":"2"}}{"email": "john.doe@gmail.com, john.doe@outlook.com"}{"index":{"_id":"3"}}{"email": "hello-john.doe@outlook.com"}{"index":{"_id":"4"}}{"email": "john.doe@outlook.com"}{"index":{"_id":"5"}}{"email": "john@yahoo.com"}

Query to be used:

GET /test/emails/_search{  "query": {    "term": {      "email": "john.doe@gmail.com"    }  }}