Django haystack EdgeNgramField given different results than elasticsearch
After a deep look into the code I found that the search generated by haystack was:
{ "query":{ "filtered":{ "filter":{ "fquery":{ "query":{ "query_string":{ "query": "django_ct:(csi.geoname)" } }, "_cache":false } }, "query":{ "query_string":{ "query": "name_auto:(mid)", "default_operator":"or", "default_field":"text", "auto_generate_phrase_queries":true, "analyze_wildcard":true } } } }, "from":0, "size":6}
Running this query in elasticsearch was given me as result the same 6 objects that haystack was showing...but If I added to the "query_string"
"analyzer": "standard"
it worked as desired. So the idea was to be able to setup a different search analyzer for the field.
Based on the @user954994 answer's link and the explanation on this post, what I finally did to make it work was:
- I created my custom elasticsearch backend, adding a new custom analyzer based on the standard one.
- I added a custom EdgeNgramField, enabling the way to setup an specific analyzer for index (index_analyzer) and another analyzer for search (search_analyzer).
So, my new settings are:
ELASTICSEARCH_INDEX_SETTINGS = { 'settings': { "analysis": { "analyzer": { "ngram_analyzer": { "type": "custom", "tokenizer": "lowercase", "filter": ["haystack_ngram"] }, "edgengram_analyzer": { "type": "custom", "tokenizer": "lowercase", "filter": ["haystack_edgengram"] }, "suggest_analyzer": { "type":"custom", "tokenizer":"standard", "filter":[ "standard", "lowercase", "asciifolding" ] }, }, "tokenizer": { "haystack_ngram_tokenizer": { "type": "nGram", "min_gram": 3, "max_gram": 15, }, "haystack_edgengram_tokenizer": { "type": "edgeNGram", "min_gram": 2, "max_gram": 15, "side": "front" } }, "filter": { "haystack_ngram": { "type": "nGram", "min_gram": 3, "max_gram": 15 }, "haystack_edgengram": { "type": "edgeNGram", "min_gram": 2, "max_gram": 15 } } } }}
My new custom build_schema method looks as follow:
def build_schema(self, fields): content_field_name, mapping = super(ConfigurableElasticBackend, self).build_schema(fields) for field_name, field_class in fields.items(): field_mapping = mapping[field_class.index_fieldname] index_analyzer = getattr(field_class, 'index_analyzer', None) search_analyzer = getattr(field_class, 'search_analyzer', None) field_analyzer = getattr(field_class, 'analyzer', self.DEFAULT_ANALYZER) if field_mapping['type'] == 'string' and field_class.indexed: if not hasattr(field_class, 'facet_for') and not field_class.field_type in('ngram', 'edge_ngram'): field_mapping['analyzer'] = field_analyzer if index_analyzer and search_analyzer: field_mapping['index_analyzer'] = index_analyzer field_mapping['search_analyzer'] = search_analyzer del(field_mapping['analyzer']) mapping.update({field_class.index_fieldname: field_mapping}) return (content_field_name, mapping)
And after rebuild index my mapping looks as below:
modelresult: { _boost: { name: "boost", null_value: 1 }, properties: { django_ct: { type: "string" }, django_id: { type: "string" }, name_auto: { type: "string", store: true, term_vector: "with_positions_offsets", index_analyzer: "edgengram_analyzer", search_analyzer: "suggest_analyzer" } }}
Now everything is working as expected!
UPDATE:
Bellow you'll find the code to clarify this part:
- I created my custom elasticsearch backend, adding a new custom analyzer based on the standard one.
- I added a custom EdgeNgramField, enabling the way to setup an specific analyzer for index (index_analyzer) and another analyzer for search (search_analyzer).
Into my app search_backends.py:
from django.conf import settingsfrom haystack.backends.elasticsearch_backend import ElasticsearchSearchBackendfrom haystack.backends.elasticsearch_backend import ElasticsearchSearchEnginefrom haystack.fields import EdgeNgramField as BaseEdgeNgramField# Custom Backend class CustomElasticBackend(ElasticsearchSearchBackend): DEFAULT_ANALYZER = None def __init__(self, connection_alias, **connection_options): super(CustomElasticBackend, self).__init__( connection_alias, **connection_options) user_settings = getattr(settings, 'ELASTICSEARCH_INDEX_SETTINGS', None) self.DEFAULT_ANALYZER = getattr(settings, 'ELASTICSEARCH_DEFAULT_ANALYZER', "snowball") if user_settings: setattr(self, 'DEFAULT_SETTINGS', user_settings) def build_schema(self, fields): content_field_name, mapping = super(CustomElasticBackend, self).build_schema(fields) for field_name, field_class in fields.items(): field_mapping = mapping[field_class.index_fieldname] index_analyzer = getattr(field_class, 'index_analyzer', None) search_analyzer = getattr(field_class, 'search_analyzer', None) field_analyzer = getattr(field_class, 'analyzer', self.DEFAULT_ANALYZER) if field_mapping['type'] == 'string' and field_class.indexed: if not hasattr(field_class, 'facet_for') and not field_class.field_type in('ngram', 'edge_ngram'): field_mapping['analyzer'] = field_analyzer if index_analyzer and search_analyzer: field_mapping['index_analyzer'] = index_analyzer field_mapping['search_analyzer'] = search_analyzer del(field_mapping['analyzer']) mapping.update({field_class.index_fieldname: field_mapping}) return (content_field_name, mapping)class CustomElasticSearchEngine(ElasticsearchSearchEngine): backend = CustomElasticBackend# Custom fieldclass CustomFieldMixin(object): def __init__(self, **kwargs): self.analyzer = kwargs.pop('analyzer', None) self.index_analyzer = kwargs.pop('index_analyzer', None) self.search_analyzer = kwargs.pop('search_analyzer', None) super(CustomFieldMixin, self).__init__(**kwargs)class CustomEdgeNgramField(CustomFieldMixin, BaseEdgeNgramField): pass
My index definition goes something like:
class MyIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) name_auto = CustomEdgeNgramField(model_attr='name', index_analyzer="edgengram_analyzer", search_analyzer="suggest_analyzer")
And finally, settings uses of course the custom backend for the haystack connection definition:
HAYSTACK_CONNECTIONS = { 'default': { 'ENGINE': 'my_app.search_backends.CustomElasticSearchEngine', 'URL': 'http://localhost:9200', 'INDEX_NAME': 'index' },}
Well, I had a similar problem and my strategy was make a custom backend.
The complete instructions can be found on:
http://www.wellfireinteractive.com/blog/custom-haystack-elasticsearch-backend/
It works to me !
Hope this helps.