Creating a Gin Index with Trigram (gin_trgm_ops) in Django model
I had a similar problem, trying to use the pg_tgrm
extension to support efficient contains
and icontains
Django field lookups.
There may be a more elegant way, but defining a new index type like this worked for me:
from django.contrib.postgres.indexes import GinIndexclass TrigramIndex(GinIndex): def get_sql_create_template_values(self, model, schema_editor, using): fields = [model._meta.get_field(field_name) for field_name, order in self.fields_orders] tablespace_sql = schema_editor._get_index_tablespace_sql(model, fields) quote_name = schema_editor.quote_name columns = [ ('%s %s' % (quote_name(field.column), order)).strip() + ' gin_trgm_ops' for field, (field_name, order) in zip(fields, self.fields_orders) ] return { 'table': quote_name(model._meta.db_table), 'name': quote_name(self.name), 'columns': ', '.join(columns), 'using': using, 'extra': tablespace_sql, }
The method get_sql_create_template_values
is copied from Index.get_sql_create_template_values()
, with just one modification: the addition of + ' gin_trgm_ops'
.
For your use case, you would then define the index on name_txt
using this TrigramIndex
instead of a GinIndex
. Then run makemigrations
, which will produce a migration that generates the required CREATE INDEX
SQL.
UPDATE:
I see you're also doing a query using icontains
:
result.exclude(name_txt__icontains = 'sp.')
The Postgresql backend will turn that into something like this:
UPPER("NCBI_names"."name_txt"::text) LIKE UPPER('sp.')
and then the trigram index won't be used because of the UPPER()
.
I had the same problem, and ended up subclassing the database backend to work around it:
from django.db.backends.postgresql import base, operationsclass DatabaseFeatures(base.DatabaseFeatures): passclass DatabaseOperations(operations.DatabaseOperations): def lookup_cast(self, lookup_type, internal_type=None): lookup = '%s' # Cast text lookups to text to allow things like filter(x__contains=4) if lookup_type in ('iexact', 'contains', 'icontains', 'startswith', 'istartswith', 'endswith', 'iendswith', 'regex', 'iregex'): if internal_type in ('IPAddressField', 'GenericIPAddressField'): lookup = "HOST(%s)" else: lookup = "%s::text" return lookupclass DatabaseWrapper(base.DatabaseWrapper): """ Override the defaults where needed to allow use of trigram index """ ops_class = DatabaseOperations def __init__(self, *args, **kwargs): self.operators.update({ 'icontains': 'ILIKE %s', 'istartswith': 'ILIKE %s', 'iendswith': 'ILIKE %s', }) self.pattern_ops.update({ 'icontains': "ILIKE '%%' || {} || '%%'", 'istartswith': "ILIKE {} || '%%'", 'iendswith': "ILIKE '%%' || {}", }) super(DatabaseWrapper, self).__init__(*args, **kwargs)
I found a 12/2020 article that uses the newest version of Django ORM as such:
class Author(models.Model): first_name = models.CharField(max_length=100) last_name = models.CharField(max_length=100) class Meta: indexes = [ GinIndex( name='review_author_ln_gin_idx', fields=['last_name'], opclasses=['gin_trgm_ops'], ) ]
If, like the original poster, you were looking to create an index that works with icontains, you'll have to index the UPPER() of the column, which requires special handling from OpClass:
from django.db.models.functions import Upperfrom django.contrib.postgres.indexes import GinIndex, OpClassclass Author(models.Model): indexes = [ GinIndex( OpClass(Upper('last_name'), name='gin_trgm_ops'), name='review_author_ln_gin_idx', ) ]
Inspired from an old article on this subject, I landed to a current one which gives the following solution for a GistIndex
:
Update:From Django-1.11 things seem to be simpler, as this answer and django docs sugest:
from django.contrib.postgres.indexes import GinIndexclass MyModel(models.Model): the_field = models.CharField(max_length=512, db_index=True) class Meta: indexes = [GinIndex(fields=['the_field'])]
From Django-2.2, an attribute opclasses
will be available in class Index(fields=(), name=None, db_tablespace=None, opclasses=())
for this purpose.
from django.contrib.postgres.indexes import GistIndexclass GistIndexTrgrmOps(GistIndex): def create_sql(self, model, schema_editor): # - this Statement is instantiated by the _create_index_sql() # method of django.db.backends.base.schema.BaseDatabaseSchemaEditor. # using sql_create_index template from # django.db.backends.postgresql.schema.DatabaseSchemaEditor # - the template has original value: # "CREATE INDEX %(name)s ON %(table)s%(using)s (%(columns)s)%(extra)s" statement = super().create_sql(model, schema_editor) # - however, we want to use a GIST index to accelerate trigram # matching, so we want to add the gist_trgm_ops index operator # class # - so we replace the template with: # "CREATE INDEX %(name)s ON %(table)s%(using)s (%(columns)s gist_trgrm_ops)%(extra)s" statement.template =\ "CREATE INDEX %(name)s ON %(table)s%(using)s (%(columns)s gist_trgm_ops)%(extra)s" return statement
Which you can then use in your model class like this:
class YourModel(models.Model): some_field = models.TextField(...) class Meta: indexes = [ GistIndexTrgrmOps(fields=['some_field']) ]
To make Django 2.2 use the index for icontains
and similar searches:
Subclass GinIndex to make an case insensitive index (uppercasing all field values):
from django.contrib.postgres.indexes import GinIndexclass UpperGinIndex(GinIndex): def create_sql(self, model, schema_editor, using=''): statement = super().create_sql(model, schema_editor, using=using) quote_name = statement.parts['columns'].quote_name def upper_quoted(column): return f'UPPER({quote_name(column)})' statement.parts['columns'].quote_name = upper_quoted return statement
Add the index to your model like this, including kwarg name
which is required when using opclasses
:
class MyModel(Model): name = TextField(...) class Meta: indexes = [ UpperGinIndex(fields=['name'], name='mymodel_name_gintrgm', opclasses=['gin_trgm_ops']) ]
Generate the migration and edit the generated file:
# Generated by Django 2.2.3 on 2019-07-15 10:46from django.contrib.postgres.operations import TrigramExtension # <<< add thisfrom django.db import migrationsimport myapp.modelsclass Migration(migrations.Migration): operations = [ TrigramExtension(), # <<< add this migrations.AddIndex( model_name='mymodel', index=myapp.models.UpperGinIndex(fields=['name'], name='mymodel_name_gintrgm', opclasses=['gin_trgm_ops']), ), ]