Create a Full Text Search index with SQLAlchemy on PostgreSQL
You could create index using Index
in __table_args__
. Also I use a function to create ts_vector
to make it more tidy and reusable if more than one field is required. Something like below:
from sqlalchemy.dialects import postgresqldef create_tsvector(*args): exp = args[0] for e in args[1:]: exp += ' ' + e return func.to_tsvector('english', exp)class Person(db.Model): id = db.Column(db.Integer, primary_key=True) name = db.Column(db.String) __ts_vector__ = create_tsvector( cast(func.coalesce(name, ''), postgresql.TEXT) ) __table_args__ = ( Index( 'idx_person_fts', __ts_vector__, postgresql_using='gin' ) )
Update:A sample query using index (corrected based on comments):
people = Person.query.filter(Person.__ts_vector__.match(expressions, postgresql_regconfig='english')).all()
The answer from @sharez is really useful (especially if you need to concatenate columns in your index). For anyone looking to create a tsvector GIN index on a single column, you can simplify the original answer approach with something like:
from sqlalchemy import Column, Index, Integer, Stringfrom sqlalchemy.ext.declarative import declarative_basefrom sqlalchemy.sql import funcBase = declarative_base()class Example(Base): __tablename__ = 'examples' id = Column(Integer, primary_key=True) textsearch = Column(String) __table_args__ = ( Index( 'ix_examples_tsv', func.to_tsvector('english', textsearch), postgresql_using='gin' ), )
Note that the comma following Index(...)
in __table_args__
is not a style choice, the value of __table_args__
must be a tuple, dictionary, or None
.
If you do need to create a tsvector GIN index on multiple columns, here is another way to get there using text()
.
from sqlalchemy import Column, Index, Integer, String, textfrom sqlalchemy.ext.declarative import declarative_basefrom sqlalchemy.sql import funcBase = declarative_base()def to_tsvector_ix(*columns): s = " || ' ' || ".join(columns) return func.to_tsvector('english', text(s))class Example(Base): __tablename__ = 'examples' id = Column(Integer, primary_key=True) atext = Column(String) btext = Column(String) __table_args__ = ( Index( 'ix_examples_tsv', to_tsvector_ix('atext', 'btext'), postgresql_using='gin' ), )
It has been answered already by @sharez and @benvc. I needed to make it work with weights though. This is how I did it based on their answers :
from sqlalchemy import Column, func, Index, Stringfrom sqlalchemy.ext.declarative import declarative_basefrom sqlalchemy.sql.operators import opCONFIG = 'english'Base = declarative_base()def create_tsvector(*args): field, weight = args[0] exp = func.setweight(func.to_tsvector(CONFIG, field), weight) for field, weight in args[1:]: exp = op(exp, '||', func.setweight(func.to_tsvector(CONFIG, field), weight)) return expclass Example(Base): __tablename__ = 'example' foo = Column(String) bar = Column(String) __ts_vector__ = create_tsvector( (foo, 'A'), (bar, 'B') ) __table_args__ = ( Index('my_index', __ts_vector__, postgresql_using='gin'), )