Full-Text Search

tsvector & tsquery Basics

tsvector stores preprocessed document text; tsquery represents a search query.

-- Convert text to tsvector
SELECT to_tsvector('english', 'The quick brown fox jumps over the lazy dog');
-- 'brown':3 'dog':9 'fox':4 'jump':5 'lazi':8 'quick':2

-- Parse a tsquery
SELECT to_tsquery('english', 'quick & fox');        -- AND
SELECT to_tsquery('english', 'quick | fox');        -- OR
SELECT to_tsquery('english', '!dog');               -- NOT
SELECT to_tsquery('english', 'quick <-> fox');      -- followed by
SELECT plainto_tsquery('english', 'quick fox');     -- simple phrase
SELECT phraseto_tsquery('english', 'quick brown fox'); -- exact phrase

-- Match test: @@ operator
SELECT to_tsvector('english', 'the quick brown fox')
  @@ to_tsquery('english', 'quick & fox');  -- true

Table Setup & GIN Index

-- Add tsvector column
ALTER TABLE articles ADD COLUMN search_vec tsvector;

-- Populate from multiple columns with weights
UPDATE articles
SET search_vec =
  setweight(to_tsvector('english', COALESCE(title, '')), 'A') ||
  setweight(to_tsvector('english', COALESCE(body,  '')), 'B');

-- Auto-update with trigger
CREATE FUNCTION articles_search_trigger() RETURNS trigger AS $$
BEGIN
  NEW.search_vec :=
    setweight(to_tsvector('english', COALESCE(NEW.title, '')), 'A') ||
    setweight(to_tsvector('english', COALESCE(NEW.body, '')), 'B');
  RETURN NEW;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER trig_articles_search
BEFORE INSERT OR UPDATE ON articles
FOR EACH ROW EXECUTE FUNCTION articles_search_trigger();

-- GIN index for fast @@ queries
CREATE INDEX idx_articles_search ON articles USING GIN (search_vec);

Searching & Ranking

-- Basic search
SELECT id, title
FROM articles
WHERE search_vec @@ to_tsquery('english', 'postgres & index')
ORDER BY ts_rank(search_vec, to_tsquery('english', 'postgres & index')) DESC
LIMIT 10;

-- ts_rank_cd (cover density ranking, better for longer texts)
SELECT id, title,
  ts_rank_cd(search_vec, q) AS rank
FROM articles,
  to_tsquery('english', 'full & text') AS q
WHERE search_vec @@ q
ORDER BY rank DESC
LIMIT 10;

-- Normalization flags for ts_rank
-- 1: divide by doc length
-- 2: divide by log(doc length)
-- 32: divide by rank+1 (smooth)
SELECT ts_rank(search_vec, q, 1) AS rank_normalized
FROM articles, to_tsquery('english', 'database') q
WHERE search_vec @@ q;

Highlighting Results

-- ts_headline: wrap matched terms in HTML tags
SELECT
  title,
  ts_headline(
    'english',
    body,
    to_tsquery('english', 'postgres & performance'),
    'StartSel=<mark>, StopSel=</mark>, MaxWords=50, MinWords=20'
  ) AS excerpt
FROM articles
WHERE search_vec @@ to_tsquery('english', 'postgres & performance');

-- Default highlighting uses <b> tags
SELECT ts_headline('english',
  'PostgreSQL is a powerful open source database system',
  to_tsquery('powerful & database'));

Configuration & Dictionaries

-- List available text search configs
SELECT cfgname FROM pg_ts_config;

-- List available dictionaries
SELECT dictname FROM pg_ts_dict;

-- See how a config processes a word
SELECT * FROM ts_debug('english', 'running');

-- Create a custom text search config
CREATE TEXT SEARCH CONFIGURATION my_english (COPY = english);
ALTER TEXT SEARCH CONFIGURATION my_english
  ALTER MAPPING FOR asciiword WITH my_dict, english_stem;

-- Unaccent extension for accent-insensitive search
CREATE EXTENSION unaccent;
SELECT to_tsvector('french', unaccent('รชtre ร '))