全文搜索
tsvector 与 tsquery 基础
tsvector 存储预处理的文档文本;tsquery 表示搜索查询。
-- Convert text to tsvector
SELECT to_tsvector('english', 'The quick brown fox jumps over the lazy dog');
-- 'brown':3 'dog':9 'fox':4 'jump':5 'lazi':8 'quick':2
-- Parse a tsquery
SELECT to_tsquery('english', 'quick & fox'); -- AND
SELECT to_tsquery('english', 'quick | fox'); -- OR
SELECT to_tsquery('english', '!dog'); -- NOT
SELECT to_tsquery('english', 'quick <-> fox'); -- followed by
SELECT plainto_tsquery('english', 'quick fox'); -- simple phrase
SELECT phraseto_tsquery('english', 'quick brown fox'); -- exact phrase
-- Match test: @@ operator
SELECT to_tsvector('english', 'the quick brown fox')
@@ to_tsquery('english', 'quick & fox'); -- true
表配置与 GIN 索引
-- Add tsvector column
ALTER TABLE articles ADD COLUMN search_vec tsvector;
-- Populate from multiple columns with weights
UPDATE articles
SET search_vec =
setweight(to_tsvector('english', COALESCE(title, '')), 'A') ||
setweight(to_tsvector('english', COALESCE(body, '')), 'B');
-- Auto-update with trigger
CREATE FUNCTION articles_search_trigger() RETURNS trigger AS $$
BEGIN
NEW.search_vec :=
setweight(to_tsvector('english', COALESCE(NEW.title, '')), 'A') ||
setweight(to_tsvector('english', COALESCE(NEW.body, '')), 'B');
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER trig_articles_search
BEFORE INSERT OR UPDATE ON articles
FOR EACH ROW EXECUTE FUNCTION articles_search_trigger();
-- GIN index for fast @@ queries
CREATE INDEX idx_articles_search ON articles USING GIN (search_vec);
搜索与排名
-- Basic search
SELECT id, title
FROM articles
WHERE search_vec @@ to_tsquery('english', 'postgres & index')
ORDER BY ts_rank(search_vec, to_tsquery('english', 'postgres & index')) DESC
LIMIT 10;
-- ts_rank_cd (cover density ranking, better for longer texts)
SELECT id, title,
ts_rank_cd(search_vec, q) AS rank
FROM articles,
to_tsquery('english', 'full & text') AS q
WHERE search_vec @@ q
ORDER BY rank DESC
LIMIT 10;
-- Normalization flags for ts_rank
-- 1: divide by doc length
-- 2: divide by log(doc length)
-- 32: divide by rank+1 (smooth)
SELECT ts_rank(search_vec, q, 1) AS rank_normalized
FROM articles, to_tsquery('english', 'database') q
WHERE search_vec @@ q;
高亮显示结果
-- ts_headline: wrap matched terms in HTML tags
SELECT
title,
ts_headline(
'english',
body,
to_tsquery('english', 'postgres & performance'),
'StartSel=<mark>, StopSel=</mark>, MaxWords=50, MinWords=20'
) AS excerpt
FROM articles
WHERE search_vec @@ to_tsquery('english', 'postgres & performance');
-- Default highlighting uses <b> tags
SELECT ts_headline('english',
'PostgreSQL is a powerful open source database system',
to_tsquery('powerful & database'));
配置与词典
-- List available text search configs
SELECT cfgname FROM pg_ts_config;
-- List available dictionaries
SELECT dictname FROM pg_ts_dict;
-- See how a config processes a word
SELECT * FROM ts_debug('english', 'running');
-- Create a custom text search config
CREATE TEXT SEARCH CONFIGURATION my_english (COPY = english);
ALTER TEXT SEARCH CONFIGURATION my_english
ALTER MAPPING FOR asciiword WITH my_dict, english_stem;
-- Unaccent extension for accent-insensitive search
CREATE EXTENSION unaccent;
SELECT to_tsvector('french', unaccent('être à'))