Документ взят из кэша поисковой машины. Адрес
оригинального документа
: http://www.sai.msu.su/~megera/postgres/fts/doc/fts-intdict-xmp.html
Дата изменения: Unknown Дата индексирования: Sun Apr 13 07:50:24 2008 Кодировка: Поисковые слова: п п п п п п п п п п п п п п п п п р п р п р п р п р п р п р п р п р п р п р п р п р п п р п п р п п р п п р п п р п п р п п р п п р п |
Full-Text Search in PostgreSQL: A Gentle Introduction | ||||
---|---|---|---|---|
Prev | Fast Backward | Fast Forward | Next |
Motivation for this dictionary is to control indexing of integers (signed and unsigned), and, consequently, to minimize the number of unique words, which, in turn, greatly affects to performance of searching.
Dictionary accepts two init options:
MAXLEN parameter specifies maximum length of the number considered as a 'good' integer. Default value is 6.
REJECTLONG parameter specifies if 'long' integer should be indexed or treated as a stop-word. If REJECTLONG=FALSE (default), than dictionary returns prefixed part of integer number with length MAXLEN. If REJECTLONG=TRUE, than dictionary consider integer as a stop word.
Similar idea can be applied to the indexing of decimal numbers, for example, DecDict dictionary. Dictionary accepts two init options: MAXLENFRAC parameter specifies maximum length of the fraction part considered as a 'good' decimal, default value is 3. REJECTLONG parameter specifies if decimal number with 'long' fraction part should be indexed or treated as a stop word. If REJECTLONG=FALSE (default), than dictionary returns decimal number with length of fraction part MAXLEN. If REJECTLONG=TRUE, than dictionary consider number as a stop word. Notice, that REJECTLONG=FALSE allow indexing 'shortened' numbers and search results will contain documents with original 'garbage' numbers.
Examples:
=# select lexize('intdict', 11234567890); lexize ---------- {112345}
Now, we want to ignore long integers.
=# ALTER FULLTEXT DICTIONARY intdict SET OPTION 'MAXLEN=6, REJECTLONG=TRUE'; =# select lexize('intdict', 11234567890); lexize -------- {}
Create contrib/dict_intdict directory with files dict_tmpl.c,Makefile,dict_intdict.sql.in, then
make && make install psql DBNAME < dict_intdict.sql
This is a dict_tmpl.c file.
#include "postgres.h" #include "utils/builtins.h" #include "fmgr.h" #ifdef PG_MODULE_MAGIC PG_MODULE_MAGIC; #endif #include "utils/ts_locale.h" #include "utils/ts_public.h" #include "utils/ts_utils.h" typedef struct { int maxlen; bool rejectlong; } DictInt; PG_FUNCTION_INFO_V1(dinit_intdict); Datum dinit_intdict(PG_FUNCTION_ARGS); Datum dinit_intdict(PG_FUNCTION_ARGS) { DictInt *d = (DictInt*)malloc( sizeof(DictInt) ); Map *cfg, *pcfg; text *in; if ( !d ) elog(ERROR, "No memory"); memset(d,0,sizeof(DictInt)); /* Your INIT code */ /* defaults */ d->maxlen = 6; d->rejectlong = false; if ( PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL ) { /* no options */ PG_RETURN_POINTER(d); } in = PG_GETARG_TEXT_P(0); parse_keyvalpairs(in,&cfg); PG_FREE_IF_COPY(in, 0); pcfg=cfg; while (pcfg->key) { if ( strcasecmp("MAXLEN", pcfg->key) == 0 ) { d->maxlen=atoi(pcfg->value); } else if ( strcasecmp("REJECTLONG", pcfg->key) == 0 ) { if ( strcasecmp("true", pcfg->value) == 0 ) { d->rejectlong=true; } else if ( strcasecmp("false", pcfg->value) == 0 ) { d->rejectlong=false; } else { elog(ERROR,"Unknown value: %s => %s", pcfg->key, pcfg->value); } } else { elog(ERROR,"Unknown option: %s => %s", pcfg->key, pcfg-> value); } pfree(pcfg->key); pfree(pcfg->value); pcfg++; } pfree(cfg); PG_RETURN_POINTER(d); } PG_FUNCTION_INFO_V1(dlexize_intdict); Datum dlexize_intdict(PG_FUNCTION_ARGS); Datum dlexize_intdict(PG_FUNCTION_ARGS) { DictInt *d = (DictInt*)PG_GETARG_POINTER(0); char *in = (char*)PG_GETARG_POINTER(1); char *txt = pnstrdup(in, PG_GETARG_INT32(2)); TSLexeme *res=palloc(sizeof(TSLexeme)*2); /* Your INIT dictionary code */ res[1].lexeme = NULL; if ( PG_GETARG_INT32(2) > d->maxlen ) { if ( d->rejectlong ) { /* stop, return void array */ pfree(txt); res[0].lexeme = NULL; } else { /* cut integer */ txt[d->maxlen] = '\0'; res[0].lexeme = txt; } } else { res[0].lexeme = txt; } PG_RETURN_POINTER(res); }
This is a Makefile:
subdir = contrib/dict_intdict top_builddir = ../.. include $(top_builddir)/src/Makefile.global MODULE_big = dict_intdict OBJS = dict_tmpl.o DATA_built = dict_intdict.sql DOCS = include $(top_srcdir)/contrib/contrib-global.mk
This is a dict_intdict.sql.in:
SET search_path = public; BEGIN; CREATE OR REPLACE FUNCTION dinit_intdict(internal) returns internal as 'MODULE_PATHNAME' language 'C'; CREATE OR REPLACE FUNCTION dlexize_intdict(internal,internal,internal,internal) returns internal as 'MODULE_PATHNAME' language 'C' with (isstrict); CREATE FULLTEXT DICTIONARY intdict LEXIZE 'dlexize_intdict' INIT 'dinit_intdict' OPTION 'MAXLEN=6,REJECTLONG=false' ; COMMENT ON FULLTEXT DICTIONARY intdict IS 'Dictionary for Integers'; END;