Документ взят из кэша поисковой машины. Адрес
оригинального документа
: http://www.sai.msu.su/~megera/postgres/fts/doc/fts-parser-xmp-src.html
Дата изменения: Unknown Дата индексирования: Sun Apr 13 07:53:22 2008 Кодировка: Поисковые слова: tail |
Full-Text Search in PostgreSQL: A Gentle Introduction | ||||
---|---|---|---|---|
Prev | Fast Backward | Appendix B. FTS Parser Example | Fast Forward | Next |
Parser sources was adapted to 8.3 release from original tutorial by Valli parser HOWTO.
To compile an example just do
make make install psql regression < test_parser.sql
This is a test_parser.c
#ifdef PG_MODULE_MAGIC PG_MODULE_MAGIC; #endif /* * types */ /* self-defined type */ typedef struct { char * buffer; /* text to parse */ int len; /* length of the text in buffer */ int pos; /* position of the parser */ } ParserState; /* copy-paste from wparser.h of tsearch2 */ typedef struct { int lexid; char *alias; char *descr; } LexDescr; /* * prototypes */ PG_FUNCTION_INFO_V1(testprs_start); Datum testprs_start(PG_FUNCTION_ARGS); PG_FUNCTION_INFO_V1(testprs_getlexeme); Datum testprs_getlexeme(PG_FUNCTION_ARGS); PG_FUNCTION_INFO_V1(testprs_end); Datum testprs_end(PG_FUNCTION_ARGS); PG_FUNCTION_INFO_V1(testprs_lextype); Datum testprs_lextype(PG_FUNCTION_ARGS); /* * functions */ Datum testprs_start(PG_FUNCTION_ARGS) { ParserState *pst = (ParserState *) palloc(sizeof(ParserState)); pst->buffer = (char *) PG_GETARG_POINTER(0); pst->len = PG_GETARG_INT32(1); pst->pos = 0; PG_RETURN_POINTER(pst); } Datum testprs_getlexeme(PG_FUNCTION_ARGS) { ParserState *pst = (ParserState *) PG_GETARG_POINTER(0); char **t = (char **) PG_GETARG_POINTER(1); int *tlen = (int *) PG_GETARG_POINTER(2); int type; *tlen = pst->pos; *t = pst->buffer + pst->pos; if ((pst->buffer)[pst->pos] == ' ') { /* blank type */ type = 12; /* go to the next non-white-space character */ while (((pst->buffer)[pst->pos] == ' ') && (pst->pos < pst->len)) { (pst->pos)++; } } else { /* word type */ type = 3; /* go to the next white-space character */ while (((pst->buffer)[pst->pos] != ' ') && (pst->pos < pst->len)) { (pst->pos)++; } } *tlen = pst->pos - *tlen; /* we are finished if (*tlen == 0) */ if (*tlen == 0) type=0; PG_RETURN_INT32(type); } Datum testprs_end(PG_FUNCTION_ARGS) { ParserState *pst = (ParserState *) PG_GETARG_POINTER(0); pfree(pst); PG_RETURN_VOID(); } Datum testprs_lextype(PG_FUNCTION_ARGS) { /* Remarks: - we have to return the blanks for headline reason - we use the same lexids like Teodor in the default word parser; in this way we can reuse the headline function of the default word parser. */ LexDescr *descr = (LexDescr *) palloc(sizeof(LexDescr) * (2+1)); /* there are only two types in this parser */ descr[0].lexid = 3; descr[0].alias = pstrdup("word"); descr[0].descr = pstrdup("Word"); descr[1].lexid = 12; descr[1].alias = pstrdup("blank"); descr[1].descr = pstrdup("Space symbols"); descr[2].lexid = 0; PG_RETURN_POINTER(descr); }
This is a Makefile
override CPPFLAGS := -I. $(CPPFLAGS) MODULE_big = test_parser OBJS = test_parser.o DATA_built = test_parser.sql DATA = DOCS = README.test_parser REGRESS = test_parser ifdef USE_PGXS PGXS := $(shell pg_config --pgxs) include $(PGXS) else subdir = contrib/test_parser top_builddir = ../.. include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk endif
This is a test_parser.sql.in
SET search_path = public; BEGIN; CREATE FUNCTION testprs_start(internal,int4) RETURNS internal AS 'MODULE_PATHNAME' LANGUAGE 'C' with (isstrict); CREATE FUNCTION testprs_getlexeme(internal,internal,internal) RETURNS internal AS 'MODULE_PATHNAME' LANGUAGE 'C' with (isstrict); CREATE FUNCTION testprs_end(internal) RETURNS void AS 'MODULE_PATHNAME' LANGUAGE 'C' with (isstrict); CREATE FUNCTION testprs_lextype(internal) RETURNS internal AS 'MODULE_PATHNAME' LANGUAGE 'C' with (isstrict); CREATE FULLTEXT PARSER testparser START 'testprs_start' GETTOKEN 'testprs_getlexeme' END 'testprs_end' LEXTYPES 'testprs_lextype' ; CREATE FULLTEXT CONFIGURATION testcfg PARSER 'testparser' LOCALE NULL; CREATE FULLTEXT MAPPING ON testcfg FOR word WITH simple; END;