| Документ взят из кэша поисковой машины. Адрес
оригинального документа
: http://www.sai.msu.su/~megera/postgres/fts/doc/fts-parser-xmp-src.html Дата изменения: Unknown Дата индексирования: Sun Apr 13 07:53:22 2008 Кодировка: Поисковые слова: asteroid | 
| Full-Text Search in PostgreSQL: A Gentle Introduction | ||||
|---|---|---|---|---|
| Prev | Fast Backward | Appendix B. FTS Parser Example | Fast Forward | Next | 
Parser sources was adapted to 8.3 release from original tutorial by Valli parser HOWTO.
To compile an example just do
make make install psql regression < test_parser.sql
This is a test_parser.c
#ifdef PG_MODULE_MAGIC
PG_MODULE_MAGIC;
#endif
/*
 * types
 */
/* self-defined type */
typedef struct {
  char *  buffer; /* text to parse */
  int     len;    /* length of the text in buffer */
  int     pos;    /* position of the parser */
} ParserState;
/* copy-paste from wparser.h of tsearch2 */
typedef struct {
  int     lexid;
  char    *alias;
  char    *descr;
} LexDescr;
/*
 * prototypes
 */
PG_FUNCTION_INFO_V1(testprs_start);
Datum testprs_start(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(testprs_getlexeme);
Datum testprs_getlexeme(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(testprs_end);
Datum testprs_end(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(testprs_lextype);
Datum testprs_lextype(PG_FUNCTION_ARGS);
/*
 * functions
 */
Datum testprs_start(PG_FUNCTION_ARGS)
{
  ParserState *pst = (ParserState *) palloc(sizeof(ParserState));
  pst->buffer = (char *) PG_GETARG_POINTER(0);
  pst->len = PG_GETARG_INT32(1);
  pst->pos = 0;
  PG_RETURN_POINTER(pst);
}
Datum testprs_getlexeme(PG_FUNCTION_ARGS)
{
  ParserState *pst   = (ParserState *) PG_GETARG_POINTER(0);
  char        **t    = (char **) PG_GETARG_POINTER(1);
  int         *tlen  = (int *) PG_GETARG_POINTER(2);
  int         type;
  *tlen = pst->pos;
  *t = pst->buffer +  pst->pos;
  if ((pst->buffer)[pst->pos] == ' ') {
    /* blank type */
    type = 12;
    /* go to the next non-white-space character */
    while (((pst->buffer)[pst->pos] == ' ') && (pst->pos < pst->len)) {
      (pst->pos)++;
    }
  } else {
    /* word type */
    type = 3;
    /* go to the next white-space character */
    while (((pst->buffer)[pst->pos] != ' ') && (pst->pos < pst->len)) {
      (pst->pos)++;
    }
  }
  *tlen = pst->pos - *tlen;
  /* we are finished if (*tlen == 0) */
  if (*tlen == 0) type=0;
  PG_RETURN_INT32(type);
}
Datum testprs_end(PG_FUNCTION_ARGS)
{
  ParserState *pst = (ParserState *) PG_GETARG_POINTER(0);
  pfree(pst);
  PG_RETURN_VOID();
}
Datum testprs_lextype(PG_FUNCTION_ARGS)
{
  /*
    Remarks:
    - we have to return the blanks for headline reason
    - we use the same lexids like Teodor in the default
      word parser; in this way we can reuse the headline
      function of the default word parser.
  */
  LexDescr *descr = (LexDescr *) palloc(sizeof(LexDescr) * (2+1));
  /* there are only two types in this parser */
  descr[0].lexid = 3;
  descr[0].alias = pstrdup("word");
  descr[0].descr = pstrdup("Word");
  descr[1].lexid = 12;
  descr[1].alias = pstrdup("blank");
  descr[1].descr = pstrdup("Space symbols");
  descr[2].lexid = 0;
  PG_RETURN_POINTER(descr);
}
This is a Makefile
override CPPFLAGS := -I. $(CPPFLAGS) MODULE_big = test_parser OBJS = test_parser.o DATA_built = test_parser.sql DATA = DOCS = README.test_parser REGRESS = test_parser ifdef USE_PGXS PGXS := $(shell pg_config --pgxs) include $(PGXS) else subdir = contrib/test_parser top_builddir = ../.. include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk endif
This is a test_parser.sql.in
SET search_path = public;
BEGIN;
CREATE FUNCTION testprs_start(internal,int4)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE 'C' with (isstrict);
CREATE FUNCTION testprs_getlexeme(internal,internal,internal)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE 'C' with (isstrict);
CREATE FUNCTION testprs_end(internal)
RETURNS void
AS 'MODULE_PATHNAME'
LANGUAGE 'C' with (isstrict);
CREATE FUNCTION testprs_lextype(internal)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE 'C' with (isstrict);
CREATE FULLTEXT PARSER testparser 
        START     'testprs_start'
        GETTOKEN  'testprs_getlexeme'
        END       'testprs_end'
        LEXTYPES  'testprs_lextype'
;
CREATE FULLTEXT CONFIGURATION  testcfg  PARSER  'testparser' LOCALE  NULL;
CREATE FULLTEXT MAPPING ON testcfg FOR word WITH simple;
END;