Документ взят из кэша поисковой машины. Адрес оригинального документа : http://www.sai.msu.su/~megera/postgres/fts/doc/fts-parser-xmp-src.html
Дата изменения: Unknown
Дата индексирования: Sun Apr 13 07:53:22 2008
Кодировка:

Поисковые слова: vallis
Parser sources

B.1. Parser sources

Parser sources was adapted to 8.3 release from original tutorial by Valli parser HOWTO.

To compile an example just do

make 
make install
psql regression < test_parser.sql

This is a test_parser.c


#ifdef PG_MODULE_MAGIC
PG_MODULE_MAGIC;
#endif

/*
 * types
 */

/* self-defined type */
typedef struct {
  char *  buffer; /* text to parse */
  int     len;    /* length of the text in buffer */
  int     pos;    /* position of the parser */
} ParserState;

/* copy-paste from wparser.h of tsearch2 */
typedef struct {
  int     lexid;
  char    *alias;
  char    *descr;
} LexDescr;

/*
 * prototypes
 */
PG_FUNCTION_INFO_V1(testprs_start);
Datum testprs_start(PG_FUNCTION_ARGS);

PG_FUNCTION_INFO_V1(testprs_getlexeme);
Datum testprs_getlexeme(PG_FUNCTION_ARGS);

PG_FUNCTION_INFO_V1(testprs_end);
Datum testprs_end(PG_FUNCTION_ARGS);

PG_FUNCTION_INFO_V1(testprs_lextype);
Datum testprs_lextype(PG_FUNCTION_ARGS);

/*
 * functions
 */
Datum testprs_start(PG_FUNCTION_ARGS)
{
  ParserState *pst = (ParserState *) palloc(sizeof(ParserState));
  pst->buffer = (char *) PG_GETARG_POINTER(0);
  pst->len = PG_GETARG_INT32(1);
  pst->pos = 0;

  PG_RETURN_POINTER(pst);
}

Datum testprs_getlexeme(PG_FUNCTION_ARGS)
{
  ParserState *pst   = (ParserState *) PG_GETARG_POINTER(0);
  char        **t    = (char **) PG_GETARG_POINTER(1);
  int         *tlen  = (int *) PG_GETARG_POINTER(2);
  int         type;

  *tlen = pst->pos;
  *t = pst->buffer +  pst->pos;

  if ((pst->buffer)[pst->pos] == ' ') {
    /* blank type */
    type = 12;
    /* go to the next non-white-space character */
    while (((pst->buffer)[pst->pos] == ' ') && (pst->pos < pst->len)) {
      (pst->pos)++;
    }
  } else {
    /* word type */
    type = 3;
    /* go to the next white-space character */
    while (((pst->buffer)[pst->pos] != ' ') && (pst->pos < pst->len)) {
      (pst->pos)++;
    }
  }

  *tlen = pst->pos - *tlen;

  /* we are finished if (*tlen == 0) */
  if (*tlen == 0) type=0;

  PG_RETURN_INT32(type);
}
Datum testprs_end(PG_FUNCTION_ARGS)
{
  ParserState *pst = (ParserState *) PG_GETARG_POINTER(0);
  pfree(pst);
  PG_RETURN_VOID();
}

Datum testprs_lextype(PG_FUNCTION_ARGS)
{
  /*
    Remarks:
    - we have to return the blanks for headline reason
    - we use the same lexids like Teodor in the default
      word parser; in this way we can reuse the headline
      function of the default word parser.
  */
  LexDescr *descr = (LexDescr *) palloc(sizeof(LexDescr) * (2+1));

  /* there are only two types in this parser */
  descr[0].lexid = 3;
  descr[0].alias = pstrdup("word");
  descr[0].descr = pstrdup("Word");
  descr[1].lexid = 12;
  descr[1].alias = pstrdup("blank");
  descr[1].descr = pstrdup("Space symbols");
  descr[2].lexid = 0;

  PG_RETURN_POINTER(descr);
}

This is a Makefile

override CPPFLAGS := -I. $(CPPFLAGS)

MODULE_big = test_parser
OBJS = test_parser.o

DATA_built = test_parser.sql
DATA =
DOCS = README.test_parser
REGRESS = test_parser


ifdef USE_PGXS
PGXS := $(shell pg_config --pgxs)
include $(PGXS)
else
subdir = contrib/test_parser
top_builddir = ../..
include $(top_builddir)/src/Makefile.global
include $(top_srcdir)/contrib/contrib-global.mk
endif

This is a test_parser.sql.in

SET search_path = public;

BEGIN;

CREATE FUNCTION testprs_start(internal,int4)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE 'C' with (isstrict);

CREATE FUNCTION testprs_getlexeme(internal,internal,internal)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE 'C' with (isstrict);

CREATE FUNCTION testprs_end(internal)
RETURNS void
AS 'MODULE_PATHNAME'
LANGUAGE 'C' with (isstrict);

CREATE FUNCTION testprs_lextype(internal)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE 'C' with (isstrict);


CREATE FULLTEXT PARSER testparser 
        START     'testprs_start'
        GETTOKEN  'testprs_getlexeme'
        END       'testprs_end'
        LEXTYPES  'testprs_lextype'
;

CREATE FULLTEXT CONFIGURATION  testcfg  PARSER  'testparser' LOCALE  NULL;
CREATE FULLTEXT MAPPING ON testcfg FOR word WITH simple;

END;