postgrespro
diff --git a/‎contrib/tsearch2/Makefile
Lines changed: 4 additions & 4 deletions b/‎contrib/tsearch2/Makefile
Lines changed: 4 additions & 4 deletions
diff --git a/‎contrib/tsearch2/common.c
Lines changed: 21 additions & 0 deletions b/‎contrib/tsearch2/common.c
Lines changed: 21 additions & 0 deletions
diff --git a/‎contrib/tsearch2/common.h
Lines changed: 2 additions & 0 deletions b/‎contrib/tsearch2/common.h
Lines changed: 2 additions & 0 deletions
diff --git a/‎contrib/tsearch2/dict.c
Lines changed: 50 additions & 24 deletions b/‎contrib/tsearch2/dict.c
Lines changed: 50 additions & 24 deletions
diff --git a/‎contrib/tsearch2/dict.h
Lines changed: 55 additions & 5 deletions b/‎contrib/tsearch2/dict.h
Lines changed: 55 additions & 5 deletions
@@ -1,13 +1,13 @@
-# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.14 2006/05/02 11:28:54 teodor Exp $
+# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.15 2006/05/31 14:05:31 teodor Exp $
 
 MODULE_big = tsearch2
 OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o \
-       dict_snowball.o dict_ispell.o dict_syn.o \
+       dict_snowball.o dict_ispell.o dict_syn.o dict_thesaurus.o \
        wparser.o wparser_def.o \
        ts_cfg.o tsvector.o query_cleanup.o crc32.o query.o gistidx.o \
        tsvector_op.o rank.o ts_stat.o \
        query_util.o query_support.o query_rewrite.o query_gist.o \
-       ts_locale.o ginidx.o
+       ts_locale.o ts_lexize.o ginidx.o
 
 SUBDIRS     := snowball ispell wordparser
 SUBDIROBJS  := $(SUBDIRS:%=%/SUBSYS.o)
@@ -16,7 +16,7 @@ OBJS	+= $(SUBDIROBJS)
 
 PG_CPPFLAGS = -I$(srcdir)/snowball -I$(srcdir)/ispell -I$(srcdir)/wordparser
 
-DATA = stopword/english.stop stopword/russian.stop stopword/russian.stop.utf8
+DATA = stopword/english.stop stopword/russian.stop stopword/russian.stop.utf8 thesaurus
 DATA_built = tsearch2.sql untsearch2.sql
 DOCS = README.tsearch2
 REGRESS = tsearch2
 
@@ -5,6 +5,7 @@
 #include "catalog/pg_proc.h"
 #include "catalog/pg_namespace.h"
 #include "utils/syscache.h"
+#include "miscadmin.h"
 
 #include "ts_cfg.h"
 #include "dict.h"
@@ -163,3 +164,23 @@ get_oidnamespace(Oid funcoid)
 
 	return nspoid;
 }
+
+    /* if path is relative, take it as relative to share dir */
+char *
+to_absfilename(char *filename) {
+	if (!is_absolute_path(filename)) {
+		char        sharepath[MAXPGPATH];
+		char       *absfn;
+#ifdef  WIN32
+		char    delim = '\\';
+#else
+		char    delim = '/';
+#endif
+		get_share_path(my_exec_path, sharepath);
+		absfn = palloc(strlen(sharepath) + strlen(filename) + 2);
+		sprintf(absfn, "%s%c%s", sharepath, delim, filename);
+		filename = absfn;
+	}
+
+	return filename;
+}
@@ -16,6 +16,8 @@ text	   *mtextdup(text *in);
 
 int			text_cmp(text *a, text *b);
 
+char * to_absfilename(char *filename);
+
 #define NEXTVAL(x) ( (text*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
 #define ARRNELEMS(x)  ArrayGetNItems( ARR_NDIM(x), ARR_DIMS(x))
 
 
@@ -1,4 +1,4 @@
-/* $PostgreSQL: pgsql/contrib/tsearch2/dict.c,v 1.11 2006/03/11 04:38:30 momjian Exp $ */
+/* $PostgreSQL: pgsql/contrib/tsearch2/dict.c,v 1.12 2006/05/31 14:05:31 teodor Exp $ */
 
 /*
  * interface functions to dictionary
@@ -50,16 +50,19 @@ init_dict(Oid id, DictInfo * dict)
 		Datum		opt;
 		Oid			oid = InvalidOid;
 
+		/* setup dictlexize method */
+		oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull));
+		if (isnull || oid == InvalidOid)
+			ts_error(ERROR, "Null dict_lexize for dictonary %d", id);
+		fmgr_info_cxt(oid, &(dict->lexize_info), TopMemoryContext);
+
+		/* setup and call dictinit method, optinally */
 		oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
 		if (!(isnull || oid == InvalidOid))
 		{
 			opt = SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull);
 			dict->dictionary = (void *) DatumGetPointer(OidFunctionCall1(oid, opt));
 		}
-		oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull));
-		if (isnull || oid == InvalidOid)
-			ts_error(ERROR, "Null dict_lexize for dictonary %d", id);
-		fmgr_info_cxt(oid, &(dict->lexize_info), TopMemoryContext);
 		dict->dict_id = id;
 	}
 	else
@@ -98,6 +101,29 @@ comparedict(const void *a, const void *b)
 	return (((DictInfo *) a)->dict_id < ((DictInfo *) b)->dict_id) ? -1 : 1;
 }
 
+static void
+insertdict(Oid id) {
+	DictInfo	newdict;
+
+	if (DList.len == DList.reallen)
+	{
+		DictInfo   *tmp;
+		int			reallen = (DList.reallen) ? 2 * DList.reallen : 16;
+
+		tmp = (DictInfo *) realloc(DList.list, sizeof(DictInfo) * reallen);
+		if (!tmp)
+			ts_error(ERROR, "No memory");
+		DList.reallen = reallen;
+		DList.list = tmp;
+	}
+	init_dict(id, &newdict);
+
+	DList.list[DList.len] = newdict;
+	DList.len++;
+
+	qsort(DList.list, DList.len, sizeof(DictInfo), comparedict);
+}
+
 DictInfo *
 finddict(Oid id)
 {
@@ -117,23 +143,8 @@ finddict(Oid id)
 			return DList.last_dict;
 	}
 
-	/* last chance */
-	if (DList.len == DList.reallen)
-	{
-		DictInfo   *tmp;
-		int			reallen = (DList.reallen) ? 2 * DList.reallen : 16;
-
-		tmp = (DictInfo *) realloc(DList.list, sizeof(DictInfo) * reallen);
-		if (!tmp)
-			ts_error(ERROR, "No memory");
-		DList.reallen = reallen;
-		DList.list = tmp;
-	}
-	DList.last_dict = &(DList.list[DList.len]);
-	init_dict(id, DList.last_dict);
-
-	DList.len++;
-	qsort(DList.list, DList.len, sizeof(DictInfo), comparedict);
+	/* insert new dictionary */ 
+	insertdict(id);
 	return finddict(id); /* qsort changed order!! */ ;
 }
 
@@ -190,17 +201,32 @@ lexize(PG_FUNCTION_ARGS)
 			   *ptr;
 	Datum	   *da;
 	ArrayType  *a;
+	DictSubState	dstate = { false, false, NULL };
 
 	SET_FUNCOID();
 	dict = finddict(PG_GETARG_OID(0));
 
 	ptr = res = (TSLexeme *) DatumGetPointer(
-										  FunctionCall3(&(dict->lexize_info),
+										FunctionCall4(&(dict->lexize_info),
+										PointerGetDatum(dict->dictionary),
+										PointerGetDatum(VARDATA(in)),
+										Int32GetDatum(VARSIZE(in) - VARHDRSZ),
+										PointerGetDatum(&dstate)
+														)
+		);
+
+	if (dstate.getnext)  {
+		dstate.isend = true;	
+		ptr = res = (TSLexeme *) DatumGetPointer(
+										FunctionCall4(&(dict->lexize_info),
 										   PointerGetDatum(dict->dictionary),
 												PointerGetDatum(VARDATA(in)),
-										Int32GetDatum(VARSIZE(in) - VARHDRSZ)
+										Int32GetDatum(VARSIZE(in) - VARHDRSZ),
+										PointerGetDatum(&dstate)
 														)
 		);
+	}
+
 	PG_FREE_IF_COPY(in, 1);
 	if (!res)
 	{
 
@@ -1,9 +1,10 @@
-/* $PostgreSQL: pgsql/contrib/tsearch2/dict.h,v 1.6 2006/03/11 04:38:30 momjian Exp $ */
+/* $PostgreSQL: pgsql/contrib/tsearch2/dict.h,v 1.7 2006/05/31 14:05:31 teodor Exp $ */
 
 #ifndef __DICT_H__
 #define __DICT_H__
 #include "postgres.h"
 #include "fmgr.h"
+#include "ts_cfg.h"
 
 typedef struct
 {
@@ -29,6 +30,11 @@ DictInfo   *finddict(Oid id);
 Oid			name2id_dict(text *name);
 void		reset_dict(void);
 
+typedef struct {
+	bool isend; /* in: marks for lexize_info about text end is reached */
+	bool getnext; /* out: dict wants next lexeme */
+	void	*private;  /* internal dict state between calls with getnext == true */
+} DictSubState;
 
 /* simple parser of cfg string */
 typedef struct
@@ -45,17 +51,61 @@ typedef struct
 	/*
 	 * number of variant of split word , for example Word 'fotballklubber'
 	 * (norwegian) has two varian to split: ( fotball, klubb ) and ( fot,
-	 * ball, klubb ). So, dictionary should return: nvariant	lexeme 1
-	 * fotball 1	   klubb 2		 fot 2		 ball 2		  klubb
-	 *
+	 * ball, klubb ). So, dictionary should return: 
+	 * nvariant	lexeme 
+	 *   1 		fotball 
+	 *   1	   	klubb 
+	 *	 2		fot 
+	 *	 2		ball 
+	 *   2		klubb
 	 */
 	uint16		nvariant;
 
-	/* currently unused */
 	uint16		flags;
 
 	/* C-string */
 	char	   *lexeme;
 }	TSLexeme;
 
+#define TSL_ADDPOS		0x01
+
+
+/*
+ * Lexize subsystem
+ */
+
+typedef struct ParsedLex {
+    int     	type;
+    char    	*lemm;
+    int     	lenlemm;
+	bool		resfollow;
+    struct ParsedLex *next;
+} ParsedLex;
+
+typedef struct ListParsedLex {
+	ParsedLex	*head;
+	ParsedLex	*tail;
+} ListParsedLex;
+
+typedef struct {
+    TSCfgInfo       *cfg;
+    Oid             curDictId;
+    int             posDict;
+    DictSubState    dictState;
+    ParsedLex       *curSub;
+	ListParsedLex	towork;   /* current list to work */
+	ListParsedLex	waste;    /* list of lexemes that already lexized */
+
+	/* fields to store last variant to lexize (basically, thesaurus 
+	   or similar to, which wants  several lexemes */	
+	   
+	ParsedLex		*lastRes;
+	TSLexeme		*tmpRes;
+} LexizeData;
+
+
+void LexizeInit(LexizeData *ld, TSCfgInfo *cfg);
+void LexizeAddLemm(LexizeData *ld, int type, char *lemm, int lenlemm);
+TSLexeme* LexizeExec(LexizeData *ld, ParsedLex **correspondLexem);
+
 #endif