Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 25bd9ce

Browse files
committed
Add matchorig, matchsynonyms, and keepsynonyms options to contrib/dict_xsyn.
Sergey Karpov
1 parent 23dc89d commit 25bd9ce

File tree

4 files changed

+282
-52
lines changed

4 files changed

+282
-52
lines changed

contrib/dict_xsyn/dict_xsyn.c

+69-43
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Copyright (c) 2007-2009, PostgreSQL Global Development Group
77
*
88
* IDENTIFICATION
9-
* $PostgreSQL: pgsql/contrib/dict_xsyn/dict_xsyn.c,v 1.6 2009/01/01 17:23:32 momjian Exp $
9+
* $PostgreSQL: pgsql/contrib/dict_xsyn/dict_xsyn.c,v 1.7 2009/08/05 18:06:49 tgl Exp $
1010
*
1111
*-------------------------------------------------------------------------
1212
*/
@@ -33,7 +33,10 @@ typedef struct
3333
int len;
3434
Syn *syn;
3535

36+
bool matchorig;
3637
bool keeporig;
38+
bool matchsynonyms;
39+
bool keepsynonyms;
3740
} DictSyn;
3841

3942

@@ -88,34 +91,45 @@ read_dictionary(DictSyn *d, char *filename)
8891
{
8992
char *value;
9093
char *key;
91-
char *end = NULL;
94+
char *pos;
95+
char *end;
9296

9397
if (*line == '\0')
9498
continue;
9599

96100
value = lowerstr(line);
97101
pfree(line);
98102

99-
key = find_word(value, &end);
100-
if (!key)
103+
pos = value;
104+
while ((key = find_word(pos, &end)) != NULL)
101105
{
102-
pfree(value);
103-
continue;
104-
}
106+
/* Enlarge syn structure if full */
107+
if (cur == d->len)
108+
{
109+
d->len = (d->len > 0) ? 2 * d->len : 16;
110+
if (d->syn)
111+
d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
112+
else
113+
d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
114+
}
105115

106-
if (cur == d->len)
107-
{
108-
d->len = (d->len > 0) ? 2 * d->len : 16;
109-
if (d->syn)
110-
d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
111-
else
112-
d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
113-
}
116+
/* Save first word only if we will match it */
117+
if (pos != value || d->matchorig)
118+
{
119+
d->syn[cur].key = pnstrdup(key, end - key);
120+
d->syn[cur].value = pstrdup(value);
114121

115-
d->syn[cur].key = pnstrdup(key, end - key);
116-
d->syn[cur].value = value;
122+
cur++;
123+
}
124+
125+
pos = end;
117126

118-
cur++;
127+
/* Don't bother scanning synonyms if we will not match them */
128+
if (!d->matchsynonyms)
129+
break;
130+
}
131+
132+
pfree(value);
119133
}
120134

121135
tsearch_readline_end(&trst);
@@ -133,23 +147,40 @@ dxsyn_init(PG_FUNCTION_ARGS)
133147
List *dictoptions = (List *) PG_GETARG_POINTER(0);
134148
DictSyn *d;
135149
ListCell *l;
150+
char *filename = NULL;
136151

137152
d = (DictSyn *) palloc0(sizeof(DictSyn));
138153
d->len = 0;
139154
d->syn = NULL;
155+
d->matchorig = true;
140156
d->keeporig = true;
157+
d->matchsynonyms = false;
158+
d->keepsynonyms = true;
141159

142160
foreach(l, dictoptions)
143161
{
144162
DefElem *defel = (DefElem *) lfirst(l);
145163

146-
if (pg_strcasecmp(defel->defname, "KEEPORIG") == 0)
164+
if (pg_strcasecmp(defel->defname, "MATCHORIG") == 0)
165+
{
166+
d->matchorig = defGetBoolean(defel);
167+
}
168+
else if (pg_strcasecmp(defel->defname, "KEEPORIG") == 0)
147169
{
148170
d->keeporig = defGetBoolean(defel);
149171
}
172+
else if (pg_strcasecmp(defel->defname, "MATCHSYNONYMS") == 0)
173+
{
174+
d->matchsynonyms = defGetBoolean(defel);
175+
}
176+
else if (pg_strcasecmp(defel->defname, "KEEPSYNONYMS") == 0)
177+
{
178+
d->keepsynonyms = defGetBoolean(defel);
179+
}
150180
else if (pg_strcasecmp(defel->defname, "RULES") == 0)
151181
{
152-
read_dictionary(d, defGetString(defel));
182+
/* we can't read the rules before parsing all options! */
183+
filename = defGetString(defel);
153184
}
154185
else
155186
{
@@ -160,6 +191,9 @@ dxsyn_init(PG_FUNCTION_ARGS)
160191
}
161192
}
162193

194+
if (filename)
195+
read_dictionary(d, filename);
196+
163197
PG_RETURN_POINTER(d);
164198
}
165199

@@ -194,41 +228,33 @@ dxsyn_lexize(PG_FUNCTION_ARGS)
194228

195229
/* Parse string of synonyms and return array of words */
196230
{
197-
char *value = pstrdup(found->value);
198-
int value_length = strlen(value);
199-
char *pos = value;
231+
char *value = found->value;
232+
char *syn;
233+
char *pos;
234+
char *end;
200235
int nsyns = 0;
201-
bool is_first = true;
202236

203-
res = palloc(0);
237+
res = palloc(sizeof(TSLexeme));
204238

205-
while (pos < value + value_length)
239+
pos = value;
240+
while ((syn = find_word(pos, &end)) != NULL)
206241
{
207-
char *end;
208-
char *syn = find_word(pos, &end);
209-
210-
if (!syn)
211-
break;
212-
*end = '\0';
213-
214242
res = repalloc(res, sizeof(TSLexeme) * (nsyns + 2));
215-
res[nsyns].lexeme = NULL;
216243

217-
/* first word is added to result only if KEEPORIG flag is set */
218-
if (d->keeporig || !is_first)
244+
/* The first word is output only if keeporig=true */
245+
if (pos != value || d->keeporig)
219246
{
220-
res[nsyns].lexeme = pstrdup(syn);
221-
res[nsyns + 1].lexeme = NULL;
222-
247+
res[nsyns].lexeme = pnstrdup(syn, end - syn);
223248
nsyns++;
224249
}
225250

226-
is_first = false;
251+
pos = end;
227252

228-
pos = end + 1;
253+
/* Stop if we are not to output the synonyms */
254+
if (!d->keepsynonyms)
255+
break;
229256
}
230-
231-
pfree(value);
257+
res[nsyns].lexeme = NULL;
232258
}
233259

234260
PG_RETURN_POINTER(res);

contrib/dict_xsyn/expected/dict_xsyn.out

+128-2
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,76 @@
55
SET client_min_messages = warning;
66
\set ECHO none
77
RESET client_min_messages;
8-
--configuration
9-
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false);
8+
-- default configuration - match first word and return it among with all synonyms
9+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=false);
1010
--lexize
1111
SELECT ts_lexize('xsyn', 'supernova');
12+
ts_lexize
13+
--------------------------
14+
{supernova,sn,sne,1987a}
15+
(1 row)
16+
17+
SELECT ts_lexize('xsyn', 'sn');
18+
ts_lexize
19+
-----------
20+
21+
(1 row)
22+
23+
SELECT ts_lexize('xsyn', 'grb');
24+
ts_lexize
25+
-----------
26+
27+
(1 row)
28+
29+
-- the same, but return only synonyms
30+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=false);
31+
SELECT ts_lexize('xsyn', 'supernova');
32+
ts_lexize
33+
----------------
34+
{sn,sne,1987a}
35+
(1 row)
36+
37+
SELECT ts_lexize('xsyn', 'sn');
38+
ts_lexize
39+
-----------
40+
41+
(1 row)
42+
43+
SELECT ts_lexize('xsyn', 'grb');
44+
ts_lexize
45+
-----------
46+
47+
(1 row)
48+
49+
-- match any word and return all words
50+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=true);
51+
SELECT ts_lexize('xsyn', 'supernova');
52+
ts_lexize
53+
--------------------------
54+
{supernova,sn,sne,1987a}
55+
(1 row)
56+
57+
SELECT ts_lexize('xsyn', 'sn');
58+
ts_lexize
59+
--------------------------
60+
{supernova,sn,sne,1987a}
61+
(1 row)
62+
63+
SELECT ts_lexize('xsyn', 'grb');
64+
ts_lexize
65+
-----------
66+
67+
(1 row)
68+
69+
-- match any word and return all words except first one
70+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=true);
71+
SELECT ts_lexize('xsyn', 'supernova');
72+
ts_lexize
73+
----------------
74+
{sn,sne,1987a}
75+
(1 row)
76+
77+
SELECT ts_lexize('xsyn', 'sn');
1278
ts_lexize
1379
----------------
1480
{sn,sne,1987a}
@@ -20,3 +86,63 @@ SELECT ts_lexize('xsyn', 'grb');
2086

2187
(1 row)
2288

89+
-- match any synonym but not first word, and return first word instead
90+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=false, KEEPSYNONYMS=false, MATCHSYNONYMS=true);
91+
SELECT ts_lexize('xsyn', 'supernova');
92+
ts_lexize
93+
-----------
94+
95+
(1 row)
96+
97+
SELECT ts_lexize('xsyn', 'sn');
98+
ts_lexize
99+
-------------
100+
{supernova}
101+
(1 row)
102+
103+
SELECT ts_lexize('xsyn', 'grb');
104+
ts_lexize
105+
-----------
106+
107+
(1 row)
108+
109+
-- do not match or return anything
110+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=false, KEEPSYNONYMS=false, MATCHSYNONYMS=false);
111+
SELECT ts_lexize('xsyn', 'supernova');
112+
ts_lexize
113+
-----------
114+
115+
(1 row)
116+
117+
SELECT ts_lexize('xsyn', 'sn');
118+
ts_lexize
119+
-----------
120+
121+
(1 row)
122+
123+
SELECT ts_lexize('xsyn', 'grb');
124+
ts_lexize
125+
-----------
126+
127+
(1 row)
128+
129+
-- match any word but return nothing
130+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=false, MATCHSYNONYMS=true);
131+
SELECT ts_lexize('xsyn', 'supernova');
132+
ts_lexize
133+
-----------
134+
{}
135+
(1 row)
136+
137+
SELECT ts_lexize('xsyn', 'sn');
138+
ts_lexize
139+
-----------
140+
{}
141+
(1 row)
142+
143+
SELECT ts_lexize('xsyn', 'grb');
144+
ts_lexize
145+
-----------
146+
147+
(1 row)
148+

contrib/dict_xsyn/sql/dict_xsyn.sql

+39-2
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,46 @@ SET client_min_messages = warning;
88
\set ECHO all
99
RESET client_min_messages;
1010

11-
--configuration
12-
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false);
11+
-- default configuration - match first word and return it among with all synonyms
12+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=false);
1313

1414
--lexize
1515
SELECT ts_lexize('xsyn', 'supernova');
16+
SELECT ts_lexize('xsyn', 'sn');
17+
SELECT ts_lexize('xsyn', 'grb');
18+
19+
-- the same, but return only synonyms
20+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=false);
21+
SELECT ts_lexize('xsyn', 'supernova');
22+
SELECT ts_lexize('xsyn', 'sn');
23+
SELECT ts_lexize('xsyn', 'grb');
24+
25+
-- match any word and return all words
26+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=true);
27+
SELECT ts_lexize('xsyn', 'supernova');
28+
SELECT ts_lexize('xsyn', 'sn');
29+
SELECT ts_lexize('xsyn', 'grb');
30+
31+
-- match any word and return all words except first one
32+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=true);
33+
SELECT ts_lexize('xsyn', 'supernova');
34+
SELECT ts_lexize('xsyn', 'sn');
35+
SELECT ts_lexize('xsyn', 'grb');
36+
37+
-- match any synonym but not first word, and return first word instead
38+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=false, KEEPSYNONYMS=false, MATCHSYNONYMS=true);
39+
SELECT ts_lexize('xsyn', 'supernova');
40+
SELECT ts_lexize('xsyn', 'sn');
41+
SELECT ts_lexize('xsyn', 'grb');
42+
43+
-- do not match or return anything
44+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=false, KEEPSYNONYMS=false, MATCHSYNONYMS=false);
45+
SELECT ts_lexize('xsyn', 'supernova');
46+
SELECT ts_lexize('xsyn', 'sn');
47+
SELECT ts_lexize('xsyn', 'grb');
48+
49+
-- match any word but return nothing
50+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=false, MATCHSYNONYMS=true);
51+
SELECT ts_lexize('xsyn', 'supernova');
52+
SELECT ts_lexize('xsyn', 'sn');
1653
SELECT ts_lexize('xsyn', 'grb');

0 commit comments

Comments
 (0)