7
7
*
8
8
*
9
9
* IDENTIFICATION
10
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.5 2007/10/21 22:29:56 tgl Exp $
10
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.6 2007/10/23 00:51:23 tgl Exp $
11
11
*
12
12
*-------------------------------------------------------------------------
13
13
*/
22
22
23
23
typedef struct
24
24
{
25
- WordEntry entry ; /* should be first ! */
25
+ WordEntry entry ; /* must be first! */
26
26
WordEntryPos * pos ;
27
27
int poslen ; /* number of elements in pos */
28
28
} WordEntryIN ;
29
29
30
+
31
+ /* Compare two WordEntryPos values for qsort */
30
32
static int
31
33
comparePos (const void * a , const void * b )
32
34
{
33
- int apos = WEP_GETPOS (* (WordEntryPos * ) a );
34
- int bpos = WEP_GETPOS (* (WordEntryPos * ) b );
35
+ int apos = WEP_GETPOS (* (const WordEntryPos * ) a );
36
+ int bpos = WEP_GETPOS (* (const WordEntryPos * ) b );
35
37
36
38
if (apos == bpos )
37
39
return 0 ;
@@ -53,17 +55,18 @@ uniquePos(WordEntryPos * a, int l)
53
55
if (l <= 1 )
54
56
return l ;
55
57
56
- res = a ;
57
58
qsort ((void * ) a , l , sizeof (WordEntryPos ), comparePos );
58
59
60
+ res = a ;
59
61
ptr = a + 1 ;
60
62
while (ptr - a < l )
61
63
{
62
64
if (WEP_GETPOS (* ptr ) != WEP_GETPOS (* res ))
63
65
{
64
66
res ++ ;
65
67
* res = * ptr ;
66
- if (res - a >= MAXNUMPOS - 1 || WEP_GETPOS (* res ) == MAXENTRYPOS - 1 )
68
+ if (res - a >= MAXNUMPOS - 1 ||
69
+ WEP_GETPOS (* res ) == MAXENTRYPOS - 1 )
67
70
break ;
68
71
}
69
72
else if (WEP_GETWEIGHT (* ptr ) > WEP_GETWEIGHT (* res ))
@@ -74,12 +77,13 @@ uniquePos(WordEntryPos * a, int l)
74
77
return res + 1 - a ;
75
78
}
76
79
80
+ /* Compare two WordEntryIN values for qsort */
77
81
static int
78
82
compareentry (const void * va , const void * vb , void * arg )
79
83
{
84
+ const WordEntryIN * a = (const WordEntryIN * ) va ;
85
+ const WordEntryIN * b = (const WordEntryIN * ) vb ;
80
86
char * BufferStr = (char * ) arg ;
81
- WordEntryIN * a = (WordEntryIN * ) va ;
82
- WordEntryIN * b = (WordEntryIN * ) vb ;
83
87
84
88
if (a -> entry .len == b -> entry .len )
85
89
{
@@ -91,82 +95,78 @@ compareentry(const void *va, const void *vb, void *arg)
91
95
return (a -> entry .len > b -> entry .len ) ? 1 : -1 ;
92
96
}
93
97
98
+ /*
99
+ * Sort an array of WordEntryIN, remove duplicates.
100
+ * *outbuflen receives the amount of space needed for strings and positions.
101
+ */
94
102
static int
95
103
uniqueentry (WordEntryIN * a , int l , char * buf , int * outbuflen )
96
104
{
105
+ int buflen ;
97
106
WordEntryIN * ptr ,
98
107
* res ;
99
108
100
109
Assert (l >= 1 );
101
110
102
- if (l == 1 )
103
- {
104
- if (a -> entry .haspos )
105
- {
106
- a -> poslen = uniquePos (a -> pos , a -> poslen );
107
- * outbuflen = SHORTALIGN (a -> entry .len ) + (a -> poslen + 1 ) * sizeof (WordEntryPos );
108
- }
109
- else
110
- * outbuflen = a -> entry .len ;
111
+ if (l > 1 )
112
+ qsort_arg ((void * ) a , l , sizeof (WordEntryIN ), compareentry ,
113
+ (void * ) buf );
111
114
112
- return l ;
113
- }
115
+ buflen = 0 ;
114
116
res = a ;
115
-
116
117
ptr = a + 1 ;
117
- qsort_arg ((void * ) a , l , sizeof (WordEntryIN ), compareentry , (void * ) buf );
118
-
119
118
while (ptr - a < l )
120
119
{
121
120
if (!(ptr -> entry .len == res -> entry .len &&
122
- strncmp (& buf [ptr -> entry .pos ], & buf [res -> entry .pos ], res -> entry .len ) == 0 ))
121
+ strncmp (& buf [ptr -> entry .pos ], & buf [res -> entry .pos ],
122
+ res -> entry .len ) == 0 ))
123
123
{
124
+ /* done accumulating data into *res, count space needed */
125
+ buflen += res -> entry .len ;
124
126
if (res -> entry .haspos )
125
127
{
126
- * outbuflen += SHORTALIGN (res -> entry .len );
127
128
res -> poslen = uniquePos (res -> pos , res -> poslen );
128
- * outbuflen += res -> poslen * sizeof (WordEntryPos );
129
+ buflen = SHORTALIGN (buflen );
130
+ buflen += res -> poslen * sizeof (WordEntryPos ) + sizeof (uint16 );
129
131
}
130
- else
131
- * outbuflen += res -> entry .len ;
132
132
res ++ ;
133
133
memcpy (res , ptr , sizeof (WordEntryIN ));
134
134
}
135
135
else if (ptr -> entry .haspos )
136
136
{
137
137
if (res -> entry .haspos )
138
138
{
139
+ /* append ptr's positions to res's positions */
139
140
int newlen = ptr -> poslen + res -> poslen ;
140
141
141
- /* Append res to pos */
142
-
143
- res -> pos = (WordEntryPos * ) repalloc (res -> pos , newlen * sizeof (WordEntryPos ));
144
- memcpy (& res -> pos [res -> poslen ],
145
- ptr -> pos , ptr -> poslen * sizeof (WordEntryPos ));
142
+ res -> pos = (WordEntryPos * )
143
+ repalloc (res -> pos , newlen * sizeof (WordEntryPos ));
144
+ memcpy (& res -> pos [res -> poslen ], ptr -> pos ,
145
+ ptr -> poslen * sizeof (WordEntryPos ));
146
146
res -> poslen = newlen ;
147
147
pfree (ptr -> pos );
148
148
}
149
149
else
150
150
{
151
+ /* just give ptr's positions to pos */
151
152
res -> entry .haspos = 1 ;
152
153
res -> pos = ptr -> pos ;
154
+ res -> poslen = ptr -> poslen ;
153
155
}
154
156
}
155
157
ptr ++ ;
156
158
}
157
159
158
- /* add last item */
159
-
160
+ /* count space needed for last item */
161
+ buflen += res -> entry . len ;
160
162
if (res -> entry .haspos )
161
163
{
162
- * outbuflen += SHORTALIGN (res -> entry .len );
163
-
164
164
res -> poslen = uniquePos (res -> pos , res -> poslen );
165
- * outbuflen += res -> poslen * sizeof (WordEntryPos );
165
+ buflen = SHORTALIGN (buflen );
166
+ buflen += res -> poslen * sizeof (WordEntryPos ) + sizeof (uint16 );
166
167
}
167
- else
168
- * outbuflen += res -> entry .len ;
169
168
169
+ * outbuflen = buflen ;
170
170
return res + 1 - a ;
171
171
}
172
172
@@ -193,6 +193,8 @@ tsvectorin(PG_FUNCTION_ARGS)
193
193
int toklen ;
194
194
WordEntryPos * pos ;
195
195
int poslen ;
196
+ char * strbuf ;
197
+ int stroff ;
196
198
197
199
/*
198
200
* Tokens are appended to tmpbuf, cur is a pointer
@@ -212,27 +214,26 @@ tsvectorin(PG_FUNCTION_ARGS)
212
214
213
215
while (gettoken_tsvector (state , & token , & toklen , & pos , & poslen , NULL ))
214
216
{
215
-
216
217
if (toklen >= MAXSTRLEN )
217
218
ereport (ERROR ,
218
- (errcode (ERRCODE_SYNTAX_ERROR ),
219
+ (errcode (ERRCODE_PROGRAM_LIMIT_EXCEEDED ),
219
220
errmsg ("word is too long (%ld bytes, max %ld bytes)" ,
220
221
(long ) toklen ,
221
- (long ) MAXSTRLEN )));
222
-
222
+ (long ) (MAXSTRLEN - 1 ))));
223
223
224
224
if (cur - tmpbuf > MAXSTRPOS )
225
225
ereport (ERROR ,
226
- (errcode (ERRCODE_SYNTAX_ERROR ),
227
- errmsg ("position value is too large " )));
226
+ (errcode (ERRCODE_PROGRAM_LIMIT_EXCEEDED ),
227
+ errmsg ("string is too long for tsvector " )));
228
228
229
229
/*
230
230
* Enlarge buffers if needed
231
231
*/
232
232
if (len >= arrlen )
233
233
{
234
234
arrlen *= 2 ;
235
- arr = (WordEntryIN * ) repalloc ((void * ) arr , sizeof (WordEntryIN ) * arrlen );
235
+ arr = (WordEntryIN * )
236
+ repalloc ((void * ) arr , sizeof (WordEntryIN ) * arrlen );
236
237
}
237
238
while ((cur - tmpbuf ) + toklen >= buflen )
238
239
{
@@ -254,7 +255,11 @@ tsvectorin(PG_FUNCTION_ARGS)
254
255
arr [len ].poslen = poslen ;
255
256
}
256
257
else
258
+ {
257
259
arr [len ].entry .haspos = 0 ;
260
+ arr [len ].pos = NULL ;
261
+ arr [len ].poslen = 0 ;
262
+ }
258
263
len ++ ;
259
264
}
260
265
@@ -264,40 +269,45 @@ tsvectorin(PG_FUNCTION_ARGS)
264
269
len = uniqueentry (arr , len , tmpbuf , & buflen );
265
270
else
266
271
buflen = 0 ;
272
+
273
+ if (buflen > MAXSTRPOS )
274
+ ereport (ERROR ,
275
+ (errcode (ERRCODE_PROGRAM_LIMIT_EXCEEDED ),
276
+ errmsg ("string is too long for tsvector" )));
277
+
267
278
totallen = CALCDATASIZE (len , buflen );
268
279
in = (TSVector ) palloc0 (totallen );
269
-
270
280
SET_VARSIZE (in , totallen );
271
281
in -> size = len ;
272
- cur = STRPTR (in );
273
282
inarr = ARRPTR (in );
283
+ strbuf = STRPTR (in );
284
+ stroff = 0 ;
274
285
for (i = 0 ; i < len ; i ++ )
275
286
{
276
- memcpy (( void * ) cur , ( void * ) & tmpbuf [arr [i ].entry .pos ], arr [i ].entry .len );
277
- arr [i ].entry .pos = cur - STRPTR ( in ) ;
278
- cur += SHORTALIGN ( arr [i ].entry .len ) ;
287
+ memcpy (strbuf + stroff , & tmpbuf [arr [i ].entry .pos ], arr [i ].entry .len );
288
+ arr [i ].entry .pos = stroff ;
289
+ stroff += arr [i ].entry .len ;
279
290
if (arr [i ].entry .haspos )
280
291
{
281
- uint16 tmplen ;
282
-
283
- if (arr [i ].poslen > 0xFFFF )
292
+ if (arr [i ].poslen > 0xFFFF )
284
293
elog (ERROR , "positions array too long" );
285
294
286
- tmplen = (uint16 ) arr [i ].poslen ;
287
-
288
- /* Copy length to output struct */
289
- memcpy (cur , & tmplen , sizeof (uint16 ));
290
- cur += sizeof (uint16 );
295
+ /* Copy number of positions */
296
+ stroff = SHORTALIGN (stroff );
297
+ * (uint16 * ) (strbuf + stroff ) = (uint16 ) arr [i ].poslen ;
298
+ stroff += sizeof (uint16 );
291
299
292
300
/* Copy positions */
293
- memcpy (cur , arr [i ].pos , ( arr [i ].poslen ) * sizeof (WordEntryPos ));
294
- cur += arr [i ].poslen * sizeof (WordEntryPos );
301
+ memcpy (strbuf + stroff , arr [i ].pos , arr [i ].poslen * sizeof (WordEntryPos ));
302
+ stroff += arr [i ].poslen * sizeof (WordEntryPos );
295
303
296
304
pfree (arr [i ].pos );
297
305
}
298
306
inarr [i ] = arr [i ].entry ;
299
307
}
300
308
309
+ Assert ((strbuf + stroff - (char * ) in ) == totallen );
310
+
301
311
PG_RETURN_TSVECTOR (in );
302
312
}
303
313
@@ -495,11 +505,12 @@ tsvectorrecv(PG_FUNCTION_ARGS)
495
505
496
506
datalen += lex_len ;
497
507
498
- if (i > 0 && WordEntryCMP (& vec -> entries [i ], & vec -> entries [i - 1 ], STRPTR (vec )) <= 0 )
508
+ if (i > 0 && WordEntryCMP (& vec -> entries [i ],
509
+ & vec -> entries [i - 1 ],
510
+ STRPTR (vec )) <= 0 )
499
511
elog (ERROR , "lexemes are misordered" );
500
512
501
513
/* Receive positions */
502
-
503
514
if (npos > 0 )
504
515
{
505
516
uint16 j ;
0 commit comments