Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 1ea47dd

Browse files
committed
Fix shared tsvector/tsquery input code so that we don't say "syntax error in
tsvector" when we are really parsing a tsquery. Report the bogus input, too. Make styles of some related error messages more consistent.
1 parent dfc6f13 commit 1ea47dd

File tree

4 files changed

+81
-68
lines changed

4 files changed

+81
-68
lines changed

src/backend/utils/adt/tsquery.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.7 2007/09/11 16:01:40 teodor Exp $
10+
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.8 2007/10/21 22:29:56 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -141,7 +141,7 @@ gettoken_query(TSQueryParserState state,
141141
{
142142
ereport(ERROR,
143143
(errcode(ERRCODE_SYNTAX_ERROR),
144-
errmsg("syntax error at start of operand in tsearch query: \"%s\"",
144+
errmsg("syntax error in tsquery: \"%s\"",
145145
state->buffer)));
146146
}
147147
else if (!t_isspace(state->buf))
@@ -159,7 +159,7 @@ gettoken_query(TSQueryParserState state,
159159
else
160160
ereport(ERROR,
161161
(errcode(ERRCODE_SYNTAX_ERROR),
162-
errmsg("no operand in tsearch query: \"%s\"",
162+
errmsg("no operand in tsquery: \"%s\"",
163163
state->buffer)));
164164
}
165165
break;
@@ -232,12 +232,12 @@ pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int
232232
if (distance >= MAXSTRPOS)
233233
ereport(ERROR,
234234
(errcode(ERRCODE_SYNTAX_ERROR),
235-
errmsg("value is too big in tsearch query: \"%s\"",
235+
errmsg("value is too big in tsquery: \"%s\"",
236236
state->buffer)));
237237
if (lenval >= MAXSTRLEN)
238238
ereport(ERROR,
239239
(errcode(ERRCODE_SYNTAX_ERROR),
240-
errmsg("operand is too long in tsearch query: \"%s\"",
240+
errmsg("operand is too long in tsquery: \"%s\"",
241241
state->buffer)));
242242

243243
tmp = (QueryOperand *) palloc(sizeof(QueryOperand));
@@ -264,7 +264,7 @@ pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight)
264264
if (lenval >= MAXSTRLEN)
265265
ereport(ERROR,
266266
(errcode(ERRCODE_SYNTAX_ERROR),
267-
errmsg("word is too long in tsearch query: \"%s\"",
267+
errmsg("word is too long in tsquery: \"%s\"",
268268
state->buffer)));
269269

270270
INIT_CRC32(valcrc);
@@ -372,7 +372,7 @@ makepol(TSQueryParserState state,
372372
default:
373373
ereport(ERROR,
374374
(errcode(ERRCODE_SYNTAX_ERROR),
375-
errmsg("syntax error in tsearch query: \"%s\"",
375+
errmsg("syntax error in tsquery: \"%s\"",
376376
state->buffer)));
377377
}
378378
}
@@ -478,7 +478,7 @@ parse_tsquery(char *buf,
478478
state.polstr = NIL;
479479

480480
/* init value parser's state */
481-
state.valstate = init_tsvector_parser(NULL, true);
481+
state.valstate = init_tsvector_parser(state.buffer, true, true);
482482

483483
/* init list of operand */
484484
state.sumlen = 0;

src/backend/utils/adt/tsvector.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.4 2007/09/07 16:03:40 teodor Exp $
10+
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.5 2007/10/21 22:29:56 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -204,7 +204,7 @@ tsvectorin(PG_FUNCTION_ARGS)
204204

205205
pg_verifymbstr(buf, strlen(buf), false);
206206

207-
state = init_tsvector_parser(buf, false);
207+
state = init_tsvector_parser(buf, false, false);
208208

209209
arrlen = 64;
210210
arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen);
@@ -224,7 +224,7 @@ tsvectorin(PG_FUNCTION_ARGS)
224224
if (cur - tmpbuf > MAXSTRPOS)
225225
ereport(ERROR,
226226
(errcode(ERRCODE_SYNTAX_ERROR),
227-
errmsg("position value too large")));
227+
errmsg("position value is too large")));
228228

229229
/*
230230
* Enlarge buffers if needed
@@ -496,7 +496,7 @@ tsvectorrecv(PG_FUNCTION_ARGS)
496496
datalen += lex_len;
497497

498498
if (i > 0 && WordEntryCMP(&vec->entries[i], &vec->entries[i - 1], STRPTR(vec)) <= 0)
499-
elog(ERROR, "lexemes are unordered");
499+
elog(ERROR, "lexemes are misordered");
500500

501501
/* Receive positions */
502502

@@ -523,7 +523,7 @@ tsvectorrecv(PG_FUNCTION_ARGS)
523523
{
524524
wepptr[j] = (WordEntryPos) pq_getmsgint(buf, sizeof(WordEntryPos));
525525
if (j > 0 && WEP_GETPOS(wepptr[j]) <= WEP_GETPOS(wepptr[j - 1]))
526-
elog(ERROR, "position information is unordered");
526+
elog(ERROR, "position information is misordered");
527527
}
528528

529529
datalen += (npos + 1) * sizeof(WordEntry);

src/backend/utils/adt/tsvector_parser.c

Lines changed: 63 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_parser.c,v 1.1 2007/09/07 15:09:56 teodor Exp $
10+
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_parser.c,v 1.2 2007/10/21 22:29:56 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -20,35 +20,49 @@
2020
#include "tsearch/ts_utils.h"
2121
#include "utils/memutils.h"
2222

23+
24+
/*
25+
* Private state of tsvector parser. Note that tsquery also uses this code to
26+
* parse its input, hence the boolean flags. The two flags are both true or
27+
* both false in current usage, but we keep them separate for clarity.
28+
* is_tsquery affects *only* the content of error messages.
29+
*/
2330
struct TSVectorParseStateData
2431
{
25-
char *prsbuf;
26-
char *word; /* buffer to hold the current word */
27-
int len; /* size in bytes allocated for 'word' */
28-
bool oprisdelim;
32+
char *prsbuf; /* next input character */
33+
char *bufstart; /* whole string (used only for errors) */
34+
char *word; /* buffer to hold the current word */
35+
int len; /* size in bytes allocated for 'word' */
36+
int eml; /* max bytes per character */
37+
bool oprisdelim; /* treat ! | * ( ) as delimiters? */
38+
bool is_tsquery; /* say "tsquery" not "tsvector" in errors? */
2939
};
3040

41+
3142
/*
3243
* Initializes parser for the input string. If oprisdelim is set, the
3344
* following characters are treated as delimiters in addition to whitespace:
3445
* ! | & ( )
3546
*/
3647
TSVectorParseState
37-
init_tsvector_parser(char *input, bool oprisdelim)
48+
init_tsvector_parser(char *input, bool oprisdelim, bool is_tsquery)
3849
{
3950
TSVectorParseState state;
4051

4152
state = (TSVectorParseState) palloc(sizeof(struct TSVectorParseStateData));
4253
state->prsbuf = input;
54+
state->bufstart = input;
4355
state->len = 32;
4456
state->word = (char *) palloc(state->len);
57+
state->eml = pg_database_encoding_max_length();
4558
state->oprisdelim = oprisdelim;
59+
state->is_tsquery = is_tsquery;
4660

4761
return state;
4862
}
4963

5064
/*
51-
* Reinitializes parser for parsing 'input', instead of previous input.
65+
* Reinitializes parser to parse 'input', instead of previous input.
5266
*/
5367
void
5468
reset_tsvector_parser(TSVectorParseState state, char *input)
@@ -66,21 +80,21 @@ close_tsvector_parser(TSVectorParseState state)
6680
pfree(state);
6781
}
6882

83+
/* increase the size of 'word' if needed to hold one more character */
6984
#define RESIZEPRSBUF \
7085
do { \
71-
if ( curpos - state->word + pg_database_encoding_max_length() >= state->len ) \
86+
int clen = curpos - state->word; \
87+
if ( clen + state->eml >= state->len ) \
7288
{ \
73-
int clen = curpos - state->word; \
7489
state->len *= 2; \
75-
state->word = (char*)repalloc( (void*)state->word, state->len ); \
90+
state->word = (char *) repalloc(state->word, state->len); \
7691
curpos = state->word + clen; \
7792
} \
7893
} while (0)
7994

80-
8195
#define ISOPERATOR(x) ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) )
8296

83-
/* Fills the output parameters, and returns true */
97+
/* Fills gettoken_tsvector's output parameters, and returns true */
8498
#define RETURN_TOKEN \
8599
do { \
86100
if (pos_ptr != NULL) \
@@ -111,18 +125,34 @@ do { \
111125
#define WAITPOSDELIM 7
112126
#define WAITCHARCMPLX 8
113127

128+
#define PRSSYNTAXERROR prssyntaxerror(state)
129+
130+
static void
131+
prssyntaxerror(TSVectorParseState state)
132+
{
133+
ereport(ERROR,
134+
(errcode(ERRCODE_SYNTAX_ERROR),
135+
state->is_tsquery ?
136+
errmsg("syntax error in tsquery: \"%s\"", state->bufstart) :
137+
errmsg("syntax error in tsvector: \"%s\"", state->bufstart)));
138+
}
139+
140+
114141
/*
115-
* Get next token from string being parsed. Returns false if
116-
* end of input string is reached, otherwise strval, lenval, pos_ptr
117-
* and poslen output parameters are filled in:
142+
* Get next token from string being parsed. Returns true if successful,
143+
* false if end of input string is reached. On success, these output
144+
* parameters are filled in:
118145
*
119-
* *strval token
120-
* *lenval length of*strval
146+
* *strval pointer to token
147+
* *lenval length of *strval
121148
* *pos_ptr pointer to a palloc'd array of positions and weights
122149
* associated with the token. If the caller is not interested
123150
* in the information, NULL can be supplied. Otherwise
124151
* the caller is responsible for pfreeing the array.
125152
* *poslen number of elements in *pos_ptr
153+
* *endptr scan resumption point
154+
*
155+
* Pass NULL for unwanted output parameters.
126156
*/
127157
bool
128158
gettoken_tsvector(TSVectorParseState state,
@@ -155,9 +185,7 @@ gettoken_tsvector(TSVectorParseState state,
155185
oldstate = WAITENDWORD;
156186
}
157187
else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
158-
ereport(ERROR,
159-
(errcode(ERRCODE_SYNTAX_ERROR),
160-
errmsg("syntax error in tsvector")));
188+
PRSSYNTAXERROR;
161189
else if (!t_isspace(state->prsbuf))
162190
{
163191
COPYCHAR(curpos, state->prsbuf);
@@ -170,7 +198,8 @@ gettoken_tsvector(TSVectorParseState state,
170198
if (*(state->prsbuf) == '\0')
171199
ereport(ERROR,
172200
(errcode(ERRCODE_SYNTAX_ERROR),
173-
errmsg("there is no escaped character")));
201+
errmsg("there is no escaped character: \"%s\"",
202+
state->bufstart)));
174203
else
175204
{
176205
RESIZEPRSBUF;
@@ -192,18 +221,14 @@ gettoken_tsvector(TSVectorParseState state,
192221
{
193222
RESIZEPRSBUF;
194223
if (curpos == state->word)
195-
ereport(ERROR,
196-
(errcode(ERRCODE_SYNTAX_ERROR),
197-
errmsg("syntax error in tsvector")));
224+
PRSSYNTAXERROR;
198225
*(curpos) = '\0';
199226
RETURN_TOKEN;
200227
}
201228
else if (t_iseq(state->prsbuf, ':'))
202229
{
203230
if (curpos == state->word)
204-
ereport(ERROR,
205-
(errcode(ERRCODE_SYNTAX_ERROR),
206-
errmsg("syntax error in tsvector")));
231+
PRSSYNTAXERROR;
207232
*(curpos) = '\0';
208233
if (state->oprisdelim)
209234
RETURN_TOKEN;
@@ -229,9 +254,7 @@ gettoken_tsvector(TSVectorParseState state,
229254
oldstate = WAITENDCMPLX;
230255
}
231256
else if (*(state->prsbuf) == '\0')
232-
ereport(ERROR,
233-
(errcode(ERRCODE_SYNTAX_ERROR),
234-
errmsg("syntax error in tsvector")));
257+
PRSSYNTAXERROR;
235258
else
236259
{
237260
RESIZEPRSBUF;
@@ -253,9 +276,7 @@ gettoken_tsvector(TSVectorParseState state,
253276
RESIZEPRSBUF;
254277
*(curpos) = '\0';
255278
if (curpos == state->word)
256-
ereport(ERROR,
257-
(errcode(ERRCODE_SYNTAX_ERROR),
258-
errmsg("syntax error in tsvector")));
279+
PRSSYNTAXERROR;
259280
if (state->oprisdelim)
260281
{
261282
/* state->prsbuf+=pg_mblen(state->prsbuf); */
@@ -290,17 +311,17 @@ gettoken_tsvector(TSVectorParseState state,
290311
}
291312
npos++;
292313
WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf)));
314+
/* we cannot get here in tsquery, so no need for 2 errmsgs */
293315
if (WEP_GETPOS(pos[npos - 1]) == 0)
294316
ereport(ERROR,
295317
(errcode(ERRCODE_SYNTAX_ERROR),
296-
errmsg("wrong position info in tsvector")));
318+
errmsg("wrong position info in tsvector: \"%s\"",
319+
state->bufstart)));
297320
WEP_SETWEIGHT(pos[npos - 1], 0);
298321
statecode = WAITPOSDELIM;
299322
}
300323
else
301-
ereport(ERROR,
302-
(errcode(ERRCODE_SYNTAX_ERROR),
303-
errmsg("syntax error in tsvector")));
324+
PRSSYNTAXERROR;
304325
}
305326
else if (statecode == WAITPOSDELIM)
306327
{
@@ -309,42 +330,32 @@ gettoken_tsvector(TSVectorParseState state,
309330
else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
310331
{
311332
if (WEP_GETWEIGHT(pos[npos - 1]))
312-
ereport(ERROR,
313-
(errcode(ERRCODE_SYNTAX_ERROR),
314-
errmsg("syntax error in tsvector")));
333+
PRSSYNTAXERROR;
315334
WEP_SETWEIGHT(pos[npos - 1], 3);
316335
}
317336
else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
318337
{
319338
if (WEP_GETWEIGHT(pos[npos - 1]))
320-
ereport(ERROR,
321-
(errcode(ERRCODE_SYNTAX_ERROR),
322-
errmsg("syntax error in tsvector")));
339+
PRSSYNTAXERROR;
323340
WEP_SETWEIGHT(pos[npos - 1], 2);
324341
}
325342
else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
326343
{
327344
if (WEP_GETWEIGHT(pos[npos - 1]))
328-
ereport(ERROR,
329-
(errcode(ERRCODE_SYNTAX_ERROR),
330-
errmsg("syntax error in tsvector")));
345+
PRSSYNTAXERROR;
331346
WEP_SETWEIGHT(pos[npos - 1], 1);
332347
}
333348
else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
334349
{
335350
if (WEP_GETWEIGHT(pos[npos - 1]))
336-
ereport(ERROR,
337-
(errcode(ERRCODE_SYNTAX_ERROR),
338-
errmsg("syntax error in tsvector")));
351+
PRSSYNTAXERROR;
339352
WEP_SETWEIGHT(pos[npos - 1], 0);
340353
}
341354
else if (t_isspace(state->prsbuf) ||
342355
*(state->prsbuf) == '\0')
343356
RETURN_TOKEN;
344357
else if (!t_isdigit(state->prsbuf))
345-
ereport(ERROR,
346-
(errcode(ERRCODE_SYNTAX_ERROR),
347-
errmsg("syntax error in tsvector")));
358+
PRSSYNTAXERROR;
348359
}
349360
else /* internal error */
350361
elog(ERROR, "internal error in gettoken_tsvector");

0 commit comments

Comments
 (0)