Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 78212f2

Browse files
committed
Convert tsqueryin and tsvectorin to report errors softly.
This is slightly tedious because the adjustments cascade through a couple of levels of subroutines, but it's not very hard. I chose to avoid changing function signatures more than absolutely necessary, by passing the escontext pointer in existing structs where possible. tsquery's nuisance NOTICEs about empty queries are suppressed in soft-error mode, since they're not errors and we surely don't want them to be shown to the user anyway. Maybe that whole behavior should be reconsidered. Discussion: https://postgr.es/m/3824377.1672076822@sss.pgh.pa.us
1 parent eb8312a commit 78212f2

File tree

8 files changed

+196
-52
lines changed

8 files changed

+196
-52
lines changed

src/backend/tsearch/to_tsany.c

+8-4
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,8 @@ to_tsquery_byid(PG_FUNCTION_ARGS)
594594
query = parse_tsquery(text_to_cstring(in),
595595
pushval_morph,
596596
PointerGetDatum(&data),
597-
0);
597+
0,
598+
NULL);
598599

599600
PG_RETURN_TSQUERY(query);
600601
}
@@ -630,7 +631,8 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS)
630631
query = parse_tsquery(text_to_cstring(in),
631632
pushval_morph,
632633
PointerGetDatum(&data),
633-
P_TSQ_PLAIN);
634+
P_TSQ_PLAIN,
635+
NULL);
634636

635637
PG_RETURN_POINTER(query);
636638
}
@@ -667,7 +669,8 @@ phraseto_tsquery_byid(PG_FUNCTION_ARGS)
667669
query = parse_tsquery(text_to_cstring(in),
668670
pushval_morph,
669671
PointerGetDatum(&data),
670-
P_TSQ_PLAIN);
672+
P_TSQ_PLAIN,
673+
NULL);
671674

672675
PG_RETURN_TSQUERY(query);
673676
}
@@ -704,7 +707,8 @@ websearch_to_tsquery_byid(PG_FUNCTION_ARGS)
704707
query = parse_tsquery(text_to_cstring(in),
705708
pushval_morph,
706709
PointerGetDatum(&data),
707-
P_TSQ_WEB);
710+
P_TSQ_WEB,
711+
NULL);
708712

709713
PG_RETURN_TSQUERY(query);
710714
}

src/backend/utils/adt/tsquery.c

+76-24
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#include "libpq/pqformat.h"
1818
#include "miscadmin.h"
19+
#include "nodes/miscnodes.h"
1920
#include "tsearch/ts_locale.h"
2021
#include "tsearch/ts_type.h"
2122
#include "tsearch/ts_utils.h"
@@ -58,10 +59,16 @@ typedef enum
5859
/*
5960
* get token from query string
6061
*
61-
* *operator is filled in with OP_* when return values is PT_OPR,
62-
* but *weight could contain a distance value in case of phrase operator.
63-
* *strval, *lenval and *weight are filled in when return value is PT_VAL
62+
* All arguments except "state" are output arguments.
6463
*
64+
* If return value is PT_OPR, then *operator is filled with an OP_* code
65+
* and *weight will contain a distance value in case of phrase operator.
66+
*
67+
* If return value is PT_VAL, then *lenval, *strval, *weight, and *prefix
68+
* are filled.
69+
*
70+
* If PT_ERR is returned then a soft error has occurred. If state->escontext
71+
* isn't already filled then this should be reported as a generic parse error.
6572
*/
6673
typedef ts_tokentype (*ts_tokenizer) (TSQueryParserState state, int8 *operator,
6774
int *lenval, char **strval,
@@ -93,6 +100,9 @@ struct TSQueryParserStateData
93100

94101
/* state for value's parser */
95102
TSVectorParseState valstate;
103+
104+
/* context object for soft errors - must match valstate's escontext */
105+
Node *escontext;
96106
};
97107

98108
/*
@@ -194,7 +204,7 @@ parse_phrase_operator(TSQueryParserState pstate, int16 *distance)
194204
if (ptr == endptr)
195205
return false;
196206
else if (errno == ERANGE || l < 0 || l > MAXENTRYPOS)
197-
ereport(ERROR,
207+
ereturn(pstate->escontext, false,
198208
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
199209
errmsg("distance in phrase operator must be an integer value between zero and %d inclusive",
200210
MAXENTRYPOS)));
@@ -301,10 +311,8 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator,
301311
}
302312
else if (t_iseq(state->buf, ':'))
303313
{
304-
ereport(ERROR,
305-
(errcode(ERRCODE_SYNTAX_ERROR),
306-
errmsg("syntax error in tsquery: \"%s\"",
307-
state->buffer)));
314+
/* generic syntax error message is fine */
315+
return PT_ERR;
308316
}
309317
else if (!t_isspace(state->buf))
310318
{
@@ -320,12 +328,17 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator,
320328
state->state = WAITOPERATOR;
321329
return PT_VAL;
322330
}
331+
else if (SOFT_ERROR_OCCURRED(state->escontext))
332+
{
333+
/* gettoken_tsvector reported a soft error */
334+
return PT_ERR;
335+
}
323336
else if (state->state == WAITFIRSTOPERAND)
324337
{
325338
return PT_END;
326339
}
327340
else
328-
ereport(ERROR,
341+
ereturn(state->escontext, PT_ERR,
329342
(errcode(ERRCODE_SYNTAX_ERROR),
330343
errmsg("no operand in tsquery: \"%s\"",
331344
state->buffer)));
@@ -354,6 +367,11 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator,
354367
*operator = OP_PHRASE;
355368
return PT_OPR;
356369
}
370+
else if (SOFT_ERROR_OCCURRED(state->escontext))
371+
{
372+
/* parse_phrase_operator reported a soft error */
373+
return PT_ERR;
374+
}
357375
else if (t_iseq(state->buf, ')'))
358376
{
359377
state->buf++;
@@ -438,6 +456,11 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
438456
state->state = WAITOPERATOR;
439457
return PT_VAL;
440458
}
459+
else if (SOFT_ERROR_OCCURRED(state->escontext))
460+
{
461+
/* gettoken_tsvector reported a soft error */
462+
return PT_ERR;
463+
}
441464
else if (state->state == WAITFIRSTOPERAND)
442465
{
443466
return PT_END;
@@ -529,12 +552,12 @@ pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int
529552
QueryOperand *tmp;
530553

531554
if (distance >= MAXSTRPOS)
532-
ereport(ERROR,
555+
ereturn(state->escontext,,
533556
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
534557
errmsg("value is too big in tsquery: \"%s\"",
535558
state->buffer)));
536559
if (lenval >= MAXSTRLEN)
537-
ereport(ERROR,
560+
ereturn(state->escontext,,
538561
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
539562
errmsg("operand is too long in tsquery: \"%s\"",
540563
state->buffer)));
@@ -562,7 +585,7 @@ pushValue(TSQueryParserState state, char *strval, int lenval, int16 weight, bool
562585
pg_crc32 valcrc;
563586

564587
if (lenval >= MAXSTRLEN)
565-
ereport(ERROR,
588+
ereturn(state->escontext,,
566589
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
567590
errmsg("word is too long in tsquery: \"%s\"",
568591
state->buffer)));
@@ -686,11 +709,17 @@ makepol(TSQueryParserState state,
686709
return;
687710
case PT_ERR:
688711
default:
689-
ereport(ERROR,
690-
(errcode(ERRCODE_SYNTAX_ERROR),
691-
errmsg("syntax error in tsquery: \"%s\"",
692-
state->buffer)));
712+
/* don't overwrite a soft error saved by gettoken function */
713+
if (!SOFT_ERROR_OCCURRED(state->escontext))
714+
errsave(state->escontext,
715+
(errcode(ERRCODE_SYNTAX_ERROR),
716+
errmsg("syntax error in tsquery: \"%s\"",
717+
state->buffer)));
718+
return;
693719
}
720+
/* detect soft error in pushval or recursion */
721+
if (SOFT_ERROR_OCCURRED(state->escontext))
722+
return;
694723
}
695724

696725
cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
@@ -769,6 +798,8 @@ findoprnd(QueryItem *ptr, int size, bool *needcleanup)
769798

770799

771800
/*
801+
* Parse the tsquery stored in "buf".
802+
*
772803
* Each value (operand) in the query is passed to pushval. pushval can
773804
* transform the simple value to an arbitrarily complex expression using
774805
* pushValue and pushOperator. It must push a single value with pushValue,
@@ -778,19 +809,27 @@ findoprnd(QueryItem *ptr, int size, bool *needcleanup)
778809
*
779810
* opaque is passed on to pushval as is, pushval can use it to store its
780811
* private state.
812+
*
813+
* The pushval function can record soft errors via escontext.
814+
* Callers must check SOFT_ERROR_OCCURRED to detect that.
815+
*
816+
* A bitmask of flags (see ts_utils.h) and an error context object
817+
* can be provided as well. If a soft error occurs, NULL is returned.
781818
*/
782819
TSQuery
783820
parse_tsquery(char *buf,
784821
PushFunction pushval,
785822
Datum opaque,
786-
int flags)
823+
int flags,
824+
Node *escontext)
787825
{
788826
struct TSQueryParserStateData state;
789827
int i;
790828
TSQuery query;
791829
int commonlen;
792830
QueryItem *ptr;
793831
ListCell *cell;
832+
bool noisy;
794833
bool needcleanup;
795834
int tsv_flags = P_TSV_OPR_IS_DELIM | P_TSV_IS_TSQUERY;
796835

@@ -808,15 +847,19 @@ parse_tsquery(char *buf,
808847
else
809848
state.gettoken = gettoken_query_standard;
810849

850+
/* emit nuisance NOTICEs only if not doing soft errors */
851+
noisy = !(escontext && IsA(escontext, ErrorSaveContext));
852+
811853
/* init state */
812854
state.buffer = buf;
813855
state.buf = buf;
814856
state.count = 0;
815857
state.state = WAITFIRSTOPERAND;
816858
state.polstr = NIL;
859+
state.escontext = escontext;
817860

818861
/* init value parser's state */
819-
state.valstate = init_tsvector_parser(state.buffer, tsv_flags);
862+
state.valstate = init_tsvector_parser(state.buffer, tsv_flags, escontext);
820863

821864
/* init list of operand */
822865
state.sumlen = 0;
@@ -829,19 +872,23 @@ parse_tsquery(char *buf,
829872

830873
close_tsvector_parser(state.valstate);
831874

875+
if (SOFT_ERROR_OCCURRED(escontext))
876+
return NULL;
877+
832878
if (state.polstr == NIL)
833879
{
834-
ereport(NOTICE,
835-
(errmsg("text-search query doesn't contain lexemes: \"%s\"",
836-
state.buffer)));
880+
if (noisy)
881+
ereport(NOTICE,
882+
(errmsg("text-search query doesn't contain lexemes: \"%s\"",
883+
state.buffer)));
837884
query = (TSQuery) palloc(HDRSIZETQ);
838885
SET_VARSIZE(query, HDRSIZETQ);
839886
query->size = 0;
840887
return query;
841888
}
842889

843890
if (TSQUERY_TOO_BIG(list_length(state.polstr), state.sumlen))
844-
ereport(ERROR,
891+
ereturn(escontext, NULL,
845892
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
846893
errmsg("tsquery is too large")));
847894
commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen);
@@ -889,7 +936,7 @@ parse_tsquery(char *buf,
889936
* If there are QI_VALSTOP nodes, delete them and simplify the tree.
890937
*/
891938
if (needcleanup)
892-
query = cleanup_tsquery_stopwords(query);
939+
query = cleanup_tsquery_stopwords(query, noisy);
893940

894941
return query;
895942
}
@@ -908,8 +955,13 @@ Datum
908955
tsqueryin(PG_FUNCTION_ARGS)
909956
{
910957
char *in = PG_GETARG_CSTRING(0);
958+
Node *escontext = fcinfo->context;
911959

912-
PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), 0));
960+
PG_RETURN_TSQUERY(parse_tsquery(in,
961+
pushval_asis,
962+
PointerGetDatum(NULL),
963+
0,
964+
escontext));
913965
}
914966

915967
/*

src/backend/utils/adt/tsquery_cleanup.c

+4-3
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,7 @@ calcstrlen(NODE *node)
383383
* Remove QI_VALSTOP (stopword) nodes from TSQuery.
384384
*/
385385
TSQuery
386-
cleanup_tsquery_stopwords(TSQuery in)
386+
cleanup_tsquery_stopwords(TSQuery in, bool noisy)
387387
{
388388
int32 len,
389389
lenstr,
@@ -403,8 +403,9 @@ cleanup_tsquery_stopwords(TSQuery in)
403403
root = clean_stopword_intree(maketree(GETQUERY(in)), &ladd, &radd);
404404
if (root == NULL)
405405
{
406-
ereport(NOTICE,
407-
(errmsg("text-search query contains only stop words or doesn't contain lexemes, ignored")));
406+
if (noisy)
407+
ereport(NOTICE,
408+
(errmsg("text-search query contains only stop words or doesn't contain lexemes, ignored")));
408409
out = palloc(HDRSIZETQ);
409410
out->size = 0;
410411
SET_VARSIZE(out, HDRSIZETQ);

src/backend/utils/adt/tsvector.c

+11-4
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "postgres.h"
1616

1717
#include "libpq/pqformat.h"
18+
#include "nodes/miscnodes.h"
1819
#include "tsearch/ts_locale.h"
1920
#include "tsearch/ts_utils.h"
2021
#include "utils/builtins.h"
@@ -178,6 +179,7 @@ Datum
178179
tsvectorin(PG_FUNCTION_ARGS)
179180
{
180181
char *buf = PG_GETARG_CSTRING(0);
182+
Node *escontext = fcinfo->context;
181183
TSVectorParseState state;
182184
WordEntryIN *arr;
183185
int totallen;
@@ -201,7 +203,7 @@ tsvectorin(PG_FUNCTION_ARGS)
201203
char *cur;
202204
int buflen = 256; /* allocated size of tmpbuf */
203205

204-
state = init_tsvector_parser(buf, 0);
206+
state = init_tsvector_parser(buf, 0, escontext);
205207

206208
arrlen = 64;
207209
arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen);
@@ -210,14 +212,14 @@ tsvectorin(PG_FUNCTION_ARGS)
210212
while (gettoken_tsvector(state, &token, &toklen, &pos, &poslen, NULL))
211213
{
212214
if (toklen >= MAXSTRLEN)
213-
ereport(ERROR,
215+
ereturn(escontext, (Datum) 0,
214216
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
215217
errmsg("word is too long (%ld bytes, max %ld bytes)",
216218
(long) toklen,
217219
(long) (MAXSTRLEN - 1))));
218220

219221
if (cur - tmpbuf > MAXSTRPOS)
220-
ereport(ERROR,
222+
ereturn(escontext, (Datum) 0,
221223
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
222224
errmsg("string is too long for tsvector (%ld bytes, max %ld bytes)",
223225
(long) (cur - tmpbuf), (long) MAXSTRPOS)));
@@ -261,13 +263,17 @@ tsvectorin(PG_FUNCTION_ARGS)
261263

262264
close_tsvector_parser(state);
263265

266+
/* Did gettoken_tsvector fail? */
267+
if (SOFT_ERROR_OCCURRED(escontext))
268+
PG_RETURN_NULL();
269+
264270
if (len > 0)
265271
len = uniqueentry(arr, len, tmpbuf, &buflen);
266272
else
267273
buflen = 0;
268274

269275
if (buflen > MAXSTRPOS)
270-
ereport(ERROR,
276+
ereturn(escontext, (Datum) 0,
271277
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
272278
errmsg("string is too long for tsvector (%d bytes, max %d bytes)", buflen, MAXSTRPOS)));
273279

@@ -285,6 +291,7 @@ tsvectorin(PG_FUNCTION_ARGS)
285291
stroff += arr[i].entry.len;
286292
if (arr[i].entry.haspos)
287293
{
294+
/* This should be unreachable because of MAXNUMPOS restrictions */
288295
if (arr[i].poslen > 0xFFFF)
289296
elog(ERROR, "positions array too long");
290297

0 commit comments

Comments
 (0)