Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit db4f21e

Browse files
committed
Redesign interrupt/cancel API for regex engine.
Previously, a PostgreSQL-specific callback checked by the regex engine had a way to trigger a special error code REG_CANCEL if it detected that the next call to CHECK_FOR_INTERRUPTS() would certainly throw via ereport(). A later proposed bugfix aims to move some complex logic out of signal handlers, so that it won't run until the next CHECK_FOR_INTERRUPTS(), which makes the above design impossible unless we split CHECK_FOR_INTERRUPTS() into two phases, one to run logic and another to ereport(). We may develop such a system in the future, but for the regex code it is no longer necessary. An earlier commit moved regex memory management over to our MemoryContext system. Given that the purpose of the two-phase interrupt checking was to free memory before throwing, something we don't need to worry about anymore, it seems simpler to inject CHECK_FOR_INTERRUPTS() directly into cancelation points, and just let it throw. Since the plan is to keep PostgreSQL-specific concerns separate from the main regex engine code (with a view to bein able to stay in sync with other projects), do this with a new macro INTERRUPT(), customizable in regcustom.h and defaulting to nothing. Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Discussion: https://postgr.es/m/CA%2BhUKGK3PGKwcKqzoosamn36YW-fsuTdOPPF1i_rtEO%3DnEYKSg%40mail.gmail.com
1 parent 6db75ed commit db4f21e

File tree

13 files changed

+18
-104
lines changed

13 files changed

+18
-104
lines changed

src/backend/regex/regc_locale.c

+1-5
Original file line numberDiff line numberDiff line change
@@ -475,11 +475,7 @@ range(struct vars *v, /* context */
475475
}
476476
addchr(cv, cc);
477477
}
478-
if (CANCEL_REQUESTED(v->re))
479-
{
480-
ERR(REG_CANCEL);
481-
return NULL;
482-
}
478+
INTERRUPT(v->re);
483479
}
484480

485481
return cv;

src/backend/regex/regc_nfa.c

+8-40
Original file line numberDiff line numberDiff line change
@@ -143,11 +143,7 @@ newstate(struct nfa *nfa)
143143
* compilation, since no code path will go very long without making a new
144144
* state or arc.
145145
*/
146-
if (CANCEL_REQUESTED(nfa->v->re))
147-
{
148-
NERR(REG_CANCEL);
149-
return NULL;
150-
}
146+
INTERRUPT(nfa->v->re);
151147

152148
/* first, recycle anything that's on the freelist */
153149
if (nfa->freestates != NULL)
@@ -297,11 +293,7 @@ newarc(struct nfa *nfa,
297293
* compilation, since no code path will go very long without making a new
298294
* state or arc.
299295
*/
300-
if (CANCEL_REQUESTED(nfa->v->re))
301-
{
302-
NERR(REG_CANCEL);
303-
return;
304-
}
296+
INTERRUPT(nfa->v->re);
305297

306298
/* check for duplicate arc, using whichever chain is shorter */
307299
if (from->nouts <= to->nins)
@@ -825,11 +817,7 @@ moveins(struct nfa *nfa,
825817
* Because we bypass newarc() in this code path, we'd better include a
826818
* cancel check.
827819
*/
828-
if (CANCEL_REQUESTED(nfa->v->re))
829-
{
830-
NERR(REG_CANCEL);
831-
return;
832-
}
820+
INTERRUPT(nfa->v->re);
833821

834822
sortins(nfa, oldState);
835823
sortins(nfa, newState);
@@ -929,11 +917,7 @@ copyins(struct nfa *nfa,
929917
* Because we bypass newarc() in this code path, we'd better include a
930918
* cancel check.
931919
*/
932-
if (CANCEL_REQUESTED(nfa->v->re))
933-
{
934-
NERR(REG_CANCEL);
935-
return;
936-
}
920+
INTERRUPT(nfa->v->re);
937921

938922
sortins(nfa, oldState);
939923
sortins(nfa, newState);
@@ -1000,11 +984,7 @@ mergeins(struct nfa *nfa,
1000984
* Because we bypass newarc() in this code path, we'd better include a
1001985
* cancel check.
1002986
*/
1003-
if (CANCEL_REQUESTED(nfa->v->re))
1004-
{
1005-
NERR(REG_CANCEL);
1006-
return;
1007-
}
987+
INTERRUPT(nfa->v->re);
1008988

1009989
/* Sort existing inarcs as well as proposed new ones */
1010990
sortins(nfa, s);
@@ -1125,11 +1105,7 @@ moveouts(struct nfa *nfa,
11251105
* Because we bypass newarc() in this code path, we'd better include a
11261106
* cancel check.
11271107
*/
1128-
if (CANCEL_REQUESTED(nfa->v->re))
1129-
{
1130-
NERR(REG_CANCEL);
1131-
return;
1132-
}
1108+
INTERRUPT(nfa->v->re);
11331109

11341110
sortouts(nfa, oldState);
11351111
sortouts(nfa, newState);
@@ -1226,11 +1202,7 @@ copyouts(struct nfa *nfa,
12261202
* Because we bypass newarc() in this code path, we'd better include a
12271203
* cancel check.
12281204
*/
1229-
if (CANCEL_REQUESTED(nfa->v->re))
1230-
{
1231-
NERR(REG_CANCEL);
1232-
return;
1233-
}
1205+
INTERRUPT(nfa->v->re);
12341206

12351207
sortouts(nfa, oldState);
12361208
sortouts(nfa, newState);
@@ -3282,11 +3254,7 @@ checkmatchall_recurse(struct nfa *nfa, struct state *s, bool **haspaths)
32823254
return false;
32833255

32843256
/* In case the search takes a long time, check for cancel */
3285-
if (CANCEL_REQUESTED(nfa->v->re))
3286-
{
3287-
NERR(REG_CANCEL);
3288-
return false;
3289-
}
3257+
INTERRUPT(nfa->v->re);
32903258

32913259
/* Create a haspath array for this state */
32923260
haspath = (bool *) MALLOC((DUPINF + 2) * sizeof(bool));

src/backend/regex/regcomp.c

-18
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,6 @@ static int newlacon(struct vars *v, struct state *begin, struct state *end,
8686
int latype);
8787
static void freelacons(struct subre *subs, int n);
8888
static void rfree(regex_t *re);
89-
static int rcancelrequested(void);
9089
static int rstacktoodeep(void);
9190

9291
#ifdef REG_DEBUG
@@ -356,7 +355,6 @@ struct vars
356355
/* static function list */
357356
static const struct fns functions = {
358357
rfree, /* regfree insides */
359-
rcancelrequested, /* check for cancel request */
360358
rstacktoodeep /* check for stack getting dangerously deep */
361359
};
362360

@@ -2468,22 +2466,6 @@ rfree(regex_t *re)
24682466
}
24692467
}
24702468

2471-
/*
2472-
* rcancelrequested - check for external request to cancel regex operation
2473-
*
2474-
* Return nonzero to fail the operation with error code REG_CANCEL,
2475-
* zero to keep going
2476-
*
2477-
* The current implementation is Postgres-specific. If we ever get around
2478-
* to splitting the regex code out as a standalone library, there will need
2479-
* to be some API to let applications define a callback function for this.
2480-
*/
2481-
static int
2482-
rcancelrequested(void)
2483-
{
2484-
return InterruptPending && (QueryCancelPending || ProcDiePending);
2485-
}
2486-
24872469
/*
24882470
* rstacktoodeep - check for stack getting dangerously deep
24892471
*

src/backend/regex/rege_dfa.c

+1-5
Original file line numberDiff line numberDiff line change
@@ -805,11 +805,7 @@ miss(struct vars *v,
805805
* Checking for operation cancel in the inner text search loop seems
806806
* unduly expensive. As a compromise, check during cache misses.
807807
*/
808-
if (CANCEL_REQUESTED(v->re))
809-
{
810-
ERR(REG_CANCEL);
811-
return NULL;
812-
}
808+
INTERRUPT(v->re);
813809

814810
/*
815811
* What set of states would we end up in after consuming the co character?

src/backend/regex/regexec.c

+1-2
Original file line numberDiff line numberDiff line change
@@ -764,8 +764,7 @@ cdissect(struct vars *v,
764764
MDEBUG(("%d: cdissect %c %ld-%ld\n", t->id, t->op, LOFF(begin), LOFF(end)));
765765

766766
/* handy place to check for operation cancel */
767-
if (CANCEL_REQUESTED(v->re))
768-
return REG_CANCEL;
767+
INTERRUPT(v->re);
769768
/* ... and stack overrun */
770769
if (STACK_TOO_DEEP(v->re))
771770
return REG_ETOOBIG;

src/backend/utils/adt/jsonpath_gram.y

-2
Original file line numberDiff line numberDiff line change
@@ -553,8 +553,6 @@ makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
553553
{
554554
char errMsg[100];
555555

556-
/* See regexp.c for explanation */
557-
CHECK_FOR_INTERRUPTS();
558556
pg_regerror(re_result, &re_tmp, errMsg, sizeof(errMsg));
559557
ereturn(escontext, false,
560558
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),

src/backend/utils/adt/regexp.c

-11
Original file line numberDiff line numberDiff line change
@@ -218,15 +218,6 @@ RE_compile_and_cache(text *text_re, int cflags, Oid collation)
218218
if (regcomp_result != REG_OKAY)
219219
{
220220
/* re didn't compile (no need for pg_regfree, if so) */
221-
222-
/*
223-
* Here and in other places in this file, do CHECK_FOR_INTERRUPTS
224-
* before reporting a regex error. This is so that if the regex
225-
* library aborts and returns REG_CANCEL, we don't print an error
226-
* message that implies the regex was invalid.
227-
*/
228-
CHECK_FOR_INTERRUPTS();
229-
230221
pg_regerror(regcomp_result, &re_temp.cre_re, errMsg, sizeof(errMsg));
231222
ereport(ERROR,
232223
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
@@ -308,7 +299,6 @@ RE_wchar_execute(regex_t *re, pg_wchar *data, int data_len,
308299
if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
309300
{
310301
/* re failed??? */
311-
CHECK_FOR_INTERRUPTS();
312302
pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
313303
ereport(ERROR,
314304
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
@@ -2001,7 +1991,6 @@ regexp_fixed_prefix(text *text_re, bool case_insensitive, Oid collation,
20011991

20021992
default:
20031993
/* re failed??? */
2004-
CHECK_FOR_INTERRUPTS();
20051994
pg_regerror(re_result, re, errMsg, sizeof(errMsg));
20061995
ereport(ERROR,
20071996
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),

src/backend/utils/adt/varlena.c

-1
Original file line numberDiff line numberDiff line change
@@ -4265,7 +4265,6 @@ replace_text_regexp(text *src_text, text *pattern_text,
42654265
{
42664266
char errMsg[100];
42674267

4268-
CHECK_FOR_INTERRUPTS();
42694268
pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
42704269
ereport(ERROR,
42714270
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),

src/include/regex/regcustom.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,15 @@
4444

4545
#include "mb/pg_wchar.h"
4646

47-
#include "miscadmin.h" /* needed by rcancelrequested/rstacktoodeep */
47+
#include "miscadmin.h" /* needed by stacktoodeep */
4848

4949

5050
/* overrides for regguts.h definitions, if any */
5151
#define FUNCPTR(name, args) (*name) args
5252
#define MALLOC(n) palloc_extended((n), MCXT_ALLOC_NO_OOM)
5353
#define FREE(p) pfree(VS(p))
5454
#define REALLOC(p,n) repalloc_extended(VS(p),(n), MCXT_ALLOC_NO_OOM)
55+
#define INTERRUPT(re) CHECK_FOR_INTERRUPTS()
5556
#define assert(x) Assert(x)
5657

5758
/* internal character type and related */

src/include/regex/regerrs.h

-4
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,3 @@
8181
{
8282
REG_ECOLORS, "REG_ECOLORS", "too many colors"
8383
},
84-
85-
{
86-
REG_CANCEL, "REG_CANCEL", "operation cancelled"
87-
},

src/include/regex/regex.h

-1
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,6 @@ typedef struct
156156
#define REG_BADOPT 18 /* invalid embedded option */
157157
#define REG_ETOOBIG 19 /* regular expression is too complex */
158158
#define REG_ECOLORS 20 /* too many colors */
159-
#define REG_CANCEL 21 /* operation cancelled */
160159
/* two specials for debugging and testing */
161160
#define REG_ATOI 101 /* convert error-code name to number */
162161
#define REG_ITOA 102 /* convert error-code number to name */

src/include/regex/regguts.h

+5-4
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,11 @@
7777
#define FREE(p) free(VS(p))
7878
#endif
7979

80+
/* interruption */
81+
#ifndef INTERRUPT
82+
#define INTERRUPT(re)
83+
#endif
84+
8085
/* want size of a char in bits, and max value in bounded quantifiers */
8186
#ifndef _POSIX2_RE_DUP_MAX
8287
#define _POSIX2_RE_DUP_MAX 255 /* normally from <limits.h> */
@@ -510,13 +515,9 @@ struct subre
510515
struct fns
511516
{
512517
void FUNCPTR(free, (regex_t *));
513-
int FUNCPTR(cancel_requested, (void));
514518
int FUNCPTR(stack_too_deep, (void));
515519
};
516520

517-
#define CANCEL_REQUESTED(re) \
518-
((*((struct fns *) (re)->re_fns)->cancel_requested) ())
519-
520521
#define STACK_TOO_DEEP(re) \
521522
((*((struct fns *) (re)->re_fns)->stack_too_deep) ())
522523

src/test/modules/test_regex/test_regex.c

-10
Original file line numberDiff line numberDiff line change
@@ -185,15 +185,6 @@ test_re_compile(text *text_re, int cflags, Oid collation,
185185
if (regcomp_result != REG_OKAY)
186186
{
187187
/* re didn't compile (no need for pg_regfree, if so) */
188-
189-
/*
190-
* Here and in other places in this file, do CHECK_FOR_INTERRUPTS
191-
* before reporting a regex error. This is so that if the regex
192-
* library aborts and returns REG_CANCEL, we don't print an error
193-
* message that implies the regex was invalid.
194-
*/
195-
CHECK_FOR_INTERRUPTS();
196-
197188
pg_regerror(regcomp_result, result_re, errMsg, sizeof(errMsg));
198189
ereport(ERROR,
199190
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
@@ -239,7 +230,6 @@ test_re_execute(regex_t *re, pg_wchar *data, int data_len,
239230
if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
240231
{
241232
/* re failed??? */
242-
CHECK_FOR_INTERRUPTS();
243233
pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
244234
ereport(ERROR,
245235
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),

0 commit comments

Comments
 (0)