Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit d5b90cd

Browse files
committed
Fix bogus handling of XQuery regex option flags.
The SQL spec defers to XQuery to define what the option flags are for LIKE_REGEX patterns. XQuery says that: * 's' allows the dot character to match newlines, which by default it will not; * 'm' allows ^ and $ to match at newlines, not only at the start/end of the whole string. Thus, these are *not* inverses as they are for the similarly-named POSIX options, and neither one corresponds to the POSIX 'n' option. Fortunately, Spencer's library does expose these two behaviors as separately twiddlable flags, so we just have to fix the mapping from JSP flag bits to REG flag bits. I also chose to rename the symbol for 's' to DOTALL, to make it clearer that it's not the inverse of MLINE. Also, XQuery says that if the 'q' flag "is used together with the m, s, or x flag, that flag has no effect". I read this as saying that 'q' overrides the other flags; whoever wrote our code seems to have read it backwards. Lastly, while XQuery's 'x' flag is related to what Spencer's code does for REG_EXPANDED, it's not the same or a subset. It seems best to treat XQuery's 'x' as unimplemented for now. Maybe later we can expand our regex code to offer 'x'-style parsing as a separate option. While at it, refactor the jsonpath code so that (a) there's only one copy of the flag transformation logic not two, and (b) the processing of flags is independent of the order in which the flags are written. We need some documentation updates to go with this, but I'll tackle that separately. Back-patch to v12 where this code originated. Discussion: https://postgr.es/m/CAPpHfdvDci4iqNF9fhRkTqhe-5_8HmzeLt56drH%2B_Rv2rNRqfg@mail.gmail.com Reference: https://www.w3.org/TR/2017/REC-xpath-functions-31-20170321/#flags
1 parent a25221f commit d5b90cd

File tree

7 files changed

+76
-63
lines changed

7 files changed

+76
-63
lines changed

src/backend/utils/adt/jsonpath.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -557,7 +557,7 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
557557

558558
if (v->content.like_regex.flags & JSP_REGEX_ICASE)
559559
appendStringInfoChar(buf, 'i');
560-
if (v->content.like_regex.flags & JSP_REGEX_SLINE)
560+
if (v->content.like_regex.flags & JSP_REGEX_DOTALL)
561561
appendStringInfoChar(buf, 's');
562562
if (v->content.like_regex.flags & JSP_REGEX_MLINE)
563563
appendStringInfoChar(buf, 'm');

src/backend/utils/adt/jsonpath_exec.c

+1-25
Original file line numberDiff line numberDiff line change
@@ -1646,34 +1646,10 @@ executeLikeRegex(JsonPathItem *jsp, JsonbValue *str, JsonbValue *rarg,
16461646
/* Cache regex text and converted flags. */
16471647
if (!cxt->regex)
16481648
{
1649-
uint32 flags = jsp->content.like_regex.flags;
1650-
16511649
cxt->regex =
16521650
cstring_to_text_with_len(jsp->content.like_regex.pattern,
16531651
jsp->content.like_regex.patternlen);
1654-
1655-
/* Convert regex flags. */
1656-
cxt->cflags = REG_ADVANCED;
1657-
1658-
if (flags & JSP_REGEX_ICASE)
1659-
cxt->cflags |= REG_ICASE;
1660-
if (flags & JSP_REGEX_MLINE)
1661-
cxt->cflags |= REG_NEWLINE;
1662-
if (flags & JSP_REGEX_SLINE)
1663-
cxt->cflags &= ~REG_NEWLINE;
1664-
if (flags & JSP_REGEX_WSPACE)
1665-
cxt->cflags |= REG_EXPANDED;
1666-
1667-
/*
1668-
* 'q' flag can work together only with 'i'. When other is specified,
1669-
* then 'q' has no effect.
1670-
*/
1671-
if ((flags & JSP_REGEX_QUOTE) &&
1672-
!(flags & (JSP_REGEX_MLINE | JSP_REGEX_SLINE | JSP_REGEX_WSPACE)))
1673-
{
1674-
cxt->cflags &= ~REG_ADVANCED;
1675-
cxt->cflags |= REG_QUOTE;
1676-
}
1652+
cxt->cflags = jspConvertRegexFlags(jsp->content.like_regex.flags);
16771653
}
16781654

16791655
if (RE_compile_and_execute(cxt->regex, str->val.string.val,

src/backend/utils/adt/jsonpath_gram.y

+50-14
Original file line numberDiff line numberDiff line change
@@ -481,42 +481,32 @@ makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
481481
{
482482
JsonPathParseItem *v = makeItemType(jpiLikeRegex);
483483
int i;
484-
int cflags = REG_ADVANCED;
484+
int cflags;
485485

486486
v->value.like_regex.expr = expr;
487487
v->value.like_regex.pattern = pattern->val;
488488
v->value.like_regex.patternlen = pattern->len;
489-
v->value.like_regex.flags = 0;
490489

490+
/* Parse the flags string, convert to bitmask. Duplicate flags are OK. */
491+
v->value.like_regex.flags = 0;
491492
for (i = 0; flags && i < flags->len; i++)
492493
{
493494
switch (flags->val[i])
494495
{
495496
case 'i':
496497
v->value.like_regex.flags |= JSP_REGEX_ICASE;
497-
cflags |= REG_ICASE;
498498
break;
499499
case 's':
500-
v->value.like_regex.flags &= ~JSP_REGEX_MLINE;
501-
v->value.like_regex.flags |= JSP_REGEX_SLINE;
502-
cflags |= REG_NEWLINE;
500+
v->value.like_regex.flags |= JSP_REGEX_DOTALL;
503501
break;
504502
case 'm':
505-
v->value.like_regex.flags &= ~JSP_REGEX_SLINE;
506503
v->value.like_regex.flags |= JSP_REGEX_MLINE;
507-
cflags &= ~REG_NEWLINE;
508504
break;
509505
case 'x':
510506
v->value.like_regex.flags |= JSP_REGEX_WSPACE;
511-
cflags |= REG_EXPANDED;
512507
break;
513508
case 'q':
514509
v->value.like_regex.flags |= JSP_REGEX_QUOTE;
515-
if (!(v->value.like_regex.flags & (JSP_REGEX_MLINE | JSP_REGEX_SLINE | JSP_REGEX_WSPACE)))
516-
{
517-
cflags &= ~REG_ADVANCED;
518-
cflags |= REG_QUOTE;
519-
}
520510
break;
521511
default:
522512
ereport(ERROR,
@@ -528,6 +518,9 @@ makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
528518
}
529519
}
530520

521+
/* Convert flags to what RE_compile_and_cache needs */
522+
cflags = jspConvertRegexFlags(v->value.like_regex.flags);
523+
531524
/* check regex validity */
532525
(void) RE_compile_and_cache(cstring_to_text_with_len(pattern->val,
533526
pattern->len),
@@ -536,6 +529,49 @@ makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
536529
return v;
537530
}
538531

532+
/*
533+
* Convert from XQuery regex flags to those recognized by our regex library.
534+
*/
535+
int
536+
jspConvertRegexFlags(uint32 xflags)
537+
{
538+
/* By default, XQuery is very nearly the same as Spencer's AREs */
539+
int cflags = REG_ADVANCED;
540+
541+
/* Ignore-case means the same thing, too, modulo locale issues */
542+
if (xflags & JSP_REGEX_ICASE)
543+
cflags |= REG_ICASE;
544+
545+
/* Per XQuery spec, if 'q' is specified then 'm', 's', 'x' are ignored */
546+
if (xflags & JSP_REGEX_QUOTE)
547+
{
548+
cflags &= ~REG_ADVANCED;
549+
cflags |= REG_QUOTE;
550+
}
551+
else
552+
{
553+
/* Note that dotall mode is the default in POSIX */
554+
if (!(xflags & JSP_REGEX_DOTALL))
555+
cflags |= REG_NLSTOP;
556+
if (xflags & JSP_REGEX_MLINE)
557+
cflags |= REG_NLANCH;
558+
559+
/*
560+
* XQuery's 'x' mode is related to Spencer's expanded mode, but it's
561+
* not really enough alike to justify treating JSP_REGEX_WSPACE as
562+
* REG_EXPANDED. For now we treat 'x' as unimplemented; perhaps in
563+
* future we'll modify the regex library to have an option for
564+
* XQuery-style ignore-whitespace mode.
565+
*/
566+
if (xflags & JSP_REGEX_WSPACE)
567+
ereport(ERROR,
568+
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
569+
errmsg("XQuery \"x\" flag (expanded regular expressions) is not implemented")));
570+
}
571+
572+
return cflags;
573+
}
574+
539575
/*
540576
* jsonpath_scan.l is compiled as part of jsonpath_gram.y. Currently, this is
541577
* unavoidable because jsonpath_gram does not create a .h file to export its

src/include/utils/jsonpath.h

+5-3
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,9 @@ typedef enum JsonPathItemType
8888

8989
/* XQuery regex mode flags for LIKE_REGEX predicate */
9090
#define JSP_REGEX_ICASE 0x01 /* i flag, case insensitive */
91-
#define JSP_REGEX_SLINE 0x02 /* s flag, single-line mode */
92-
#define JSP_REGEX_MLINE 0x04 /* m flag, multi-line mode */
93-
#define JSP_REGEX_WSPACE 0x08 /* x flag, expanded syntax */
91+
#define JSP_REGEX_DOTALL 0x02 /* s flag, dot matches newline */
92+
#define JSP_REGEX_MLINE 0x04 /* m flag, ^/$ match at newlines */
93+
#define JSP_REGEX_WSPACE 0x08 /* x flag, ignore whitespace in pattern */
9494
#define JSP_REGEX_QUOTE 0x10 /* q flag, no special characters */
9595

9696
/*
@@ -245,4 +245,6 @@ typedef struct JsonPathParseResult
245245

246246
extern JsonPathParseResult *parsejsonpath(const char *str, int len);
247247

248+
extern int jspConvertRegexFlags(uint32 xflags);
249+
248250
#endif

src/test/regress/expected/jsonb_jsonpath.out

+6-5
Original file line numberDiff line numberDiff line change
@@ -1592,35 +1592,36 @@ select jsonb_path_query('[null, 1, "abd", "abdabc"]', 'lax $[*] ? ((@ starts wit
15921592
1
15931593
(2 rows)
15941594

1595-
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "adc\nabc", "babc"]', 'lax $[*] ? (@ like_regex "^ab.*c")');
1595+
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "babc", "adc\nabc", "ab\nadc"]', 'lax $[*] ? (@ like_regex "^ab.*c")');
15961596
jsonb_path_query
15971597
------------------
15981598
"abc"
15991599
"abdacb"
16001600
(2 rows)
16011601

1602-
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "adc\nabc", "babc"]', 'lax $[*] ? (@ like_regex "^a b.* c " flag "ix")');
1602+
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "babc", "adc\nabc", "ab\nadc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "i")');
16031603
jsonb_path_query
16041604
------------------
16051605
"abc"
16061606
"aBdC"
16071607
"abdacb"
16081608
(3 rows)
16091609

1610-
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "adc\nabc", "babc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "m")');
1610+
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "babc", "adc\nabc", "ab\nadc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "m")');
16111611
jsonb_path_query
16121612
------------------
16131613
"abc"
16141614
"abdacb"
16151615
"adc\nabc"
16161616
(3 rows)
16171617

1618-
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "adc\nabc", "babc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "s")');
1618+
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "babc", "adc\nabc", "ab\nadc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "s")');
16191619
jsonb_path_query
16201620
------------------
16211621
"abc"
16221622
"abdacb"
1623-
(2 rows)
1623+
"ab\nadc"
1624+
(3 rows)
16241625

16251626
select jsonb_path_query('[null, 1, "a\b", "a\\b", "^a\\b$"]', 'lax $[*] ? (@ like_regex "a\\b" flag "q")');
16261627
jsonb_path_query

src/test/regress/expected/jsonpath.out

+9-11
Original file line numberDiff line numberDiff line change
@@ -442,17 +442,15 @@ select '$ ? (@ like_regex "pattern" flag "is")'::jsonpath;
442442
(1 row)
443443

444444
select '$ ? (@ like_regex "pattern" flag "isim")'::jsonpath;
445-
jsonpath
446-
--------------------------------------
447-
$?(@ like_regex "pattern" flag "im")
445+
jsonpath
446+
---------------------------------------
447+
$?(@ like_regex "pattern" flag "ism")
448448
(1 row)
449449

450450
select '$ ? (@ like_regex "pattern" flag "xsms")'::jsonpath;
451-
jsonpath
452-
--------------------------------------
453-
$?(@ like_regex "pattern" flag "sx")
454-
(1 row)
455-
451+
ERROR: XQuery "x" flag (expanded regular expressions) is not implemented
452+
LINE 1: select '$ ? (@ like_regex "pattern" flag "xsms")'::jsonpath;
453+
^
456454
select '$ ? (@ like_regex "pattern" flag "q")'::jsonpath;
457455
jsonpath
458456
-------------------------------------
@@ -466,9 +464,9 @@ select '$ ? (@ like_regex "pattern" flag "iq")'::jsonpath;
466464
(1 row)
467465

468466
select '$ ? (@ like_regex "pattern" flag "smixq")'::jsonpath;
469-
jsonpath
470-
----------------------------------------
471-
$?(@ like_regex "pattern" flag "imxq")
467+
jsonpath
468+
-----------------------------------------
469+
$?(@ like_regex "pattern" flag "ismxq")
472470
(1 row)
473471

474472
select '$ ? (@ like_regex "pattern" flag "a")'::jsonpath;

src/test/regress/sql/jsonb_jsonpath.sql

+4-4
Original file line numberDiff line numberDiff line change
@@ -335,10 +335,10 @@ select jsonb_path_query('[[null, 1, "abc", "abcabc"]]', 'lax $ ? (@[*] starts wi
335335
select jsonb_path_query('[[null, 1, "abd", "abdabc"]]', 'lax $ ? ((@[*] starts with "abc") is unknown)');
336336
select jsonb_path_query('[null, 1, "abd", "abdabc"]', 'lax $[*] ? ((@ starts with "abc") is unknown)');
337337

338-
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "adc\nabc", "babc"]', 'lax $[*] ? (@ like_regex "^ab.*c")');
339-
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "adc\nabc", "babc"]', 'lax $[*] ? (@ like_regex "^a b.* c " flag "ix")');
340-
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "adc\nabc", "babc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "m")');
341-
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "adc\nabc", "babc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "s")');
338+
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "babc", "adc\nabc", "ab\nadc"]', 'lax $[*] ? (@ like_regex "^ab.*c")');
339+
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "babc", "adc\nabc", "ab\nadc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "i")');
340+
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "babc", "adc\nabc", "ab\nadc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "m")');
341+
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "babc", "adc\nabc", "ab\nadc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "s")');
342342
select jsonb_path_query('[null, 1, "a\b", "a\\b", "^a\\b$"]', 'lax $[*] ? (@ like_regex "a\\b" flag "q")');
343343
select jsonb_path_query('[null, 1, "a\b", "a\\b", "^a\\b$"]', 'lax $[*] ? (@ like_regex "a\\b" flag "")');
344344
select jsonb_path_query('[null, 1, "a\b", "a\\b", "^a\\b$"]', 'lax $[*] ? (@ like_regex "^a\\b$" flag "q")');

0 commit comments

Comments
 (0)