Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 92087ec

Browse files
author
Nikita Glukhov
committed
Add jsonpath LIKE_REGEX predicate
1 parent b86833e commit 92087ec

File tree

11 files changed

+269
-5
lines changed

11 files changed

+269
-5
lines changed

src/backend/utils/adt/jsonpath.c

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,29 @@ flattenJsonPathParseItem(StringInfo buf, JsonPathParseItem *item,
9999
*(int32*)(buf->data + right) = chld;
100100
}
101101
break;
102+
case jpiLikeRegex:
103+
{
104+
int32 offs;
105+
106+
appendBinaryStringInfo(buf,
107+
(char *) &item->value.like_regex.flags,
108+
sizeof(item->value.like_regex.flags));
109+
offs = buf->len;
110+
appendBinaryStringInfo(buf, (char *) &offs /* fake value */, sizeof(offs));
111+
112+
appendBinaryStringInfo(buf,
113+
(char *) &item->value.like_regex.patternlen,
114+
sizeof(item->value.like_regex.patternlen));
115+
appendBinaryStringInfo(buf, item->value.like_regex.pattern,
116+
item->value.like_regex.patternlen);
117+
appendStringInfoChar(buf, '\0');
118+
119+
chld = flattenJsonPathParseItem(buf, item->value.like_regex.expr,
120+
forbiddenRoot,
121+
insideArraySubscript);
122+
*(int32 *)(buf->data + offs) = chld;
123+
}
124+
break;
102125
case jpiFilter:
103126
case jpiIsUnknown:
104127
case jpiNot:
@@ -369,6 +392,38 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey, bool printBracket
369392
if (printBracketes)
370393
appendStringInfoChar(buf, ')');
371394
break;
395+
case jpiLikeRegex:
396+
if (printBracketes)
397+
appendStringInfoChar(buf, '(');
398+
399+
jspInitByBuffer(&elem, v->base, v->content.like_regex.expr);
400+
printJsonPathItem(buf, &elem, false,
401+
operationPriority(elem.type) <=
402+
operationPriority(v->type));
403+
404+
appendBinaryStringInfo(buf, " like_regex ", 12);
405+
406+
escape_json(buf, v->content.like_regex.pattern);
407+
408+
if (v->content.like_regex.flags)
409+
{
410+
appendBinaryStringInfo(buf, " flag \"", 7);
411+
412+
if (v->content.like_regex.flags & JSP_REGEX_ICASE)
413+
appendStringInfoChar(buf, 'i');
414+
if (v->content.like_regex.flags & JSP_REGEX_SLINE)
415+
appendStringInfoChar(buf, 's');
416+
if (v->content.like_regex.flags & JSP_REGEX_MLINE)
417+
appendStringInfoChar(buf, 'm');
418+
if (v->content.like_regex.flags & JSP_REGEX_WSPACE)
419+
appendStringInfoChar(buf, 'x');
420+
421+
appendStringInfoChar(buf, '"');
422+
}
423+
424+
if (printBracketes)
425+
appendStringInfoChar(buf, ')');
426+
break;
372427
case jpiPlus:
373428
case jpiMinus:
374429
if (printBracketes)
@@ -607,6 +662,12 @@ jspInitByBuffer(JsonPathItem *v, char *base, int32 pos)
607662
read_int32(v->content.args.left, base, pos);
608663
read_int32(v->content.args.right, base, pos);
609664
break;
665+
case jpiLikeRegex:
666+
read_int32(v->content.like_regex.flags, base, pos);
667+
read_int32(v->content.like_regex.expr, base, pos);
668+
read_int32(v->content.like_regex.patternlen, base, pos);
669+
v->content.like_regex.pattern = base + pos;
670+
break;
610671
case jpiNot:
611672
case jpiExists:
612673
case jpiIsUnknown:

src/backend/utils/adt/jsonpath_exec.c

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "catalog/pg_collation.h"
1717
#include "catalog/pg_type.h"
1818
#include "lib/stringinfo.h"
19+
#include "regex/regex.h"
1920
#include "utils/builtins.h"
2021
#include "utils/json.h"
2122
#include "utils/jsonpath.h"
@@ -819,6 +820,72 @@ executeStartsWithPredicate(JsonPathExecContext *cxt, JsonPathItem *jsp,
819820
return jperNotFound;
820821
}
821822

823+
static JsonPathExecResult
824+
executeLikeRegexPredicate(JsonPathExecContext *cxt, JsonPathItem *jsp,
825+
JsonbValue *jb)
826+
{
827+
JsonPathExecResult res;
828+
JsonPathItem elem;
829+
List *lseq = NIL;
830+
ListCell *lc;
831+
text *regex;
832+
uint32 flags = jsp->content.like_regex.flags;
833+
int cflags = REG_ADVANCED;
834+
bool error = false;
835+
bool found = false;
836+
837+
if (flags & JSP_REGEX_ICASE)
838+
cflags |= REG_ICASE;
839+
if (flags & JSP_REGEX_MLINE)
840+
cflags |= REG_NEWLINE;
841+
if (flags & JSP_REGEX_SLINE)
842+
cflags &= ~REG_NEWLINE;
843+
if (flags & JSP_REGEX_WSPACE)
844+
cflags |= REG_EXPANDED;
845+
846+
regex = cstring_to_text_with_len(jsp->content.like_regex.pattern,
847+
jsp->content.like_regex.patternlen);
848+
849+
jspInitByBuffer(&elem, jsp->base, jsp->content.like_regex.expr);
850+
res = recursiveExecuteAndUnwrap(cxt, &elem, jb, &lseq);
851+
if (jperIsError(res))
852+
return jperError;
853+
854+
foreach(lc, lseq)
855+
{
856+
JsonbValue *str = lfirst(lc);
857+
JsonbValue strbuf;
858+
859+
if (JsonbType(str) == jbvScalar)
860+
str = JsonbExtractScalar(str->val.binary.data, &strbuf);
861+
862+
if (str->type != jbvString)
863+
{
864+
if (!cxt->lax)
865+
return jperError;
866+
867+
error = true;
868+
}
869+
else if (RE_compile_and_execute(regex, str->val.string.val,
870+
str->val.string.len, cflags,
871+
DEFAULT_COLLATION_OID, 0, NULL))
872+
{
873+
if (cxt->lax)
874+
return jperOk;
875+
876+
found = true;
877+
}
878+
}
879+
880+
if (found) /* possible only in strict mode */
881+
return jperOk;
882+
883+
if (error) /* possible only in lax mode */
884+
return jperError;
885+
886+
return jperNotFound;
887+
}
888+
822889
/*
823890
* Main executor function: walks on jsonpath structure and tries to find
824891
* correspoding parts of jsonb. Note, jsonb and jsonpath values should be
@@ -1519,6 +1586,9 @@ recursiveExecuteNoUnwrap(JsonPathExecContext *cxt, JsonPathItem *jsp,
15191586
case jpiStartsWith:
15201587
res = executeStartsWithPredicate(cxt, jsp, jb);
15211588
break;
1589+
case jpiLikeRegex:
1590+
res = executeLikeRegexPredicate(cxt, jsp, jb);
1591+
break;
15221592
default:
15231593
elog(ERROR,"2Wrong state: %d", jsp->type);
15241594
}

src/backend/utils/adt/jsonpath_gram.y

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,44 @@ makeAny(int first, int last)
198198
return v;
199199
}
200200

201+
static JsonPathParseItem *
202+
makeItemLikeRegex(JsonPathParseItem *expr, string *pattern, string *flags)
203+
{
204+
JsonPathParseItem *v = makeItemType(jpiLikeRegex);
205+
int i;
206+
207+
v->value.like_regex.expr = expr;
208+
v->value.like_regex.pattern = pattern->val;
209+
v->value.like_regex.patternlen = pattern->len;
210+
v->value.like_regex.flags = 0;
211+
212+
for (i = 0; flags && i < flags->len; i++)
213+
{
214+
switch (flags->val[i])
215+
{
216+
case 'i':
217+
v->value.like_regex.flags |= JSP_REGEX_ICASE;
218+
break;
219+
case 's':
220+
v->value.like_regex.flags &= ~JSP_REGEX_MLINE;
221+
v->value.like_regex.flags |= JSP_REGEX_SLINE;
222+
break;
223+
case 'm':
224+
v->value.like_regex.flags &= ~JSP_REGEX_SLINE;
225+
v->value.like_regex.flags |= JSP_REGEX_MLINE;
226+
break;
227+
case 'x':
228+
v->value.like_regex.flags |= JSP_REGEX_WSPACE;
229+
break;
230+
default:
231+
yyerror(NULL, "unrecognized flag of LIKE_REGEX predicate");
232+
break;
233+
}
234+
}
235+
236+
return v;
237+
}
238+
201239
%}
202240

203241
/* BISON Declarations */
@@ -221,7 +259,7 @@ makeAny(int first, int last)
221259
%token <str> STRING_P NUMERIC_P INT_P VARIABLE_P
222260
%token <str> OR_P AND_P NOT_P
223261
%token <str> LESS_P LESSEQUAL_P EQUAL_P NOTEQUAL_P GREATEREQUAL_P GREATER_P
224-
%token <str> ANY_P STRICT_P LAX_P LAST_P STARTS_P WITH_P
262+
%token <str> ANY_P STRICT_P LAX_P LAST_P STARTS_P WITH_P LIKE_REGEX_P FLAG_P
225263
%token <str> ABS_P SIZE_P TYPE_P FLOOR_P DOUBLE_P CEILING_P DATETIME_P
226264
%token <str> KEYVALUE_P
227265

@@ -301,9 +339,9 @@ predicate:
301339
| '(' predicate ')' IS_P UNKNOWN_P { $$ = makeItemUnary(jpiIsUnknown, $2); }
302340
| pexpr STARTS_P WITH_P starts_with_initial
303341
{ $$ = makeItemBinary(jpiStartsWith, $1, $4); }
304-
/* Left for the future (needs XQuery support)
305-
| pexpr LIKE_REGEX pattern [FLAG_P flags] { $$ = ...; };
306-
*/
342+
| pexpr LIKE_REGEX_P STRING_P { $$ = makeItemLikeRegex($1, &$3, NULL); };
343+
| pexpr LIKE_REGEX_P STRING_P FLAG_P STRING_P
344+
{ $$ = makeItemLikeRegex($1, &$3, &$5); };
307345
;
308346

309347
starts_with_initial:
@@ -402,6 +440,8 @@ key_name:
402440
| LAST_P
403441
| STARTS_P
404442
| WITH_P
443+
| LIKE_REGEX_P
444+
| FLAG_P
405445
;
406446

407447
method:

src/backend/utils/adt/jsonpath_scan.l

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ static keyword keywords[] = {
273273
{ 2, false, TO_P, "to"},
274274
{ 3, false, ABS_P, "abs"},
275275
{ 3, false, LAX_P, "lax"},
276+
{ 4, false, FLAG_P, "flag"},
276277
{ 4, false, LAST_P, "last"},
277278
{ 4, true, NULL_P, "null"},
278279
{ 4, false, SIZE_P, "size"},
@@ -289,6 +290,7 @@ static keyword keywords[] = {
289290
{ 7, false, UNKNOWN_P, "unknown"},
290291
{ 8, false, DATETIME_P, "datetime"},
291292
{ 8, false, KEYVALUE_P, "keyvalue"},
293+
{ 10,false, LIKE_REGEX_P, "like_regex"},
292294
};
293295

294296
static int

src/backend/utils/adt/regexp.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ RE_execute(regex_t *re, char *dat, int dat_len,
335335
* Both pattern and data are given in the database encoding. We internally
336336
* convert to array of pg_wchar which is what Spencer's regex package wants.
337337
*/
338-
static bool
338+
bool
339339
RE_compile_and_execute(text *text_re, char *dat, int dat_len,
340340
int cflags, Oid collation,
341341
int nmatch, regmatch_t *pmatch)

src/include/regex/regex.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,4 +173,8 @@ extern int pg_regprefix(regex_t *, pg_wchar **, size_t *);
173173
extern void pg_regfree(regex_t *);
174174
extern size_t pg_regerror(int, const regex_t *, char *, size_t);
175175

176+
extern bool RE_compile_and_execute(text *text_re, char *dat, int dat_len,
177+
int cflags, Oid collation,
178+
int nmatch, regmatch_t *pmatch);
179+
176180
#endif /* _REGEX_H_ */

src/include/utils/jsonpath.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,14 @@ typedef enum JsonPathItemType {
8181
jpiSubscript,
8282
jpiLast,
8383
jpiStartsWith,
84+
jpiLikeRegex,
8485
} JsonPathItemType;
8586

87+
/* XQuery regex mode flags for LIKE_REGEX predicate */
88+
#define JSP_REGEX_ICASE 0x01 /* i flag, case insensitive */
89+
#define JSP_REGEX_SLINE 0x02 /* s flag, single-line mode */
90+
#define JSP_REGEX_MLINE 0x04 /* m flag, multi-line mode */
91+
#define JSP_REGEX_WSPACE 0x08 /* x flag, expanded syntax */
8692

8793
/*
8894
* Support functions to parse/construct binary value.
@@ -133,6 +139,13 @@ typedef struct JsonPathItem {
133139
char *data; /* for bool, numeric and string/key */
134140
int32 datalen; /* filled only for string/key */
135141
} value;
142+
143+
struct {
144+
int32 expr;
145+
char *pattern;
146+
int32 patternlen;
147+
uint32 flags;
148+
} like_regex;
136149
} content;
137150
} JsonPathItem;
138151

@@ -185,6 +198,13 @@ struct JsonPathParseItem {
185198
uint32 last;
186199
} anybounds;
187200

201+
struct {
202+
JsonPathParseItem *expr;
203+
char *pattern; /* could not be not null-terminated */
204+
uint32 patternlen;
205+
uint32 flags;
206+
} like_regex;
207+
188208
/* scalars */
189209
Numeric numeric;
190210
bool boolean;

src/test/regress/expected/jsonb_jsonpath.out

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1006,3 +1006,18 @@ select _jsonpath_query(jsonb '[null, 1, "abd", "abdabc"]', 'lax $[*] ? ((@ start
10061006
1
10071007
(2 rows)
10081008

1009+
select _jsonpath_query(jsonb '[null, 1, "abc", "abd", "aBdC", "abdacb", "babc"]', 'lax $[*] ? (@ like_regex "^ab.*c")');
1010+
_jsonpath_query
1011+
-----------------
1012+
"abc"
1013+
"abdacb"
1014+
(2 rows)
1015+
1016+
select _jsonpath_query(jsonb '[null, 1, "abc", "abd", "aBdC", "abdacb", "babc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "i")');
1017+
_jsonpath_query
1018+
-----------------
1019+
"abc"
1020+
"aBdC"
1021+
"abdacb"
1022+
(3 rows)
1023+

src/test/regress/expected/jsonpath.out

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,47 @@ select '$ ? (@ starts with $var)'::jsonpath;
389389
$?(@ starts with $"var")
390390
(1 row)
391391

392+
select '$ ? (@ like_regex "pattern")'::jsonpath;
393+
jsonpath
394+
----------------------------
395+
$?(@ like_regex "pattern")
396+
(1 row)
397+
398+
select '$ ? (@ like_regex "pattern" flag "")'::jsonpath;
399+
jsonpath
400+
----------------------------
401+
$?(@ like_regex "pattern")
402+
(1 row)
403+
404+
select '$ ? (@ like_regex "pattern" flag "i")'::jsonpath;
405+
jsonpath
406+
-------------------------------------
407+
$?(@ like_regex "pattern" flag "i")
408+
(1 row)
409+
410+
select '$ ? (@ like_regex "pattern" flag "is")'::jsonpath;
411+
jsonpath
412+
--------------------------------------
413+
$?(@ like_regex "pattern" flag "is")
414+
(1 row)
415+
416+
select '$ ? (@ like_regex "pattern" flag "isim")'::jsonpath;
417+
jsonpath
418+
--------------------------------------
419+
$?(@ like_regex "pattern" flag "im")
420+
(1 row)
421+
422+
select '$ ? (@ like_regex "pattern" flag "xsms")'::jsonpath;
423+
jsonpath
424+
--------------------------------------
425+
$?(@ like_regex "pattern" flag "sx")
426+
(1 row)
427+
428+
select '$ ? (@ like_regex "pattern" flag "a")'::jsonpath;
429+
ERROR: bad jsonpath representation
430+
LINE 1: select '$ ? (@ like_regex "pattern" flag "a")'::jsonpath;
431+
^
432+
DETAIL: unrecognized flag of LIKE_REGEX predicate at or near """
392433
select '$ ? (a < 1)'::jsonpath;
393434
jsonpath
394435
-------------

src/test/regress/sql/jsonb_jsonpath.sql

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,3 +202,6 @@ select _jsonpath_query(jsonb '["abc", "abcabc", null, 1]', 'strict $ ? ((@[*] st
202202
select _jsonpath_query(jsonb '[[null, 1, "abc", "abcabc"]]', 'lax $ ? (@[*] starts with "abc")');
203203
select _jsonpath_query(jsonb '[[null, 1, "abd", "abdabc"]]', 'lax $ ? ((@[*] starts with "abc") is unknown)');
204204
select _jsonpath_query(jsonb '[null, 1, "abd", "abdabc"]', 'lax $[*] ? ((@ starts with "abc") is unknown)');
205+
206+
select _jsonpath_query(jsonb '[null, 1, "abc", "abd", "aBdC", "abdacb", "babc"]', 'lax $[*] ? (@ like_regex "^ab.*c")');
207+
select _jsonpath_query(jsonb '[null, 1, "abc", "abd", "aBdC", "abdacb", "babc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "i")');

0 commit comments

Comments
 (0)