Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 1393b87

Browse files
author
Nikita Glukhov
committed
Add jsonpath LIKE_REGEX predicate
1 parent 34c7b12 commit 1393b87

File tree

11 files changed

+287
-6
lines changed

11 files changed

+287
-6
lines changed

src/backend/utils/adt/jsonpath.c

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,29 @@ flattenJsonPathParseItem(StringInfo buf, JsonPathParseItem *item,
101101
*(int32*)(buf->data + right) = chld;
102102
}
103103
break;
104+
case jpiLikeRegex:
105+
{
106+
int32 offs;
107+
108+
appendBinaryStringInfo(buf,
109+
(char *) &item->value.like_regex.flags,
110+
sizeof(item->value.like_regex.flags));
111+
offs = buf->len;
112+
appendBinaryStringInfo(buf, (char *) &offs /* fake value */, sizeof(offs));
113+
114+
appendBinaryStringInfo(buf,
115+
(char *) &item->value.like_regex.patternlen,
116+
sizeof(item->value.like_regex.patternlen));
117+
appendBinaryStringInfo(buf, item->value.like_regex.pattern,
118+
item->value.like_regex.patternlen);
119+
appendStringInfoChar(buf, '\0');
120+
121+
chld = flattenJsonPathParseItem(buf, item->value.like_regex.expr,
122+
forbiddenRoot,
123+
insideArraySubscript);
124+
*(int32 *)(buf->data + offs) = chld;
125+
}
126+
break;
104127
case jpiFilter:
105128
case jpiIsUnknown:
106129
case jpiNot:
@@ -373,6 +396,38 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey, bool printBracket
373396
if (printBracketes)
374397
appendStringInfoChar(buf, ')');
375398
break;
399+
case jpiLikeRegex:
400+
if (printBracketes)
401+
appendStringInfoChar(buf, '(');
402+
403+
jspInitByBuffer(&elem, v->base, v->content.like_regex.expr);
404+
printJsonPathItem(buf, &elem, false,
405+
operationPriority(elem.type) <=
406+
operationPriority(v->type));
407+
408+
appendBinaryStringInfo(buf, " like_regex ", 12);
409+
410+
escape_json(buf, v->content.like_regex.pattern);
411+
412+
if (v->content.like_regex.flags)
413+
{
414+
appendBinaryStringInfo(buf, " flag \"", 7);
415+
416+
if (v->content.like_regex.flags & JSP_REGEX_ICASE)
417+
appendStringInfoChar(buf, 'i');
418+
if (v->content.like_regex.flags & JSP_REGEX_SLINE)
419+
appendStringInfoChar(buf, 's');
420+
if (v->content.like_regex.flags & JSP_REGEX_MLINE)
421+
appendStringInfoChar(buf, 'm');
422+
if (v->content.like_regex.flags & JSP_REGEX_WSPACE)
423+
appendStringInfoChar(buf, 'x');
424+
425+
appendStringInfoChar(buf, '"');
426+
}
427+
428+
if (printBracketes)
429+
appendStringInfoChar(buf, ')');
430+
break;
376431
case jpiPlus:
377432
case jpiMinus:
378433
if (printBracketes)
@@ -611,6 +666,12 @@ jspInitByBuffer(JsonPathItem *v, char *base, int32 pos)
611666
read_int32(v->content.args.left, base, pos);
612667
read_int32(v->content.args.right, base, pos);
613668
break;
669+
case jpiLikeRegex:
670+
read_int32(v->content.like_regex.flags, base, pos);
671+
read_int32(v->content.like_regex.expr, base, pos);
672+
read_int32(v->content.like_regex.patternlen, base, pos);
673+
v->content.like_regex.pattern = base + pos;
674+
break;
614675
case jpiNot:
615676
case jpiExists:
616677
case jpiIsUnknown:

src/backend/utils/adt/jsonpath_exec.c

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "catalog/pg_collation.h"
1717
#include "catalog/pg_type.h"
1818
#include "lib/stringinfo.h"
19+
#include "regex/regex.h"
1920
#include "utils/builtins.h"
2021
#include "utils/datum.h"
2122
#include "utils/json.h"
@@ -840,6 +841,72 @@ executeStartsWithPredicate(JsonPathExecContext *cxt, JsonPathItem *jsp,
840841
return jperNotFound;
841842
}
842843

844+
static JsonPathExecResult
845+
executeLikeRegexPredicate(JsonPathExecContext *cxt, JsonPathItem *jsp,
846+
JsonbValue *jb)
847+
{
848+
JsonPathExecResult res;
849+
JsonPathItem elem;
850+
List *lseq = NIL;
851+
ListCell *lc;
852+
text *regex;
853+
uint32 flags = jsp->content.like_regex.flags;
854+
int cflags = REG_ADVANCED;
855+
bool error = false;
856+
bool found = false;
857+
858+
if (flags & JSP_REGEX_ICASE)
859+
cflags |= REG_ICASE;
860+
if (flags & JSP_REGEX_MLINE)
861+
cflags |= REG_NEWLINE;
862+
if (flags & JSP_REGEX_SLINE)
863+
cflags &= ~REG_NEWLINE;
864+
if (flags & JSP_REGEX_WSPACE)
865+
cflags |= REG_EXPANDED;
866+
867+
regex = cstring_to_text_with_len(jsp->content.like_regex.pattern,
868+
jsp->content.like_regex.patternlen);
869+
870+
jspInitByBuffer(&elem, jsp->base, jsp->content.like_regex.expr);
871+
res = recursiveExecuteAndUnwrap(cxt, &elem, jb, &lseq);
872+
if (jperIsError(res))
873+
return jperError;
874+
875+
foreach(lc, lseq)
876+
{
877+
JsonbValue *str = lfirst(lc);
878+
JsonbValue strbuf;
879+
880+
if (JsonbType(str) == jbvScalar)
881+
str = JsonbExtractScalar(str->val.binary.data, &strbuf);
882+
883+
if (str->type != jbvString)
884+
{
885+
if (!cxt->lax)
886+
return jperError;
887+
888+
error = true;
889+
}
890+
else if (RE_compile_and_execute(regex, str->val.string.val,
891+
str->val.string.len, cflags,
892+
DEFAULT_COLLATION_OID, 0, NULL))
893+
{
894+
if (cxt->lax)
895+
return jperOk;
896+
897+
found = true;
898+
}
899+
}
900+
901+
if (found) /* possible only in strict mode */
902+
return jperOk;
903+
904+
if (error) /* possible only in lax mode */
905+
return jperError;
906+
907+
return jperNotFound;
908+
}
909+
843910
/*
844911
* Main executor function: walks on jsonpath structure and tries to find
845912
* correspoding parts of jsonb. Note, jsonb and jsonpath values should be
@@ -1540,6 +1607,9 @@ recursiveExecuteNoUnwrap(JsonPathExecContext *cxt, JsonPathItem *jsp,
15401607
case jpiStartsWith:
15411608
res = executeStartsWithPredicate(cxt, jsp, jb);
15421609
break;
1610+
case jpiLikeRegex:
1611+
res = executeLikeRegexPredicate(cxt, jsp, jb);
1612+
break;
15431613
default:
15441614
elog(ERROR, "unrecognized jsonpath item type: %d", jsp->type);
15451615
}

src/backend/utils/adt/jsonpath_gram.y

Lines changed: 55 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@
1515
#include "postgres.h"
1616

1717
#include "fmgr.h"
18+
#include "catalog/pg_collation.h"
1819
#include "miscadmin.h"
1920
#include "nodes/pg_list.h"
21+
#include "regex/regex.h"
2022
#include "utils/builtins.h"
2123
#include "utils/jsonpath.h"
2224

@@ -201,6 +203,53 @@ makeAny(int first, int last)
201203
return v;
202204
}
203205

206+
static JsonPathParseItem *
207+
makeItemLikeRegex(JsonPathParseItem *expr, string *pattern, string *flags)
208+
{
209+
JsonPathParseItem *v = makeItemType(jpiLikeRegex);
210+
int i;
211+
int cflags = REG_ADVANCED;
212+
213+
v->value.like_regex.expr = expr;
214+
v->value.like_regex.pattern = pattern->val;
215+
v->value.like_regex.patternlen = pattern->len;
216+
v->value.like_regex.flags = 0;
217+
218+
for (i = 0; flags && i < flags->len; i++)
219+
{
220+
switch (flags->val[i])
221+
{
222+
case 'i':
223+
v->value.like_regex.flags |= JSP_REGEX_ICASE;
224+
cflags |= REG_ICASE;
225+
break;
226+
case 's':
227+
v->value.like_regex.flags &= ~JSP_REGEX_MLINE;
228+
v->value.like_regex.flags |= JSP_REGEX_SLINE;
229+
cflags |= REG_NEWLINE;
230+
break;
231+
case 'm':
232+
v->value.like_regex.flags &= ~JSP_REGEX_SLINE;
233+
v->value.like_regex.flags |= JSP_REGEX_MLINE;
234+
cflags &= ~REG_NEWLINE;
235+
break;
236+
case 'x':
237+
v->value.like_regex.flags |= JSP_REGEX_WSPACE;
238+
cflags |= REG_EXPANDED;
239+
break;
240+
default:
241+
yyerror(NULL, "unrecognized flag of LIKE_REGEX predicate");
242+
break;
243+
}
244+
}
245+
246+
/* check regex validity */
247+
(void) RE_compile_and_cache(cstring_to_text_with_len(pattern->val, pattern->len),
248+
cflags, DEFAULT_COLLATION_OID);
249+
250+
return v;
251+
}
252+
204253
%}
205254

206255
/* BISON Declarations */
@@ -224,7 +273,7 @@ makeAny(int first, int last)
224273
%token <str> STRING_P NUMERIC_P INT_P VARIABLE_P
225274
%token <str> OR_P AND_P NOT_P
226275
%token <str> LESS_P LESSEQUAL_P EQUAL_P NOTEQUAL_P GREATEREQUAL_P GREATER_P
227-
%token <str> ANY_P STRICT_P LAX_P LAST_P STARTS_P WITH_P
276+
%token <str> ANY_P STRICT_P LAX_P LAST_P STARTS_P WITH_P LIKE_REGEX_P FLAG_P
228277
%token <str> ABS_P SIZE_P TYPE_P FLOOR_P DOUBLE_P CEILING_P DATETIME_P
229278
%token <str> KEYVALUE_P
230279

@@ -304,9 +353,9 @@ predicate:
304353
| '(' predicate ')' IS_P UNKNOWN_P { $$ = makeItemUnary(jpiIsUnknown, $2); }
305354
| pexpr STARTS_P WITH_P starts_with_initial
306355
{ $$ = makeItemBinary(jpiStartsWith, $1, $4); }
307-
/* Left for the future (needs XQuery support)
308-
| pexpr LIKE_REGEX pattern [FLAG_P flags] { $$ = ...; };
309-
*/
356+
| pexpr LIKE_REGEX_P STRING_P { $$ = makeItemLikeRegex($1, &$3, NULL); }
357+
| pexpr LIKE_REGEX_P STRING_P FLAG_P STRING_P
358+
{ $$ = makeItemLikeRegex($1, &$3, &$5); }
310359
;
311360

312361
starts_with_initial:
@@ -404,6 +453,8 @@ key_name:
404453
| LAST_P
405454
| STARTS_P
406455
| WITH_P
456+
| LIKE_REGEX_P
457+
| FLAG_P
407458
;
408459

409460
method:

src/backend/utils/adt/jsonpath_scan.l

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ static keyword keywords[] = {
273273
{ 2, false, TO_P, "to"},
274274
{ 3, false, ABS_P, "abs"},
275275
{ 3, false, LAX_P, "lax"},
276+
{ 4, false, FLAG_P, "flag"},
276277
{ 4, false, LAST_P, "last"},
277278
{ 4, true, NULL_P, "null"},
278279
{ 4, false, SIZE_P, "size"},
@@ -289,6 +290,7 @@ static keyword keywords[] = {
289290
{ 7, false, UNKNOWN_P, "unknown"},
290291
{ 8, false, DATETIME_P, "datetime"},
291292
{ 8, false, KEYVALUE_P, "keyvalue"},
293+
{ 10,false, LIKE_REGEX_P, "like_regex"},
292294
};
293295

294296
static int

src/backend/utils/adt/regexp.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ static Datum build_regexp_split_result(regexp_matches_ctx *splitctx);
133133
* Pattern is given in the database encoding. We internally convert to
134134
* an array of pg_wchar, which is what Spencer's regex package wants.
135135
*/
136-
static regex_t *
136+
regex_t *
137137
RE_compile_and_cache(text *text_re, int cflags, Oid collation)
138138
{
139139
int text_re_len = VARSIZE_ANY_EXHDR(text_re);
@@ -339,7 +339,7 @@ RE_execute(regex_t *re, char *dat, int dat_len,
339339
* Both pattern and data are given in the database encoding. We internally
340340
* convert to array of pg_wchar which is what Spencer's regex package wants.
341341
*/
342-
static bool
342+
bool
343343
RE_compile_and_execute(text *text_re, char *dat, int dat_len,
344344
int cflags, Oid collation,
345345
int nmatch, regmatch_t *pmatch)

src/include/regex/regex.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,4 +173,9 @@ extern int pg_regprefix(regex_t *, pg_wchar **, size_t *);
173173
extern void pg_regfree(regex_t *);
174174
extern size_t pg_regerror(int, const regex_t *, char *, size_t);
175175

176+
extern regex_t *RE_compile_and_cache(text *text_re, int cflags, Oid collation);
177+
extern bool RE_compile_and_execute(text *text_re, char *dat, int dat_len,
178+
int cflags, Oid collation,
179+
int nmatch, regmatch_t *pmatch);
180+
176181
#endif /* _REGEX_H_ */

src/include/utils/jsonpath.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,14 @@ typedef enum JsonPathItemType {
8181
jpiSubscript,
8282
jpiLast,
8383
jpiStartsWith,
84+
jpiLikeRegex,
8485
} JsonPathItemType;
8586

87+
/* XQuery regex mode flags for LIKE_REGEX predicate */
88+
#define JSP_REGEX_ICASE 0x01 /* i flag, case insensitive */
89+
#define JSP_REGEX_SLINE 0x02 /* s flag, single-line mode */
90+
#define JSP_REGEX_MLINE 0x04 /* m flag, multi-line mode */
91+
#define JSP_REGEX_WSPACE 0x08 /* x flag, expanded syntax */
8692

8793
/*
8894
* Support functions to parse/construct binary value.
@@ -133,6 +139,13 @@ typedef struct JsonPathItem {
133139
char *data; /* for bool, numeric and string/key */
134140
int32 datalen; /* filled only for string/key */
135141
} value;
142+
143+
struct {
144+
int32 expr;
145+
char *pattern;
146+
int32 patternlen;
147+
uint32 flags;
148+
} like_regex;
136149
} content;
137150
} JsonPathItem;
138151

@@ -185,6 +198,13 @@ struct JsonPathParseItem {
185198
uint32 last;
186199
} anybounds;
187200

201+
struct {
202+
JsonPathParseItem *expr;
203+
char *pattern; /* could not be not null-terminated */
204+
uint32 patternlen;
205+
uint32 flags;
206+
} like_regex;
207+
188208
/* scalars */
189209
Numeric numeric;
190210
bool boolean;

src/test/regress/expected/jsonb_jsonpath.out

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1016,3 +1016,18 @@ select _jsonpath_query(jsonb '[null, 1, "abd", "abdabc"]', 'lax $[*] ? ((@ start
10161016
1
10171017
(2 rows)
10181018

1019+
select _jsonpath_query(jsonb '[null, 1, "abc", "abd", "aBdC", "abdacb", "babc"]', 'lax $[*] ? (@ like_regex "^ab.*c")');
1020+
_jsonpath_query
1021+
-----------------
1022+
"abc"
1023+
"abdacb"
1024+
(2 rows)
1025+
1026+
select _jsonpath_query(jsonb '[null, 1, "abc", "abd", "aBdC", "abdacb", "babc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "i")');
1027+
_jsonpath_query
1028+
-----------------
1029+
"abc"
1030+
"aBdC"
1031+
"abdacb"
1032+
(3 rows)
1033+

0 commit comments

Comments
 (0)