Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 1d88a75

Browse files
committed
Get rid of backtracking in jsonpath_scan.l
Non-backtracking flex parsers work faster than backtracking ones. So, this commit gets rid of backtracking in jsonpath_scan.l. That required explicit handling of some cases as well as manual backtracking for some cases. More regression tests for numerics are added. Discussion: https://mail.google.com/mail/u/0?ik=a20b091faa&view=om&permmsgid=msg-f%3A1628425344167939063 Author: John Naylor, Nikita Gluknov, Alexander Korotkov
1 parent 8b17298 commit 1d88a75

11 files changed

+795
-24
lines changed

src/backend/utils/adt/Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ OBJS = acl.o amutils.o arrayfuncs.o array_expanded.o array_selfuncs.o \
3434
windowfuncs.o xid.o xml.o
3535

3636
jsonpath_scan.c: FLEXFLAGS = -CF -p -p
37+
jsonpath_scan.c: FLEX_NO_BACKUP=yes
3738

3839
# Force these dependencies to be known even without dependency info built:
3940
jsonpath_gram.o: jsonpath_scan.c

src/backend/utils/adt/jsonpath_scan.l

+35-21
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ static void addstring(bool init, char *s, int l);
3131
static void addchar(bool init, char s);
3232
static enum yytokentype checkKeyword(void);
3333
static void parseUnicode(char *s, int l);
34-
static void parseHexChars(char *s, int l);
34+
static void parseHexChar(char *s);
3535

3636
/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
3737
#undef fprintf
@@ -78,9 +78,20 @@ fprintf_to_ereport(const char *fmt, const char *msg)
7878
special [\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/]
7979
any [^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\"\' \t\n\r\f]
8080
blank [ \t\n\r\f]
81+
82+
digit [0-9]
83+
integer {digit}+
84+
decimal {digit}*\.{digit}+
85+
decimalfail {digit}+\.
86+
real ({integer}|{decimal})[Ee][-+]?{digit}+
87+
realfail1 ({integer}|{decimal})[Ee]
88+
realfail2 ({integer}|{decimal})[Ee][-+]
89+
8190
hex_dig [0-9A-Fa-f]
8291
unicode \\u({hex_dig}{4}|\{{hex_dig}{1,6}\})
92+
unicodefail \\u({hex_dig}{0,3}|\{{hex_dig}{0,6})
8393
hex_char \\x{hex_dig}{2}
94+
hex_fail \\x{hex_dig}{0,1}
8495

8596
%%
8697

@@ -129,11 +140,17 @@ hex_char \\x{hex_dig}{2}
129140

130141
<xnq,xq,xvq,xsq>{unicode}+ { parseUnicode(yytext, yyleng); }
131142

132-
<xnq,xq,xvq,xsq>{hex_char}+ { parseHexChars(yytext, yyleng); }
143+
<xnq,xq,xvq,xsq>{hex_char} { parseHexChar(yytext); }
144+
145+
<xnq,xq,xvq,xsq>{unicode}*{unicodefail} { yyerror(NULL, "Unicode sequence is invalid"); }
133146

134-
<xnq,xq,xvq,xsq>\\x { yyerror(NULL, "Hex character sequence is invalid"); }
147+
<xnq,xq,xvq,xsq>{hex_fail} { yyerror(NULL, "Hex character sequence is invalid"); }
135148

136-
<xnq,xq,xvq,xsq>\\u { yyerror(NULL, "Unicode sequence is invalid"); }
149+
<xnq,xq,xvq,xsq>{unicode}+\\ {
150+
/* throw back the \\, and treat as unicode */
151+
yyless(yyleng - 1);
152+
parseUnicode(yytext, yyleng);
153+
}
137154

138155
<xnq,xq,xvq,xsq>\\. { yyerror(NULL, "Escape sequence is invalid"); }
139156

@@ -214,34 +231,38 @@ hex_char \\x{hex_dig}{2}
214231
BEGIN xc;
215232
}
216233

217-
[0-9]+(\.[0-9]+)?[eE][+-]?[0-9]+ { /* float */
234+
{real} {
218235
addstring(true, yytext, yyleng);
219236
addchar(false, '\0');
220237
yylval->str = scanstring;
221238
return NUMERIC_P;
222239
}
223240

224-
\.[0-9]+[eE][+-]?[0-9]+ { /* float */
241+
{decimal} {
225242
addstring(true, yytext, yyleng);
226243
addchar(false, '\0');
227244
yylval->str = scanstring;
228245
return NUMERIC_P;
229246
}
230247

231-
([0-9]+)?\.[0-9]+ {
248+
{integer} {
232249
addstring(true, yytext, yyleng);
233250
addchar(false, '\0');
234251
yylval->str = scanstring;
235-
return NUMERIC_P;
252+
return INT_P;
236253
}
237254

238-
[0-9]+ {
255+
{decimalfail} {
256+
/* throw back the ., and treat as integer */
257+
yyless(yyleng - 1);
239258
addstring(true, yytext, yyleng);
240259
addchar(false, '\0');
241260
yylval->str = scanstring;
242261
return INT_P;
243262
}
244263

264+
({realfail1}|{realfail2}) { yyerror(NULL, "Floating point number is invalid"); }
265+
245266
{any}+ {
246267
addstring(true, yytext, yyleng);
247268
BEGIN xnq;
@@ -571,7 +592,7 @@ addUnicode(int ch, int *hi_surrogate)
571592
static void
572593
parseUnicode(char *s, int l)
573594
{
574-
int i;
595+
int i = 2;
575596
int hi_surrogate = -1;
576597

577598
for (i = 2; i < l; i += 2) /* skip '\u' */
@@ -606,19 +627,12 @@ parseUnicode(char *s, int l)
606627

607628
/* Parse sequence of hex-encoded characters */
608629
static void
609-
parseHexChars(char *s, int l)
630+
parseHexChar(char *s)
610631
{
611-
int i;
612-
613-
Assert(l % 4 /* \xXX */ == 0);
614-
615-
for (i = 0; i < l / 4; i++)
616-
{
617-
int ch = (hexval(s[i * 4 + 2]) << 4) |
618-
hexval(s[i * 4 + 3]);
632+
int ch = (hexval(s[2]) << 4) |
633+
hexval(s[3]);
619634

620-
addUnicodeChar(ch);
621-
}
635+
addUnicodeChar(ch);
622636
}
623637

624638
/*

src/test/regress/expected/jsonb_jsonpath.out

+1-1
Original file line numberDiff line numberDiff line change
@@ -1297,7 +1297,7 @@ select jsonb_path_query('null', 'true.type()');
12971297
"boolean"
12981298
(1 row)
12991299

1300-
select jsonb_path_query('null', '123.type()');
1300+
select jsonb_path_query('null', '(123).type()');
13011301
jsonb_path_query
13021302
------------------
13031303
"number"

src/test/regress/expected/jsonpath.out

+168
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,18 @@ select '1.type()'::jsonpath;
365365
1.type()
366366
(1 row)
367367

368+
select '(1).type()'::jsonpath;
369+
jsonpath
370+
----------
371+
1.type()
372+
(1 row)
373+
374+
select '1.2.type()'::jsonpath;
375+
jsonpath
376+
------------
377+
1.2.type()
378+
(1 row)
379+
368380
select '"aaa".type()'::jsonpath;
369381
jsonpath
370382
--------------
@@ -804,3 +816,159 @@ select '$ ? (@.a < +10.1e+1)'::jsonpath;
804816
$?(@."a" < 101)
805817
(1 row)
806818

819+
select '0'::jsonpath;
820+
jsonpath
821+
----------
822+
0
823+
(1 row)
824+
825+
select '00'::jsonpath;
826+
jsonpath
827+
----------
828+
0
829+
(1 row)
830+
831+
select '0.0'::jsonpath;
832+
jsonpath
833+
----------
834+
0.0
835+
(1 row)
836+
837+
select '0.000'::jsonpath;
838+
jsonpath
839+
----------
840+
0.000
841+
(1 row)
842+
843+
select '0.000e1'::jsonpath;
844+
jsonpath
845+
----------
846+
0.00
847+
(1 row)
848+
849+
select '0.000e2'::jsonpath;
850+
jsonpath
851+
----------
852+
0.0
853+
(1 row)
854+
855+
select '0.000e3'::jsonpath;
856+
jsonpath
857+
----------
858+
0
859+
(1 row)
860+
861+
select '0.0010'::jsonpath;
862+
jsonpath
863+
----------
864+
0.0010
865+
(1 row)
866+
867+
select '0.0010e-1'::jsonpath;
868+
jsonpath
869+
----------
870+
0.00010
871+
(1 row)
872+
873+
select '0.0010e+1'::jsonpath;
874+
jsonpath
875+
----------
876+
0.010
877+
(1 row)
878+
879+
select '0.0010e+2'::jsonpath;
880+
jsonpath
881+
----------
882+
0.10
883+
(1 row)
884+
885+
select '1e'::jsonpath;
886+
ERROR: bad jsonpath representation
887+
LINE 1: select '1e'::jsonpath;
888+
^
889+
DETAIL: Floating point number is invalid at or near "1e"
890+
select '1.e'::jsonpath;
891+
jsonpath
892+
----------
893+
1."e"
894+
(1 row)
895+
896+
select '1.2e'::jsonpath;
897+
ERROR: bad jsonpath representation
898+
LINE 1: select '1.2e'::jsonpath;
899+
^
900+
DETAIL: Floating point number is invalid at or near "1.2e"
901+
select '1.2.e'::jsonpath;
902+
jsonpath
903+
----------
904+
1.2."e"
905+
(1 row)
906+
907+
select '(1.2).e'::jsonpath;
908+
jsonpath
909+
----------
910+
1.2."e"
911+
(1 row)
912+
913+
select '1e3'::jsonpath;
914+
jsonpath
915+
----------
916+
1000
917+
(1 row)
918+
919+
select '1.e3'::jsonpath;
920+
jsonpath
921+
----------
922+
1."e3"
923+
(1 row)
924+
925+
select '1.e3.e'::jsonpath;
926+
jsonpath
927+
------------
928+
1."e3"."e"
929+
(1 row)
930+
931+
select '1.e3.e4'::jsonpath;
932+
jsonpath
933+
-------------
934+
1."e3"."e4"
935+
(1 row)
936+
937+
select '1.2e3'::jsonpath;
938+
jsonpath
939+
----------
940+
1200
941+
(1 row)
942+
943+
select '1.2.e3'::jsonpath;
944+
jsonpath
945+
----------
946+
1.2."e3"
947+
(1 row)
948+
949+
select '(1.2).e3'::jsonpath;
950+
jsonpath
951+
----------
952+
1.2."e3"
953+
(1 row)
954+
955+
select '1..e'::jsonpath;
956+
ERROR: bad jsonpath representation
957+
LINE 1: select '1..e'::jsonpath;
958+
^
959+
DETAIL: syntax error, unexpected '.' at or near "."
960+
select '1..e3'::jsonpath;
961+
ERROR: bad jsonpath representation
962+
LINE 1: select '1..e3'::jsonpath;
963+
^
964+
DETAIL: syntax error, unexpected '.' at or near "."
965+
select '(1.).e'::jsonpath;
966+
ERROR: bad jsonpath representation
967+
LINE 1: select '(1.).e'::jsonpath;
968+
^
969+
DETAIL: syntax error, unexpected ')' at or near ")"
970+
select '(1.).e3'::jsonpath;
971+
ERROR: bad jsonpath representation
972+
LINE 1: select '(1.).e3'::jsonpath;
973+
^
974+
DETAIL: syntax error, unexpected ')' at or near ")"

0 commit comments

Comments
 (0)