Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit e56cad8

Browse files
committed
Fix some minor spec-compliance issues in jsonpath lexer.
Although the SQL/JSON tech report makes reference to ECMAScript which allows both single- and double-quoted strings, all the rest of the report speaks only of double-quoted string literals in jsonpaths. That's more compatible with JSON itself; moreover single-quoted strings are hard to use inside a jsonpath that is itself a single-quoted SQL literal. So guess that the intent is to allow only double-quoted literals, and remove lexer support for single-quoted literals. It'll be less painful to add this again later if we're wrong, than to remove a shipped feature. Also, adjust the lexer so that unrecognized backslash sequences are treated as just meaning the escaped character, not as errors. This change has much better support in the standards, as JSON, JavaScript and ECMAScript all make it plain that that's what's supposed to happen. Back-patch to v12. Discussion: https://postgr.es/m/CAPpHfdvDci4iqNF9fhRkTqhe-5_8HmzeLt56drH%2B_Rv2rNRqfg@mail.gmail.com
1 parent 96b6c82 commit e56cad8

File tree

6 files changed

+38
-235
lines changed

6 files changed

+38
-235
lines changed

src/backend/utils/adt/jsonpath_scan.l

+31-48
Original file line numberDiff line numberDiff line change
@@ -59,25 +59,24 @@ fprintf_to_ereport(const char *fmt, const char *msg)
5959
%option noyyfree
6060

6161
/*
62-
* We use exclusive states for quoted, signle-quoted and non-quoted strings,
63-
* quoted variable names and C-tyle comments.
62+
* We use exclusive states for quoted and non-quoted strings,
63+
* quoted variable names and C-style comments.
6464
* Exclusive states:
6565
* <xq> - quoted strings
6666
* <xnq> - non-quoted strings
6767
* <xvq> - quoted variable names
68-
* <xsq> - single-quoted strings
6968
* <xc> - C-style comment
7069
*/
7170

7271
%x xq
7372
%x xnq
7473
%x xvq
75-
%x xsq
7674
%x xc
7775

78-
special [\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/]
79-
any [^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\"\' \t\n\r\f]
76+
special [\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/]
8077
blank [ \t\n\r\f]
78+
/* "other" means anything that's not special, blank, or '\' or '"' */
79+
other [^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\" \t\n\r\f]
8180

8281
digit [0-9]
8382
integer (0|[1-9]{digit}*)
@@ -95,7 +94,7 @@ hex_fail \\x{hex_dig}{0,1}
9594

9695
%%
9796

98-
<xnq>{any}+ {
97+
<xnq>{other}+ {
9998
addstring(false, yytext, yyleng);
10099
}
101100

@@ -105,13 +104,12 @@ hex_fail \\x{hex_dig}{0,1}
105104
return checkKeyword();
106105
}
107106

108-
109107
<xnq>\/\* {
110108
yylval->str = scanstring;
111109
BEGIN xc;
112110
}
113111

114-
<xnq>({special}|\"|\') {
112+
<xnq>({special}|\") {
115113
yylval->str = scanstring;
116114
yyless(0);
117115
BEGIN INITIAL;
@@ -124,39 +122,37 @@ hex_fail \\x{hex_dig}{0,1}
124122
return checkKeyword();
125123
}
126124

127-
<xnq,xq,xvq,xsq>\\[\"\'\\] { addchar(false, yytext[1]); }
128-
129-
<xnq,xq,xvq,xsq>\\b { addchar(false, '\b'); }
125+
<xnq,xq,xvq>\\b { addchar(false, '\b'); }
130126

131-
<xnq,xq,xvq,xsq>\\f { addchar(false, '\f'); }
127+
<xnq,xq,xvq>\\f { addchar(false, '\f'); }
132128

133-
<xnq,xq,xvq,xsq>\\n { addchar(false, '\n'); }
129+
<xnq,xq,xvq>\\n { addchar(false, '\n'); }
134130

135-
<xnq,xq,xvq,xsq>\\r { addchar(false, '\r'); }
131+
<xnq,xq,xvq>\\r { addchar(false, '\r'); }
136132

137-
<xnq,xq,xvq,xsq>\\t { addchar(false, '\t'); }
133+
<xnq,xq,xvq>\\t { addchar(false, '\t'); }
138134

139-
<xnq,xq,xvq,xsq>\\v { addchar(false, '\v'); }
135+
<xnq,xq,xvq>\\v { addchar(false, '\v'); }
140136

141-
<xnq,xq,xvq,xsq>{unicode}+ { parseUnicode(yytext, yyleng); }
137+
<xnq,xq,xvq>{unicode}+ { parseUnicode(yytext, yyleng); }
142138

143-
<xnq,xq,xvq,xsq>{hex_char} { parseHexChar(yytext); }
139+
<xnq,xq,xvq>{hex_char} { parseHexChar(yytext); }
144140

145-
<xnq,xq,xvq,xsq>{unicode}*{unicodefail} { yyerror(NULL, "invalid unicode sequence"); }
141+
<xnq,xq,xvq>{unicode}*{unicodefail} { yyerror(NULL, "invalid unicode sequence"); }
146142

147-
<xnq,xq,xvq,xsq>{hex_fail} { yyerror(NULL, "invalid hex character sequence"); }
143+
<xnq,xq,xvq>{hex_fail} { yyerror(NULL, "invalid hex character sequence"); }
148144

149-
<xnq,xq,xvq,xsq>{unicode}+\\ {
150-
/* throw back the \\, and treat as unicode */
151-
yyless(yyleng - 1);
152-
parseUnicode(yytext, yyleng);
153-
}
145+
<xnq,xq,xvq>{unicode}+\\ {
146+
/* throw back the \\, and treat as unicode */
147+
yyless(yyleng - 1);
148+
parseUnicode(yytext, yyleng);
149+
}
154150

155-
<xnq,xq,xvq,xsq>\\. { yyerror(NULL, "escape sequence is invalid"); }
151+
<xnq,xq,xvq>\\. { addchar(false, yytext[1]); }
156152

157-
<xnq,xq,xvq,xsq>\\ { yyerror(NULL, "unexpected end after backslash"); }
153+
<xnq,xq,xvq>\\ { yyerror(NULL, "unexpected end after backslash"); }
158154

159-
<xq,xvq,xsq><<EOF>> { yyerror(NULL, "unexpected end of quoted string"); }
155+
<xq,xvq><<EOF>> { yyerror(NULL, "unexpected end of quoted string"); }
160156

161157
<xq>\" {
162158
yylval->str = scanstring;
@@ -170,16 +166,8 @@ hex_fail \\x{hex_dig}{0,1}
170166
return VARIABLE_P;
171167
}
172168

173-
<xsq>\' {
174-
yylval->str = scanstring;
175-
BEGIN INITIAL;
176-
return STRING_P;
177-
}
178-
179169
<xq,xvq>[^\\\"]+ { addstring(false, yytext, yyleng); }
180170

181-
<xsq>[^\\\']+ { addstring(false, yytext, yyleng); }
182-
183171
<xc>\*\/ { BEGIN INITIAL; }
184172

185173
<xc>[^\*]+ { }
@@ -210,7 +198,7 @@ hex_fail \\x{hex_dig}{0,1}
210198

211199
\> { return GREATER_P; }
212200

213-
\${any}+ {
201+
\${other}+ {
214202
addstring(true, yytext + 1, yyleng - 1);
215203
addchar(false, '\0');
216204
yylval->str = scanstring;
@@ -263,27 +251,22 @@ hex_fail \\x{hex_dig}{0,1}
263251

264252
({realfail1}|{realfail2}) { yyerror(NULL, "invalid floating point number"); }
265253

266-
{any}+ {
267-
addstring(true, yytext, yyleng);
268-
BEGIN xnq;
269-
}
270-
271254
\" {
272255
addchar(true, '\0');
273256
BEGIN xq;
274257
}
275258

276-
\' {
277-
addchar(true, '\0');
278-
BEGIN xsq;
279-
}
280-
281259
\\ {
282260
yyless(0);
283261
addchar(true, '\0');
284262
BEGIN xnq;
285263
}
286264

265+
{other}+ {
266+
addstring(true, yytext, yyleng);
267+
BEGIN xnq;
268+
}
269+
287270
<<EOF>> { yyterminate(); }
288271

289272
%%

src/test/regress/expected/jsonpath.out

+6-12
Original file line numberDiff line numberDiff line change
@@ -171,30 +171,24 @@ select '"\b\f\r\n\t\v\"\''\\"'::jsonpath;
171171
"\b\f\r\n\t\u000b\"'\\"
172172
(1 row)
173173

174-
select '''\b\f\r\n\t\v\"\''\\'''::jsonpath;
175-
jsonpath
176-
-------------------------
177-
"\b\f\r\n\t\u000b\"'\\"
178-
(1 row)
179-
180174
select '"\x50\u0067\u{53}\u{051}\u{00004C}"'::jsonpath;
181175
jsonpath
182176
----------
183177
"PgSQL"
184178
(1 row)
185179

186-
select '''\x50\u0067\u{53}\u{051}\u{00004C}'''::jsonpath;
187-
jsonpath
188-
----------
189-
"PgSQL"
190-
(1 row)
191-
192180
select '$.foo\x50\u0067\u{53}\u{051}\u{00004C}\t\"bar'::jsonpath;
193181
jsonpath
194182
---------------------
195183
$."fooPgSQL\t\"bar"
196184
(1 row)
197185

186+
select '"\z"'::jsonpath; -- unrecognized escape is just the literal char
187+
jsonpath
188+
----------
189+
"z"
190+
(1 row)
191+
198192
select '$.g ? ($.a == 1)'::jsonpath;
199193
jsonpath
200194
--------------------

src/test/regress/expected/jsonpath_encoding.out

-78
Original file line numberDiff line numberDiff line change
@@ -81,84 +81,6 @@ select '"null \\u0000 escape"'::jsonpath as not_an_escape;
8181
"null \\u0000 escape"
8282
(1 row)
8383

84-
-- checks for single-quoted values
85-
-- basic unicode input
86-
SELECT E'\'\u\''::jsonpath; -- ERROR, incomplete escape
87-
ERROR: invalid Unicode escape
88-
LINE 1: SELECT E'\'\u\''::jsonpath;
89-
^
90-
HINT: Unicode escapes must be \uXXXX or \UXXXXXXXX.
91-
SELECT E'\'\u00\''::jsonpath; -- ERROR, incomplete escape
92-
ERROR: invalid Unicode escape
93-
LINE 1: SELECT E'\'\u00\''::jsonpath;
94-
^
95-
HINT: Unicode escapes must be \uXXXX or \UXXXXXXXX.
96-
SELECT E'\'\u000g\''::jsonpath; -- ERROR, g is not a hex digit
97-
ERROR: invalid Unicode escape
98-
LINE 1: SELECT E'\'\u000g\''::jsonpath;
99-
^
100-
HINT: Unicode escapes must be \uXXXX or \UXXXXXXXX.
101-
SELECT E'\'\u0000\''::jsonpath; -- OK, legal escape
102-
ERROR: invalid Unicode escape value at or near "E'\'\u0000"
103-
LINE 1: SELECT E'\'\u0000\''::jsonpath;
104-
^
105-
SELECT E'\'\uaBcD\''::jsonpath; -- OK, uppercase and lower case both OK
106-
jsonpath
107-
----------
108-
"ꯍ"
109-
(1 row)
110-
111-
-- handling of unicode surrogate pairs
112-
select E'\'\ud83d\ude04\ud83d\udc36\''::jsonpath as correct_in_utf8;
113-
correct_in_utf8
114-
-----------------
115-
"😄🐶"
116-
(1 row)
117-
118-
select E'\'\ud83d\ud83d\''::jsonpath; -- 2 high surrogates in a row
119-
ERROR: invalid Unicode surrogate pair at or near "E'\'\ud83d\ud83d"
120-
LINE 1: select E'\'\ud83d\ud83d\''::jsonpath;
121-
^
122-
select E'\'\ude04\ud83d\''::jsonpath; -- surrogates in wrong order
123-
ERROR: invalid Unicode surrogate pair at or near "E'\'\ude04"
124-
LINE 1: select E'\'\ude04\ud83d\''::jsonpath;
125-
^
126-
select E'\'\ud83dX\''::jsonpath; -- orphan high surrogate
127-
ERROR: invalid Unicode surrogate pair at or near "E'\'\ud83dX"
128-
LINE 1: select E'\'\ud83dX\''::jsonpath;
129-
^
130-
select E'\'\ude04X\''::jsonpath; -- orphan low surrogate
131-
ERROR: invalid Unicode surrogate pair at or near "E'\'\ude04"
132-
LINE 1: select E'\'\ude04X\''::jsonpath;
133-
^
134-
--handling of simple unicode escapes
135-
select E'\'the Copyright \u00a9 sign\''::jsonpath as correct_in_utf8;
136-
correct_in_utf8
137-
------------------------
138-
"the Copyright © sign"
139-
(1 row)
140-
141-
select E'\'dollar \u0024 character\''::jsonpath as correct_everywhere;
142-
correct_everywhere
143-
----------------------
144-
"dollar $ character"
145-
(1 row)
146-
147-
select E'\'dollar \\u0024 character\''::jsonpath as not_an_escape;
148-
not_an_escape
149-
----------------------
150-
"dollar $ character"
151-
(1 row)
152-
153-
select E'\'null \u0000 escape\''::jsonpath as not_unescaped;
154-
ERROR: invalid Unicode escape value at or near "E'\'null \u0000"
155-
LINE 1: select E'\'null \u0000 escape\''::jsonpath as not_unescaped;
156-
^
157-
select E'\'null \\u0000 escape\''::jsonpath as not_an_escape;
158-
ERROR: unsupported Unicode escape sequence
159-
LINE 1: select E'\'null \\u0000 escape\''::jsonpath as not_an_escape...
160-
^
161-
DETAIL: \u0000 cannot be converted to text.
16284
-- checks for quoted key names
16385
-- basic unicode input
16486
SELECT '$."\u"'::jsonpath; -- ERROR, incomplete escape

src/test/regress/expected/jsonpath_encoding_1.out

-72
Original file line numberDiff line numberDiff line change
@@ -78,78 +78,6 @@ select '"null \\u0000 escape"'::jsonpath as not_an_escape;
7878
"null \\u0000 escape"
7979
(1 row)
8080

81-
-- checks for single-quoted values
82-
-- basic unicode input
83-
SELECT E'\'\u\''::jsonpath; -- ERROR, incomplete escape
84-
ERROR: invalid Unicode escape
85-
LINE 1: SELECT E'\'\u\''::jsonpath;
86-
^
87-
HINT: Unicode escapes must be \uXXXX or \UXXXXXXXX.
88-
SELECT E'\'\u00\''::jsonpath; -- ERROR, incomplete escape
89-
ERROR: invalid Unicode escape
90-
LINE 1: SELECT E'\'\u00\''::jsonpath;
91-
^
92-
HINT: Unicode escapes must be \uXXXX or \UXXXXXXXX.
93-
SELECT E'\'\u000g\''::jsonpath; -- ERROR, g is not a hex digit
94-
ERROR: invalid Unicode escape
95-
LINE 1: SELECT E'\'\u000g\''::jsonpath;
96-
^
97-
HINT: Unicode escapes must be \uXXXX or \UXXXXXXXX.
98-
SELECT E'\'\u0000\''::jsonpath; -- OK, legal escape
99-
ERROR: invalid Unicode escape value at or near "E'\'\u0000"
100-
LINE 1: SELECT E'\'\u0000\''::jsonpath;
101-
^
102-
SELECT E'\'\uaBcD\''::jsonpath; -- OK, uppercase and lower case both OK
103-
ERROR: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8 at or near "E'\'\uaBcD"
104-
LINE 1: SELECT E'\'\uaBcD\''::jsonpath;
105-
^
106-
-- handling of unicode surrogate pairs
107-
select E'\'\ud83d\ude04\ud83d\udc36\''::jsonpath as correct_in_utf8;
108-
ERROR: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8 at or near "E'\'\ud83d\ude04"
109-
LINE 1: select E'\'\ud83d\ude04\ud83d\udc36\''::jsonpath as correct_...
110-
^
111-
select E'\'\ud83d\ud83d\''::jsonpath; -- 2 high surrogates in a row
112-
ERROR: invalid Unicode surrogate pair at or near "E'\'\ud83d\ud83d"
113-
LINE 1: select E'\'\ud83d\ud83d\''::jsonpath;
114-
^
115-
select E'\'\ude04\ud83d\''::jsonpath; -- surrogates in wrong order
116-
ERROR: invalid Unicode surrogate pair at or near "E'\'\ude04"
117-
LINE 1: select E'\'\ude04\ud83d\''::jsonpath;
118-
^
119-
select E'\'\ud83dX\''::jsonpath; -- orphan high surrogate
120-
ERROR: invalid Unicode surrogate pair at or near "E'\'\ud83dX"
121-
LINE 1: select E'\'\ud83dX\''::jsonpath;
122-
^
123-
select E'\'\ude04X\''::jsonpath; -- orphan low surrogate
124-
ERROR: invalid Unicode surrogate pair at or near "E'\'\ude04"
125-
LINE 1: select E'\'\ude04X\''::jsonpath;
126-
^
127-
--handling of simple unicode escapes
128-
select E'\'the Copyright \u00a9 sign\''::jsonpath as correct_in_utf8;
129-
ERROR: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8 at or near "E'\'the Copyright \u00a9"
130-
LINE 1: select E'\'the Copyright \u00a9 sign\''::jsonpath as correct...
131-
^
132-
select E'\'dollar \u0024 character\''::jsonpath as correct_everywhere;
133-
correct_everywhere
134-
----------------------
135-
"dollar $ character"
136-
(1 row)
137-
138-
select E'\'dollar \\u0024 character\''::jsonpath as not_an_escape;
139-
not_an_escape
140-
----------------------
141-
"dollar $ character"
142-
(1 row)
143-
144-
select E'\'null \u0000 escape\''::jsonpath as not_unescaped;
145-
ERROR: invalid Unicode escape value at or near "E'\'null \u0000"
146-
LINE 1: select E'\'null \u0000 escape\''::jsonpath as not_unescaped;
147-
^
148-
select E'\'null \\u0000 escape\''::jsonpath as not_an_escape;
149-
ERROR: unsupported Unicode escape sequence
150-
LINE 1: select E'\'null \\u0000 escape\''::jsonpath as not_an_escape...
151-
^
152-
DETAIL: \u0000 cannot be converted to text.
15381
-- checks for quoted key names
15482
-- basic unicode input
15583
SELECT '$."\u"'::jsonpath; -- ERROR, incomplete escape

src/test/regress/sql/jsonpath.sql

+1-2
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,9 @@ select '$.a/+-1'::jsonpath;
3030
select '1 * 2 + 4 % -3 != false'::jsonpath;
3131

3232
select '"\b\f\r\n\t\v\"\''\\"'::jsonpath;
33-
select '''\b\f\r\n\t\v\"\''\\'''::jsonpath;
3433
select '"\x50\u0067\u{53}\u{051}\u{00004C}"'::jsonpath;
35-
select '''\x50\u0067\u{53}\u{051}\u{00004C}'''::jsonpath;
3634
select '$.foo\x50\u0067\u{53}\u{051}\u{00004C}\t\"bar'::jsonpath;
35+
select '"\z"'::jsonpath; -- unrecognized escape is just the literal char
3736
3837
select '$.g ? ($.a == 1)'::jsonpath;
3938
select '$.g ? (@ == 1)'::jsonpath;

0 commit comments

Comments
 (0)