Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit a5ff502

Browse files
committed
Change the way UESCAPE is lexed, to reduce the size of the flex tables.
The error rule used to avoid backtracking with the U&'...' UESCAPE 'x' syntax bloated the flex tables, so refactor that. This patch makes the error rule shorter, by introducing a new exclusive flex state that's entered after parsing U&'...'. This shrinks the postgres binary by about 220kB.
1 parent 59d0bf9 commit a5ff502

File tree

1 file changed

+62
-19
lines changed

1 file changed

+62
-19
lines changed

src/backend/parser/scan.l

+62-19
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ static bool is_utf16_surrogate_first(pg_wchar c);
9797
static bool is_utf16_surrogate_second(pg_wchar c);
9898
static pg_wchar surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second);
9999
static void addunicode(pg_wchar c, yyscan_t yyscanner);
100+
static bool check_uescapechar(unsigned char escape);
100101

101102
#define yyerror(msg) scanner_yyerror(msg, yyscanner)
102103

@@ -150,7 +151,9 @@ extern void core_yyset_column(int column_no, yyscan_t yyscanner);
150151
* <xe> extended quoted strings (support backslash escape sequences)
151152
* <xdolq> $foo$ quoted strings
152153
* <xui> quoted identifier with Unicode escapes
154+
* <xuiend> end of a quoted identifier with Unicode escapes, UESCAPE can follow
153155
* <xus> quoted string with Unicode escapes
156+
* <xusend> end of a quoted string with Unicode escapes, UESCAPE can follow
154157
* <xeu> Unicode surrogate pair in extended quoted string
155158
*/
156159

@@ -162,7 +165,9 @@ extern void core_yyset_column(int column_no, yyscan_t yyscanner);
162165
%x xq
163166
%x xdolq
164167
%x xui
168+
%x xuiend
165169
%x xus
170+
%x xusend
166171
%x xeu
167172

168173
/*
@@ -279,17 +284,17 @@ xdinside [^"]+
279284
/* Unicode escapes */
280285
uescape [uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}
281286
/* error rule to avoid backup */
282-
uescapefail ("-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU])
287+
uescapefail [uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU]
283288

284289
/* Quoted identifier with Unicode escapes */
285290
xuistart [uU]&{dquote}
286-
xuistop1 {dquote}{whitespace}*{uescapefail}?
287-
xuistop2 {dquote}{whitespace}*{uescape}
288291

289292
/* Quoted string with Unicode escapes */
290293
xusstart [uU]&{quote}
291-
xusstop1 {quote}{whitespace}*{uescapefail}?
292-
xusstop2 {quote}{whitespace}*{uescape}
294+
295+
/* Optional UESCAPE after a quoted string or identifier with Unicode escapes. */
296+
xustop1 {uescapefail}?
297+
xustop2 {uescape}
293298

294299
/* error rule to avoid backup */
295300
xufailed [uU]&
@@ -536,15 +541,31 @@ other .
536541
yylval->str = litbufdup(yyscanner);
537542
return SCONST;
538543
}
539-
<xus>{xusstop1} {
544+
<xus>{quotestop} |
545+
<xus>{quotefail} {
540546
/* throw back all but the quote */
541547
yyless(1);
548+
/* handle possible UESCAPE in xusend mode */
549+
BEGIN(xusend);
550+
}
551+
<xusend>{whitespace}
552+
<xusend>{other} |
553+
<xusend>{xustop1} {
554+
/* no UESCAPE after the quote, throw back everything */
555+
yyless(0);
542556
BEGIN(INITIAL);
543557
yylval->str = litbuf_udeescape('\\', yyscanner);
544558
return SCONST;
545559
}
546-
<xus>{xusstop2} {
560+
<xusend>{xustop2} {
561+
/* found UESCAPE after the end quote */
547562
BEGIN(INITIAL);
563+
if (!check_uescapechar(yytext[yyleng-2]))
564+
{
565+
SET_YYLLOC();
566+
ADVANCE_YYLLOC(yyleng-2);
567+
yyerror("invalid Unicode escape character");
568+
}
548569
yylval->str = litbuf_udeescape(yytext[yyleng-2], yyscanner);
549570
return SCONST;
550571
}
@@ -702,26 +723,41 @@ other .
702723
yylval->str = ident;
703724
return IDENT;
704725
}
705-
<xui>{xuistop1} {
726+
<xui>{dquote} {
727+
yyless(1);
728+
/* handle possible UESCAPE in xuiend mode */
729+
BEGIN(xuiend);
730+
}
731+
<xuiend>{whitespace} { }
732+
<xuiend>{other} |
733+
<xuiend>{xustop1} {
734+
/* no UESCAPE after the quote, throw back everything */
706735
char *ident;
707736

737+
yyless(0);
738+
708739
BEGIN(INITIAL);
709740
if (yyextra->literallen == 0)
710741
yyerror("zero-length delimited identifier");
711742
ident = litbuf_udeescape('\\', yyscanner);
712743
if (yyextra->literallen >= NAMEDATALEN)
713744
truncate_identifier(ident, yyextra->literallen, true);
714745
yylval->str = ident;
715-
/* throw back all but the quote */
716-
yyless(1);
717746
return IDENT;
718747
}
719-
<xui>{xuistop2} {
748+
<xuiend>{xustop2} {
749+
/* found UESCAPE after the end quote */
720750
char *ident;
721751

722752
BEGIN(INITIAL);
723753
if (yyextra->literallen == 0)
724754
yyerror("zero-length delimited identifier");
755+
if (!check_uescapechar(yytext[yyleng-2]))
756+
{
757+
SET_YYLLOC();
758+
ADVANCE_YYLLOC(yyleng-2);
759+
yyerror("invalid Unicode escape character");
760+
}
725761
ident = litbuf_udeescape(yytext[yyleng - 2], yyscanner);
726762
if (yyextra->literallen >= NAMEDATALEN)
727763
truncate_identifier(ident, yyextra->literallen, true);
@@ -1203,22 +1239,29 @@ addunicode(pg_wchar c, core_yyscan_t yyscanner)
12031239
addlit(buf, pg_mblen(buf), yyscanner);
12041240
}
12051241

1206-
static char *
1207-
litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner)
1242+
/* is 'escape' acceptable as Unicode escape character (UESCAPE syntax) ? */
1243+
static bool
1244+
check_uescapechar(unsigned char escape)
12081245
{
1209-
char *new;
1210-
char *litbuf, *in, *out;
1211-
pg_wchar pair_first = 0;
1212-
12131246
if (isxdigit(escape)
12141247
|| escape == '+'
12151248
|| escape == '\''
12161249
|| escape == '"'
12171250
|| scanner_isspace(escape))
12181251
{
1219-
ADVANCE_YYLLOC(yyextra->literallen + yyleng + 1);
1220-
yyerror("invalid Unicode escape character");
1252+
return false;
12211253
}
1254+
else
1255+
return true;
1256+
}
1257+
1258+
/* like litbufdup, but handle unicode escapes */
1259+
static char *
1260+
litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner)
1261+
{
1262+
char *new;
1263+
char *litbuf, *in, *out;
1264+
pg_wchar pair_first = 0;
12221265

12231266
/* Make literalbuf null-terminated to simplify the scanning loop */
12241267
litbuf = yyextra->literalbuf;

0 commit comments

Comments
 (0)