@@ -97,6 +97,7 @@ static bool is_utf16_surrogate_first(pg_wchar c);
97
97
static bool is_utf16_surrogate_second (pg_wchar c);
98
98
static pg_wchar surrogate_pair_to_codepoint (pg_wchar first, pg_wchar second);
99
99
static void addunicode (pg_wchar c, yyscan_t yyscanner);
100
+ static bool check_uescapechar (unsigned char escape);
100
101
101
102
#define yyerror (msg ) scanner_yyerror(msg, yyscanner)
102
103
@@ -150,7 +151,9 @@ extern void core_yyset_column(int column_no, yyscan_t yyscanner);
150
151
* <xe> extended quoted strings (support backslash escape sequences)
151
152
* <xdolq> $foo$ quoted strings
152
153
* <xui> quoted identifier with Unicode escapes
154
+ * <xuiend> end of a quoted identifier with Unicode escapes, UESCAPE can follow
153
155
* <xus> quoted string with Unicode escapes
156
+ * <xusend> end of a quoted string with Unicode escapes, UESCAPE can follow
154
157
* <xeu> Unicode surrogate pair in extended quoted string
155
158
*/
156
159
@@ -162,7 +165,9 @@ extern void core_yyset_column(int column_no, yyscan_t yyscanner);
162
165
%x xq
163
166
%x xdolq
164
167
%x xui
168
+ %x xuiend
165
169
%x xus
170
+ %x xusend
166
171
%x xeu
167
172
168
173
/*
@@ -279,17 +284,17 @@ xdinside [^"]+
279
284
/* Unicode escapes */
280
285
uescape [uU ][eE ][sS ][cC ][aA ][pP ][eE ]{whitespace }* {quote }[^ ' ]{quote }
281
286
/* error rule to avoid backup */
282
- uescapefail ( " - " | [uU ][eE ][sS ][cC ][aA ][pP ][eE ]{whitespace }* " -" | [uU ][eE ][sS ][cC ][aA ][pP ][eE ]{whitespace }* {quote }[^ ' ]| [uU ][eE ][sS ][cC ][aA ][pP ][eE ]{whitespace }* {quote }| [uU ][eE ][sS ][cC ][aA ][pP ][eE ]{whitespace }* | [uU ][eE ][sS ][cC ][aA ][pP ]| [uU ][eE ][sS ][cC ][aA ]| [uU ][eE ][sS ][cC ]| [uU ][eE ][sS ]| [uU ][eE ]| [uU ])
287
+ uescapefail [uU ][eE ][sS ][cC ][aA ][pP ][eE ]{whitespace }* " -" | [uU ][eE ][sS ][cC ][aA ][pP ][eE ]{whitespace }* {quote }[^ ' ]| [uU ][eE ][sS ][cC ][aA ][pP ][eE ]{whitespace }* {quote }| [uU ][eE ][sS ][cC ][aA ][pP ][eE ]{whitespace }* | [uU ][eE ][sS ][cC ][aA ][pP ]| [uU ][eE ][sS ][cC ][aA ]| [uU ][eE ][sS ][cC ]| [uU ][eE ][sS ]| [uU ][eE ]| [uU ]
283
288
284
289
/* Quoted identifier with Unicode escapes */
285
290
xuistart [uU ]&{dquote }
286
- xuistop1 {dquote }{whitespace }* {uescapefail }?
287
- xuistop2 {dquote }{whitespace }* {uescape }
288
291
289
292
/* Quoted string with Unicode escapes */
290
293
xusstart [uU ]&{quote }
291
- xusstop1 {quote }{whitespace }* {uescapefail }?
292
- xusstop2 {quote }{whitespace }* {uescape }
294
+
295
+ /* Optional UESCAPE after a quoted string or identifier with Unicode escapes. */
296
+ xustop1 {uescapefail }?
297
+ xustop2 {uescape }
293
298
294
299
/* error rule to avoid backup */
295
300
xufailed [uU ]&
@@ -536,15 +541,31 @@ other .
536
541
yylval->str = litbufdup (yyscanner);
537
542
return SCONST;
538
543
}
539
- <xus >{xusstop1 } {
544
+ <xus >{quotestop } |
545
+ <xus >{quotefail } {
540
546
/* throw back all but the quote */
541
547
yyless (1 );
548
+ /* handle possible UESCAPE in xusend mode */
549
+ BEGIN (xusend);
550
+ }
551
+ <xusend >{whitespace }
552
+ <xusend >{other } |
553
+ <xusend >{xustop1 } {
554
+ /* no UESCAPE after the quote, throw back everything */
555
+ yyless (0 );
542
556
BEGIN (INITIAL);
543
557
yylval->str = litbuf_udeescape (' \\ ' , yyscanner);
544
558
return SCONST;
545
559
}
546
- <xus >{xusstop2 } {
560
+ <xusend >{xustop2 } {
561
+ /* found UESCAPE after the end quote */
547
562
BEGIN (INITIAL);
563
+ if (!check_uescapechar (yytext[yyleng-2 ]))
564
+ {
565
+ SET_YYLLOC ();
566
+ ADVANCE_YYLLOC (yyleng-2 );
567
+ yyerror (" invalid Unicode escape character" );
568
+ }
548
569
yylval->str = litbuf_udeescape (yytext[yyleng-2 ], yyscanner);
549
570
return SCONST;
550
571
}
@@ -702,26 +723,41 @@ other .
702
723
yylval->str = ident;
703
724
return IDENT;
704
725
}
705
- <xui >{xuistop1 } {
726
+ <xui >{dquote } {
727
+ yyless (1 );
728
+ /* handle possible UESCAPE in xuiend mode */
729
+ BEGIN (xuiend);
730
+ }
731
+ <xuiend >{whitespace } { }
732
+ <xuiend >{other } |
733
+ <xuiend >{xustop1 } {
734
+ /* no UESCAPE after the quote, throw back everything */
706
735
char *ident;
707
736
737
+ yyless (0 );
738
+
708
739
BEGIN (INITIAL);
709
740
if (yyextra->literallen == 0 )
710
741
yyerror (" zero-length delimited identifier" );
711
742
ident = litbuf_udeescape (' \\ ' , yyscanner);
712
743
if (yyextra->literallen >= NAMEDATALEN)
713
744
truncate_identifier (ident, yyextra->literallen , true );
714
745
yylval->str = ident;
715
- /* throw back all but the quote */
716
- yyless (1 );
717
746
return IDENT;
718
747
}
719
- <xui >{xuistop2 } {
748
+ <xuiend >{xustop2 } {
749
+ /* found UESCAPE after the end quote */
720
750
char *ident;
721
751
722
752
BEGIN (INITIAL);
723
753
if (yyextra->literallen == 0 )
724
754
yyerror (" zero-length delimited identifier" );
755
+ if (!check_uescapechar (yytext[yyleng-2 ]))
756
+ {
757
+ SET_YYLLOC ();
758
+ ADVANCE_YYLLOC (yyleng-2 );
759
+ yyerror (" invalid Unicode escape character" );
760
+ }
725
761
ident = litbuf_udeescape (yytext[yyleng - 2 ], yyscanner);
726
762
if (yyextra->literallen >= NAMEDATALEN)
727
763
truncate_identifier (ident, yyextra->literallen , true );
@@ -1203,22 +1239,29 @@ addunicode(pg_wchar c, core_yyscan_t yyscanner)
1203
1239
addlit (buf, pg_mblen (buf), yyscanner);
1204
1240
}
1205
1241
1206
- static char *
1207
- litbuf_udeescape (unsigned char escape, core_yyscan_t yyscanner)
1242
+ /* is 'escape' acceptable as Unicode escape character (UESCAPE syntax) ? */
1243
+ static bool
1244
+ check_uescapechar (unsigned char escape)
1208
1245
{
1209
- char *new ;
1210
- char *litbuf, *in, *out;
1211
- pg_wchar pair_first = 0 ;
1212
-
1213
1246
if (isxdigit (escape)
1214
1247
|| escape == ' +'
1215
1248
|| escape == ' \' '
1216
1249
|| escape == ' "'
1217
1250
|| scanner_isspace (escape))
1218
1251
{
1219
- ADVANCE_YYLLOC (yyextra->literallen + yyleng + 1 );
1220
- yyerror (" invalid Unicode escape character" );
1252
+ return false ;
1221
1253
}
1254
+ else
1255
+ return true ;
1256
+ }
1257
+
1258
+ /* like litbufdup, but handle unicode escapes */
1259
+ static char *
1260
+ litbuf_udeescape (unsigned char escape, core_yyscan_t yyscanner)
1261
+ {
1262
+ char *new ;
1263
+ char *litbuf, *in, *out;
1264
+ pg_wchar pair_first = 0 ;
1222
1265
1223
1266
/* Make literalbuf null-terminated to simplify the scanning loop */
1224
1267
litbuf = yyextra->literalbuf ;
0 commit comments