Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 799ac99

Browse files
committed
Sync psql's scanner with recent changes in backend scanner's flex rules.
Marko Kreen, Tom Lane
1 parent 3686bcb commit 799ac99

File tree

2 files changed

+47
-7
lines changed

2 files changed

+47
-7
lines changed

src/backend/parser/scan.l

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
* Portions Copyright (c) 1994, Regents of the University of California
2525
*
2626
* IDENTIFICATION
27-
* $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.161 2009/09/25 21:13:06 petere Exp $
27+
* $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.162 2009/09/27 03:27:23 tgl Exp $
2828
*
2929
*-------------------------------------------------------------------------
3030
*/
@@ -571,18 +571,16 @@ other .
571571

572572
BEGIN(xe);
573573
}
574-
<xeu>. |
575-
<xeu>\n |
574+
<xeu>. { yyerror("invalid Unicode surrogate pair"); }
575+
<xeu>\n { yyerror("invalid Unicode surrogate pair"); }
576576
<xeu><<EOF>> { yyerror("invalid Unicode surrogate pair"); }
577-
578577
<xe,xeu>{xeunicodefail} {
579578
ereport(ERROR,
580579
(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
581580
errmsg("invalid Unicode escape"),
582581
errhint("Unicode escapes must be \\uXXXX or \\UXXXXXXXX."),
583582
lexer_errposition()));
584-
}
585-
583+
}
586584
<xe>{xeescape} {
587585
if (yytext[1] == '\'')
588586
{

src/bin/psql/psqlscan.l

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
* Portions Copyright (c) 1994, Regents of the University of California
3434
*
3535
* IDENTIFICATION
36-
* $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.28 2009/01/01 17:23:55 momjian Exp $
36+
* $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.29 2009/09/27 03:27:24 tgl Exp $
3737
*
3838
*-------------------------------------------------------------------------
3939
*/
@@ -117,6 +117,7 @@ static void push_new_buffer(const char *newstr);
117117
static YY_BUFFER_STATE prepare_buffer(const char *txt, int len,
118118
char **txtcopy);
119119
static void emit(const char *txt, int len);
120+
static bool is_utf16_surrogate_first(uint32 c);
120121

121122
#define ECHO emit(yytext, yyleng)
122123

@@ -158,6 +159,7 @@ static void emit(const char *txt, int len);
158159
* <xdolq> $foo$ quoted strings
159160
* <xui> quoted identifier with Unicode escapes
160161
* <xus> quoted string with Unicode escapes
162+
* <xeu> Unicode surrogate pair in extended quoted string
161163
*/
162164

163165
%x xb
@@ -169,6 +171,7 @@ static void emit(const char *txt, int len);
169171
%x xdolq
170172
%x xui
171173
%x xus
174+
%x xeu
172175
/* Additional exclusive states for psql only: lex backslash commands */
173176
%x xslashcmd
174177
%x xslasharg
@@ -192,6 +195,9 @@ static void emit(const char *txt, int len);
192195
* did not end with a newline.
193196
*
194197
* XXX perhaps \f (formfeed) should be treated as a newline as well?
198+
*
199+
* XXX if you change the set of whitespace characters, fix scanner_isspace()
200+
* to agree, and see also the plpgsql lexer.
195201
*/
196202

197203
space [ \t\n\r\f]
@@ -253,6 +259,8 @@ xeinside [^\\']+
253259
xeescape [\\][^0-7]
254260
xeoctesc [\\][0-7]{1,3}
255261
xehexesc [\\]x[0-9A-Fa-f]{1,2}
262+
xeunicode [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
263+
xeunicodefail [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
256264
257265
/* Extended quote
258266
* xqdouble implements embedded quote, ''''
@@ -334,6 +342,10 @@ identifier {ident_start}{ident_cont}*
334342

335343
typecast "::"
336344

345+
/* these two token types are used by PL/pgsql, though not in core SQL */
346+
dot_dot \.\.
347+
colon_equals ":="
348+
337349
/*
338350
* "self" is the set of chars that should be returned as single-character
339351
* tokens. "op_chars" is the set of chars that can make up "Op" tokens,
@@ -511,6 +523,22 @@ other .
511523
<xe>{xeinside} {
512524
ECHO;
513525
}
526+
<xe>{xeunicode} {
527+
uint32 c = strtoul(yytext+2, NULL, 16);
528+
529+
if (is_utf16_surrogate_first(c))
530+
BEGIN(xeu);
531+
ECHO;
532+
}
533+
<xeu>{xeunicode} {
534+
BEGIN(xe);
535+
ECHO;
536+
}
537+
<xeu>. { ECHO; }
538+
<xeu>\n { ECHO; }
539+
<xe,xeu>{xeunicodefail} {
540+
ECHO;
541+
}
514542
<xe>{xeescape} {
515543
ECHO;
516544
}
@@ -605,6 +633,14 @@ other .
605633
ECHO;
606634
}
607635

636+
{dot_dot} {
637+
ECHO;
638+
}
639+
640+
{colon_equals} {
641+
ECHO;
642+
}
643+
608644
/*
609645
* These rules are specific to psql --- they implement parenthesis
610646
* counting and detection of command-ending semicolon. These must
@@ -1690,3 +1726,9 @@ emit(const char *txt, int len)
16901726
}
16911727
}
16921728
}
1729+
1730+
static bool
1731+
is_utf16_surrogate_first(uint32 c)
1732+
{
1733+
return (c >= 0xD800 && c <= 0xDBFF);
1734+
}

0 commit comments

Comments
 (0)