Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 58e7053

Browse files
committed
Implement dollar-quoting in the backend lexer and psql. Documentation
is still lacking, as is support in plpgsql and other places, but this is the basic feature. Patch by Andrew Dunstan, some tweaking by Tom Lane. Also, enable %option nodefault in these two lexers, and patch some gaps revealed thereby.
1 parent 1f87d79 commit 58e7053

File tree

4 files changed

+131
-12
lines changed

4 files changed

+131
-12
lines changed

src/backend/parser/scan.l

+57-3
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* Portions Copyright (c) 1994, Regents of the University of California
1111
*
1212
* IDENTIFICATION
13-
* $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.114 2004/02/21 00:34:52 tgl Exp $
13+
* $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.115 2004/02/24 21:45:18 tgl Exp $
1414
*
1515
*-------------------------------------------------------------------------
1616
*/
@@ -37,6 +37,7 @@
3737
extern YYSTYPE yylval;
3838

3939
static int xcdepth = 0; /* depth of nesting in slash-star comments */
40+
static char *dolqstart; /* current $foo$ quote start string */
4041

4142
/*
4243
* literalbuf is used to accumulate literal values when multiple rules
@@ -74,6 +75,7 @@ unsigned char unescape_single_char(unsigned char c);
7475

7576
%option 8bit
7677
%option never-interactive
78+
%option nodefault
7779
%option nounput
7880
%option noyywrap
7981
%option prefix="base_yy"
@@ -94,13 +96,15 @@ unsigned char unescape_single_char(unsigned char c);
9496
* <xd> delimited identifiers (double-quoted identifiers)
9597
* <xh> hexadecimal numeric string
9698
* <xq> quoted strings
99+
* <xdolq> $foo$ quoted strings
97100
*/
98101

99102
%x xb
100103
%x xc
101104
%x xd
102105
%x xh
103106
%x xq
107+
%x xdolq
104108

105109
/*
106110
* In order to make the world safe for Windows and Mac clients as well as
@@ -175,6 +179,17 @@ xqescape [\\][^0-7]
175179
xqoctesc [\\][0-7]{1,3}
176180
xqcat {quote}{whitespace_with_newline}{quote}
177181

182+
/* $foo$ style quotes ("dollar quoting")
183+
* The quoted string starts with $foo$ where "foo" is an optional string
184+
* in the form of an identifier, except that it may not contain "$",
185+
* and extends to the first occurrence of an identical string.
186+
* There is *no* processing of the quoted text.
187+
*/
188+
dolq_start [A-Za-z\200-\377_]
189+
dolq_cont [A-Za-z\200-\377_0-9]
190+
dolqdelim \$({dolq_start}{dolq_cont}*)?\$
191+
dolqinside [^$]+
192+
178193
/* Double quote
179194
* Allows embedded spaces and other special characters into identifiers.
180195
*/
@@ -242,7 +257,8 @@ param \${integer}
242257
other .
243258

244259
/*
245-
* Quoted strings must allow some special characters such as single-quote
260+
* Dollar quoted strings are totally opaque, and no escaping is done on them.
261+
* Other quoted strings must allow some special characters such as single-quote
246262
* and newline.
247263
* Embedded single-quotes are implemented both in the SQL standard
248264
* style of two adjacent single quotes "''" and in the Postgres/Java style
@@ -388,8 +404,46 @@ other .
388404
<xq>{xqcat} {
389405
/* ignore */
390406
}
407+
<xq>. {
408+
/* This is only needed for \ just before EOF */
409+
addlitchar(yytext[0]);
410+
}
391411
<xq><<EOF>> { yyerror("unterminated quoted string"); }
392412

413+
{dolqdelim} {
414+
token_start = yytext;
415+
dolqstart = pstrdup(yytext);
416+
BEGIN(xdolq);
417+
startlit();
418+
}
419+
<xdolq>{dolqdelim} {
420+
if (strcmp(yytext, dolqstart) == 0)
421+
{
422+
pfree(dolqstart);
423+
BEGIN(INITIAL);
424+
yylval.str = litbufdup();
425+
return SCONST;
426+
}
427+
else
428+
{
429+
/*
430+
* When we fail to match $...$ to dolqstart, transfer
431+
* the $... part to the output, but put back the final
432+
* $ for rescanning. Consider $delim$...$junk$delim$
433+
*/
434+
addlit(yytext, yyleng-1);
435+
yyless(yyleng-1);
436+
}
437+
}
438+
<xdolq>{dolqinside} {
439+
addlit(yytext, yyleng);
440+
}
441+
<xdolq>. {
442+
/* This is only needed for $ inside the quoted text */
443+
addlitchar(yytext[0]);
444+
}
445+
<xdolq><<EOF>> { yyerror("unterminated dollar-quoted string"); }
446+
393447
{xdstart} {
394448
token_start = yytext;
395449
BEGIN(xd);
@@ -407,7 +461,7 @@ other .
407461
yylval.str = ident;
408462
return IDENT;
409463
}
410-
<xd>{xddouble} {
464+
<xd>{xddouble} {
411465
addlitchar('"');
412466
}
413467
<xd>{xdinside} {

src/bin/psql/prompt.c

+5-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
*
44
* Copyright (c) 2000-2003, PostgreSQL Global Development Group
55
*
6-
* $PostgreSQL: pgsql/src/bin/psql/prompt.c,v 1.34 2004/01/25 03:07:22 neilc Exp $
6+
* $PostgreSQL: pgsql/src/bin/psql/prompt.c,v 1.35 2004/02/24 21:45:18 tgl Exp $
77
*/
88
#include "postgres_fe.h"
99
#include "prompt.h"
@@ -85,6 +85,7 @@ get_prompt(promptStatus_t status)
8585
case PROMPT_CONTINUE:
8686
case PROMPT_SINGLEQUOTE:
8787
case PROMPT_DOUBLEQUOTE:
88+
case PROMPT_DOLLARQUOTE:
8889
case PROMPT_COMMENT:
8990
case PROMPT_PAREN:
9091
prompt_name = "PROMPT2";
@@ -199,6 +200,9 @@ get_prompt(promptStatus_t status)
199200
case PROMPT_DOUBLEQUOTE:
200201
buf[0] = '"';
201202
break;
203+
case PROMPT_DOLLARQUOTE:
204+
buf[0] = '$';
205+
break;
202206
case PROMPT_COMMENT:
203207
buf[0] = '*';
204208
break;

src/bin/psql/prompt.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
*
44
* Copyright (c) 2000-2003, PostgreSQL Global Development Group
55
*
6-
* $PostgreSQL: pgsql/src/bin/psql/prompt.h,v 1.13 2003/11/29 19:52:07 pgsql Exp $
6+
* $PostgreSQL: pgsql/src/bin/psql/prompt.h,v 1.14 2004/02/24 21:45:18 tgl Exp $
77
*/
88
#ifndef PROMPT_H
99
#define PROMPT_H
@@ -15,6 +15,7 @@ typedef enum _promptStatus
1515
PROMPT_COMMENT,
1616
PROMPT_SINGLEQUOTE,
1717
PROMPT_DOUBLEQUOTE,
18+
PROMPT_DOLLARQUOTE,
1819
PROMPT_PAREN,
1920
PROMPT_COPY
2021
} promptStatus_t;

src/bin/psql/psqlscan.l

+67-7
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
* Portions Copyright (c) 1994, Regents of the University of California
3232
*
3333
* IDENTIFICATION
34-
* $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.1 2004/02/19 19:40:09 tgl Exp $
34+
* $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.2 2004/02/24 21:45:18 tgl Exp $
3535
*
3636
*-------------------------------------------------------------------------
3737
*/
@@ -92,6 +92,7 @@ typedef struct PsqlScanStateData
9292
int start_state; /* saved YY_START */
9393
int paren_depth; /* depth of nesting in parentheses */
9494
int xcdepth; /* depth of nesting in slash-star comments */
95+
char *dolqstart; /* current $foo$ quote start string */
9596
} PsqlScanStateData;
9697

9798
static PsqlScanState cur_state; /* current state while active */
@@ -123,6 +124,7 @@ static void emit(const char *txt, int len);
123124

124125
%option 8bit
125126
%option never-interactive
127+
%option nodefault
126128
%option nounput
127129
%option noyywrap
128130

@@ -151,13 +153,15 @@ static void emit(const char *txt, int len);
151153
* <xd> delimited identifiers (double-quoted identifiers)
152154
* <xh> hexadecimal numeric string
153155
* <xq> quoted strings
156+
* <xdolq> $foo$ quoted strings
154157
*/
155158

156159
%x xb
157160
%x xc
158161
%x xd
159162
%x xh
160163
%x xq
164+
%x xdolq
161165
/* Additional exclusive states for psql only: lex backslash commands */
162166
%x xslashcmd
163167
%x xslasharg
@@ -241,6 +245,17 @@ xqescape [\\][^0-7]
241245
xqoctesc [\\][0-7]{1,3}
242246
xqcat {quote}{whitespace_with_newline}{quote}
243247

248+
/* $foo$ style quotes ("dollar quoting")
249+
* The quoted string starts with $foo$ where "foo" is an optional string
250+
* in the form of an identifier, except that it may not contain "$",
251+
* and extends to the first occurrence of an identical string.
252+
* There is *no* processing of the quoted text.
253+
*/
254+
dolq_start [A-Za-z\200-\377_]
255+
dolq_cont [A-Za-z\200-\377_0-9]
256+
dolqdelim \$({dolq_start}{dolq_cont}*)?\$
257+
dolqinside [^$]+
258+
244259
/* Double quote
245260
* Allows embedded spaces and other special characters into identifiers.
246261
*/
@@ -308,7 +323,8 @@ param \${integer}
308323
other .
309324

310325
/*
311-
* Quoted strings must allow some special characters such as single-quote
326+
* Dollar quoted strings are totally opaque, and no escaping is done on them.
327+
* Other quoted strings must allow some special characters such as single-quote
312328
* and newline.
313329
* Embedded single-quotes are implemented both in the SQL standard
314330
* style of two adjacent single quotes "''" and in the Postgres/Java style
@@ -427,6 +443,41 @@ other .
427443
<xq>{xqcat} {
428444
ECHO;
429445
}
446+
<xq>. {
447+
/* This is only needed for \ just before EOF */
448+
ECHO;
449+
}
450+
451+
{dolqdelim} {
452+
cur_state->dolqstart = pg_strdup(yytext);
453+
BEGIN(xdolq);
454+
ECHO;
455+
}
456+
<xdolq>{dolqdelim} {
457+
if (strcmp(yytext, cur_state->dolqstart) == 0)
458+
{
459+
free(cur_state->dolqstart);
460+
cur_state->dolqstart = NULL;
461+
BEGIN(INITIAL);
462+
}
463+
else
464+
{
465+
/*
466+
* When we fail to match $...$ to dolqstart, transfer
467+
* the $... part to the output, but put back the final
468+
* $ for rescanning. Consider $delim$...$junk$delim$
469+
*/
470+
yyless(yyleng-1);
471+
}
472+
ECHO;
473+
}
474+
<xdolq>{dolqinside} {
475+
ECHO;
476+
}
477+
<xdolq>. {
478+
/* This is only needed for $ inside the quoted text */
479+
ECHO;
480+
}
430481

431482
{xdstart} {
432483
BEGIN(xd);
@@ -436,7 +487,7 @@ other .
436487
BEGIN(INITIAL);
437488
ECHO;
438489
}
439-
<xd>{xddouble} {
490+
<xd>{xddouble} {
440491
ECHO;
441492
}
442493
<xd>{xdinside} {
@@ -754,7 +805,7 @@ other .
754805

755806
"\\". { emit(yytext + 1, 1); }
756807

757-
{other} { ECHO; }
808+
{other}|\n { ECHO; }
758809

759810
}
760811

@@ -766,7 +817,7 @@ other .
766817

767818
"`" { return LEXRES_OK; }
768819

769-
{other} { ECHO; }
820+
{other}|\n { ECHO; }
770821

771822
}
772823

@@ -811,7 +862,7 @@ other .
811862
BEGIN(xslashdefaultarg);
812863
}
813864

814-
{other} { ECHO; }
865+
{other}|\n { ECHO; }
815866

816867
}
817868

@@ -833,7 +884,7 @@ other .
833884

834885
"\\\\" { return LEXRES_OK; }
835886

836-
{other} {
887+
{other}|\n {
837888
yyless(0);
838889
return LEXRES_OK;
839890
}
@@ -865,6 +916,8 @@ psql_scan_destroy(PsqlScanState state)
865916
{
866917
psql_scan_finish(state);
867918

919+
psql_scan_reset(state);
920+
868921
free(state);
869922
}
870923

@@ -1008,6 +1061,10 @@ psql_scan(PsqlScanState state,
10081061
result = PSCAN_INCOMPLETE;
10091062
*prompt = PROMPT_SINGLEQUOTE;
10101063
break;
1064+
case xdolq:
1065+
result = PSCAN_INCOMPLETE;
1066+
*prompt = PROMPT_DOLLARQUOTE;
1067+
break;
10111068
default:
10121069
/* can't get here */
10131070
fprintf(stderr, "invalid YY_START\n");
@@ -1082,6 +1139,9 @@ psql_scan_reset(PsqlScanState state)
10821139
state->start_state = INITIAL;
10831140
state->paren_depth = 0;
10841141
state->xcdepth = 0; /* not really necessary */
1142+
if (state->dolqstart)
1143+
free(state->dolqstart);
1144+
state->dolqstart = NULL;
10851145
}
10861146

10871147
/*

0 commit comments

Comments
 (0)