Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 6aa5186

Browse files
committed
Fix limitations on what SQL commands can be issued to a walsender.
In logical replication mode, a WalSender is supposed to be able to execute any regular SQL command, as well as the special replication commands. Poor design of the replication-command parser caused it to fail in various cases, notably: * semicolons embedded in a command, or multiple SQL commands sent in a single message; * dollar-quoted literals containing odd numbers of single or double quote marks; * commands starting with a comment. The basic problem here is that we're trying to run repl_scanner.l across the entire input string even when it's not a replication command. Since repl_scanner.l does not understand all of the token types known to the core lexer, this is doomed to have failure modes. We certainly don't want to make repl_scanner.l as big as scan.l, so instead rejigger stuff so that we only lex the first token of a non-replication command. That will usually look like an IDENT to repl_scanner.l, though a comment would end up getting reported as a '-' or '/' single-character token. If the token is a replication command keyword, we push it back and proceed normally with repl_gram.y parsing. Otherwise, we can drop out of exec_replication_command() without examining the rest of the string. (It's still theoretically possible for repl_scanner.l to fail on the first token; but that could only happen if it's an unterminated single- or double-quoted string, in which case you'd have gotten largely the same error from the core lexer too.) In this way, repl_gram.y isn't involved at all in handling general SQL commands, so we can get rid of the SQLCmd node type. (In the back branches, we can't remove it because renumbering enum NodeTag would be an ABI break; so just leave it sit there unused.) I failed to resist the temptation to clean up some other sloppy coding in repl_scanner.l while at it. The only externally-visible behavior change from that is it now accepts \r and \f as whitespace, same as the core lexer. Per bug #17379 from Greg Rychlewski. Back-patch to all supported branches. Discussion: https://postgr.es/m/17379-6a5c6cfb3f1f5e77@postgresql.org
1 parent 0ad8032 commit 6aa5186

File tree

6 files changed

+98
-69
lines changed

6 files changed

+98
-69
lines changed

src/backend/replication/repl_gram.y

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@
2525
/* Result of the parsing is returned here */
2626
Node *replication_parse_result;
2727

28-
static SQLCmd *make_sqlcmd(void);
29-
3028

3129
/*
3230
* Bison doesn't allocate anything that needs to live across parser calls,
@@ -59,7 +57,6 @@ static SQLCmd *make_sqlcmd(void);
5957
%token <str> SCONST IDENT
6058
%token <uintval> UCONST
6159
%token <recptr> RECPTR
62-
%token T_WORD
6360

6461
/* Keyword tokens. */
6562
%token K_BASE_BACKUP
@@ -95,7 +92,7 @@ static SQLCmd *make_sqlcmd(void);
9592
%type <node> command
9693
%type <node> base_backup start_replication start_logical_replication
9794
create_replication_slot drop_replication_slot identify_system
98-
read_replication_slot timeline_history show sql_cmd
95+
read_replication_slot timeline_history show
9996
%type <list> base_backup_legacy_opt_list generic_option_list
10097
%type <defelt> base_backup_legacy_opt generic_option
10198
%type <uintval> opt_timeline
@@ -129,7 +126,6 @@ command:
129126
| read_replication_slot
130127
| timeline_history
131128
| show
132-
| sql_cmd
133129
;
134130

135131
/*
@@ -450,10 +446,6 @@ plugin_opt_arg:
450446
| /* EMPTY */ { $$ = NULL; }
451447
;
452448

453-
sql_cmd:
454-
IDENT { $$ = (Node *) make_sqlcmd(); }
455-
;
456-
457449
generic_option_list:
458450
generic_option_list ',' generic_option
459451
{ $$ = lappend($1, $3); }
@@ -514,20 +506,4 @@ ident_or_keyword:
514506

515507
%%
516508

517-
static SQLCmd *
518-
make_sqlcmd(void)
519-
{
520-
SQLCmd *cmd = makeNode(SQLCmd);
521-
int tok;
522-
523-
/* Just move lexer to the end of command. */
524-
for (;;)
525-
{
526-
tok = yylex();
527-
if (tok == ';' || tok == 0)
528-
break;
529-
}
530-
return cmd;
531-
}
532-
533509
#include "repl_scanner.c"

src/backend/replication/repl_scanner.l

Lines changed: 70 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ fprintf_to_ereport(const char *fmt, const char *msg)
3131
/* Handle to the buffer that the lexer uses internally */
3232
static YY_BUFFER_STATE scanbufhandle;
3333

34+
/* Pushed-back token (we only handle one) */
35+
static int repl_pushed_back_token;
36+
37+
/* Work area for collecting literals */
3438
static StringInfoData litbuf;
3539

3640
static void startlit(void);
@@ -51,7 +55,18 @@ static void addlitchar(unsigned char ychar);
5155
%option warn
5256
%option prefix="replication_yy"
5357

54-
%x xq xd
58+
/*
59+
* Exclusive states:
60+
* <xd> delimited identifiers (double-quoted identifiers)
61+
* <xq> standard single-quoted strings
62+
*/
63+
%x xd
64+
%x xq
65+
66+
space [ \t\n\r\f]
67+
68+
quote '
69+
quotestop {quote}
5570

5671
/* Extended quote
5772
* xqdouble implements embedded quote, ''''
@@ -69,11 +84,8 @@ xdstop {dquote}
6984
xddouble {dquote}{dquote}
7085
xdinside [^"]+
7186

72-
digit [0-9]+
73-
hexdigit [0-9A-Za-z]+
74-
75-
quote '
76-
quotestop {quote}
87+
digit [0-9]
88+
hexdigit [0-9A-Fa-f]
7789

7890
ident_start [A-Za-z\200-\377_]
7991
ident_cont [A-Za-z\200-\377_0-9\$]
@@ -82,6 +94,19 @@ identifier {ident_start}{ident_cont}*
8294

8395
%%
8496

97+
%{
98+
/* This code is inserted at the start of replication_yylex() */
99+
100+
/* If we have a pushed-back token, return that. */
101+
if (repl_pushed_back_token)
102+
{
103+
int result = repl_pushed_back_token;
104+
105+
repl_pushed_back_token = 0;
106+
return result;
107+
}
108+
%}
109+
85110
BASE_BACKUP { return K_BASE_BACKUP; }
86111
FAST { return K_FAST; }
87112
IDENTIFY_SYSTEM { return K_IDENTIFY_SYSTEM; }
@@ -112,14 +137,7 @@ WAIT { return K_WAIT; }
112137
MANIFEST { return K_MANIFEST; }
113138
MANIFEST_CHECKSUMS { return K_MANIFEST_CHECKSUMS; }
114139

115-
"," { return ','; }
116-
";" { return ';'; }
117-
"(" { return '('; }
118-
")" { return ')'; }
119-
120-
[\n] ;
121-
[\t] ;
122-
" " ;
140+
{space}+ { /* do nothing */ }
123141

124142
{digit}+ {
125143
yylval.uintval = strtoul(yytext, NULL, 10);
@@ -181,16 +199,18 @@ MANIFEST_CHECKSUMS { return K_MANIFEST_CHECKSUMS; }
181199
return IDENT;
182200
}
183201

202+
. {
203+
/* Any char not recognized above is returned as itself */
204+
return yytext[0];
205+
}
206+
184207
<xq,xd><<EOF>> { yyerror("unterminated quoted string"); }
185208

186209

187210
<<EOF>> {
188211
yyterminate();
189212
}
190213

191-
. {
192-
return T_WORD;
193-
}
194214
%%
195215

196216
/* LCOV_EXCL_STOP */
@@ -250,6 +270,7 @@ replication_scanner_init(const char *str)
250270

251271
/* Make sure we start in proper state */
252272
BEGIN(INITIAL);
273+
repl_pushed_back_token = 0;
253274
}
254275

255276
void
@@ -258,3 +279,35 @@ replication_scanner_finish(void)
258279
yy_delete_buffer(scanbufhandle);
259280
scanbufhandle = NULL;
260281
}
282+
283+
/*
284+
* Check to see if the first token of a command is a WalSender keyword.
285+
*
286+
* To keep repl_scanner.l minimal, we don't ask it to know every construct
287+
* that the core lexer knows. Therefore, we daren't lex more than the
288+
* first token of a general SQL command. That will usually look like an
289+
* IDENT token here, although some other cases are possible.
290+
*/
291+
bool
292+
replication_scanner_is_replication_command(void)
293+
{
294+
int first_token = replication_yylex();
295+
296+
switch (first_token)
297+
{
298+
case K_IDENTIFY_SYSTEM:
299+
case K_BASE_BACKUP:
300+
case K_START_REPLICATION:
301+
case K_CREATE_REPLICATION_SLOT:
302+
case K_DROP_REPLICATION_SLOT:
303+
case K_READ_REPLICATION_SLOT:
304+
case K_TIMELINE_HISTORY:
305+
case K_SHOW:
306+
/* Yes; push back the first token so we can parse later. */
307+
repl_pushed_back_token = first_token;
308+
return true;
309+
default:
310+
/* Nope; we don't bother to push back the token. */
311+
return false;
312+
}
313+
}

src/backend/replication/walsender.c

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1630,7 +1630,8 @@ exec_replication_command(const char *cmd_string)
16301630
*/
16311631
if (MyWalSnd->state == WALSNDSTATE_STOPPING)
16321632
ereport(ERROR,
1633-
(errmsg("cannot execute new commands while WAL sender is in stopping mode")));
1633+
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1634+
errmsg("cannot execute new commands while WAL sender is in stopping mode")));
16341635

16351636
/*
16361637
* CREATE_REPLICATION_SLOT ... LOGICAL exports a snapshot until the next
@@ -1641,41 +1642,49 @@ exec_replication_command(const char *cmd_string)
16411642
CHECK_FOR_INTERRUPTS();
16421643

16431644
/*
1644-
* Parse the command.
1645+
* Prepare to parse and execute the command.
16451646
*/
16461647
cmd_context = AllocSetContextCreate(CurrentMemoryContext,
16471648
"Replication command context",
16481649
ALLOCSET_DEFAULT_SIZES);
16491650
old_context = MemoryContextSwitchTo(cmd_context);
16501651

16511652
replication_scanner_init(cmd_string);
1652-
parse_rc = replication_yyparse();
1653-
if (parse_rc != 0)
1654-
ereport(ERROR,
1655-
(errcode(ERRCODE_SYNTAX_ERROR),
1656-
errmsg_internal("replication command parser returned %d",
1657-
parse_rc)));
1658-
replication_scanner_finish();
1659-
1660-
cmd_node = replication_parse_result;
16611653

16621654
/*
1663-
* If it's a SQL command, just clean up our mess and return false; the
1664-
* caller will take care of executing it.
1655+
* Is it a WalSender command?
16651656
*/
1666-
if (IsA(cmd_node, SQLCmd))
1657+
if (!replication_scanner_is_replication_command())
16671658
{
1668-
if (MyDatabaseId == InvalidOid)
1669-
ereport(ERROR,
1670-
(errmsg("cannot execute SQL commands in WAL sender for physical replication")));
1659+
/* Nope; clean up and get out. */
1660+
replication_scanner_finish();
16711661

16721662
MemoryContextSwitchTo(old_context);
16731663
MemoryContextDelete(cmd_context);
16741664

1665+
/* XXX this is a pretty random place to make this check */
1666+
if (MyDatabaseId == InvalidOid)
1667+
ereport(ERROR,
1668+
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1669+
errmsg("cannot execute SQL commands in WAL sender for physical replication")));
1670+
16751671
/* Tell the caller that this wasn't a WalSender command. */
16761672
return false;
16771673
}
16781674

1675+
/*
1676+
* Looks like a WalSender command, so parse it.
1677+
*/
1678+
parse_rc = replication_yyparse();
1679+
if (parse_rc != 0)
1680+
ereport(ERROR,
1681+
(errcode(ERRCODE_SYNTAX_ERROR),
1682+
errmsg_internal("replication command parser returned %d",
1683+
parse_rc)));
1684+
replication_scanner_finish();
1685+
1686+
cmd_node = replication_parse_result;
1687+
16791688
/*
16801689
* Report query to various monitoring facilities. For this purpose, we
16811690
* report replication commands just like SQL commands.

src/include/nodes/nodes.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -501,7 +501,6 @@ typedef enum NodeTag
501501
T_ReadReplicationSlotCmd,
502502
T_StartReplicationCmd,
503503
T_TimeLineHistoryCmd,
504-
T_SQLCmd,
505504

506505
/*
507506
* TAGS FOR RANDOM OTHER STUFF

src/include/nodes/replnodes.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -108,13 +108,4 @@ typedef struct TimeLineHistoryCmd
108108
TimeLineID timeline;
109109
} TimeLineHistoryCmd;
110110

111-
/* ----------------------
112-
* SQL commands
113-
* ----------------------
114-
*/
115-
typedef struct SQLCmd
116-
{
117-
NodeTag type;
118-
} SQLCmd;
119-
120111
#endif /* REPLNODES_H */

src/include/replication/walsender_private.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ extern int replication_yylex(void);
121121
extern void replication_yyerror(const char *str) pg_attribute_noreturn();
122122
extern void replication_scanner_init(const char *query_string);
123123
extern void replication_scanner_finish(void);
124+
extern bool replication_scanner_is_replication_command(void);
124125

125126
extern Node *replication_parse_result;
126127

0 commit comments

Comments
 (0)