Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 5392dd3

Browse files
committed
Fix plpgsql's handling of -- comments following expressions.
Up to now, read_sql_construct() has collected all the source text from the statement or expression's initial token up to the character just before the "until" token. It normally tries to strip trailing whitespace from that, largely for neatness. If there was a "-- text" comment after the expression, this resulted in removing the newline that terminates the comment, which creates a hazard if we try to paste the collected text into a larger SQL construct without inserting a newline after it. In particular this caused our handling of CASE constructs to fail if there's a comment after a WHEN expression. Commit 4adead1 noticed a similar problem with cursor arguments, and worked around it through the rather crude hack of suppressing the whitespace-trimming behavior for those. Rather than do that and leave the hazard open for future hackers to trip over, let's fix it properly. pl_scanner.c already has enough infrastructure to report the end location of the expression's last token, so we can copy up to that location and never collect any trailing whitespace or comment to begin with. Erik Wienhold and Tom Lane, per report from Michal Bartak. Back-patch to all supported branches. Discussion: https://postgr.es/m/CAAVzF_FjRoi8fOVuLCZhQJx6HATQ7MKm=aFOHWZODFnLmjX-xA@mail.gmail.com
1 parent 52b49b7 commit 5392dd3

File tree

7 files changed

+76
-36
lines changed

7 files changed

+76
-36
lines changed

src/pl/plpgsql/src/expected/plpgsql_control.out

+17
Original file line numberDiff line numberDiff line change
@@ -681,3 +681,20 @@ select case_test(13);
681681
other
682682
(1 row)
683683

684+
-- test line comment between WHEN and THEN
685+
create or replace function case_comment(int) returns text as $$
686+
begin
687+
case $1
688+
when 1 -- comment before THEN
689+
then return 'one';
690+
else
691+
return 'other';
692+
end case;
693+
end;
694+
$$ language plpgsql immutable;
695+
select case_comment(1);
696+
case_comment
697+
--------------
698+
one
699+
(1 row)
700+

src/pl/plpgsql/src/pl_gram.y

+21-26
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ static PLpgSQL_expr *read_sql_construct(int until,
6666
RawParseMode parsemode,
6767
bool isexpression,
6868
bool valid_sql,
69-
bool trim,
7069
int *startloc,
7170
int *endtoken);
7271
static PLpgSQL_expr *read_sql_expression(int until,
@@ -895,7 +894,7 @@ stmt_perform : K_PERFORM
895894
*/
896895
new->expr = read_sql_construct(';', 0, 0, ";",
897896
RAW_PARSE_DEFAULT,
898-
false, false, true,
897+
false, false,
899898
&startloc, NULL);
900899
/* overwrite "perform" ... */
901900
memcpy(new->expr->query, " SELECT", 7);
@@ -981,7 +980,7 @@ stmt_assign : T_DATUM
981980
plpgsql_push_back_token(T_DATUM);
982981
new->expr = read_sql_construct(';', 0, 0, ";",
983982
pmode,
984-
false, true, true,
983+
false, true,
985984
NULL, NULL);
986985

987986
$$ = (PLpgSQL_stmt *) new;
@@ -1474,7 +1473,6 @@ for_control : for_variable K_IN
14741473
RAW_PARSE_DEFAULT,
14751474
true,
14761475
false,
1477-
true,
14781476
&expr1loc,
14791477
&tok);
14801478

@@ -1879,7 +1877,7 @@ stmt_raise : K_RAISE
18791877
expr = read_sql_construct(',', ';', K_USING,
18801878
", or ; or USING",
18811879
RAW_PARSE_PLPGSQL_EXPR,
1882-
true, true, true,
1880+
true, true,
18831881
NULL, &tok);
18841882
new->params = lappend(new->params, expr);
18851883
}
@@ -2016,7 +2014,7 @@ stmt_dynexecute : K_EXECUTE
20162014
expr = read_sql_construct(K_INTO, K_USING, ';',
20172015
"INTO or USING or ;",
20182016
RAW_PARSE_PLPGSQL_EXPR,
2019-
true, true, true,
2017+
true, true,
20202018
NULL, &endtoken);
20212019

20222020
new = palloc(sizeof(PLpgSQL_stmt_dynexecute));
@@ -2055,7 +2053,7 @@ stmt_dynexecute : K_EXECUTE
20552053
expr = read_sql_construct(',', ';', K_INTO,
20562054
", or ; or INTO",
20572055
RAW_PARSE_PLPGSQL_EXPR,
2058-
true, true, true,
2056+
true, true,
20592057
NULL, &endtoken);
20602058
new->params = lappend(new->params, expr);
20612059
} while (endtoken == ',');
@@ -2640,7 +2638,7 @@ read_sql_expression(int until, const char *expected)
26402638
{
26412639
return read_sql_construct(until, 0, 0, expected,
26422640
RAW_PARSE_PLPGSQL_EXPR,
2643-
true, true, true, NULL, NULL);
2641+
true, true, NULL, NULL);
26442642
}
26452643

26462644
/* Convenience routine to read an expression with two possible terminators */
@@ -2650,7 +2648,7 @@ read_sql_expression2(int until, int until2, const char *expected,
26502648
{
26512649
return read_sql_construct(until, until2, 0, expected,
26522650
RAW_PARSE_PLPGSQL_EXPR,
2653-
true, true, true, NULL, endtoken);
2651+
true, true, NULL, endtoken);
26542652
}
26552653

26562654
/* Convenience routine to read a SQL statement that must end with ';' */
@@ -2659,7 +2657,7 @@ read_sql_stmt(void)
26592657
{
26602658
return read_sql_construct(';', 0, 0, ";",
26612659
RAW_PARSE_DEFAULT,
2662-
false, true, true, NULL, NULL);
2660+
false, true, NULL, NULL);
26632661
}
26642662

26652663
/*
@@ -2672,7 +2670,6 @@ read_sql_stmt(void)
26722670
* parsemode: raw_parser() mode to use
26732671
* isexpression: whether to say we're reading an "expression" or a "statement"
26742672
* valid_sql: whether to check the syntax of the expr
2675-
* trim: trim trailing whitespace
26762673
* startloc: if not NULL, location of first token is stored at *startloc
26772674
* endtoken: if not NULL, ending token is stored at *endtoken
26782675
* (this is only interesting if until2 or until3 isn't zero)
@@ -2685,14 +2682,14 @@ read_sql_construct(int until,
26852682
RawParseMode parsemode,
26862683
bool isexpression,
26872684
bool valid_sql,
2688-
bool trim,
26892685
int *startloc,
26902686
int *endtoken)
26912687
{
26922688
int tok;
26932689
StringInfoData ds;
26942690
IdentifierLookup save_IdentifierLookup;
26952691
int startlocation = -1;
2692+
int endlocation = -1;
26962693
int parenlevel = 0;
26972694
PLpgSQL_expr *expr;
26982695

@@ -2743,6 +2740,8 @@ read_sql_construct(int until,
27432740
expected),
27442741
parser_errposition(yylloc)));
27452742
}
2743+
/* Remember end+1 location of last accepted token */
2744+
endlocation = yylloc + plpgsql_token_length();
27462745
}
27472746

27482747
plpgsql_IdentifierLookup = save_IdentifierLookup;
@@ -2753,22 +2752,22 @@ read_sql_construct(int until,
27532752
*endtoken = tok;
27542753

27552754
/* give helpful complaint about empty input */
2756-
if (startlocation >= yylloc)
2755+
if (startlocation >= endlocation)
27572756
{
27582757
if (isexpression)
27592758
yyerror("missing expression");
27602759
else
27612760
yyerror("missing SQL statement");
27622761
}
27632762

2764-
plpgsql_append_source_text(&ds, startlocation, yylloc);
2765-
2766-
/* trim any trailing whitespace, for neatness */
2767-
if (trim)
2768-
{
2769-
while (ds.len > 0 && scanner_isspace(ds.data[ds.len - 1]))
2770-
ds.data[--ds.len] = '\0';
2771-
}
2763+
/*
2764+
* We save only the text from startlocation to endlocation-1. This
2765+
* suppresses the "until" token as well as any whitespace or comments
2766+
* following the last accepted token. (We used to strip such trailing
2767+
* whitespace by hand, but that causes problems if there's a "-- comment"
2768+
* in front of said whitespace.)
2769+
*/
2770+
plpgsql_append_source_text(&ds, startlocation, endlocation);
27722771

27732772
expr = palloc0(sizeof(PLpgSQL_expr));
27742773
expr->query = pstrdup(ds.data);
@@ -3933,16 +3932,12 @@ read_cursor_args(PLpgSQL_var *cursor, int until)
39333932
* Read the value expression. To provide the user with meaningful
39343933
* parse error positions, we check the syntax immediately, instead of
39353934
* checking the final expression that may have the arguments
3936-
* reordered. Trailing whitespace must not be trimmed, because
3937-
* otherwise input of the form (param -- comment\n, param) would be
3938-
* translated into a form where the second parameter is commented
3939-
* out.
3935+
* reordered.
39403936
*/
39413937
item = read_sql_construct(',', ')', 0,
39423938
",\" or \")",
39433939
RAW_PARSE_PLPGSQL_EXPR,
39443940
true, true,
3945-
false, /* do not trim */
39463941
NULL, &endtoken);
39473942

39483943
argv[argpos] = item->query;

src/pl/plpgsql/src/pl_scanner.c

+17
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,8 @@ plpgsql_yylex(void)
184184
tok1 = T_DATUM;
185185
else
186186
tok1 = T_CWORD;
187+
/* Adjust token length to include A.B.C */
188+
aux1.leng = aux5.lloc - aux1.lloc + aux5.leng;
187189
}
188190
else
189191
{
@@ -197,6 +199,8 @@ plpgsql_yylex(void)
197199
tok1 = T_DATUM;
198200
else
199201
tok1 = T_CWORD;
202+
/* Adjust token length to include A.B */
203+
aux1.leng = aux3.lloc - aux1.lloc + aux3.leng;
200204
}
201205
}
202206
else
@@ -210,6 +214,8 @@ plpgsql_yylex(void)
210214
tok1 = T_DATUM;
211215
else
212216
tok1 = T_CWORD;
217+
/* Adjust token length to include A.B */
218+
aux1.leng = aux3.lloc - aux1.lloc + aux3.leng;
213219
}
214220
}
215221
else
@@ -298,6 +304,17 @@ plpgsql_yylex(void)
298304
return tok1;
299305
}
300306

307+
/*
308+
* Return the length of the token last returned by plpgsql_yylex().
309+
*
310+
* In the case of compound tokens, the length includes all the parts.
311+
*/
312+
int
313+
plpgsql_token_length(void)
314+
{
315+
return plpgsql_yyleng;
316+
}
317+
301318
/*
302319
* Internal yylex function. This wraps the core lexer and adds one feature:
303320
* a token pushback stack. We also make a couple of trivial single-token

src/pl/plpgsql/src/plpgsql.h

+1
Original file line numberDiff line numberDiff line change
@@ -1317,6 +1317,7 @@ extern void plpgsql_dumptree(PLpgSQL_function *func);
13171317
*/
13181318
extern int plpgsql_base_yylex(void);
13191319
extern int plpgsql_yylex(void);
1320+
extern int plpgsql_token_length(void);
13201321
extern void plpgsql_push_back_token(int token);
13211322
extern bool plpgsql_token_is_unreserved_keyword(int token);
13221323
extern void plpgsql_append_source_text(StringInfo buf,

src/pl/plpgsql/src/sql/plpgsql_control.sql

+14
Original file line numberDiff line numberDiff line change
@@ -486,3 +486,17 @@ select case_test(1);
486486
select case_test(2);
487487
select case_test(12);
488488
select case_test(13);
489+
490+
-- test line comment between WHEN and THEN
491+
create or replace function case_comment(int) returns text as $$
492+
begin
493+
case $1
494+
when 1 -- comment before THEN
495+
then return 'one';
496+
else
497+
return 'other';
498+
end case;
499+
end;
500+
$$ language plpgsql immutable;
501+
502+
select case_comment(1);

src/test/regress/expected/plpgsql.out

+3-5
Original file line numberDiff line numberDiff line change
@@ -2390,11 +2390,9 @@ select namedparmcursor_test7();
23902390
ERROR: division by zero
23912391
CONTEXT: SQL expression "42/0 AS p1, 77 AS p2"
23922392
PL/pgSQL function namedparmcursor_test7() line 6 at OPEN
2393-
-- check that line comments work correctly within the argument list (there
2394-
-- is some special handling of this case in the code: the newline after the
2395-
-- comment must be preserved when the argument-evaluating query is
2396-
-- constructed, otherwise the comment effectively comments out the next
2397-
-- argument, too)
2393+
-- check that line comments work correctly within the argument list
2394+
-- (this used to require a special hack in the code; it no longer does,
2395+
-- but let's keep the test anyway)
23982396
create function namedparmcursor_test8() returns int4 as $$
23992397
declare
24002398
c1 cursor (p1 int, p2 int) for

src/test/regress/sql/plpgsql.sql

+3-5
Original file line numberDiff line numberDiff line change
@@ -2047,11 +2047,9 @@ begin
20472047
end $$ language plpgsql;
20482048
select namedparmcursor_test7();
20492049

2050-
-- check that line comments work correctly within the argument list (there
2051-
-- is some special handling of this case in the code: the newline after the
2052-
-- comment must be preserved when the argument-evaluating query is
2053-
-- constructed, otherwise the comment effectively comments out the next
2054-
-- argument, too)
2050+
-- check that line comments work correctly within the argument list
2051+
-- (this used to require a special hack in the code; it no longer does,
2052+
-- but let's keep the test anyway)
20552053
create function namedparmcursor_test8() returns int4 as $$
20562054
declare
20572055
c1 cursor (p1 int, p2 int) for

0 commit comments

Comments
 (0)