Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 5ec70a9

Browse files
committed
Fix lexing of standard multi-character operators in edge cases.
Commits c6b3c93 (which fixed the precedence of >=, <=, <> operators) and 865f14a (which added support for the standard => notation for named arguments) created a class of lexer tokens which look like multi-character operators but which have their own token IDs distinct from Op. However, longest-match rules meant that following any of these tokens with another operator character, as in (1<>-1), would cause them to be incorrectly returned as Op. The error here isn't immediately obvious, because the parser would usually still find the correct operator via the Op token, but there were more subtle problems: 1. If immediately followed by a comment or +-, >= <= <> would be given the old precedence of Op rather than the correct new precedence; 2. If followed by a comment, != would be returned as Op rather than as NOT_EQUAL, causing it not to be found at all; 3. If followed by a comment or +-, the => token for named arguments would be lexed as Op, causing the argument to be mis-parsed as a simple expression, usually causing an error. Fix by explicitly checking for the operators in the {operator} code block in addition to all the existing special cases there. Backpatch to 9.5 where the problem was introduced. Analysis and patch by me; review by Tom Lane. Discussion: https://postgr.es/m/87va851ppl.fsf@news-spur.riddles.org.uk
1 parent 4854ead commit 5ec70a9

File tree

7 files changed

+221
-0
lines changed

7 files changed

+221
-0
lines changed

src/backend/parser/scan.l

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,15 @@ identifier {ident_start}{ident_cont}*
335335
typecast "::"
336336
dot_dot \.\.
337337
colon_equals ":="
338+
339+
/*
340+
* These operator-like tokens (unlike the above ones) also match the {operator}
341+
* rule, which means that they might be overridden by a longer match if they
342+
* are followed by a comment start or a + or - character. Accordingly, if you
343+
* add to this list, you must also add corresponding code to the {operator}
344+
* block to return the correct token in such cases. (This is not needed in
345+
* psqlscan.l since the token value is ignored there.)
346+
*/
338347
equals_greater "=>"
339348
less_equals "<="
340349
greater_equals ">="
@@ -925,6 +934,25 @@ other .
925934
if (nchars == 1 &&
926935
strchr(",()[].;:+-*/%^<>=", yytext[0]))
927936
return yytext[0];
937+
/*
938+
* Likewise, if what we have left is two chars, and
939+
* those match the tokens ">=", "<=", "=>", "<>" or
940+
* "!=", then we must return the appropriate token
941+
* rather than the generic Op.
942+
*/
943+
if (nchars == 2)
944+
{
945+
if (yytext[0] == '=' && yytext[1] == '>')
946+
return EQUALS_GREATER;
947+
if (yytext[0] == '>' && yytext[1] == '=')
948+
return GREATER_EQUALS;
949+
if (yytext[0] == '<' && yytext[1] == '=')
950+
return LESS_EQUALS;
951+
if (yytext[0] == '<' && yytext[1] == '>')
952+
return NOT_EQUALS;
953+
if (yytext[0] == '!' && yytext[1] == '=')
954+
return NOT_EQUALS;
955+
}
928956
}
929957

930958
/*

src/fe_utils/psqlscan.l

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,15 @@ identifier {ident_start}{ident_cont}*
296296
typecast "::"
297297
dot_dot \.\.
298298
colon_equals ":="
299+
300+
/*
301+
* These operator-like tokens (unlike the above ones) also match the {operator}
302+
* rule, which means that they might be overridden by a longer match if they
303+
* are followed by a comment start or a + or - character. Accordingly, if you
304+
* add to this list, you must also add corresponding code to the {operator}
305+
* block to return the correct token in such cases. (This is not needed in
306+
* psqlscan.l since the token value is ignored there.)
307+
*/
299308
equals_greater "=>"
300309
less_equals "<="
301310
greater_equals ">="

src/interfaces/ecpg/preproc/pgc.l

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,15 @@ array ({ident_cont}|{whitespace}|[\[\]\+\-\*\%\/\(\)\>\.])*
242242
typecast "::"
243243
dot_dot \.\.
244244
colon_equals ":="
245+
246+
/*
247+
* These operator-like tokens (unlike the above ones) also match the {operator}
248+
* rule, which means that they might be overridden by a longer match if they
249+
* are followed by a comment start or a + or - character. Accordingly, if you
250+
* add to this list, you must also add corresponding code to the {operator}
251+
* block to return the correct token in such cases. (This is not needed in
252+
* psqlscan.l since the token value is ignored there.)
253+
*/
245254
equals_greater "=>"
246255
less_equals "<="
247256
greater_equals ">="
@@ -729,6 +738,25 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
729738
if (nchars == 1 &&
730739
strchr(",()[].;:+-*/%^<>=", yytext[0]))
731740
return yytext[0];
741+
/*
742+
* Likewise, if what we have left is two chars, and
743+
* those match the tokens ">=", "<=", "=>", "<>" or
744+
* "!=", then we must return the appropriate token
745+
* rather than the generic Op.
746+
*/
747+
if (nchars == 2)
748+
{
749+
if (yytext[0] == '=' && yytext[1] == '>')
750+
return EQUALS_GREATER;
751+
if (yytext[0] == '>' && yytext[1] == '=')
752+
return GREATER_EQUALS;
753+
if (yytext[0] == '<' && yytext[1] == '=')
754+
return LESS_EQUALS;
755+
if (yytext[0] == '<' && yytext[1] == '>')
756+
return NOT_EQUALS;
757+
if (yytext[0] == '!' && yytext[1] == '=')
758+
return NOT_EQUALS;
759+
}
732760
}
733761

734762
base_yylval.str = mm_strdup(yytext);

src/test/regress/expected/create_operator.out

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,80 @@ CREATE OPERATOR => (
3737
ERROR: syntax error at or near "=>"
3838
LINE 1: CREATE OPERATOR => (
3939
^
40+
-- lexing of <=, >=, <>, != has a number of edge cases
41+
-- (=> is tested elsewhere)
42+
-- this is legal because ! is not allowed in sql ops
43+
CREATE OPERATOR !=- (
44+
leftarg = int8, -- right unary
45+
procedure = numeric_fac
46+
);
47+
SELECT 2 !=-;
48+
?column?
49+
----------
50+
2
51+
(1 row)
52+
53+
-- make sure lexer returns != as <> even in edge cases
54+
SELECT 2 !=/**/ 1, 2 !=/**/ 2;
55+
?column? | ?column?
56+
----------+----------
57+
t | f
58+
(1 row)
59+
60+
SELECT 2 !=-- comment to be removed by psql
61+
1;
62+
?column?
63+
----------
64+
t
65+
(1 row)
66+
67+
DO $$ -- use DO to protect -- from psql
68+
declare r boolean;
69+
begin
70+
execute $e$ select 2 !=-- comment
71+
1 $e$ into r;
72+
raise info 'r = %', r;
73+
end;
74+
$$;
75+
INFO: r = t
76+
-- check that <= etc. followed by more operator characters are returned
77+
-- as the correct token with correct precedence
78+
SELECT true<>-1 BETWEEN 1 AND 1; -- BETWEEN has prec. above <> but below Op
79+
?column?
80+
----------
81+
t
82+
(1 row)
83+
84+
SELECT false<>/**/1 BETWEEN 1 AND 1;
85+
?column?
86+
----------
87+
t
88+
(1 row)
89+
90+
SELECT false<=-1 BETWEEN 1 AND 1;
91+
?column?
92+
----------
93+
t
94+
(1 row)
95+
96+
SELECT false>=-1 BETWEEN 1 AND 1;
97+
?column?
98+
----------
99+
t
100+
(1 row)
101+
102+
SELECT 2<=/**/3, 3>=/**/2, 2<>/**/3;
103+
?column? | ?column? | ?column?
104+
----------+----------+----------
105+
t | t | t
106+
(1 row)
107+
108+
SELECT 3<=/**/2, 2>=/**/3, 2<>/**/2;
109+
?column? | ?column? | ?column?
110+
----------+----------+----------
111+
f | f | f
112+
(1 row)
113+
40114
-- Should fail. CREATE OPERATOR requires USAGE on SCHEMA
41115
BEGIN TRANSACTION;
42116
CREATE ROLE regress_rol_op1;

src/test/regress/expected/polymorphism.out

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1478,6 +1478,42 @@ select dfunc('a'::text, 'b', flag => true); -- mixed notation
14781478
a
14791479
(1 row)
14801480

1481+
-- this tests lexer edge cases around =>
1482+
select dfunc(a =>-1);
1483+
dfunc
1484+
-------
1485+
-1
1486+
(1 row)
1487+
1488+
select dfunc(a =>+1);
1489+
dfunc
1490+
-------
1491+
1
1492+
(1 row)
1493+
1494+
select dfunc(a =>/**/1);
1495+
dfunc
1496+
-------
1497+
1
1498+
(1 row)
1499+
1500+
select dfunc(a =>--comment to be removed by psql
1501+
1);
1502+
dfunc
1503+
-------
1504+
1
1505+
(1 row)
1506+
1507+
-- need DO to protect the -- from psql
1508+
do $$
1509+
declare r integer;
1510+
begin
1511+
select dfunc(a=>-- comment
1512+
1) into r;
1513+
raise info 'r = %', r;
1514+
end;
1515+
$$;
1516+
INFO: r = 1
14811517
-- check reverse-listing of named-arg calls
14821518
CREATE VIEW dfview AS
14831519
SELECT q1, q2,

src/test/regress/sql/create_operator.sql

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,37 @@ CREATE OPERATOR => (
4141
procedure = numeric_fac
4242
);
4343

44+
-- lexing of <=, >=, <>, != has a number of edge cases
45+
-- (=> is tested elsewhere)
46+
47+
-- this is legal because ! is not allowed in sql ops
48+
CREATE OPERATOR !=- (
49+
leftarg = int8, -- right unary
50+
procedure = numeric_fac
51+
);
52+
SELECT 2 !=-;
53+
-- make sure lexer returns != as <> even in edge cases
54+
SELECT 2 !=/**/ 1, 2 !=/**/ 2;
55+
SELECT 2 !=-- comment to be removed by psql
56+
1;
57+
DO $$ -- use DO to protect -- from psql
58+
declare r boolean;
59+
begin
60+
execute $e$ select 2 !=-- comment
61+
1 $e$ into r;
62+
raise info 'r = %', r;
63+
end;
64+
$$;
65+
66+
-- check that <= etc. followed by more operator characters are returned
67+
-- as the correct token with correct precedence
68+
SELECT true<>-1 BETWEEN 1 AND 1; -- BETWEEN has prec. above <> but below Op
69+
SELECT false<>/**/1 BETWEEN 1 AND 1;
70+
SELECT false<=-1 BETWEEN 1 AND 1;
71+
SELECT false>=-1 BETWEEN 1 AND 1;
72+
SELECT 2<=/**/3, 3>=/**/2, 2<>/**/3;
73+
SELECT 3<=/**/2, 2>=/**/3, 2<>/**/2;
74+
4475
-- Should fail. CREATE OPERATOR requires USAGE on SCHEMA
4576
BEGIN TRANSACTION;
4677
CREATE ROLE regress_rol_op1;

src/test/regress/sql/polymorphism.sql

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -785,6 +785,21 @@ select dfunc('a'::text, 'b', flag => false); -- mixed notation
785785
select dfunc('a'::text, 'b', true); -- full positional notation
786786
select dfunc('a'::text, 'b', flag => true); -- mixed notation
787787

788+
-- this tests lexer edge cases around =>
789+
select dfunc(a =>-1);
790+
select dfunc(a =>+1);
791+
select dfunc(a =>/**/1);
792+
select dfunc(a =>--comment to be removed by psql
793+
1);
794+
-- need DO to protect the -- from psql
795+
do $$
796+
declare r integer;
797+
begin
798+
select dfunc(a=>-- comment
799+
1) into r;
800+
raise info 'r = %', r;
801+
end;
802+
$$;
788803

789804
-- check reverse-listing of named-arg calls
790805
CREATE VIEW dfview AS

0 commit comments

Comments
 (0)