Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 78c8876

Browse files
committed
Add current substring regular expression syntax
SQL:1999 had syntax SUBSTRING(text FROM pattern FOR escapechar) but this was replaced in SQL:2003 by the more clear SUBSTRING(text SIMILAR pattern ESCAPE escapechar) but this was never implemented in PostgreSQL. This patch adds that new syntax as an alternative in the parser, and updates documentation and tests to indicate that this is the preferred alternative now. Reviewed-by: Pavel Stehule <pavel.stehule@gmail.com> Reviewed-by: Vik Fearing <vik@postgresfriends.org> Reviewed-by: Fabien COELHO <coelho@cri.ensmp.fr> Discussion: https://www.postgresql.org/message-id/flat/a15db31c-d0f8-8ce0-9039-578a31758adb%402ndquadrant.com
1 parent aafefb4 commit 78c8876

File tree

8 files changed

+77
-34
lines changed

8 files changed

+77
-34
lines changed

contrib/citext/expected/citext.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1602,7 +1602,7 @@ SELECT substring('Thomas'::citext from '...$') = 'mas' AS t;
16021602
t
16031603
(1 row)
16041604

1605-
SELECT substring('Thomas'::citext from '%#"o_a#"_' for '#') = 'oma' AS t;
1605+
SELECT substring('Thomas'::citext similar '%#"o_a#"_' escape '#') = 'oma' AS t;
16061606
t
16071607
---
16081608
t

contrib/citext/expected/citext_1.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1602,7 +1602,7 @@ SELECT substring('Thomas'::citext from '...$') = 'mas' AS t;
16021602
t
16031603
(1 row)
16041604

1605-
SELECT substring('Thomas'::citext from '%#"o_a#"_' for '#') = 'oma' AS t;
1605+
SELECT substring('Thomas'::citext similar '%#"o_a#"_' escape '#') = 'oma' AS t;
16061606
t
16071607
---
16081608
t

contrib/citext/sql/citext.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -564,7 +564,7 @@ SELECT substring('alphabet'::citext, 3, 2) = 'ph' AS t;
564564
SELECT substring('Thomas'::citext from 2 for 3) = 'hom' AS t;
565565
SELECT substring('Thomas'::citext from 2) = 'homas' AS t;
566566
SELECT substring('Thomas'::citext from '...$') = 'mas' AS t;
567-
SELECT substring('Thomas'::citext from '%#"o_a#"_' for '#') = 'oma' AS t;
567+
SELECT substring('Thomas'::citext similar '%#"o_a#"_' escape '#') = 'oma' AS t;
568568

569569
SELECT trim(' trim '::citext) = 'trim' AS t;
570570
SELECT trim('xxxxxtrimxxxx'::citext, 'x'::citext) = 'trim' AS t;

doc/src/sgml/func.sgml

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2669,15 +2669,21 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue>
26692669

26702670
<row>
26712671
<entry role="func_table_entry"><para role="func_signature">
2672+
<function>substring</function> ( <parameter>string</parameter> <type>text</type> <literal>SIMILAR</literal> <parameter>pattern</parameter> <type>text</type> <literal>ESCAPE</literal> <parameter>escape</parameter> <type>text</type> )
2673+
<returnvalue>text</returnvalue>
2674+
</para>
2675+
<para role="func_signature">
26722676
<function>substring</function> ( <parameter>string</parameter> <type>text</type> <literal>FROM</literal> <parameter>pattern</parameter> <type>text</type> <literal>FOR</literal> <parameter>escape</parameter> <type>text</type> )
26732677
<returnvalue>text</returnvalue>
26742678
</para>
26752679
<para>
26762680
Extracts substring matching <acronym>SQL</acronym> regular expression;
2677-
see <xref linkend="functions-similarto-regexp"/>.
2681+
see <xref linkend="functions-similarto-regexp"/>. The first form has
2682+
been specified since SQL:2003; the second form was only in SQL:1999
2683+
and should be considered obsolete.
26782684
</para>
26792685
<para>
2680-
<literal>substring('Thomas' from '%#"o_a#"_' for '#')</literal>
2686+
<literal>substring('Thomas' similar '%#"o_a#"_' escape '#')</literal>
26812687
<returnvalue>oma</returnvalue>
26822688
</para></entry>
26832689
</row>
@@ -5160,7 +5166,11 @@ cast(-44 as bit(12)) <lineannotation>111111010100</lineannotation>
51605166
The <function>substring</function> function with three parameters
51615167
provides extraction of a substring that matches an SQL
51625168
regular expression pattern. The function can be written according
5163-
to SQL99 syntax:
5169+
to standard SQL syntax:
5170+
<synopsis>
5171+
substring(<replaceable>string</replaceable> similar <replaceable>pattern</replaceable> escape <replaceable>escape-character</replaceable>)
5172+
</synopsis>
5173+
or using the now obsolete SQL:1999 syntax:
51645174
<synopsis>
51655175
substring(<replaceable>string</replaceable> from <replaceable>pattern</replaceable> for <replaceable>escape-character</replaceable>)
51665176
</synopsis>
@@ -5201,8 +5211,8 @@ substring(<replaceable>string</replaceable>, <replaceable>pattern</replaceable>,
52015211
<para>
52025212
Some examples, with <literal>#&quot;</literal> delimiting the return string:
52035213
<programlisting>
5204-
substring('foobar' from '%#"o_b#"%' for '#') <lineannotation>oob</lineannotation>
5205-
substring('foobar' from '#"o_b#"%' for '#') <lineannotation>NULL</lineannotation>
5214+
substring('foobar' similar '%#"o_b#"%' escape '#') <lineannotation>oob</lineannotation>
5215+
substring('foobar' similar '#"o_b#"%' escape '#') <lineannotation>NULL</lineannotation>
52065216
</programlisting>
52075217
</para>
52085218
</sect2>

src/backend/catalog/information_schema.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ CREATE FUNCTION _pg_interval_type(typid oid, mod int4) RETURNS text
182182
AS
183183
$$SELECT
184184
CASE WHEN $1 IN (1186) /* interval */
185-
THEN pg_catalog.upper(substring(pg_catalog.format_type($1, $2) from 'interval[()0-9]* #"%#"' for '#'))
185+
THEN pg_catalog.upper(substring(pg_catalog.format_type($1, $2) similar 'interval[()0-9]* #"%#"' escape '#'))
186186
ELSE null
187187
END$$;
188188

src/backend/parser/gram.y

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14451,7 +14451,27 @@ position_list:
1445114451
| /*EMPTY*/ { $$ = NIL; }
1445214452
;
1445314453

14454-
/* SUBSTRING() arguments */
14454+
/*
14455+
* SUBSTRING() arguments
14456+
*
14457+
* Note that SQL:1999 has both
14458+
*
14459+
* text FROM int FOR int
14460+
*
14461+
* and
14462+
*
14463+
* text FROM pattern FOR escape
14464+
*
14465+
* In the parser we map them both to a call to the substring() function and
14466+
* rely on type resolution to pick the right one.
14467+
*
14468+
* In SQL:2003, the second variant was changed to
14469+
*
14470+
* text SIMILAR pattern ESCAPE escape
14471+
*
14472+
* We could in theory map that to a different function internally, but
14473+
* since we still support the SQL:1999 version, we don't.
14474+
*/
1445514475
substr_list:
1445614476
a_expr FROM a_expr FOR a_expr
1445714477
{
@@ -14483,6 +14503,10 @@ substr_list:
1448314503
makeTypeCast($3,
1448414504
SystemTypeName("int4"), -1));
1448514505
}
14506+
| a_expr SIMILAR a_expr ESCAPE a_expr
14507+
{
14508+
$$ = list_make3($1, $3, $5);
14509+
}
1448614510
/*
1448714511
* We also want to support generic substring functions that
1448814512
* accept the usual generic list of arguments.

src/test/regress/expected/strings.out

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -397,82 +397,89 @@ SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS "456";
397397
(1 row)
398398

399399
-- T581 regular expression substring (with SQL's bizarre regexp syntax)
400+
SELECT SUBSTRING('abcdefg' SIMILAR 'a#"(b_d)#"%' ESCAPE '#') AS "bcd";
401+
bcd
402+
-----
403+
bcd
404+
(1 row)
405+
406+
-- obsolete SQL99 syntax
400407
SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd";
401408
bcd
402409
-----
403410
bcd
404411
(1 row)
405412

406413
-- No match should return NULL
407-
SELECT SUBSTRING('abcdefg' FROM '#"(b_d)#"%' FOR '#') IS NULL AS "True";
414+
SELECT SUBSTRING('abcdefg' SIMILAR '#"(b_d)#"%' ESCAPE '#') IS NULL AS "True";
408415
True
409416
------
410417
t
411418
(1 row)
412419

413420
-- Null inputs should return NULL
414-
SELECT SUBSTRING('abcdefg' FROM '%' FOR NULL) IS NULL AS "True";
421+
SELECT SUBSTRING('abcdefg' SIMILAR '%' ESCAPE NULL) IS NULL AS "True";
415422
True
416423
------
417424
t
418425
(1 row)
419426

420-
SELECT SUBSTRING(NULL FROM '%' FOR '#') IS NULL AS "True";
427+
SELECT SUBSTRING(NULL SIMILAR '%' ESCAPE '#') IS NULL AS "True";
421428
True
422429
------
423430
t
424431
(1 row)
425432

426-
SELECT SUBSTRING('abcdefg' FROM NULL FOR '#') IS NULL AS "True";
433+
SELECT SUBSTRING('abcdefg' SIMILAR NULL ESCAPE '#') IS NULL AS "True";
427434
True
428435
------
429436
t
430437
(1 row)
431438

432439
-- The first and last parts should act non-greedy
433-
SELECT SUBSTRING('abcdefg' FROM 'a#"%#"g' FOR '#') AS "bcdef";
440+
SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"g' ESCAPE '#') AS "bcdef";
434441
bcdef
435442
-------
436443
bcdef
437444
(1 row)
438445

439-
SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*' FOR '#') AS "abcdefg";
446+
SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*' ESCAPE '#') AS "abcdefg";
440447
abcdefg
441448
---------
442449
abcdefg
443450
(1 row)
444451

445452
-- Vertical bar in any part affects only that part
446-
SELECT SUBSTRING('abcdefg' FROM 'a|b#"%#"g' FOR '#') AS "bcdef";
453+
SELECT SUBSTRING('abcdefg' SIMILAR 'a|b#"%#"g' ESCAPE '#') AS "bcdef";
447454
bcdef
448455
-------
449456
bcdef
450457
(1 row)
451458

452-
SELECT SUBSTRING('abcdefg' FROM 'a#"%#"x|g' FOR '#') AS "bcdef";
459+
SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"x|g' ESCAPE '#') AS "bcdef";
453460
bcdef
454461
-------
455462
bcdef
456463
(1 row)
457464

458-
SELECT SUBSTRING('abcdefg' FROM 'a#"%|ab#"g' FOR '#') AS "bcdef";
465+
SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%|ab#"g' ESCAPE '#') AS "bcdef";
459466
bcdef
460467
-------
461468
bcdef
462469
(1 row)
463470

464471
-- Can't have more than two part separators
465-
SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*#"x' FOR '#') AS "error";
472+
SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*#"x' ESCAPE '#') AS "error";
466473
ERROR: SQL regular expression may not contain more than two escape-double-quote separators
467474
CONTEXT: SQL function "substring" statement 1
468475
-- Postgres extension: with 0 or 1 separator, assume parts 1 and 3 are empty
469-
SELECT SUBSTRING('abcdefg' FROM 'a#"%g' FOR '#') AS "bcdefg";
476+
SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%g' ESCAPE '#') AS "bcdefg";
470477
bcdefg
471478
--------
472479
bcdefg
473480
(1 row)
474481

475-
SELECT SUBSTRING('abcdefg' FROM 'a%g' FOR '#') AS "abcdefg";
482+
SELECT SUBSTRING('abcdefg' SIMILAR 'a%g' ESCAPE '#') AS "abcdefg";
476483
abcdefg
477484
---------
478485
abcdefg

src/test/regress/sql/strings.sql

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -132,31 +132,33 @@ SELECT SUBSTRING('1234567890' FROM 3) = '34567890' AS "34567890";
132132
SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS "456";
133133
134134
-- T581 regular expression substring (with SQL's bizarre regexp syntax)
135+
SELECT SUBSTRING('abcdefg' SIMILAR 'a#"(b_d)#"%' ESCAPE '#') AS "bcd";
136+
-- obsolete SQL99 syntax
135137
SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd";
136138

137139
-- No match should return NULL
138-
SELECT SUBSTRING('abcdefg' FROM '#"(b_d)#"%' FOR '#') IS NULL AS "True";
140+
SELECT SUBSTRING('abcdefg' SIMILAR '#"(b_d)#"%' ESCAPE '#') IS NULL AS "True";
139141

140142
-- Null inputs should return NULL
141-
SELECT SUBSTRING('abcdefg' FROM '%' FOR NULL) IS NULL AS "True";
142-
SELECT SUBSTRING(NULL FROM '%' FOR '#') IS NULL AS "True";
143-
SELECT SUBSTRING('abcdefg' FROM NULL FOR '#') IS NULL AS "True";
143+
SELECT SUBSTRING('abcdefg' SIMILAR '%' ESCAPE NULL) IS NULL AS "True";
144+
SELECT SUBSTRING(NULL SIMILAR '%' ESCAPE '#') IS NULL AS "True";
145+
SELECT SUBSTRING('abcdefg' SIMILAR NULL ESCAPE '#') IS NULL AS "True";
144146

145147
-- The first and last parts should act non-greedy
146-
SELECT SUBSTRING('abcdefg' FROM 'a#"%#"g' FOR '#') AS "bcdef";
147-
SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*' FOR '#') AS "abcdefg";
148+
SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"g' ESCAPE '#') AS "bcdef";
149+
SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*' ESCAPE '#') AS "abcdefg";
148150

149151
-- Vertical bar in any part affects only that part
150-
SELECT SUBSTRING('abcdefg' FROM 'a|b#"%#"g' FOR '#') AS "bcdef";
151-
SELECT SUBSTRING('abcdefg' FROM 'a#"%#"x|g' FOR '#') AS "bcdef";
152-
SELECT SUBSTRING('abcdefg' FROM 'a#"%|ab#"g' FOR '#') AS "bcdef";
152+
SELECT SUBSTRING('abcdefg' SIMILAR 'a|b#"%#"g' ESCAPE '#') AS "bcdef";
153+
SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"x|g' ESCAPE '#') AS "bcdef";
154+
SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%|ab#"g' ESCAPE '#') AS "bcdef";
153155

154156
-- Can't have more than two part separators
155-
SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*#"x' FOR '#') AS "error";
157+
SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*#"x' ESCAPE '#') AS "error";
156158

157159
-- Postgres extension: with 0 or 1 separator, assume parts 1 and 3 are empty
158-
SELECT SUBSTRING('abcdefg' FROM 'a#"%g' FOR '#') AS "bcdefg";
159-
SELECT SUBSTRING('abcdefg' FROM 'a%g' FOR '#') AS "abcdefg";
160+
SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%g' ESCAPE '#') AS "bcdefg";
161+
SELECT SUBSTRING('abcdefg' SIMILAR 'a%g' ESCAPE '#') AS "abcdefg";
160162

161163
-- substring() with just two arguments is not allowed by SQL spec;
162164
-- we accept it, but we interpret the pattern as a POSIX regexp not SQL

0 commit comments

Comments
 (0)