From 9eb78beeae01f2f0ccafc5d66a2003ea7e3952f9 Mon Sep 17 00:00:00 2001 From: Neil Conway Date: Tue, 20 Mar 2007 05:45:00 +0000 Subject: Add three new regexp functions: regexp_matches, regexp_split_to_array, and regexp_split_to_table. These functions provide access to the capture groups resulting from a POSIX regular expression match, and provide the ability to split a string on a POSIX regular expression, respectively. Patch from Jeremy Drake; code review by Neil Conway, additional comments and suggestions from Tom and Peter E. This patch bumps the catversion, adds some regression tests, and updates the docs. --- doc/src/sgml/func.sgml | 184 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 176 insertions(+), 8 deletions(-) (limited to 'doc/src') diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index b8be507f2df..084db0d40e5 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -1,4 +1,4 @@ - + Functions and Operators @@ -1468,17 +1468,52 @@ - regexp_replace(string text, pattern text, replacement text [,flags text]) + regexp_matches(string text, pattern text [, flags text]) + setof text[] + + Return all capture groups resulting from matching POSIX regular + expression against the string. See + for more information. + + regexp_matches('foobarbequebaz', '(bar)(beque)') + {bar,beque} + + + + regexp_replace(string text, pattern text, replacement text [, flags text]) text Replace substring matching POSIX regular expression. See - for more information on pattern - matching. + for more information. regexp_replace('Thomas', '.[mN]a.', 'M') ThM + + regexp_split_to_array(string text, pattern text [, flags text ]) + text[] + + Split string using POSIX regular expression as + the delimiter. See for more + information. + + regexp_split_to_array('hello world', E'\\s+') + {hello,world} + + + + regexp_split_to_table(string text, pattern text [, flags text]) + setof text + + Split string using POSIX regular expression as + the delimiter. See for more + information. + + regexp_split_to_table('hello world', E'\\s+') + helloworld (2 rows) + + repeat(string text, number int) text @@ -2883,9 +2918,6 @@ cast(-44 as bit(12)) 111111010100 substring - - regexp_replace - string SIMILAR TO pattern ESCAPE escape-character @@ -3004,6 +3036,21 @@ substring('foobar' from '#"o_b#"%' for '#') NULLregular expression pattern matching + + substring + + + regexp_replace + + + regexp_matches + + + regexp_split_to_table + + + regexp_split_to_array + lists the available @@ -3134,7 +3181,10 @@ substring('foobar' from 'o(.)b') o string containing zero or more single-letter flags that change the function's behavior. Flag i specifies case-insensitive matching, while flag g specifies replacement of each matching - substring rather than only the first one. + substring rather than only the first one. Other supported flags are + m, n, p, w and + x, whose meanings correspond to those shown in + . @@ -3149,6 +3199,124 @@ regexp_replace('foobarbaz', 'b(..)', E'X\\1Y', 'g') + + The regexp_matches function returns all of the capture + groups resulting from matching a POSIX regular expression pattern. + It has the syntax + regexp_matches(string, pattern + , flags ). + If there is no match to the pattern, the function returns no rows. + If there is a match, the function returns the contents of all of the capture groups + in a text array, or if there were no capture groups in the pattern, it returns the + contents of the entire match as a single-element text array. + The flags parameter is an optional text + string containing zero or more single-letter flags that change the + function's behavior. Flag i specifies case-insensitive + matching, while flag g causes the return of each matching + substring rather than only the first one. Other supported + flags are m, n, p, w and + x, whose meanings are described in + . + + + + Some examples: + +SELECT regexp_matches('foobarbequebaz', '(bar)(beque)'); + regexp_matches +---------------- + {bar,beque} +(1 row) + +SELECT regexp_matches('foobarbequebazilbarfbonk', '(b[^b]+)(b[^b]+)', 'g'); + regexp_matches +---------------- + {bar,beque} + {bazil,barf} +(2 rows) + +SELECT regexp_matches('foobarbequebaz', 'barbeque'); + regexp_matches +---------------- + {barbeque} +(1 row) + + + + + The regexp_split_to_table function splits a string using a POSIX + regular expression pattern as a delimiter. It has the syntax + regexp_split_to_table(string, pattern + , flags ). + If there is no match to the pattern, the function returns the + string. If there is at least one match, for each match it returns + the text from the end of the last match (or the beginning of the string) + to the beginning of the match. When there are no more matches, it + returns the text from the end of the last match to the end of the string. + The flags parameter is an optional text string containing + zero or more single-letter flags that change the function's behavior. + regexp_split_to_table supports the flags i, + m, n, p, w and + x, whose meanings are described in + . + + + + The regexp_split_to_array function behaves the same as + regexp_split_to_table, except that regexp_split_to_array + returns its results as a text[]. It has the syntax + regexp_split_to_array(string, pattern + , flags ). + The parameters are the same as for regexp_split_to_table. + + + + Some examples: + + +SELECT foo FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', E'\\\s+') AS foo; + foo +-------- + the + quick + brown + fox + jumped + over + the + lazy + dog +(9 rows) + +SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', E'\\s+'); + regexp_split_to_array +------------------------------------------------ + {the,quick,brown,fox,jumped,over,the,lazy,dog} +(1 row) + +SELECT foo FROM regexp_split_to_table('the quick brown fox', E'\\s*') AS foo; + foo +----- + t + h + e + q + u + i + c + k + b + r + o + w + n + f + o + x +(16 rows) + + + PostgreSQL's regular expressions are implemented using a package written by Henry Spencer. Much of -- cgit v1.2.3