Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 6424337

Browse files
committed
Add assorted new regexp_xxx SQL functions.
This patch adds new functions regexp_count(), regexp_instr(), regexp_like(), and regexp_substr(), and extends regexp_replace() with some new optional arguments. All these functions follow the definitions used in Oracle, although there are small differences in the regexp language due to using our own regexp engine -- most notably, that the default newline-matching behavior is different. Similar functions appear in DB2 and elsewhere, too. Aside from easing portability, these functions are easier to use for certain tasks than our existing regexp_match[es] functions. Gilles Darold, heavily revised by me Discussion: https://postgr.es/m/fc160ee0-c843-b024-29bb-97b5da61971f@darold.net
1 parent 9e51cc8 commit 6424337

File tree

8 files changed

+1340
-47
lines changed

8 files changed

+1340
-47
lines changed

doc/src/sgml/func.sgml

Lines changed: 331 additions & 21 deletions
Large diffs are not rendered by default.

src/backend/utils/adt/regexp.c

Lines changed: 455 additions & 18 deletions
Large diffs are not rendered by default.

src/backend/utils/adt/varlena.c

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4496,23 +4496,28 @@ appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
44964496
/*
44974497
* replace_text_regexp
44984498
*
4499-
* replace text that matches to regexp in src_text to replace_text.
4499+
* replace substring(s) in src_text that match regexp with replace_text.
4500+
*
4501+
* search_start: the character (not byte) offset in src_text at which to
4502+
* begin searching.
4503+
* n: if 0, replace all matches; if > 0, replace only the N'th match.
45004504
*
45014505
* Note: to avoid having to include regex.h in builtins.h, we declare
45024506
* the regexp argument as void *, but really it's regex_t *.
45034507
*/
45044508
text *
45054509
replace_text_regexp(text *src_text, void *regexp,
4506-
text *replace_text, bool glob)
4510+
text *replace_text,
4511+
int search_start, int n)
45074512
{
45084513
text *ret_text;
45094514
regex_t *re = (regex_t *) regexp;
45104515
int src_text_len = VARSIZE_ANY_EXHDR(src_text);
4516+
int nmatches = 0;
45114517
StringInfoData buf;
45124518
regmatch_t pmatch[REGEXP_REPLACE_BACKREF_CNT];
45134519
pg_wchar *data;
45144520
size_t data_len;
4515-
int search_start;
45164521
int data_pos;
45174522
char *start_ptr;
45184523
bool have_escape;
@@ -4530,7 +4535,6 @@ replace_text_regexp(text *src_text, void *regexp,
45304535
start_ptr = (char *) VARDATA_ANY(src_text);
45314536
data_pos = 0;
45324537

4533-
search_start = 0;
45344538
while (search_start <= data_len)
45354539
{
45364540
int regexec_result;
@@ -4560,6 +4564,23 @@ replace_text_regexp(text *src_text, void *regexp,
45604564
errmsg("regular expression failed: %s", errMsg)));
45614565
}
45624566

4567+
/*
4568+
* Count matches, and decide whether to replace this match.
4569+
*/
4570+
nmatches++;
4571+
if (n > 0 && nmatches != n)
4572+
{
4573+
/*
4574+
* No, so advance search_start, but not start_ptr/data_pos. (Thus,
4575+
* we treat the matched text as if it weren't matched, and copy it
4576+
* to the output later.)
4577+
*/
4578+
search_start = pmatch[0].rm_eo;
4579+
if (pmatch[0].rm_so == pmatch[0].rm_eo)
4580+
search_start++;
4581+
continue;
4582+
}
4583+
45634584
/*
45644585
* Copy the text to the left of the match position. Note we are given
45654586
* character not byte indexes.
@@ -4596,9 +4617,9 @@ replace_text_regexp(text *src_text, void *regexp,
45964617
data_pos = pmatch[0].rm_eo;
45974618

45984619
/*
4599-
* When global option is off, replace the first instance only.
4620+
* If we only want to replace one occurrence, we're done.
46004621
*/
4601-
if (!glob)
4622+
if (n > 0)
46024623
break;
46034624

46044625
/*

src/include/catalog/catversion.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,6 @@
5353
*/
5454

5555
/* yyyymmddN */
56-
#define CATALOG_VERSION_NO 202107261
56+
#define CATALOG_VERSION_NO 202108031
5757

5858
#endif

src/include/catalog/pg_proc.dat

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3565,6 +3565,18 @@
35653565
{ oid => '2285', descr => 'replace text using regexp',
35663566
proname => 'regexp_replace', prorettype => 'text',
35673567
proargtypes => 'text text text text', prosrc => 'textregexreplace' },
3568+
{ oid => '9611', descr => 'replace text using regexp',
3569+
proname => 'regexp_replace', prorettype => 'text',
3570+
proargtypes => 'text text text int4 int4 text',
3571+
prosrc => 'textregexreplace_extended' },
3572+
{ oid => '9612', descr => 'replace text using regexp',
3573+
proname => 'regexp_replace', prorettype => 'text',
3574+
proargtypes => 'text text text int4 int4',
3575+
prosrc => 'textregexreplace_extended_no_flags' },
3576+
{ oid => '9613', descr => 'replace text using regexp',
3577+
proname => 'regexp_replace', prorettype => 'text',
3578+
proargtypes => 'text text text int4',
3579+
prosrc => 'textregexreplace_extended_no_n' },
35683580
{ oid => '3396', descr => 'find first match for regexp',
35693581
proname => 'regexp_match', prorettype => '_text', proargtypes => 'text text',
35703582
prosrc => 'regexp_match_no_flags' },
@@ -3579,6 +3591,58 @@
35793591
proname => 'regexp_matches', prorows => '10', proretset => 't',
35803592
prorettype => '_text', proargtypes => 'text text text',
35813593
prosrc => 'regexp_matches' },
3594+
{ oid => '9614', descr => 'count regexp matches',
3595+
proname => 'regexp_count', prorettype => 'int4', proargtypes => 'text text',
3596+
prosrc => 'regexp_count_no_start' },
3597+
{ oid => '9615', descr => 'count regexp matches',
3598+
proname => 'regexp_count', prorettype => 'int4',
3599+
proargtypes => 'text text int4', prosrc => 'regexp_count_no_flags' },
3600+
{ oid => '9616', descr => 'count regexp matches',
3601+
proname => 'regexp_count', prorettype => 'int4',
3602+
proargtypes => 'text text int4 text', prosrc => 'regexp_count' },
3603+
{ oid => '9617', descr => 'position of regexp match',
3604+
proname => 'regexp_instr', prorettype => 'int4', proargtypes => 'text text',
3605+
prosrc => 'regexp_instr_no_start' },
3606+
{ oid => '9618', descr => 'position of regexp match',
3607+
proname => 'regexp_instr', prorettype => 'int4',
3608+
proargtypes => 'text text int4', prosrc => 'regexp_instr_no_n' },
3609+
{ oid => '9619', descr => 'position of regexp match',
3610+
proname => 'regexp_instr', prorettype => 'int4',
3611+
proargtypes => 'text text int4 int4', prosrc => 'regexp_instr_no_endoption' },
3612+
{ oid => '9620', descr => 'position of regexp match',
3613+
proname => 'regexp_instr', prorettype => 'int4',
3614+
proargtypes => 'text text int4 int4 int4',
3615+
prosrc => 'regexp_instr_no_flags' },
3616+
{ oid => '9621', descr => 'position of regexp match',
3617+
proname => 'regexp_instr', prorettype => 'int4',
3618+
proargtypes => 'text text int4 int4 int4 text',
3619+
prosrc => 'regexp_instr_no_subexpr' },
3620+
{ oid => '9622', descr => 'position of regexp match',
3621+
proname => 'regexp_instr', prorettype => 'int4',
3622+
proargtypes => 'text text int4 int4 int4 text int4',
3623+
prosrc => 'regexp_instr' },
3624+
{ oid => '9623', descr => 'test for regexp match',
3625+
proname => 'regexp_like', prorettype => 'bool', proargtypes => 'text text',
3626+
prosrc => 'regexp_like_no_flags' },
3627+
{ oid => '9624', descr => 'test for regexp match',
3628+
proname => 'regexp_like', prorettype => 'bool',
3629+
proargtypes => 'text text text', prosrc => 'regexp_like' },
3630+
{ oid => '9625', descr => 'extract substring that matches regexp',
3631+
proname => 'regexp_substr', prorettype => 'text', proargtypes => 'text text',
3632+
prosrc => 'regexp_substr_no_start' },
3633+
{ oid => '9626', descr => 'extract substring that matches regexp',
3634+
proname => 'regexp_substr', prorettype => 'text',
3635+
proargtypes => 'text text int4', prosrc => 'regexp_substr_no_n' },
3636+
{ oid => '9627', descr => 'extract substring that matches regexp',
3637+
proname => 'regexp_substr', prorettype => 'text',
3638+
proargtypes => 'text text int4 int4', prosrc => 'regexp_substr_no_flags' },
3639+
{ oid => '9628', descr => 'extract substring that matches regexp',
3640+
proname => 'regexp_substr', prorettype => 'text',
3641+
proargtypes => 'text text int4 int4 text',
3642+
prosrc => 'regexp_substr_no_subexpr' },
3643+
{ oid => '9629', descr => 'extract substring that matches regexp',
3644+
proname => 'regexp_substr', prorettype => 'text',
3645+
proargtypes => 'text text int4 int4 text int4', prosrc => 'regexp_substr' },
35823646
{ oid => '2088', descr => 'split string by field_sep and return field_num',
35833647
proname => 'split_part', prorettype => 'text',
35843648
proargtypes => 'text text int4', prosrc => 'split_part' },

src/include/utils/varlena.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ extern bool SplitDirectoriesString(char *rawstring, char separator,
3434
extern bool SplitGUCList(char *rawstring, char separator,
3535
List **namelist);
3636
extern text *replace_text_regexp(text *src_text, void *regexp,
37-
text *replace_text, bool glob);
37+
text *replace_text,
38+
int search_start, int n);
3839

3940
#endif

0 commit comments

Comments
 (0)