Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit ec0294f

Browse files
committed
Support negative indexes in split_part().
This provides a handy way to get, say, the last field of the string. Use of a negative index in this way has precedent in the nearby left() and right() functions. The implementation scans the string twice when N < -1, but it seems likely that N = -1 will be the huge majority of actual use cases, so I'm not really excited about adding complexity to avoid that. Nikhil Benesch, reviewed by Jacob Champion; cosmetic tweakage by me Discussion: https://postgr.es/m/cbb7f861-6162-3a51-9823-97bc3aa0b638@gmail.com
1 parent 3bf4430 commit ec0294f

File tree

4 files changed

+172
-17
lines changed

4 files changed

+172
-17
lines changed

doc/src/sgml/func.sgml

+7-1
Original file line numberDiff line numberDiff line change
@@ -3356,11 +3356,17 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue>
33563356
<para>
33573357
Splits <parameter>string</parameter> at occurrences
33583358
of <parameter>delimiter</parameter> and returns
3359-
the <parameter>n</parameter>'th field (counting from one).
3359+
the <parameter>n</parameter>'th field (counting from one),
3360+
or when <parameter>n</parameter> is negative, returns
3361+
the |<parameter>n</parameter>|'th-from-last field.
33603362
</para>
33613363
<para>
33623364
<literal>split_part('abc~@~def~@~ghi', '~@~', 2)</literal>
33633365
<returnvalue>def</returnvalue>
3366+
</para>
3367+
<para>
3368+
<literal>split_part('abc,def,ghi,jkl', ',', -2)</literal>
3369+
<returnvalue>ghi</returnvalue>
33643370
</para></entry>
33653371
</row>
33663372

src/backend/utils/adt/varlena.c

+68-15
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ typedef struct varlena VarString;
5151
typedef struct
5252
{
5353
bool is_multibyte; /* T if multibyte encoding */
54-
bool is_multibyte_char_in_char;
54+
bool is_multibyte_char_in_char; /* need to check char boundaries? */
5555

5656
char *str1; /* haystack string */
5757
char *str2; /* needle string */
@@ -1439,8 +1439,7 @@ text_position_next_internal(char *start_ptr, TextPositionState *state)
14391439
/*
14401440
* Return a pointer to the current match.
14411441
*
1442-
* The returned pointer points into correct position in the original
1443-
* the haystack string.
1442+
* The returned pointer points into the original haystack string.
14441443
*/
14451444
static char *
14461445
text_position_get_match_ptr(TextPositionState *state)
@@ -1471,12 +1470,27 @@ text_position_get_match_pos(TextPositionState *state)
14711470
}
14721471
}
14731472

1473+
/*
1474+
* Reset search state to the initial state installed by text_position_setup.
1475+
*
1476+
* The next call to text_position_next will search from the beginning
1477+
* of the string.
1478+
*/
1479+
static void
1480+
text_position_reset(TextPositionState *state)
1481+
{
1482+
state->last_match = NULL;
1483+
state->refpoint = state->str1;
1484+
state->refpos = 0;
1485+
}
1486+
14741487
static void
14751488
text_position_cleanup(TextPositionState *state)
14761489
{
14771490
/* no cleanup needed */
14781491
}
14791492

1493+
14801494
static void
14811495
check_collation_set(Oid collid)
14821496
{
@@ -4581,9 +4595,8 @@ replace_text_regexp(text *src_text, void *regexp,
45814595

45824596
/*
45834597
* split_part
4584-
* parse input string
4585-
* return ord item (1 based)
4586-
* based on provided field separator
4598+
* parse input string based on provided field separator
4599+
* return N'th item (1 based, negative counts from end)
45874600
*/
45884601
Datum
45894602
split_part(PG_FUNCTION_ARGS)
@@ -4600,10 +4613,10 @@ split_part(PG_FUNCTION_ARGS)
46004613
bool found;
46014614

46024615
/* field number is 1 based */
4603-
if (fldnum < 1)
4616+
if (fldnum == 0)
46044617
ereport(ERROR,
46054618
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4606-
errmsg("field position must be greater than zero")));
4619+
errmsg("field position must not be zero")));
46074620

46084621
inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
46094622
fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
@@ -4612,32 +4625,72 @@ split_part(PG_FUNCTION_ARGS)
46124625
if (inputstring_len < 1)
46134626
PG_RETURN_TEXT_P(cstring_to_text(""));
46144627

4615-
/* empty field separator */
4628+
/* handle empty field separator */
46164629
if (fldsep_len < 1)
46174630
{
4618-
/* if first field, return input string, else empty string */
4619-
if (fldnum == 1)
4631+
/* if first or last field, return input string, else empty string */
4632+
if (fldnum == 1 || fldnum == -1)
46204633
PG_RETURN_TEXT_P(inputstring);
46214634
else
46224635
PG_RETURN_TEXT_P(cstring_to_text(""));
46234636
}
46244637

4638+
/* find the first field separator */
46254639
text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state);
46264640

4627-
/* identify bounds of first field */
4628-
start_ptr = VARDATA_ANY(inputstring);
46294641
found = text_position_next(&state);
46304642

46314643
/* special case if fldsep not found at all */
46324644
if (!found)
46334645
{
46344646
text_position_cleanup(&state);
4635-
/* if field 1 requested, return input string, else empty string */
4636-
if (fldnum == 1)
4647+
/* if first or last field, return input string, else empty string */
4648+
if (fldnum == 1 || fldnum == -1)
46374649
PG_RETURN_TEXT_P(inputstring);
46384650
else
46394651
PG_RETURN_TEXT_P(cstring_to_text(""));
46404652
}
4653+
4654+
/*
4655+
* take care of a negative field number (i.e. count from the right) by
4656+
* converting to a positive field number; we need total number of fields
4657+
*/
4658+
if (fldnum < 0)
4659+
{
4660+
/* we found a fldsep, so there are at least two fields */
4661+
int numfields = 2;
4662+
4663+
while (text_position_next(&state))
4664+
numfields++;
4665+
4666+
/* special case of last field does not require an extra pass */
4667+
if (fldnum == -1)
4668+
{
4669+
start_ptr = text_position_get_match_ptr(&state) + fldsep_len;
4670+
end_ptr = VARDATA_ANY(inputstring) + inputstring_len;
4671+
text_position_cleanup(&state);
4672+
PG_RETURN_TEXT_P(cstring_to_text_with_len(start_ptr,
4673+
end_ptr - start_ptr));
4674+
}
4675+
4676+
/* else, convert fldnum to positive notation */
4677+
fldnum += numfields + 1;
4678+
4679+
/* if nonexistent field, return empty string */
4680+
if (fldnum <= 0)
4681+
{
4682+
text_position_cleanup(&state);
4683+
PG_RETURN_TEXT_P(cstring_to_text(""));
4684+
}
4685+
4686+
/* reset to pointing at first match, but now with positive fldnum */
4687+
text_position_reset(&state);
4688+
found = text_position_next(&state);
4689+
Assert(found);
4690+
}
4691+
4692+
/* identify bounds of first field */
4693+
start_ptr = VARDATA_ANY(inputstring);
46414694
end_ptr = text_position_get_match_ptr(&state);
46424695

46434696
while (found && --fldnum > 0)

src/test/regress/expected/strings.out

+73-1
Original file line numberDiff line numberDiff line change
@@ -1552,8 +1552,56 @@ SELECT replace('yabadoo', 'bad', '') AS "yaoo";
15521552
--
15531553
-- test split_part
15541554
--
1555+
select split_part('','@',1) AS "empty string";
1556+
empty string
1557+
--------------
1558+
1559+
(1 row)
1560+
1561+
select split_part('','@',-1) AS "empty string";
1562+
empty string
1563+
--------------
1564+
1565+
(1 row)
1566+
1567+
select split_part('joeuser@mydatabase','',1) AS "joeuser@mydatabase";
1568+
joeuser@mydatabase
1569+
--------------------
1570+
joeuser@mydatabase
1571+
(1 row)
1572+
1573+
select split_part('joeuser@mydatabase','',2) AS "empty string";
1574+
empty string
1575+
--------------
1576+
1577+
(1 row)
1578+
1579+
select split_part('joeuser@mydatabase','',-1) AS "joeuser@mydatabase";
1580+
joeuser@mydatabase
1581+
--------------------
1582+
joeuser@mydatabase
1583+
(1 row)
1584+
1585+
select split_part('joeuser@mydatabase','',-2) AS "empty string";
1586+
empty string
1587+
--------------
1588+
1589+
(1 row)
1590+
15551591
select split_part('joeuser@mydatabase','@',0) AS "an error";
1556-
ERROR: field position must be greater than zero
1592+
ERROR: field position must not be zero
1593+
select split_part('joeuser@mydatabase','@@',1) AS "joeuser@mydatabase";
1594+
joeuser@mydatabase
1595+
--------------------
1596+
joeuser@mydatabase
1597+
(1 row)
1598+
1599+
select split_part('joeuser@mydatabase','@@',2) AS "empty string";
1600+
empty string
1601+
--------------
1602+
1603+
(1 row)
1604+
15571605
select split_part('joeuser@mydatabase','@',1) AS "joeuser";
15581606
joeuser
15591607
---------
@@ -1578,6 +1626,30 @@ select split_part('@joeuser@mydatabase@','@',2) AS "joeuser";
15781626
joeuser
15791627
(1 row)
15801628

1629+
select split_part('joeuser@mydatabase','@',-1) AS "mydatabase";
1630+
mydatabase
1631+
------------
1632+
mydatabase
1633+
(1 row)
1634+
1635+
select split_part('joeuser@mydatabase','@',-2) AS "joeuser";
1636+
joeuser
1637+
---------
1638+
joeuser
1639+
(1 row)
1640+
1641+
select split_part('joeuser@mydatabase','@',-3) AS "empty string";
1642+
empty string
1643+
--------------
1644+
1645+
(1 row)
1646+
1647+
select split_part('@joeuser@mydatabase@','@',-2) AS "mydatabase";
1648+
mydatabase
1649+
------------
1650+
mydatabase
1651+
(1 row)
1652+
15811653
--
15821654
-- test to_hex
15831655
--

src/test/regress/sql/strings.sql

+24
Original file line numberDiff line numberDiff line change
@@ -533,8 +533,24 @@ SELECT replace('yabadoo', 'bad', '') AS "yaoo";
533533
--
534534
-- test split_part
535535
--
536+
select split_part('','@',1) AS "empty string";
537+
538+
select split_part('','@',-1) AS "empty string";
539+
540+
select split_part('joeuser@mydatabase','',1) AS "joeuser@mydatabase";
541+
542+
select split_part('joeuser@mydatabase','',2) AS "empty string";
543+
544+
select split_part('joeuser@mydatabase','',-1) AS "joeuser@mydatabase";
545+
546+
select split_part('joeuser@mydatabase','',-2) AS "empty string";
547+
536548
select split_part('joeuser@mydatabase','@',0) AS "an error";
537549

550+
select split_part('joeuser@mydatabase','@@',1) AS "joeuser@mydatabase";
551+
552+
select split_part('joeuser@mydatabase','@@',2) AS "empty string";
553+
538554
select split_part('joeuser@mydatabase','@',1) AS "joeuser";
539555

540556
select split_part('joeuser@mydatabase','@',2) AS "mydatabase";
@@ -543,6 +559,14 @@ select split_part('joeuser@mydatabase','@',3) AS "empty string";
543559

544560
select split_part('@joeuser@mydatabase@','@',2) AS "joeuser";
545561

562+
select split_part('joeuser@mydatabase','@',-1) AS "mydatabase";
563+
564+
select split_part('joeuser@mydatabase','@',-2) AS "joeuser";
565+
566+
select split_part('joeuser@mydatabase','@',-3) AS "empty string";
567+
568+
select split_part('@joeuser@mydatabase@','@',-2) AS "mydatabase";
569+
546570
--
547571
-- test to_hex
548572
--

0 commit comments

Comments
 (0)