Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit b181a91

Browse files
committed
More fixes for abbreviated keys infrastructure.
First, when LC_COLLATE = C, bttext_abbrev_convert should use memcpy() rather than strxfrm() to construct the abbreviated key, because the authoritative comparator uses memcpy(). If we do anything else here, we might get inconsistent answers, and the buildfarm says this risk is not theoretical. It should be faster this way, too. Second, while I'm looking at bttext_abbrev_convert, convert a needless use of goto into the loop it's trying to implement into an actual loop. Both of the above problems date to the original commit of abbreviated keys, commit 4ea51cd. Third, fix a bogus assignment to tss->locale before tss is set up. That's a new goof in commit b529b65.
1 parent b529b65 commit b181a91

File tree

1 file changed

+52
-35
lines changed

1 file changed

+52
-35
lines changed

src/backend/utils/adt/varlena.c

+52-35
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ typedef struct
6363
char *buf2; /* 2nd string, or abbreviation strxfrm() buf */
6464
int buflen1;
6565
int buflen2;
66+
bool collate_c;
6667
hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
6768
hyperLogLogState full_card; /* Full key cardinality state */
6869
#ifdef HAVE_LOCALE_T
@@ -1744,7 +1745,7 @@ static void
17441745
btsortsupport_worker(SortSupport ssup, Oid collid)
17451746
{
17461747
bool abbreviate = ssup->abbreviate;
1747-
bool locale_aware = false;
1748+
bool collate_c = false;
17481749
TextSortSupport *tss;
17491750

17501751
#ifdef HAVE_LOCALE_T
@@ -1769,15 +1770,17 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
17691770
* bttextcmp() via the fmgr trampoline.
17701771
*/
17711772
if (lc_collate_is_c(collid))
1773+
{
17721774
ssup->comparator = bttextfastcmp_c;
1775+
collate_c = true;
1776+
}
17731777
#ifdef WIN32
17741778
else if (GetDatabaseEncoding() == PG_UTF8)
17751779
return;
17761780
#endif
17771781
else
17781782
{
17791783
ssup->comparator = bttextfastcmp_locale;
1780-
locale_aware = true;
17811784

17821785
/*
17831786
* We need a collation-sensitive comparison. To make things faster,
@@ -1798,7 +1801,7 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
17981801
errhint("Use the COLLATE clause to set the collation explicitly.")));
17991802
}
18001803
#ifdef HAVE_LOCALE_T
1801-
tss->locale = pg_newlocale_from_collation(collid);
1804+
locale = pg_newlocale_from_collation(collid);
18021805
#endif
18031806
}
18041807
}
@@ -1828,7 +1831,7 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
18281831
* will make use of the temporary buffers we initialize here for scratch
18291832
* space, and the abbreviation case requires additional state.
18301833
*/
1831-
if (abbreviate || locale_aware)
1834+
if (abbreviate || !collate_c)
18321835
{
18331836
tss = palloc(sizeof(TextSortSupport));
18341837
tss->buf1 = palloc(TEXTBUFLEN);
@@ -1838,6 +1841,7 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
18381841
#ifdef HAVE_LOCALE_T
18391842
tss->locale = locale;
18401843
#endif
1844+
tss->collate_c = collate_c;
18411845
ssup->ssup_extra = tss;
18421846

18431847
/*
@@ -2011,45 +2015,58 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
20112015
memset(pres, 0, sizeof(Datum));
20122016
len = VARSIZE_ANY_EXHDR(authoritative);
20132017

2014-
/* By convention, we use buffer 1 to store and NUL-terminate text */
2015-
if (len >= tss->buflen1)
2018+
/*
2019+
* If we're using the C collation, use memcmp(), rather than strxfrm(),
2020+
* to abbreviated keys. The full comparator for the C locale is always
2021+
* memcmp(), and we can't risk having this give a different answer.
2022+
* Besides, this should be faster, too.
2023+
*/
2024+
if (tss->collate_c)
2025+
memcpy(pres, VARDATA_ANY(authoritative), Min(len, sizeof(Datum)));
2026+
else
20162027
{
2017-
pfree(tss->buf1);
2018-
tss->buflen1 = Max(len + 1, Min(tss->buflen1 * 2, MaxAllocSize));
2019-
tss->buf1 = palloc(tss->buflen1);
2020-
}
2028+
/*
2029+
* We're not using the C collation, so fall back on strxfrm.
2030+
*/
20212031

2022-
/* Just like strcoll(), strxfrm() expects a NUL-terminated string */
2023-
memcpy(tss->buf1, VARDATA_ANY(authoritative), len);
2024-
tss->buf1[len] = '\0';
2032+
/* By convention, we use buffer 1 to store and NUL-terminate text */
2033+
if (len >= tss->buflen1)
2034+
{
2035+
pfree(tss->buf1);
2036+
tss->buflen1 = Max(len + 1, Min(tss->buflen1 * 2, MaxAllocSize));
2037+
tss->buf1 = palloc(tss->buflen1);
2038+
}
20252039

2026-
/* Don't leak memory here */
2027-
if (PointerGetDatum(authoritative) != original)
2028-
pfree(authoritative);
2040+
/* Just like strcoll(), strxfrm() expects a NUL-terminated string */
2041+
memcpy(tss->buf1, VARDATA_ANY(authoritative), len);
2042+
tss->buf1[len] = '\0';
20292043

2030-
retry:
2044+
/* Don't leak memory here */
2045+
if (PointerGetDatum(authoritative) != original)
2046+
pfree(authoritative);
20312047

2032-
/*
2033-
* There is no special handling of the C locale here, unlike with
2034-
* varstr_cmp(). strxfrm() is used indifferently.
2035-
*/
2048+
for (;;)
2049+
{
20362050
#ifdef HAVE_LOCALE_T
2037-
if (tss->locale)
2038-
bsize = strxfrm_l(tss->buf2, tss->buf1, tss->buflen2, tss->locale);
2039-
else
2051+
if (tss->locale)
2052+
bsize = strxfrm_l(tss->buf2, tss->buf1,
2053+
tss->buflen2, tss->locale);
2054+
else
20402055
#endif
2041-
bsize = strxfrm(tss->buf2, tss->buf1, tss->buflen2);
2056+
bsize = strxfrm(tss->buf2, tss->buf1, tss->buflen2);
20422057

2043-
if (bsize >= tss->buflen2)
2044-
{
2045-
/*
2046-
* The C standard states that the contents of the buffer is now
2047-
* unspecified. Grow buffer, and retry.
2048-
*/
2049-
pfree(tss->buf2);
2050-
tss->buflen2 = Max(bsize + 1, Min(tss->buflen2 * 2, MaxAllocSize));
2051-
tss->buf2 = palloc(tss->buflen2);
2052-
goto retry;
2058+
if (bsize < tss->buflen2)
2059+
break;
2060+
2061+
/*
2062+
* The C standard states that the contents of the buffer is now
2063+
* unspecified. Grow buffer, and retry.
2064+
*/
2065+
pfree(tss->buf2);
2066+
tss->buflen2 = Max(bsize + 1,
2067+
Min(tss->buflen2 * 2, MaxAllocSize));
2068+
tss->buf2 = palloc(tss->buflen2);
2069+
}
20532070
}
20542071

20552072
/*

0 commit comments

Comments
 (0)