Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit b34e37b

Browse files
committed
Add sortsupport routines for text.
This provides a small but worthwhile speedup when sorting text, at least in cases to which the sortsupport machinery applies. Robert Haas and Peter Geoghegan
1 parent a4287a6 commit b34e37b

File tree

5 files changed

+215
-11
lines changed

5 files changed

+215
-11
lines changed

src/backend/utils/adt/varlena.c

+210-10
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@
2828
#include "utils/builtins.h"
2929
#include "utils/bytea.h"
3030
#include "utils/lsyscache.h"
31+
#include "utils/memutils.h"
3132
#include "utils/pg_locale.h"
33+
#include "utils/sortsupport.h"
3234

3335

3436
/* GUC variable */
@@ -50,12 +52,32 @@ typedef struct
5052
int skiptable[256]; /* skip distance for given mismatched char */
5153
} TextPositionState;
5254

55+
typedef struct
56+
{
57+
char *buf1; /* 1st string */
58+
char *buf2; /* 2nd string */
59+
int buflen1;
60+
int buflen2;
61+
#ifdef HAVE_LOCALE_T
62+
pg_locale_t locale;
63+
#endif
64+
} TextSortSupport;
65+
66+
/*
67+
* This should be large enough that most strings will fit, but small enough
68+
* that we feel comfortable putting it on the stack
69+
*/
70+
#define TEXTBUFLEN 1024
71+
5372
#define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
5473
#define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
5574
#define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
5675
#define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
5776
#define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
5877

78+
static void btsortsupport_worker(SortSupport ssup, Oid collid);
79+
static int bttextfastcmp_c(Datum x, Datum y, SortSupport ssup);
80+
static int bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup);
5981
static int32 text_length(Datum str);
6082
static text *text_catenate(text *t1, text *t2);
6183
static text *text_substring(Datum str,
@@ -1356,10 +1378,8 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
13561378
}
13571379
else
13581380
{
1359-
#define STACKBUFLEN 1024
1360-
1361-
char a1buf[STACKBUFLEN];
1362-
char a2buf[STACKBUFLEN];
1381+
char a1buf[TEXTBUFLEN];
1382+
char a2buf[TEXTBUFLEN];
13631383
char *a1p,
13641384
*a2p;
13651385

@@ -1393,24 +1413,24 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
13931413
int a2len;
13941414
int r;
13951415

1396-
if (len1 >= STACKBUFLEN / 2)
1416+
if (len1 >= TEXTBUFLEN / 2)
13971417
{
13981418
a1len = len1 * 2 + 2;
13991419
a1p = palloc(a1len);
14001420
}
14011421
else
14021422
{
1403-
a1len = STACKBUFLEN;
1423+
a1len = TEXTBUFLEN;
14041424
a1p = a1buf;
14051425
}
1406-
if (len2 >= STACKBUFLEN / 2)
1426+
if (len2 >= TEXTBUFLEN / 2)
14071427
{
14081428
a2len = len2 * 2 + 2;
14091429
a2p = palloc(a2len);
14101430
}
14111431
else
14121432
{
1413-
a2len = STACKBUFLEN;
1433+
a2len = TEXTBUFLEN;
14141434
a2p = a2buf;
14151435
}
14161436

@@ -1475,11 +1495,11 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
14751495
}
14761496
#endif /* WIN32 */
14771497

1478-
if (len1 >= STACKBUFLEN)
1498+
if (len1 >= TEXTBUFLEN)
14791499
a1p = (char *) palloc(len1 + 1);
14801500
else
14811501
a1p = a1buf;
1482-
if (len2 >= STACKBUFLEN)
1502+
if (len2 >= TEXTBUFLEN)
14831503
a2p = (char *) palloc(len2 + 1);
14841504
else
14851505
a2p = a2buf;
@@ -1683,6 +1703,186 @@ bttextcmp(PG_FUNCTION_ARGS)
16831703
PG_RETURN_INT32(result);
16841704
}
16851705

1706+
Datum
1707+
bttextsortsupport(PG_FUNCTION_ARGS)
1708+
{
1709+
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
1710+
Oid collid = ssup->ssup_collation;
1711+
MemoryContext oldcontext;
1712+
1713+
oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
1714+
1715+
btsortsupport_worker(ssup, collid);
1716+
1717+
MemoryContextSwitchTo(oldcontext);
1718+
1719+
PG_RETURN_VOID();
1720+
}
1721+
1722+
static void
1723+
btsortsupport_worker(SortSupport ssup, Oid collid)
1724+
{
1725+
TextSortSupport *tss;
1726+
1727+
/*
1728+
* If LC_COLLATE = C, we can make things quite a bit faster by using
1729+
* memcmp() rather than strcoll(). To minimize the per-comparison
1730+
* overhead, we make this decision just once for the whole sort.
1731+
*/
1732+
if (lc_collate_is_c(collid))
1733+
{
1734+
ssup->comparator = bttextfastcmp_c;
1735+
return;
1736+
}
1737+
1738+
/*
1739+
* WIN32 requires complex hacks when the database encoding is UTF-8 (except
1740+
* when using the "C" collation). For now, we don't optimize that case.
1741+
*/
1742+
#ifdef WIN32
1743+
if (GetDatabaseEncoding() == PG_UTF8)
1744+
return;
1745+
#endif
1746+
1747+
/*
1748+
* We may need a collation-sensitive comparison. To make things faster,
1749+
* we'll figure out the collation based on the locale id and cache the
1750+
* result. Also, since strxfrm()/strcoll() require NUL-terminated inputs,
1751+
* prepare one or two palloc'd buffers to use as temporary workspace. In
1752+
* the ad-hoc comparison case we only use palloc'd buffers when we need
1753+
* more space than we're comfortable allocating on the stack, but here we
1754+
* can keep the buffers around for the whole sort, so it makes sense to
1755+
* allocate them once and use them unconditionally.
1756+
*/
1757+
tss = palloc(sizeof(TextSortSupport));
1758+
#ifdef HAVE_LOCALE_T
1759+
tss->locale = 0;
1760+
#endif
1761+
1762+
if (collid != DEFAULT_COLLATION_OID)
1763+
{
1764+
if (!OidIsValid(collid))
1765+
{
1766+
/*
1767+
* This typically means that the parser could not resolve a
1768+
* conflict of implicit collations, so report it that way.
1769+
*/
1770+
ereport(ERROR,
1771+
(errcode(ERRCODE_INDETERMINATE_COLLATION),
1772+
errmsg("could not determine which collation to use for string comparison"),
1773+
errhint("Use the COLLATE clause to set the collation explicitly.")));
1774+
}
1775+
#ifdef HAVE_LOCALE_T
1776+
tss->locale = pg_newlocale_from_collation(collid);
1777+
#endif
1778+
}
1779+
1780+
tss->buf1 = palloc(TEXTBUFLEN);
1781+
tss->buflen1 = TEXTBUFLEN;
1782+
tss->buf2 = palloc(TEXTBUFLEN);
1783+
tss->buflen2 = TEXTBUFLEN;
1784+
1785+
ssup->ssup_extra = tss;
1786+
ssup->comparator = bttextfastcmp_locale;
1787+
}
1788+
1789+
/*
1790+
* sortsupport comparison func (for C locale case)
1791+
*/
1792+
static int
1793+
bttextfastcmp_c(Datum x, Datum y, SortSupport ssup)
1794+
{
1795+
text *arg1 = DatumGetTextPP(x);
1796+
text *arg2 = DatumGetTextPP(y);
1797+
char *a1p,
1798+
*a2p;
1799+
int len1,
1800+
len2,
1801+
result;
1802+
1803+
a1p = VARDATA_ANY(arg1);
1804+
a2p = VARDATA_ANY(arg2);
1805+
1806+
len1 = VARSIZE_ANY_EXHDR(arg1);
1807+
len2 = VARSIZE_ANY_EXHDR(arg2);
1808+
1809+
result = memcmp(a1p, a2p, Min(len1, len2));
1810+
if ((result == 0) && (len1 != len2))
1811+
result = (len1 < len2) ? -1 : 1;
1812+
1813+
/* We can't afford to leak memory here. */
1814+
if (PointerGetDatum(arg1) != x)
1815+
pfree(arg1);
1816+
if (PointerGetDatum(arg2) != y)
1817+
pfree(arg2);
1818+
1819+
return result;
1820+
}
1821+
1822+
/*
1823+
* sortsupport comparison func (for locale case)
1824+
*/
1825+
static int
1826+
bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
1827+
{
1828+
text *arg1 = DatumGetTextPP(x);
1829+
text *arg2 = DatumGetTextPP(y);
1830+
TextSortSupport *tss = (TextSortSupport *) ssup->ssup_extra;
1831+
1832+
/* working state */
1833+
char *a1p,
1834+
*a2p;
1835+
int len1,
1836+
len2,
1837+
result;
1838+
1839+
a1p = VARDATA_ANY(arg1);
1840+
a2p = VARDATA_ANY(arg2);
1841+
1842+
len1 = VARSIZE_ANY_EXHDR(arg1);
1843+
len2 = VARSIZE_ANY_EXHDR(arg2);
1844+
1845+
if (len1 >= tss->buflen1)
1846+
{
1847+
pfree(tss->buf1);
1848+
tss->buflen1 = Max(len1 + 1, Min(tss->buflen1 * 2, MaxAllocSize));
1849+
tss->buf1 = MemoryContextAlloc(ssup->ssup_cxt, tss->buflen1);
1850+
}
1851+
if (len2 >= tss->buflen2)
1852+
{
1853+
pfree(tss->buf2);
1854+
tss->buflen1 = Max(len2 + 1, Min(tss->buflen2 * 2, MaxAllocSize));
1855+
tss->buf2 = MemoryContextAlloc(ssup->ssup_cxt, tss->buflen2);
1856+
}
1857+
1858+
memcpy(tss->buf1, a1p, len1);
1859+
tss->buf1[len1] = '\0';
1860+
memcpy(tss->buf2, a2p, len2);
1861+
tss->buf2[len2] = '\0';
1862+
1863+
#ifdef HAVE_LOCALE_T
1864+
if (tss->locale)
1865+
result = strcoll_l(tss->buf1, tss->buf2, tss->locale);
1866+
else
1867+
#endif
1868+
result = strcoll(tss->buf1, tss->buf2);
1869+
1870+
/*
1871+
* In some locales strcoll() can claim that nonidentical strings are equal.
1872+
* Believing that would be bad news for a number of reasons, so we follow
1873+
* Perl's lead and sort "equal" strings according to strcmp().
1874+
*/
1875+
if (result == 0)
1876+
result = strcmp(tss->buf1, tss->buf2);
1877+
1878+
/* We can't afford to leak memory here. */
1879+
if (PointerGetDatum(arg1) != x)
1880+
pfree(arg1);
1881+
if (PointerGetDatum(arg2) != y)
1882+
pfree(arg2);
1883+
1884+
return result;
1885+
}
16861886

16871887
Datum
16881888
text_larger(PG_FUNCTION_ARGS)

src/include/catalog/catversion.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,6 @@
5353
*/
5454

5555
/* yyyymmddN */
56-
#define CATALOG_VERSION_NO 201407151
56+
#define CATALOG_VERSION_NO 201408141
5757

5858
#endif

src/include/catalog/pg_amproc.h

+1
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ DATA(insert ( 1989 26 26 1 356 ));
122122
DATA(insert ( 1989 26 26 2 3134 ));
123123
DATA(insert ( 1991 30 30 1 404 ));
124124
DATA(insert ( 1994 25 25 1 360 ));
125+
DATA(insert ( 1994 25 25 2 3255 ));
125126
DATA(insert ( 1996 1083 1083 1 1107 ));
126127
DATA(insert ( 2000 1266 1266 1 1358 ));
127128
DATA(insert ( 2002 1562 1562 1 1672 ));

src/include/catalog/pg_proc.h

+2
Original file line numberDiff line numberDiff line change
@@ -614,6 +614,8 @@ DATA(insert OID = 3135 ( btnamesortsupport PGNSP PGUID 12 1 0 0 0 f f f f t f i
614614
DESCR("sort support");
615615
DATA(insert OID = 360 ( bttextcmp PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 23 "25 25" _null_ _null_ _null_ _null_ bttextcmp _null_ _null_ _null_ ));
616616
DESCR("less-equal-greater");
617+
DATA(insert OID = 3255 ( bttextsortsupport PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 2278 "2281" _null_ _null_ _null_ _null_ bttextsortsupport _null_ _null_ _null_ ));
618+
DESCR("sort support");
617619
DATA(insert OID = 377 ( cash_cmp PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 23 "790 790" _null_ _null_ _null_ _null_ cash_cmp _null_ _null_ _null_ ));
618620
DESCR("less-equal-greater");
619621
DATA(insert OID = 380 ( btreltimecmp PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 23 "703 703" _null_ _null_ _null_ _null_ btreltimecmp _null_ _null_ _null_ ));

src/include/utils/builtins.h

+1
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,7 @@ extern Datum bttintervalcmp(PG_FUNCTION_ARGS);
316316
extern Datum btcharcmp(PG_FUNCTION_ARGS);
317317
extern Datum btnamecmp(PG_FUNCTION_ARGS);
318318
extern Datum bttextcmp(PG_FUNCTION_ARGS);
319+
extern Datum bttextsortsupport(PG_FUNCTION_ARGS);
319320

320321
/*
321322
* Per-opclass sort support functions for new btrees. Like the

0 commit comments

Comments
 (0)