28
28
#include "utils/builtins.h"
29
29
#include "utils/bytea.h"
30
30
#include "utils/lsyscache.h"
31
+ #include "utils/memutils.h"
31
32
#include "utils/pg_locale.h"
33
+ #include "utils/sortsupport.h"
32
34
33
35
34
36
/* GUC variable */
@@ -50,12 +52,32 @@ typedef struct
50
52
int skiptable [256 ]; /* skip distance for given mismatched char */
51
53
} TextPositionState ;
52
54
55
+ typedef struct
56
+ {
57
+ char * buf1 ; /* 1st string */
58
+ char * buf2 ; /* 2nd string */
59
+ int buflen1 ;
60
+ int buflen2 ;
61
+ #ifdef HAVE_LOCALE_T
62
+ pg_locale_t locale ;
63
+ #endif
64
+ } TextSortSupport ;
65
+
66
+ /*
67
+ * This should be large enough that most strings will fit, but small enough
68
+ * that we feel comfortable putting it on the stack
69
+ */
70
+ #define TEXTBUFLEN 1024
71
+
53
72
#define DatumGetUnknownP (X ) ((unknown *) PG_DETOAST_DATUM(X))
54
73
#define DatumGetUnknownPCopy (X ) ((unknown *) PG_DETOAST_DATUM_COPY(X))
55
74
#define PG_GETARG_UNKNOWN_P (n ) DatumGetUnknownP(PG_GETARG_DATUM(n))
56
75
#define PG_GETARG_UNKNOWN_P_COPY (n ) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
57
76
#define PG_RETURN_UNKNOWN_P (x ) PG_RETURN_POINTER(x)
58
77
78
+ static void btsortsupport_worker (SortSupport ssup , Oid collid );
79
+ static int bttextfastcmp_c (Datum x , Datum y , SortSupport ssup );
80
+ static int bttextfastcmp_locale (Datum x , Datum y , SortSupport ssup );
59
81
static int32 text_length (Datum str );
60
82
static text * text_catenate (text * t1 , text * t2 );
61
83
static text * text_substring (Datum str ,
@@ -1356,10 +1378,8 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
1356
1378
}
1357
1379
else
1358
1380
{
1359
- #define STACKBUFLEN 1024
1360
-
1361
- char a1buf [STACKBUFLEN ];
1362
- char a2buf [STACKBUFLEN ];
1381
+ char a1buf [TEXTBUFLEN ];
1382
+ char a2buf [TEXTBUFLEN ];
1363
1383
char * a1p ,
1364
1384
* a2p ;
1365
1385
@@ -1393,24 +1413,24 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
1393
1413
int a2len ;
1394
1414
int r ;
1395
1415
1396
- if (len1 >= STACKBUFLEN / 2 )
1416
+ if (len1 >= TEXTBUFLEN / 2 )
1397
1417
{
1398
1418
a1len = len1 * 2 + 2 ;
1399
1419
a1p = palloc (a1len );
1400
1420
}
1401
1421
else
1402
1422
{
1403
- a1len = STACKBUFLEN ;
1423
+ a1len = TEXTBUFLEN ;
1404
1424
a1p = a1buf ;
1405
1425
}
1406
- if (len2 >= STACKBUFLEN / 2 )
1426
+ if (len2 >= TEXTBUFLEN / 2 )
1407
1427
{
1408
1428
a2len = len2 * 2 + 2 ;
1409
1429
a2p = palloc (a2len );
1410
1430
}
1411
1431
else
1412
1432
{
1413
- a2len = STACKBUFLEN ;
1433
+ a2len = TEXTBUFLEN ;
1414
1434
a2p = a2buf ;
1415
1435
}
1416
1436
@@ -1475,11 +1495,11 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
1475
1495
}
1476
1496
#endif /* WIN32 */
1477
1497
1478
- if (len1 >= STACKBUFLEN )
1498
+ if (len1 >= TEXTBUFLEN )
1479
1499
a1p = (char * ) palloc (len1 + 1 );
1480
1500
else
1481
1501
a1p = a1buf ;
1482
- if (len2 >= STACKBUFLEN )
1502
+ if (len2 >= TEXTBUFLEN )
1483
1503
a2p = (char * ) palloc (len2 + 1 );
1484
1504
else
1485
1505
a2p = a2buf ;
@@ -1683,6 +1703,186 @@ bttextcmp(PG_FUNCTION_ARGS)
1683
1703
PG_RETURN_INT32 (result );
1684
1704
}
1685
1705
1706
+ Datum
1707
+ bttextsortsupport (PG_FUNCTION_ARGS )
1708
+ {
1709
+ SortSupport ssup = (SortSupport ) PG_GETARG_POINTER (0 );
1710
+ Oid collid = ssup -> ssup_collation ;
1711
+ MemoryContext oldcontext ;
1712
+
1713
+ oldcontext = MemoryContextSwitchTo (ssup -> ssup_cxt );
1714
+
1715
+ btsortsupport_worker (ssup , collid );
1716
+
1717
+ MemoryContextSwitchTo (oldcontext );
1718
+
1719
+ PG_RETURN_VOID ();
1720
+ }
1721
+
1722
+ static void
1723
+ btsortsupport_worker (SortSupport ssup , Oid collid )
1724
+ {
1725
+ TextSortSupport * tss ;
1726
+
1727
+ /*
1728
+ * If LC_COLLATE = C, we can make things quite a bit faster by using
1729
+ * memcmp() rather than strcoll(). To minimize the per-comparison
1730
+ * overhead, we make this decision just once for the whole sort.
1731
+ */
1732
+ if (lc_collate_is_c (collid ))
1733
+ {
1734
+ ssup -> comparator = bttextfastcmp_c ;
1735
+ return ;
1736
+ }
1737
+
1738
+ /*
1739
+ * WIN32 requires complex hacks when the database encoding is UTF-8 (except
1740
+ * when using the "C" collation). For now, we don't optimize that case.
1741
+ */
1742
+ #ifdef WIN32
1743
+ if (GetDatabaseEncoding () == PG_UTF8 )
1744
+ return ;
1745
+ #endif
1746
+
1747
+ /*
1748
+ * We may need a collation-sensitive comparison. To make things faster,
1749
+ * we'll figure out the collation based on the locale id and cache the
1750
+ * result. Also, since strxfrm()/strcoll() require NUL-terminated inputs,
1751
+ * prepare one or two palloc'd buffers to use as temporary workspace. In
1752
+ * the ad-hoc comparison case we only use palloc'd buffers when we need
1753
+ * more space than we're comfortable allocating on the stack, but here we
1754
+ * can keep the buffers around for the whole sort, so it makes sense to
1755
+ * allocate them once and use them unconditionally.
1756
+ */
1757
+ tss = palloc (sizeof (TextSortSupport ));
1758
+ #ifdef HAVE_LOCALE_T
1759
+ tss -> locale = 0 ;
1760
+ #endif
1761
+
1762
+ if (collid != DEFAULT_COLLATION_OID )
1763
+ {
1764
+ if (!OidIsValid (collid ))
1765
+ {
1766
+ /*
1767
+ * This typically means that the parser could not resolve a
1768
+ * conflict of implicit collations, so report it that way.
1769
+ */
1770
+ ereport (ERROR ,
1771
+ (errcode (ERRCODE_INDETERMINATE_COLLATION ),
1772
+ errmsg ("could not determine which collation to use for string comparison" ),
1773
+ errhint ("Use the COLLATE clause to set the collation explicitly." )));
1774
+ }
1775
+ #ifdef HAVE_LOCALE_T
1776
+ tss -> locale = pg_newlocale_from_collation (collid );
1777
+ #endif
1778
+ }
1779
+
1780
+ tss -> buf1 = palloc (TEXTBUFLEN );
1781
+ tss -> buflen1 = TEXTBUFLEN ;
1782
+ tss -> buf2 = palloc (TEXTBUFLEN );
1783
+ tss -> buflen2 = TEXTBUFLEN ;
1784
+
1785
+ ssup -> ssup_extra = tss ;
1786
+ ssup -> comparator = bttextfastcmp_locale ;
1787
+ }
1788
+
1789
+ /*
1790
+ * sortsupport comparison func (for C locale case)
1791
+ */
1792
+ static int
1793
+ bttextfastcmp_c (Datum x , Datum y , SortSupport ssup )
1794
+ {
1795
+ text * arg1 = DatumGetTextPP (x );
1796
+ text * arg2 = DatumGetTextPP (y );
1797
+ char * a1p ,
1798
+ * a2p ;
1799
+ int len1 ,
1800
+ len2 ,
1801
+ result ;
1802
+
1803
+ a1p = VARDATA_ANY (arg1 );
1804
+ a2p = VARDATA_ANY (arg2 );
1805
+
1806
+ len1 = VARSIZE_ANY_EXHDR (arg1 );
1807
+ len2 = VARSIZE_ANY_EXHDR (arg2 );
1808
+
1809
+ result = memcmp (a1p , a2p , Min (len1 , len2 ));
1810
+ if ((result == 0 ) && (len1 != len2 ))
1811
+ result = (len1 < len2 ) ? -1 : 1 ;
1812
+
1813
+ /* We can't afford to leak memory here. */
1814
+ if (PointerGetDatum (arg1 ) != x )
1815
+ pfree (arg1 );
1816
+ if (PointerGetDatum (arg2 ) != y )
1817
+ pfree (arg2 );
1818
+
1819
+ return result ;
1820
+ }
1821
+
1822
+ /*
1823
+ * sortsupport comparison func (for locale case)
1824
+ */
1825
+ static int
1826
+ bttextfastcmp_locale (Datum x , Datum y , SortSupport ssup )
1827
+ {
1828
+ text * arg1 = DatumGetTextPP (x );
1829
+ text * arg2 = DatumGetTextPP (y );
1830
+ TextSortSupport * tss = (TextSortSupport * ) ssup -> ssup_extra ;
1831
+
1832
+ /* working state */
1833
+ char * a1p ,
1834
+ * a2p ;
1835
+ int len1 ,
1836
+ len2 ,
1837
+ result ;
1838
+
1839
+ a1p = VARDATA_ANY (arg1 );
1840
+ a2p = VARDATA_ANY (arg2 );
1841
+
1842
+ len1 = VARSIZE_ANY_EXHDR (arg1 );
1843
+ len2 = VARSIZE_ANY_EXHDR (arg2 );
1844
+
1845
+ if (len1 >= tss -> buflen1 )
1846
+ {
1847
+ pfree (tss -> buf1 );
1848
+ tss -> buflen1 = Max (len1 + 1 , Min (tss -> buflen1 * 2 , MaxAllocSize ));
1849
+ tss -> buf1 = MemoryContextAlloc (ssup -> ssup_cxt , tss -> buflen1 );
1850
+ }
1851
+ if (len2 >= tss -> buflen2 )
1852
+ {
1853
+ pfree (tss -> buf2 );
1854
+ tss -> buflen1 = Max (len2 + 1 , Min (tss -> buflen2 * 2 , MaxAllocSize ));
1855
+ tss -> buf2 = MemoryContextAlloc (ssup -> ssup_cxt , tss -> buflen2 );
1856
+ }
1857
+
1858
+ memcpy (tss -> buf1 , a1p , len1 );
1859
+ tss -> buf1 [len1 ] = '\0' ;
1860
+ memcpy (tss -> buf2 , a2p , len2 );
1861
+ tss -> buf2 [len2 ] = '\0' ;
1862
+
1863
+ #ifdef HAVE_LOCALE_T
1864
+ if (tss -> locale )
1865
+ result = strcoll_l (tss -> buf1 , tss -> buf2 , tss -> locale );
1866
+ else
1867
+ #endif
1868
+ result = strcoll (tss -> buf1 , tss -> buf2 );
1869
+
1870
+ /*
1871
+ * In some locales strcoll() can claim that nonidentical strings are equal.
1872
+ * Believing that would be bad news for a number of reasons, so we follow
1873
+ * Perl's lead and sort "equal" strings according to strcmp().
1874
+ */
1875
+ if (result == 0 )
1876
+ result = strcmp (tss -> buf1 , tss -> buf2 );
1877
+
1878
+ /* We can't afford to leak memory here. */
1879
+ if (PointerGetDatum (arg1 ) != x )
1880
+ pfree (arg1 );
1881
+ if (PointerGetDatum (arg2 ) != y )
1882
+ pfree (arg2 );
1883
+
1884
+ return result ;
1885
+ }
1686
1886
1687
1887
Datum
1688
1888
text_larger (PG_FUNCTION_ARGS )
0 commit comments