Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit a890ad2

Browse files
committed
selfuncs.c: use pg_strxfrm() instead of strxfrm().
pg_strxfrm() takes a pg_locale_t, so it works properly with all providers. This improves estimates for ICU when performing linear interpolation within a histogram bin. Previously, convert_string_datum() always used strxfrm() and relied on setlocale(). That did not produce good estimates for non-default or non-libc collations. Discussion: https://postgr.es/m/89475ee5487d795124f4e25118ea8f1853edb8cb.camel@j-davis.com
1 parent a54d4ed commit a890ad2

File tree

2 files changed

+25
-11
lines changed

2 files changed

+25
-11
lines changed

src/backend/utils/adt/pg_locale.c

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2124,14 +2124,7 @@ pg_strxfrm_libc(char *dest, const char *src, size_t destsize,
21242124
pg_locale_t locale)
21252125
{
21262126
Assert(locale->provider == COLLPROVIDER_LIBC);
2127-
2128-
#ifdef TRUST_STRXFRM
21292127
return strxfrm_l(dest, src, destsize, locale->info.lt);
2130-
#else
2131-
/* shouldn't happen */
2132-
PGLOCALE_SUPPORT_ERROR(locale->provider);
2133-
return 0; /* keep compiler quiet */
2134-
#endif
21352128
}
21362129

21372130
static size_t
@@ -2340,6 +2333,10 @@ pg_strxfrm_enabled(pg_locale_t locale)
23402333
* The provided 'src' must be nul-terminated. If 'destsize' is zero, 'dest'
23412334
* may be NULL.
23422335
*
2336+
* Not all providers support pg_strxfrm() safely. The caller should check
2337+
* pg_strxfrm_enabled() first, otherwise this function may return wrong
2338+
* results or an error.
2339+
*
23432340
* Returns the number of bytes needed (or more) to store the transformed
23442341
* string, excluding the terminating nul byte. If the value returned is
23452342
* 'destsize' or greater, the resulting contents of 'dest' are undefined.
@@ -2372,6 +2369,10 @@ pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale)
23722369
* 'src' does not need to be nul-terminated. If 'destsize' is zero, 'dest' may
23732370
* be NULL.
23742371
*
2372+
* Not all providers support pg_strnxfrm() safely. The caller should check
2373+
* pg_strxfrm_enabled() first, otherwise this function may return wrong
2374+
* results or an error.
2375+
*
23752376
* Returns the number of bytes needed (or more) to store the transformed
23762377
* string, excluding the terminating nul byte. If the value returned is
23772378
* 'destsize' or greater, the resulting contents of 'dest' are undefined.
@@ -2426,6 +2427,10 @@ pg_strxfrm_prefix_enabled(pg_locale_t locale)
24262427
*
24272428
* The provided 'src' must be nul-terminated.
24282429
*
2430+
* Not all providers support pg_strxfrm_prefix() safely. The caller should
2431+
* check pg_strxfrm_prefix_enabled() first, otherwise this function may return
2432+
* wrong results or an error.
2433+
*
24292434
* If destsize is not large enough to hold the resulting byte sequence, stores
24302435
* only the first destsize bytes in 'dest'. Returns the number of bytes
24312436
* actually copied to 'dest'.
@@ -2455,6 +2460,10 @@ pg_strxfrm_prefix(char *dest, const char *src, size_t destsize,
24552460
*
24562461
* The provided 'src' must be nul-terminated.
24572462
*
2463+
* Not all providers support pg_strnxfrm_prefix() safely. The caller should
2464+
* check pg_strxfrm_prefix_enabled() first, otherwise this function may return
2465+
* wrong results or an error.
2466+
*
24582467
* If destsize is not large enough to hold the resulting byte sequence, stores
24592468
* only the first destsize bytes in 'dest'. Returns the number of bytes
24602469
* actually copied to 'dest'.

src/backend/utils/adt/selfuncs.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4639,7 +4639,7 @@ convert_one_string_to_scalar(char *value, int rangelo, int rangehi)
46394639
* On failure (e.g., unsupported typid), set *failure to true;
46404640
* otherwise, that variable is not changed. (We'll return NULL on failure.)
46414641
*
4642-
* When using a non-C locale, we must pass the string through strxfrm()
4642+
* When using a non-C locale, we must pass the string through pg_strxfrm()
46434643
* before continuing, so as to generate correct locale-specific results.
46444644
*/
46454645
static char *
@@ -4673,20 +4673,25 @@ convert_string_datum(Datum value, Oid typid, Oid collid, bool *failure)
46734673

46744674
if (!lc_collate_is_c(collid))
46754675
{
4676+
pg_locale_t mylocale = pg_newlocale_from_collation(collid);
46764677
char *xfrmstr;
46774678
size_t xfrmlen;
46784679
size_t xfrmlen2 PG_USED_FOR_ASSERTS_ONLY;
46794680

46804681
/*
46814682
* XXX: We could guess at a suitable output buffer size and only call
4682-
* strxfrm twice if our guess is too small.
4683+
* pg_strxfrm() twice if our guess is too small.
46834684
*
46844685
* XXX: strxfrm doesn't support UTF-8 encoding on Win32, it can return
46854686
* bogus data or set an error. This is not really a problem unless it
46864687
* crashes since it will only give an estimation error and nothing
46874688
* fatal.
4689+
*
4690+
* XXX: we do not check pg_strxfrm_enabled(). On some platforms and in
4691+
* some cases, libc strxfrm() may return the wrong results, but that
4692+
* will only lead to an estimation error.
46884693
*/
4689-
xfrmlen = strxfrm(NULL, val, 0);
4694+
xfrmlen = pg_strxfrm(NULL, val, 0, mylocale);
46904695
#ifdef WIN32
46914696

46924697
/*
@@ -4698,7 +4703,7 @@ convert_string_datum(Datum value, Oid typid, Oid collid, bool *failure)
46984703
return val;
46994704
#endif
47004705
xfrmstr = (char *) palloc(xfrmlen + 1);
4701-
xfrmlen2 = strxfrm(xfrmstr, val, xfrmlen + 1);
4706+
xfrmlen2 = pg_strxfrm(xfrmstr, val, xfrmlen + 1, mylocale);
47024707

47034708
/*
47044709
* Some systems (e.g., glibc) can return a smaller value from the

0 commit comments

Comments
 (0)