Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit d87d548

Browse files
committed
Refactor to add pg_strcoll(), pg_strxfrm(), and variants.
Offers a generally better separation of responsibilities for collation code. Also, a step towards multi-lib ICU, which should be based on a clean separation of the routines required for collation providers. Callers with NUL-terminated strings should call pg_strcoll() or pg_strxfrm(); callers with strings and their length should call the variants pg_strncoll() or pg_strnxfrm(). Reviewed-by: Peter Eisentraut, Peter Geoghegan Discussion: https://postgr.es/m/a581136455c940d7bd0ff482d3a2bd51af25a94f.camel%40j-davis.com
1 parent e996073 commit d87d548

File tree

5 files changed

+871
-391
lines changed

5 files changed

+871
-391
lines changed

src/backend/access/hash/hashfunc.c

+34-27
Original file line numberDiff line numberDiff line change
@@ -292,21 +292,24 @@ hashtext(PG_FUNCTION_ARGS)
292292
#ifdef USE_ICU
293293
if (mylocale->provider == COLLPROVIDER_ICU)
294294
{
295-
int32_t ulen = -1;
296-
UChar *uchar = NULL;
297-
Size bsize;
298-
uint8_t *buf;
295+
Size bsize, rsize;
296+
char *buf;
297+
const char *keydata = VARDATA_ANY(key);
298+
size_t keylen = VARSIZE_ANY_EXHDR(key);
299299

300-
ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
300+
bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
301+
buf = palloc(bsize + 1);
301302

302-
bsize = ucol_getSortKey(mylocale->info.icu.ucol,
303-
uchar, ulen, NULL, 0);
304-
buf = palloc(bsize);
305-
ucol_getSortKey(mylocale->info.icu.ucol,
306-
uchar, ulen, buf, bsize);
307-
pfree(uchar);
303+
rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
304+
if (rsize != bsize)
305+
elog(ERROR, "pg_strnxfrm() returned unexpected result");
308306

309-
result = hash_any(buf, bsize);
307+
/*
308+
* In principle, there's no reason to include the terminating NUL
309+
* character in the hash, but it was done before and the behavior
310+
* must be preserved.
311+
*/
312+
result = hash_any((uint8_t *) buf, bsize + 1);
310313

311314
pfree(buf);
312315
}
@@ -350,21 +353,25 @@ hashtextextended(PG_FUNCTION_ARGS)
350353
#ifdef USE_ICU
351354
if (mylocale->provider == COLLPROVIDER_ICU)
352355
{
353-
int32_t ulen = -1;
354-
UChar *uchar = NULL;
355-
Size bsize;
356-
uint8_t *buf;
357-
358-
ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
359-
360-
bsize = ucol_getSortKey(mylocale->info.icu.ucol,
361-
uchar, ulen, NULL, 0);
362-
buf = palloc(bsize);
363-
ucol_getSortKey(mylocale->info.icu.ucol,
364-
uchar, ulen, buf, bsize);
365-
pfree(uchar);
366-
367-
result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1));
356+
Size bsize, rsize;
357+
char *buf;
358+
const char *keydata = VARDATA_ANY(key);
359+
size_t keylen = VARSIZE_ANY_EXHDR(key);
360+
361+
bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
362+
buf = palloc(bsize + 1);
363+
364+
rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
365+
if (rsize != bsize)
366+
elog(ERROR, "pg_strnxfrm() returned unexpected result");
367+
368+
/*
369+
* In principle, there's no reason to include the terminating NUL
370+
* character in the hash, but it was done before and the behavior
371+
* must be preserved.
372+
*/
373+
result = hash_any_extended((uint8_t *) buf, bsize + 1,
374+
PG_GETARG_INT64(1));
368375

369376
pfree(buf);
370377
}

0 commit comments

Comments
 (0)