Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 36cbe9f

Browse files
jeltzCommitfest Bot
authored and
Commitfest Bot
committed
Use optimized versions of ICU case conversion for UTF-8
Instead of converting to and from UChar when doing case conversions we use the UTF-8 versions of the functions. This can give a signficant speedup, 15-20%, on short to medium length strings.
1 parent a0a4601 commit 36cbe9f

File tree

1 file changed

+108
-41
lines changed

1 file changed

+108
-41
lines changed

src/backend/utils/adt/pg_locale_icu.c

Lines changed: 108 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "postgres.h"
1313

1414
#ifdef USE_ICU
15+
#include "unicode/ucasemap.h"
1516
#include <unicode/ucnv.h>
1617
#include <unicode/ustring.h>
1718

@@ -112,9 +113,9 @@ static size_t icu_from_uchar(char *dest, size_t destsize,
112113
const UChar *buff_uchar, int32_t len_uchar);
113114
static void icu_set_collation_attributes(UCollator *collator, const char *loc,
114115
UErrorCode *status);
115-
static int32_t icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
116-
UChar **buff_dest, UChar *buff_source,
117-
int32_t len_source);
116+
static int32_t icu_convert_case_uchar(ICU_Convert_Func func, pg_locale_t mylocale,
117+
UChar **buff_dest, UChar *buff_source,
118+
int32_t len_source);
118119
static int32_t u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
119120
const UChar *src, int32_t srcLength,
120121
const char *locale,
@@ -389,60 +390,126 @@ size_t
389390
strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
390391
pg_locale_t locale)
391392
{
392-
int32_t len_uchar;
393-
int32_t len_conv;
394-
UChar *buff_uchar;
395-
UChar *buff_conv;
396-
size_t result_len;
393+
if (GetDatabaseEncoding() == PG_UTF8)
394+
{
395+
UErrorCode status = U_ZERO_ERROR;
396+
UCaseMap *casemap;
397+
int32_t needed;
397398

398-
len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
399-
len_conv = icu_convert_case(u_strToLower, locale,
400-
&buff_conv, buff_uchar, len_uchar);
401-
result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
402-
pfree(buff_uchar);
403-
pfree(buff_conv);
399+
casemap = ucasemap_open(locale->info.icu.locale, U_FOLD_CASE_DEFAULT, &status);
400+
if (U_FAILURE(status))
401+
ereport(ERROR,
402+
(errmsg("casemap lookup failed: %s", u_errorName(status))));
404403

405-
return result_len;
404+
status = U_ZERO_ERROR;
405+
needed = ucasemap_utf8ToLower(casemap, dest, destsize, src, srclen, &status);
406+
ucasemap_close(casemap);
407+
if (status != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(status))
408+
ereport(ERROR,
409+
(errmsg("case conversion failed: %s", u_errorName(status))));
410+
return needed;
411+
}
412+
else
413+
{
414+
int32_t len_uchar;
415+
int32_t len_conv;
416+
UChar *buff_uchar;
417+
UChar *buff_conv;
418+
size_t result_len;
419+
420+
len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
421+
len_conv = icu_convert_case_uchar(u_strToLower, locale, &buff_conv,
422+
buff_uchar, len_uchar);
423+
result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
424+
pfree(buff_uchar);
425+
pfree(buff_conv);
426+
427+
return result_len;
428+
}
406429
}
407430

408431
size_t
409432
strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
410433
pg_locale_t locale)
411434
{
412-
int32_t len_uchar;
413-
int32_t len_conv;
414-
UChar *buff_uchar;
415-
UChar *buff_conv;
416-
size_t result_len;
435+
if (GetDatabaseEncoding() == PG_UTF8)
436+
{
437+
UErrorCode status = U_ZERO_ERROR;
438+
UCaseMap *casemap;
439+
int32_t needed;
417440

418-
len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
419-
len_conv = icu_convert_case(u_strToTitle_default_BI, locale,
420-
&buff_conv, buff_uchar, len_uchar);
421-
result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
422-
pfree(buff_uchar);
423-
pfree(buff_conv);
441+
casemap = ucasemap_open(locale->info.icu.locale, U_FOLD_CASE_DEFAULT, &status);
442+
if (U_FAILURE(status))
443+
ereport(ERROR,
444+
(errmsg("casemap lookup failed: %s", u_errorName(status))));
424445

425-
return result_len;
446+
status = U_ZERO_ERROR;
447+
needed = ucasemap_utf8ToTitle(casemap, dest, destsize, src, srclen, &status);
448+
ucasemap_close(casemap);
449+
if (status != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(status))
450+
ereport(ERROR,
451+
(errmsg("case conversion failed: %s", u_errorName(status))));
452+
return needed;
453+
}
454+
else
455+
{
456+
int32_t len_uchar;
457+
int32_t len_conv;
458+
UChar *buff_uchar;
459+
UChar *buff_conv;
460+
size_t result_len;
461+
462+
len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
463+
len_conv = icu_convert_case_uchar(u_strToTitle_default_BI, locale, &buff_conv,
464+
buff_uchar, len_uchar);
465+
result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
466+
pfree(buff_uchar);
467+
pfree(buff_conv);
468+
469+
return result_len;
470+
}
426471
}
427472

428473
size_t
429474
strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
430475
pg_locale_t locale)
431476
{
432-
int32_t len_uchar;
433-
int32_t len_conv;
434-
UChar *buff_uchar;
435-
UChar *buff_conv;
436-
size_t result_len;
477+
if (GetDatabaseEncoding() == PG_UTF8)
478+
{
479+
UErrorCode status = U_ZERO_ERROR;
480+
UCaseMap *casemap;
481+
int32_t needed;
437482

438-
len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
439-
len_conv = icu_convert_case(u_strToUpper, locale,
440-
&buff_conv, buff_uchar, len_uchar);
441-
result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
442-
pfree(buff_uchar);
443-
pfree(buff_conv);
483+
casemap = ucasemap_open(locale->info.icu.locale, U_FOLD_CASE_DEFAULT, &status);
484+
if (U_FAILURE(status))
485+
ereport(ERROR,
486+
(errmsg("casemap lookup failed: %s", u_errorName(status))));
444487

445-
return result_len;
488+
status = U_ZERO_ERROR;
489+
needed = ucasemap_utf8ToUpper(casemap, dest, destsize, src, srclen, &status);
490+
ucasemap_close(casemap);
491+
if (status != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(status))
492+
ereport(ERROR,
493+
(errmsg("case conversion failed: %s", u_errorName(status))));
494+
return needed;
495+
}
496+
else
497+
{
498+
int32_t len_uchar;
499+
int32_t len_conv;
500+
UChar *buff_uchar;
501+
UChar *buff_conv;
502+
size_t result_len;
503+
504+
len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
505+
len_conv = icu_convert_case_uchar(u_strToUpper, locale, &buff_conv,
506+
buff_uchar, len_uchar);
507+
result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
508+
pfree(buff_uchar);
509+
pfree(buff_conv);
510+
511+
return result_len;
512+
}
446513
}
447514

448515
size_t
@@ -663,8 +730,8 @@ icu_from_uchar(char *dest, size_t destsize, const UChar *buff_uchar, int32_t len
663730
}
664731

665732
static int32_t
666-
icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
667-
UChar **buff_dest, UChar *buff_source, int32_t len_source)
733+
icu_convert_case_uchar(ICU_Convert_Func func, pg_locale_t mylocale,
734+
UChar **buff_dest, UChar *buff_source, int32_t len_source)
668735
{
669736
UErrorCode status;
670737
int32_t len_dest;

0 commit comments

Comments
 (0)