Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 0bf94b0

Browse files
author
Commitfest Bot
committed
[CF 5474] v1 - Speed up ICU case conversions
This branch was automatically generated by a robot using patches from an email thread registered at: https://commitfest.postgresql.org/patch/5474 The branch will be overwritten each time a new patch version is posted to the thread, and also periodically to check for bitrot caused by changes on the master branch. Patch(es): https://www.postgresql.org/message-id/167986ff-afcf-4542-94c6-61ee8474e138@proxel.se Author(s): Andreas Karlsson
2 parents a0a4601 + a903b4d commit 0bf94b0

File tree

1 file changed

+86
-41
lines changed

1 file changed

+86
-41
lines changed

src/backend/utils/adt/pg_locale_icu.c

Lines changed: 86 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "postgres.h"
1313

1414
#ifdef USE_ICU
15+
#include "unicode/ucasemap.h"
1516
#include <unicode/ucnv.h>
1617
#include <unicode/ustring.h>
1718

@@ -112,9 +113,12 @@ static size_t icu_from_uchar(char *dest, size_t destsize,
112113
const UChar *buff_uchar, int32_t len_uchar);
113114
static void icu_set_collation_attributes(UCollator *collator, const char *loc,
114115
UErrorCode *status);
115-
static int32_t icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
116-
UChar **buff_dest, UChar *buff_source,
117-
int32_t len_source);
116+
static int32_t icu_convert_case_no_utf8(ICU_Convert_Func func, char *dest,
117+
size_t destsize, const char *src,
118+
ssize_t srclen, pg_locale_t locale);
119+
static int32_t icu_convert_case_uchar(ICU_Convert_Func func, pg_locale_t mylocale,
120+
UChar **buff_dest, UChar *buff_source,
121+
int32_t len_source);
118122
static int32_t u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
119123
const UChar *src, int32_t srcLength,
120124
const char *locale,
@@ -389,60 +393,81 @@ size_t
389393
strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
390394
pg_locale_t locale)
391395
{
392-
int32_t len_uchar;
393-
int32_t len_conv;
394-
UChar *buff_uchar;
395-
UChar *buff_conv;
396-
size_t result_len;
396+
if (GetDatabaseEncoding() == PG_UTF8)
397+
{
398+
UErrorCode status = U_ZERO_ERROR;
399+
UCaseMap *casemap;
400+
int32_t needed;
397401

398-
len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
399-
len_conv = icu_convert_case(u_strToLower, locale,
400-
&buff_conv, buff_uchar, len_uchar);
401-
result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
402-
pfree(buff_uchar);
403-
pfree(buff_conv);
402+
casemap = ucasemap_open(locale->info.icu.locale, U_FOLD_CASE_DEFAULT, &status);
403+
if (U_FAILURE(status))
404+
ereport(ERROR,
405+
(errmsg("casemap lookup failed: %s", u_errorName(status))));
404406

405-
return result_len;
407+
status = U_ZERO_ERROR;
408+
needed = ucasemap_utf8ToLower(casemap, dest, destsize, src, srclen, &status);
409+
ucasemap_close(casemap);
410+
if (status != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(status))
411+
ereport(ERROR,
412+
(errmsg("case conversion failed: %s", u_errorName(status))));
413+
return needed;
414+
}
415+
else
416+
return icu_convert_case_no_utf8(u_strToLower, dest, destsize, src, srclen, locale);
406417
}
407418

408419
size_t
409420
strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
410421
pg_locale_t locale)
411422
{
412-
int32_t len_uchar;
413-
int32_t len_conv;
414-
UChar *buff_uchar;
415-
UChar *buff_conv;
416-
size_t result_len;
423+
if (GetDatabaseEncoding() == PG_UTF8)
424+
{
425+
UErrorCode status = U_ZERO_ERROR;
426+
UCaseMap *casemap;
427+
int32_t needed;
417428

418-
len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
419-
len_conv = icu_convert_case(u_strToTitle_default_BI, locale,
420-
&buff_conv, buff_uchar, len_uchar);
421-
result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
422-
pfree(buff_uchar);
423-
pfree(buff_conv);
429+
casemap = ucasemap_open(locale->info.icu.locale, U_FOLD_CASE_DEFAULT, &status);
430+
if (U_FAILURE(status))
431+
ereport(ERROR,
432+
(errmsg("casemap lookup failed: %s", u_errorName(status))));
424433

425-
return result_len;
434+
status = U_ZERO_ERROR;
435+
needed = ucasemap_utf8ToTitle(casemap, dest, destsize, src, srclen, &status);
436+
ucasemap_close(casemap);
437+
if (status != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(status))
438+
ereport(ERROR,
439+
(errmsg("case conversion failed: %s", u_errorName(status))));
440+
return needed;
441+
}
442+
else
443+
return icu_convert_case_no_utf8(u_strToTitle_default_BI, dest, destsize, src, srclen, locale);
426444
}
427445

428446
size_t
429447
strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
430448
pg_locale_t locale)
431449
{
432-
int32_t len_uchar;
433-
int32_t len_conv;
434-
UChar *buff_uchar;
435-
UChar *buff_conv;
436-
size_t result_len;
450+
if (GetDatabaseEncoding() == PG_UTF8)
451+
{
452+
UErrorCode status = U_ZERO_ERROR;
453+
UCaseMap *casemap;
454+
int32_t needed;
437455

438-
len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
439-
len_conv = icu_convert_case(u_strToUpper, locale,
440-
&buff_conv, buff_uchar, len_uchar);
441-
result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
442-
pfree(buff_uchar);
443-
pfree(buff_conv);
456+
casemap = ucasemap_open(locale->info.icu.locale, U_FOLD_CASE_DEFAULT, &status);
457+
if (U_FAILURE(status))
458+
ereport(ERROR,
459+
(errmsg("casemap lookup failed: %s", u_errorName(status))));
444460

445-
return result_len;
461+
status = U_ZERO_ERROR;
462+
needed = ucasemap_utf8ToUpper(casemap, dest, destsize, src, srclen, &status);
463+
ucasemap_close(casemap);
464+
if (status != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(status))
465+
ereport(ERROR,
466+
(errmsg("case conversion failed: %s", u_errorName(status))));
467+
return needed;
468+
}
469+
else
470+
return icu_convert_case_no_utf8(u_strToUpper, dest, destsize, src, srclen, locale);
446471
}
447472

448473
size_t
@@ -663,8 +688,28 @@ icu_from_uchar(char *dest, size_t destsize, const UChar *buff_uchar, int32_t len
663688
}
664689

665690
static int32_t
666-
icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
667-
UChar **buff_dest, UChar *buff_source, int32_t len_source)
691+
icu_convert_case_no_utf8(ICU_Convert_Func func, char *dest, size_t destsize,
692+
const char *src, ssize_t srclen, pg_locale_t locale)
693+
{
694+
int32_t len_uchar;
695+
int32_t len_conv;
696+
UChar *buff_uchar;
697+
UChar *buff_conv;
698+
size_t result_len;
699+
700+
len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
701+
len_conv = icu_convert_case_uchar(func, locale, &buff_conv,
702+
buff_uchar, len_uchar);
703+
result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
704+
pfree(buff_uchar);
705+
pfree(buff_conv);
706+
707+
return result_len;
708+
}
709+
710+
static int32_t
711+
icu_convert_case_uchar(ICU_Convert_Func func, pg_locale_t mylocale,
712+
UChar **buff_dest, UChar *buff_source, int32_t len_source)
668713
{
669714
UErrorCode status;
670715
int32_t len_dest;

0 commit comments

Comments
 (0)