Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 80b011e

Browse files
committed
Fix to_char() to use ASCII-only case-folding rules where appropriate.
formatting.c used locale-dependent case folding rules in some code paths where the result isn't supposed to be locale-dependent, for example to_char(timestamp, 'DAY'). Since the source data is always just ASCII in these cases, that usually didn't matter ... but it does matter in Turkish locales, which have unusual treatment of "i" and "I". To confuse matters even more, the misbehavior was only visible in UTF8 encoding, because in single-byte encodings we used pg_toupper/pg_tolower which don't have locale-specific behavior for ASCII characters. Fix by providing intentionally ASCII-only case-folding functions and using these where appropriate. Per bug #7913 from Adnan Dursun. Back-patch to all active branches, since it's been like this for a long time.
1 parent c805659 commit 80b011e

File tree

2 files changed

+120
-54
lines changed

2 files changed

+120
-54
lines changed

src/backend/utils/adt/formatting.c

Lines changed: 116 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1492,12 +1492,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
14921492
/* C/POSIX collations use this path regardless of database encoding */
14931493
if (lc_ctype_is_c(collid))
14941494
{
1495-
char *p;
1496-
1497-
result = pnstrdup(buff, nbytes);
1498-
1499-
for (p = result; *p; p++)
1500-
*p = pg_ascii_tolower((unsigned char) *p);
1495+
result = asc_tolower(buff, nbytes);
15011496
}
15021497
#ifdef USE_WIDE_UPPER_LOWER
15031498
else if (pg_database_encoding_max_length() > 1)
@@ -1617,12 +1612,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
16171612
/* C/POSIX collations use this path regardless of database encoding */
16181613
if (lc_ctype_is_c(collid))
16191614
{
1620-
char *p;
1621-
1622-
result = pnstrdup(buff, nbytes);
1623-
1624-
for (p = result; *p; p++)
1625-
*p = pg_ascii_toupper((unsigned char) *p);
1615+
result = asc_toupper(buff, nbytes);
16261616
}
16271617
#ifdef USE_WIDE_UPPER_LOWER
16281618
else if (pg_database_encoding_max_length() > 1)
@@ -1743,23 +1733,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
17431733
/* C/POSIX collations use this path regardless of database encoding */
17441734
if (lc_ctype_is_c(collid))
17451735
{
1746-
char *p;
1747-
1748-
result = pnstrdup(buff, nbytes);
1749-
1750-
for (p = result; *p; p++)
1751-
{
1752-
char c;
1753-
1754-
if (wasalnum)
1755-
*p = c = pg_ascii_tolower((unsigned char) *p);
1756-
else
1757-
*p = c = pg_ascii_toupper((unsigned char) *p);
1758-
/* we don't trust isalnum() here */
1759-
wasalnum = ((c >= 'A' && c <= 'Z') ||
1760-
(c >= 'a' && c <= 'z') ||
1761-
(c >= '0' && c <= '9'));
1762-
}
1736+
result = asc_initcap(buff, nbytes);
17631737
}
17641738
#ifdef USE_WIDE_UPPER_LOWER
17651739
else if (pg_database_encoding_max_length() > 1)
@@ -1886,6 +1860,87 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
18861860
return result;
18871861
}
18881862

1863+
/*
1864+
* ASCII-only lower function
1865+
*
1866+
* We pass the number of bytes so we can pass varlena and char*
1867+
* to this function. The result is a palloc'd, null-terminated string.
1868+
*/
1869+
char *
1870+
asc_tolower(const char *buff, size_t nbytes)
1871+
{
1872+
char *result;
1873+
char *p;
1874+
1875+
if (!buff)
1876+
return NULL;
1877+
1878+
result = pnstrdup(buff, nbytes);
1879+
1880+
for (p = result; *p; p++)
1881+
*p = pg_ascii_tolower((unsigned char) *p);
1882+
1883+
return result;
1884+
}
1885+
1886+
/*
1887+
* ASCII-only upper function
1888+
*
1889+
* We pass the number of bytes so we can pass varlena and char*
1890+
* to this function. The result is a palloc'd, null-terminated string.
1891+
*/
1892+
char *
1893+
asc_toupper(const char *buff, size_t nbytes)
1894+
{
1895+
char *result;
1896+
char *p;
1897+
1898+
if (!buff)
1899+
return NULL;
1900+
1901+
result = pnstrdup(buff, nbytes);
1902+
1903+
for (p = result; *p; p++)
1904+
*p = pg_ascii_toupper((unsigned char) *p);
1905+
1906+
return result;
1907+
}
1908+
1909+
/*
1910+
* ASCII-only initcap function
1911+
*
1912+
* We pass the number of bytes so we can pass varlena and char*
1913+
* to this function. The result is a palloc'd, null-terminated string.
1914+
*/
1915+
char *
1916+
asc_initcap(const char *buff, size_t nbytes)
1917+
{
1918+
char *result;
1919+
char *p;
1920+
int wasalnum = false;
1921+
1922+
if (!buff)
1923+
return NULL;
1924+
1925+
result = pnstrdup(buff, nbytes);
1926+
1927+
for (p = result; *p; p++)
1928+
{
1929+
char c;
1930+
1931+
if (wasalnum)
1932+
*p = c = pg_ascii_tolower((unsigned char) *p);
1933+
else
1934+
*p = c = pg_ascii_toupper((unsigned char) *p);
1935+
/* we don't trust isalnum() here */
1936+
wasalnum = ((c >= 'A' && c <= 'Z') ||
1937+
(c >= 'a' && c <= 'z') ||
1938+
(c >= '0' && c <= '9'));
1939+
}
1940+
1941+
return result;
1942+
}
1943+
18891944
/* convenience routines for when the input is null-terminated */
18901945

18911946
static char *
@@ -1906,6 +1961,20 @@ str_initcap_z(const char *buff, Oid collid)
19061961
return str_initcap(buff, strlen(buff), collid);
19071962
}
19081963

1964+
static char *
1965+
asc_tolower_z(const char *buff)
1966+
{
1967+
return asc_tolower(buff, strlen(buff));
1968+
}
1969+
1970+
static char *
1971+
asc_toupper_z(const char *buff)
1972+
{
1973+
return asc_toupper(buff, strlen(buff));
1974+
}
1975+
1976+
/* asc_initcap_z is not currently needed */
1977+
19091978

19101979
/* ----------
19111980
* Skip TM / th in FROM_CHAR
@@ -2418,7 +2487,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24182487
INVALID_FOR_INTERVAL;
24192488
if (tmtcTzn(in))
24202489
{
2421-
char *p = str_tolower_z(tmtcTzn(in), collid);
2490+
/* We assume here that timezone names aren't localized */
2491+
char *p = asc_tolower_z(tmtcTzn(in));
24222492

24232493
strcpy(s, p);
24242494
pfree(p);
@@ -2465,7 +2535,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24652535
strcpy(s, str_toupper_z(localized_full_months[tm->tm_mon - 1], collid));
24662536
else
24672537
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2468-
str_toupper_z(months_full[tm->tm_mon - 1], collid));
2538+
asc_toupper_z(months_full[tm->tm_mon - 1]));
24692539
s += strlen(s);
24702540
break;
24712541
case DCH_Month:
@@ -2475,7 +2545,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24752545
if (S_TM(n->suffix))
24762546
strcpy(s, str_initcap_z(localized_full_months[tm->tm_mon - 1], collid));
24772547
else
2478-
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
2548+
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2549+
months_full[tm->tm_mon - 1]);
24792550
s += strlen(s);
24802551
break;
24812552
case DCH_month:
@@ -2485,10 +2556,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24852556
if (S_TM(n->suffix))
24862557
strcpy(s, str_tolower_z(localized_full_months[tm->tm_mon - 1], collid));
24872558
else
2488-
{
2489-
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
2490-
*s = pg_tolower((unsigned char) *s);
2491-
}
2559+
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2560+
asc_tolower_z(months_full[tm->tm_mon - 1]));
24922561
s += strlen(s);
24932562
break;
24942563
case DCH_MON:
@@ -2498,7 +2567,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24982567
if (S_TM(n->suffix))
24992568
strcpy(s, str_toupper_z(localized_abbrev_months[tm->tm_mon - 1], collid));
25002569
else
2501-
strcpy(s, str_toupper_z(months[tm->tm_mon - 1], collid));
2570+
strcpy(s, asc_toupper_z(months[tm->tm_mon - 1]));
25022571
s += strlen(s);
25032572
break;
25042573
case DCH_Mon:
@@ -2518,10 +2587,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
25182587
if (S_TM(n->suffix))
25192588
strcpy(s, str_tolower_z(localized_abbrev_months[tm->tm_mon - 1], collid));
25202589
else
2521-
{
2522-
strcpy(s, months[tm->tm_mon - 1]);
2523-
*s = pg_tolower((unsigned char) *s);
2524-
}
2590+
strcpy(s, asc_tolower_z(months[tm->tm_mon - 1]));
25252591
s += strlen(s);
25262592
break;
25272593
case DCH_MM:
@@ -2536,34 +2602,33 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
25362602
strcpy(s, str_toupper_z(localized_full_days[tm->tm_wday], collid));
25372603
else
25382604
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2539-
str_toupper_z(days[tm->tm_wday], collid));
2605+
asc_toupper_z(days[tm->tm_wday]));
25402606
s += strlen(s);
25412607
break;
25422608
case DCH_Day:
25432609
INVALID_FOR_INTERVAL;
25442610
if (S_TM(n->suffix))
25452611
strcpy(s, str_initcap_z(localized_full_days[tm->tm_wday], collid));
25462612
else
2547-
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
2613+
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2614+
days[tm->tm_wday]);
25482615
s += strlen(s);
25492616
break;
25502617
case DCH_day:
25512618
INVALID_FOR_INTERVAL;
25522619
if (S_TM(n->suffix))
25532620
strcpy(s, str_tolower_z(localized_full_days[tm->tm_wday], collid));
25542621
else
2555-
{
2556-
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
2557-
*s = pg_tolower((unsigned char) *s);
2558-
}
2622+
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2623+
asc_tolower_z(days[tm->tm_wday]));
25592624
s += strlen(s);
25602625
break;
25612626
case DCH_DY:
25622627
INVALID_FOR_INTERVAL;
25632628
if (S_TM(n->suffix))
25642629
strcpy(s, str_toupper_z(localized_abbrev_days[tm->tm_wday], collid));
25652630
else
2566-
strcpy(s, str_toupper_z(days_short[tm->tm_wday], collid));
2631+
strcpy(s, asc_toupper_z(days_short[tm->tm_wday]));
25672632
s += strlen(s);
25682633
break;
25692634
case DCH_Dy:
@@ -2579,10 +2644,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
25792644
if (S_TM(n->suffix))
25802645
strcpy(s, str_tolower_z(localized_abbrev_days[tm->tm_wday], collid));
25812646
else
2582-
{
2583-
strcpy(s, days_short[tm->tm_wday]);
2584-
*s = pg_tolower((unsigned char) *s);
2585-
}
2647+
strcpy(s, asc_tolower_z(days_short[tm->tm_wday]));
25862648
s += strlen(s);
25872649
break;
25882650
case DCH_DDD:
@@ -4690,12 +4752,12 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, char *number,
46904752
case NUM_rn:
46914753
if (IS_FILLMODE(Np->Num))
46924754
{
4693-
strcpy(Np->inout_p, str_tolower_z(Np->number_p, collid));
4755+
strcpy(Np->inout_p, asc_tolower_z(Np->number_p));
46944756
Np->inout_p += strlen(Np->inout_p) - 1;
46954757
}
46964758
else
46974759
{
4698-
sprintf(Np->inout_p, "%15s", str_tolower_z(Np->number_p, collid));
4760+
sprintf(Np->inout_p, "%15s", asc_tolower_z(Np->number_p));
46994761
Np->inout_p += strlen(Np->inout_p) - 1;
47004762
}
47014763
break;

src/include/utils/formatting.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ extern char *str_tolower(const char *buff, size_t nbytes, Oid collid);
2424
extern char *str_toupper(const char *buff, size_t nbytes, Oid collid);
2525
extern char *str_initcap(const char *buff, size_t nbytes, Oid collid);
2626

27+
extern char *asc_tolower(const char *buff, size_t nbytes);
28+
extern char *asc_toupper(const char *buff, size_t nbytes);
29+
extern char *asc_initcap(const char *buff, size_t nbytes);
30+
2731
extern Datum timestamp_to_char(PG_FUNCTION_ARGS);
2832
extern Datum timestamptz_to_char(PG_FUNCTION_ARGS);
2933
extern Datum interval_to_char(PG_FUNCTION_ARGS);

0 commit comments

Comments
 (0)