Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit a8dfe11

Browse files
committed
Teach chklocale.c how to extract encoding info from Windows locale
names. ITAGAKI Takahiro
1 parent bda5750 commit a8dfe11

File tree

1 file changed

+54
-7
lines changed

1 file changed

+54
-7
lines changed

src/port/chklocale.c

Lines changed: 54 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/port/chklocale.c,v 1.3 2007/09/29 00:01:43 tgl Exp $
11+
* $PostgreSQL: pgsql/src/port/chklocale.c,v 1.4 2007/10/03 17:16:39 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -27,13 +27,12 @@
2727
#include "mb/pg_wchar.h"
2828

2929

30-
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
31-
3230
/*
3331
* This table needs to recognize all the CODESET spellings for supported
3432
* backend encodings, as well as frontend-only encodings where possible
3533
* (the latter case is currently only needed for initdb to recognize
36-
* error situations).
34+
* error situations). On Windows, we rely on entries for codepage
35+
* numbers (CPnnn).
3736
*
3837
* Note that we search the table with pg_strcasecmp(), so variant
3938
* capitalizations don't need their own entries.
@@ -49,23 +48,27 @@ static const struct encoding_match encoding_match_list[] = {
4948
{PG_EUC_JP, "eucJP"},
5049
{PG_EUC_JP, "IBM-eucJP"},
5150
{PG_EUC_JP, "sdeckanji"},
51+
{PG_EUC_JP, "CP20932"},
5252

5353
{PG_EUC_CN, "EUC-CN"},
5454
{PG_EUC_CN, "eucCN"},
5555
{PG_EUC_CN, "IBM-eucCN"},
5656
{PG_EUC_CN, "GB2312"},
5757
{PG_EUC_CN, "dechanzi"},
58+
{PG_EUC_CN, "CP20936"},
5859

5960
{PG_EUC_KR, "EUC-KR"},
6061
{PG_EUC_KR, "eucKR"},
6162
{PG_EUC_KR, "IBM-eucKR"},
6263
{PG_EUC_KR, "deckorean"},
6364
{PG_EUC_KR, "5601"},
65+
{PG_EUC_KR, "CP51949"}, /* or 20949 ? */
6466

6567
{PG_EUC_TW, "EUC-TW"},
6668
{PG_EUC_TW, "eucTW"},
6769
{PG_EUC_TW, "IBM-eucTW"},
6870
{PG_EUC_TW, "cns11643"},
71+
/* No codepage for EUC-TW ? */
6972

7073
{PG_UTF8, "UTF-8"},
7174
{PG_UTF8, "utf8"},
@@ -111,6 +114,7 @@ static const struct encoding_match encoding_match_list[] = {
111114
{PG_LATIN10, "iso885916"},
112115

113116
{PG_KOI8R, "KOI8-R"},
117+
{PG_KOI8R, "CP20866"},
114118

115119
{PG_WIN1252, "CP1252"},
116120
{PG_WIN1253, "CP1253"},
@@ -143,23 +147,56 @@ static const struct encoding_match encoding_match_list[] = {
143147

144148
{PG_SJIS, "SJIS"},
145149
{PG_SJIS, "PCK"},
150+
{PG_SJIS, "CP932"},
146151

147152
{PG_BIG5, "BIG5"},
148153
{PG_BIG5, "BIG5HKSCS"},
154+
{PG_BIG5, "CP950"},
149155

150156
{PG_GBK, "GBK"},
157+
{PG_GBK, "CP936"},
151158

152159
{PG_UHC, "UHC"},
153160

154161
{PG_JOHAB, "JOHAB"},
162+
{PG_JOHAB, "CP1361"},
155163

156164
{PG_GB18030, "GB18030"},
165+
{PG_GB18030, "CP54936"},
157166

158167
{PG_SHIFT_JIS_2004, "SJIS_2004"},
159168

160169
{PG_SQL_ASCII, NULL} /* end marker */
161170
};
162171

172+
#ifdef WIN32
173+
/*
174+
* On Windows, use CP<codepage number> instead of the nl_langinfo() result
175+
*/
176+
static char *
177+
win32_langinfo(const char *ctype)
178+
{
179+
char *r;
180+
char *codepage;
181+
int ln;
182+
183+
/*
184+
* Locale format on Win32 is <Language>_<Country>.<CodePage> .
185+
* For example, English_USA.1252.
186+
*/
187+
codepage = strrchr(ctype, '.');
188+
if (!codepage)
189+
return NULL;
190+
codepage++;
191+
ln = strlen(codepage);
192+
r = malloc(ln + 3);
193+
sprintf(r, "CP%s", codepage);
194+
195+
return r;
196+
}
197+
#endif /* WIN32 */
198+
199+
#if (defined(HAVE_LANGINFO_H) && defined(CODESET)) || defined(WIN32)
163200

164201
/*
165202
* Given a setting for LC_CTYPE, return the Postgres ID of the associated
@@ -181,6 +218,7 @@ pg_get_encoding_from_locale(const char *ctype)
181218
if (ctype)
182219
{
183220
char *save;
221+
char *name;
184222

185223
save = setlocale(LC_CTYPE, NULL);
186224
if (!save)
@@ -190,15 +228,20 @@ pg_get_encoding_from_locale(const char *ctype)
190228
if (!save)
191229
return PG_SQL_ASCII; /* out of memory; unlikely */
192230

193-
if (!setlocale(LC_CTYPE, ctype))
231+
name = setlocale(LC_CTYPE, ctype);
232+
if (!name)
194233
{
195234
free(save);
196235
return PG_SQL_ASCII; /* bogus ctype passed in? */
197236
}
198237

238+
#ifndef WIN32
199239
sys = nl_langinfo(CODESET);
200240
if (sys)
201241
sys = strdup(sys);
242+
#else
243+
sys = win32_langinfo(name);
244+
#endif
202245

203246
setlocale(LC_CTYPE, save);
204247
free(save);
@@ -209,9 +252,13 @@ pg_get_encoding_from_locale(const char *ctype)
209252
ctype = setlocale(LC_CTYPE, NULL);
210253
if (!ctype)
211254
return PG_SQL_ASCII; /* setlocale() broken? */
255+
#ifndef WIN32
212256
sys = nl_langinfo(CODESET);
213257
if (sys)
214258
sys = strdup(sys);
259+
#else
260+
sys = win32_langinfo(ctype);
261+
#endif
215262
}
216263

217264
if (!sys)
@@ -268,7 +315,7 @@ pg_get_encoding_from_locale(const char *ctype)
268315
return PG_SQL_ASCII;
269316
}
270317

271-
#else /* !(HAVE_LANGINFO_H && CODESET) */
318+
#else /* (HAVE_LANGINFO_H && CODESET) || WIN32 */
272319

273320
/*
274321
* stub if no platform support
@@ -279,4 +326,4 @@ pg_get_encoding_from_locale(const char *ctype)
279326
return PG_SQL_ASCII;
280327
}
281328

282-
#endif /* HAVE_LANGINFO_H && CODESET */
329+
#endif /* (HAVE_LANGINFO_H && CODESET) || WIN32 */

0 commit comments

Comments
 (0)