Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit f4b7624

Browse files
committed
Add the missing cyrillic "Yo" characters ('e' and 'E' with two dots) to the
ISO_8859-5 <-> MULE_INTERNAL conversion tables. This was discovered when trying to convert a string containing those characters from ISO_8859-5 to Windows-1251, because we use MULE_INTERNAL/KOI8R as an intermediate encoding between those two. While the missing "Yo" was just an omission in the conversion tables, there are a few other characters like the "Numero" sign ("No" as a single character) that exists in all the other cyrillic encodings (win1251, ISO_8859-5 and cp866), but not in KOI8R. Added comments about that. Patch by Sergey Burladyan. Back-patch to 7.4.
1 parent 470c6c1 commit f4b7624

File tree

1 file changed

+16
-5
lines changed

1 file changed

+16
-5
lines changed

src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c

+16-5
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Portions Copyright (c) 1994, Regents of the University of California
77
*
88
* IDENTIFICATION
9-
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c,v 1.16 2008/01/01 19:45:53 momjian Exp $
9+
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c,v 1.17 2008/03/20 10:30:04 heikki Exp $
1010
*
1111
*-------------------------------------------------------------------------
1212
*/
@@ -301,6 +301,12 @@ win866_to_win1251(PG_FUNCTION_ARGS)
301301
Assert(PG_GETARG_INT32(1) == PG_WIN1251);
302302
Assert(len >= 0);
303303

304+
/*
305+
* Note: There are a few characters like the "Numero" sign that exist in
306+
* all the other cyrillic encodings (win1251, ISO_8859-5 and cp866), but
307+
* not in KOI8R. As we use MULE_INTERNAL/KOI8R as an intermediary, we
308+
* will fail to convert those characters.
309+
*/
304310
buf = palloc(len * ENCODING_GROWTH_RATE);
305311
win8662mic(src, buf, len);
306312
mic2win1251(buf, dest, strlen((char *) buf));
@@ -321,6 +327,7 @@ win1251_to_win866(PG_FUNCTION_ARGS)
321327
Assert(PG_GETARG_INT32(1) == PG_WIN866);
322328
Assert(len >= 0);
323329

330+
/* Use mic/KOI8R as intermediary, see comment in win866_to_win1251() */
324331
buf = palloc(len * ENCODING_GROWTH_RATE);
325332
win12512mic(src, buf, len);
326333
mic2win866(buf, dest, strlen((char *) buf));
@@ -381,6 +388,7 @@ iso_to_win1251(PG_FUNCTION_ARGS)
381388
Assert(PG_GETARG_INT32(1) == PG_WIN1251);
382389
Assert(len >= 0);
383390

391+
/* Use mic/KOI8R as intermediary, see comment in win866_to_win1251() */
384392
buf = palloc(len * ENCODING_GROWTH_RATE);
385393
iso2mic(src, buf, len);
386394
mic2win1251(buf, dest, strlen((char *) buf));
@@ -401,6 +409,7 @@ win1251_to_iso(PG_FUNCTION_ARGS)
401409
Assert(PG_GETARG_INT32(1) == PG_ISO_8859_5);
402410
Assert(len >= 0);
403411

412+
/* Use mic/KOI8R as intermediary, see comment in win866_to_win1251() */
404413
buf = palloc(len * ENCODING_GROWTH_RATE);
405414
win12512mic(src, buf, len);
406415
mic2iso(buf, dest, strlen((char *) buf));
@@ -421,6 +430,7 @@ iso_to_win866(PG_FUNCTION_ARGS)
421430
Assert(PG_GETARG_INT32(1) == PG_WIN866);
422431
Assert(len >= 0);
423432

433+
/* Use mic/KOI8R as intermediary, see comment in win866_to_win1251() */
424434
buf = palloc(len * ENCODING_GROWTH_RATE);
425435
iso2mic(src, buf, len);
426436
mic2win866(buf, dest, strlen((char *) buf));
@@ -441,6 +451,7 @@ win866_to_iso(PG_FUNCTION_ARGS)
441451
Assert(PG_GETARG_INT32(1) == PG_ISO_8859_5);
442452
Assert(len >= 0);
443453

454+
/* Use mic/KOI8R as intermediary, see comment in win866_to_win1251() */
444455
buf = palloc(len * ENCODING_GROWTH_RATE);
445456
win8662mic(src, buf, len);
446457
mic2iso(buf, dest, strlen((char *) buf));
@@ -483,7 +494,7 @@ iso2mic(const unsigned char *l, unsigned char *p, int len)
483494
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
484495
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
485496
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
486-
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
497+
0x00, 0xb3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
487498
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
488499
0xe1, 0xe2, 0xf7, 0xe7, 0xe4, 0xe5, 0xf6, 0xfa,
489500
0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0,
@@ -493,7 +504,7 @@ iso2mic(const unsigned char *l, unsigned char *p, int len)
493504
0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
494505
0xd2, 0xd3, 0xd4, 0xd5, 0xc6, 0xc8, 0xc3, 0xde,
495506
0xdb, 0xdd, 0xdf, 0xd9, 0xd8, 0xdc, 0xc0, 0xd1,
496-
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
507+
0x00, 0xa3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
497508
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
498509
};
499510

@@ -509,9 +520,9 @@ mic2iso(const unsigned char *mic, unsigned char *p, int len)
509520
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
510521
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
511522
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
523+
0x00, 0x00, 0x00, 0xf1, 0x00, 0x00, 0x00, 0x00,
512524
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
513-
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
514-
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
525+
0x00, 0x00, 0x00, 0xa1, 0x00, 0x00, 0x00, 0x00,
515526
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
516527
0xee, 0xd0, 0xd1, 0xe6, 0xd4, 0xd5, 0xe4, 0xd3,
517528
0xe5, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde,

0 commit comments

Comments
 (0)