Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 8b9dd6b

Browse files
committed
Support for KOI8U encoding
1 parent 1cb54c2 commit 8b9dd6b

File tree

10 files changed

+334
-14
lines changed

10 files changed

+334
-14
lines changed

doc/src/sgml/charset.sgml

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.90 2008/09/24 16:30:26 momjian Exp $ -->
1+
<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.91 2009/02/10 19:29:39 petere Exp $ -->
22

33
<chapter id="charset">
44
<title>Localization</>
@@ -457,12 +457,20 @@ initdb --locale=sv_SE
457457
<entry></entry>
458458
</row>
459459
<row>
460-
<entry><literal>KOI8</literal></entry>
461-
<entry><acronym>KOI</acronym>8-R(U)</entry>
462-
<entry>Cyrillic</entry>
460+
<entry><literal>KOI8R</literal></entry>
461+
<entry><acronym>KOI</acronym>8-R</entry>
462+
<entry>Cyrillic (Russian)</entry>
463463
<entry>Yes</entry>
464464
<entry>1</entry>
465-
<entry><literal>KOI8R</></entry>
465+
<entry><literal>KOI8</></entry>
466+
</row>
467+
<row>
468+
<entry><literal>KOI8U</literal></entry>
469+
<entry><acronym>KOI</acronym>8-U</entry>
470+
<entry>Cyrillic (Ukrainian)</entry>
471+
<entry>Yes</entry>
472+
<entry>1</entry>
473+
<entry></entry>
466474
</row>
467475
<row>
468476
<entry><literal>LATIN1</literal></entry>

src/backend/utils/mb/Unicode/UCS_to_most.pl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#
33
# Copyright (c) 2001-2009, PostgreSQL Global Development Group
44
#
5-
# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/UCS_to_most.pl,v 1.6 2009/02/10 16:36:55 petere Exp $
5+
# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/UCS_to_most.pl,v 1.7 2009/02/10 19:29:39 petere Exp $
66
#
77
# Generate UTF-8 <--> character code conversion tables from
88
# map files provided by Unicode organization.
@@ -43,6 +43,7 @@
4343
'ISO8859_15' => '8859-15.TXT',
4444
'ISO8859_16' => '8859-16.TXT',
4545
'KOI8R' => 'KOI8-R.TXT',
46+
'KOI8U' => 'KOI8-U.TXT',
4647
'GBK' => 'CP936.TXT',
4748
'UHC' => 'CP949.TXT',
4849
'JOHAB' => 'JOHAB.TXT',
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
static pg_local_to_utf LUmapKOI8U[ 128 ] = {
2+
{0x0080, 0xe29480},
3+
{0x0081, 0xe29482},
4+
{0x0082, 0xe2948c},
5+
{0x0083, 0xe29490},
6+
{0x0084, 0xe29494},
7+
{0x0085, 0xe29498},
8+
{0x0086, 0xe2949c},
9+
{0x0087, 0xe294a4},
10+
{0x0088, 0xe294ac},
11+
{0x0089, 0xe294b4},
12+
{0x008a, 0xe294bc},
13+
{0x008b, 0xe29680},
14+
{0x008c, 0xe29684},
15+
{0x008d, 0xe29688},
16+
{0x008e, 0xe2968c},
17+
{0x008f, 0xe29690},
18+
{0x0090, 0xe29691},
19+
{0x0091, 0xe29692},
20+
{0x0092, 0xe29693},
21+
{0x0093, 0xe28ca0},
22+
{0x0094, 0xe296a0},
23+
{0x0095, 0xe28899},
24+
{0x0096, 0xe2889a},
25+
{0x0097, 0xe28988},
26+
{0x0098, 0xe289a4},
27+
{0x0099, 0xe289a5},
28+
{0x009a, 0xc2a0},
29+
{0x009b, 0xe28ca1},
30+
{0x009c, 0xc2b0},
31+
{0x009d, 0xc2b2},
32+
{0x009e, 0xc2b7},
33+
{0x009f, 0xc3b7},
34+
{0x00a0, 0xe29590},
35+
{0x00a1, 0xe29591},
36+
{0x00a2, 0xe29592},
37+
{0x00a3, 0xd191},
38+
{0x00a4, 0xd194},
39+
{0x00a5, 0xe29594},
40+
{0x00a6, 0xd196},
41+
{0x00a7, 0xd197},
42+
{0x00a8, 0xe29597},
43+
{0x00a9, 0xe29598},
44+
{0x00aa, 0xe29599},
45+
{0x00ab, 0xe2959a},
46+
{0x00ac, 0xe2959b},
47+
{0x00ad, 0xd291},
48+
{0x00ae, 0xe2959d},
49+
{0x00af, 0xe2959e},
50+
{0x00b0, 0xe2959f},
51+
{0x00b1, 0xe295a0},
52+
{0x00b2, 0xe295a1},
53+
{0x00b3, 0xd081},
54+
{0x00b4, 0xd084},
55+
{0x00b5, 0xe295a3},
56+
{0x00b6, 0xd086},
57+
{0x00b7, 0xd087},
58+
{0x00b8, 0xe295a6},
59+
{0x00b9, 0xe295a7},
60+
{0x00ba, 0xe295a8},
61+
{0x00bb, 0xe295a9},
62+
{0x00bc, 0xe295aa},
63+
{0x00bd, 0xd290},
64+
{0x00be, 0xe295ac},
65+
{0x00bf, 0xc2a9},
66+
{0x00c0, 0xd18e},
67+
{0x00c1, 0xd0b0},
68+
{0x00c2, 0xd0b1},
69+
{0x00c3, 0xd186},
70+
{0x00c4, 0xd0b4},
71+
{0x00c5, 0xd0b5},
72+
{0x00c6, 0xd184},
73+
{0x00c7, 0xd0b3},
74+
{0x00c8, 0xd185},
75+
{0x00c9, 0xd0b8},
76+
{0x00ca, 0xd0b9},
77+
{0x00cb, 0xd0ba},
78+
{0x00cc, 0xd0bb},
79+
{0x00cd, 0xd0bc},
80+
{0x00ce, 0xd0bd},
81+
{0x00cf, 0xd0be},
82+
{0x00d0, 0xd0bf},
83+
{0x00d1, 0xd18f},
84+
{0x00d2, 0xd180},
85+
{0x00d3, 0xd181},
86+
{0x00d4, 0xd182},
87+
{0x00d5, 0xd183},
88+
{0x00d6, 0xd0b6},
89+
{0x00d7, 0xd0b2},
90+
{0x00d8, 0xd18c},
91+
{0x00d9, 0xd18b},
92+
{0x00da, 0xd0b7},
93+
{0x00db, 0xd188},
94+
{0x00dc, 0xd18d},
95+
{0x00dd, 0xd189},
96+
{0x00de, 0xd187},
97+
{0x00df, 0xd18a},
98+
{0x00e0, 0xd0ae},
99+
{0x00e1, 0xd090},
100+
{0x00e2, 0xd091},
101+
{0x00e3, 0xd0a6},
102+
{0x00e4, 0xd094},
103+
{0x00e5, 0xd095},
104+
{0x00e6, 0xd0a4},
105+
{0x00e7, 0xd093},
106+
{0x00e8, 0xd0a5},
107+
{0x00e9, 0xd098},
108+
{0x00ea, 0xd099},
109+
{0x00eb, 0xd09a},
110+
{0x00ec, 0xd09b},
111+
{0x00ed, 0xd09c},
112+
{0x00ee, 0xd09d},
113+
{0x00ef, 0xd09e},
114+
{0x00f0, 0xd09f},
115+
{0x00f1, 0xd0af},
116+
{0x00f2, 0xd0a0},
117+
{0x00f3, 0xd0a1},
118+
{0x00f4, 0xd0a2},
119+
{0x00f5, 0xd0a3},
120+
{0x00f6, 0xd096},
121+
{0x00f7, 0xd092},
122+
{0x00f8, 0xd0ac},
123+
{0x00f9, 0xd0ab},
124+
{0x00fa, 0xd097},
125+
{0x00fb, 0xd0a8},
126+
{0x00fc, 0xd0ad},
127+
{0x00fd, 0xd0a9},
128+
{0x00fe, 0xd0a7},
129+
{0x00ff, 0xd0aa}
130+
};
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
static pg_utf_to_local ULmapKOI8U[ 128 ] = {
2+
{0xc2a0, 0x009a},
3+
{0xc2a9, 0x00bf},
4+
{0xc2b0, 0x009c},
5+
{0xc2b2, 0x009d},
6+
{0xc2b7, 0x009e},
7+
{0xc3b7, 0x009f},
8+
{0xd081, 0x00b3},
9+
{0xd084, 0x00b4},
10+
{0xd086, 0x00b6},
11+
{0xd087, 0x00b7},
12+
{0xd090, 0x00e1},
13+
{0xd091, 0x00e2},
14+
{0xd092, 0x00f7},
15+
{0xd093, 0x00e7},
16+
{0xd094, 0x00e4},
17+
{0xd095, 0x00e5},
18+
{0xd096, 0x00f6},
19+
{0xd097, 0x00fa},
20+
{0xd098, 0x00e9},
21+
{0xd099, 0x00ea},
22+
{0xd09a, 0x00eb},
23+
{0xd09b, 0x00ec},
24+
{0xd09c, 0x00ed},
25+
{0xd09d, 0x00ee},
26+
{0xd09e, 0x00ef},
27+
{0xd09f, 0x00f0},
28+
{0xd0a0, 0x00f2},
29+
{0xd0a1, 0x00f3},
30+
{0xd0a2, 0x00f4},
31+
{0xd0a3, 0x00f5},
32+
{0xd0a4, 0x00e6},
33+
{0xd0a5, 0x00e8},
34+
{0xd0a6, 0x00e3},
35+
{0xd0a7, 0x00fe},
36+
{0xd0a8, 0x00fb},
37+
{0xd0a9, 0x00fd},
38+
{0xd0aa, 0x00ff},
39+
{0xd0ab, 0x00f9},
40+
{0xd0ac, 0x00f8},
41+
{0xd0ad, 0x00fc},
42+
{0xd0ae, 0x00e0},
43+
{0xd0af, 0x00f1},
44+
{0xd0b0, 0x00c1},
45+
{0xd0b1, 0x00c2},
46+
{0xd0b2, 0x00d7},
47+
{0xd0b3, 0x00c7},
48+
{0xd0b4, 0x00c4},
49+
{0xd0b5, 0x00c5},
50+
{0xd0b6, 0x00d6},
51+
{0xd0b7, 0x00da},
52+
{0xd0b8, 0x00c9},
53+
{0xd0b9, 0x00ca},
54+
{0xd0ba, 0x00cb},
55+
{0xd0bb, 0x00cc},
56+
{0xd0bc, 0x00cd},
57+
{0xd0bd, 0x00ce},
58+
{0xd0be, 0x00cf},
59+
{0xd0bf, 0x00d0},
60+
{0xd180, 0x00d2},
61+
{0xd181, 0x00d3},
62+
{0xd182, 0x00d4},
63+
{0xd183, 0x00d5},
64+
{0xd184, 0x00c6},
65+
{0xd185, 0x00c8},
66+
{0xd186, 0x00c3},
67+
{0xd187, 0x00de},
68+
{0xd188, 0x00db},
69+
{0xd189, 0x00dd},
70+
{0xd18a, 0x00df},
71+
{0xd18b, 0x00d9},
72+
{0xd18c, 0x00d8},
73+
{0xd18d, 0x00dc},
74+
{0xd18e, 0x00c0},
75+
{0xd18f, 0x00d1},
76+
{0xd191, 0x00a3},
77+
{0xd194, 0x00a4},
78+
{0xd196, 0x00a6},
79+
{0xd197, 0x00a7},
80+
{0xd290, 0x00bd},
81+
{0xd291, 0x00ad},
82+
{0xe28899, 0x0095},
83+
{0xe2889a, 0x0096},
84+
{0xe28988, 0x0097},
85+
{0xe289a4, 0x0098},
86+
{0xe289a5, 0x0099},
87+
{0xe28ca0, 0x0093},
88+
{0xe28ca1, 0x009b},
89+
{0xe29480, 0x0080},
90+
{0xe29482, 0x0081},
91+
{0xe2948c, 0x0082},
92+
{0xe29490, 0x0083},
93+
{0xe29494, 0x0084},
94+
{0xe29498, 0x0085},
95+
{0xe2949c, 0x0086},
96+
{0xe294a4, 0x0087},
97+
{0xe294ac, 0x0088},
98+
{0xe294b4, 0x0089},
99+
{0xe294bc, 0x008a},
100+
{0xe29590, 0x00a0},
101+
{0xe29591, 0x00a1},
102+
{0xe29592, 0x00a2},
103+
{0xe29594, 0x00a5},
104+
{0xe29597, 0x00a8},
105+
{0xe29598, 0x00a9},
106+
{0xe29599, 0x00aa},
107+
{0xe2959a, 0x00ab},
108+
{0xe2959b, 0x00ac},
109+
{0xe2959d, 0x00ae},
110+
{0xe2959e, 0x00af},
111+
{0xe2959f, 0x00b0},
112+
{0xe295a0, 0x00b1},
113+
{0xe295a1, 0x00b2},
114+
{0xe295a3, 0x00b5},
115+
{0xe295a6, 0x00b8},
116+
{0xe295a7, 0x00b9},
117+
{0xe295a8, 0x00ba},
118+
{0xe295a9, 0x00bb},
119+
{0xe295aa, 0x00bc},
120+
{0xe295ac, 0x00be},
121+
{0xe29680, 0x008b},
122+
{0xe29684, 0x008c},
123+
{0xe29688, 0x008d},
124+
{0xe2968c, 0x008e},
125+
{0xe29690, 0x008f},
126+
{0xe29691, 0x0090},
127+
{0xe29692, 0x0091},
128+
{0xe29693, 0x0092},
129+
{0xe296a0, 0x0094}
130+
};

src/backend/utils/mb/conversion_procs/Makefile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# Makefile for utils/mb/conversion_procs
55
#
66
# IDENTIFICATION
7-
# $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/Makefile,v 1.20 2008/08/23 20:31:37 momjian Exp $
7+
# $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/Makefile,v 1.21 2009/02/10 19:29:39 petere Exp $
88
#
99
#-------------------------------------------------------------------------
1010

@@ -84,6 +84,8 @@ CONVERSIONS = \
8484
utf8_to_big5 UTF8 BIG5 utf8_to_big5 utf8_and_big5 \
8585
utf8_to_koi8_r UTF8 KOI8R utf8_to_koi8r utf8_and_cyrillic \
8686
koi8_r_to_utf8 KOI8R UTF8 koi8r_to_utf8 utf8_and_cyrillic \
87+
utf8_to_koi8_u UTF8 KOI8U utf8_to_koi8u utf8_and_cyrillic \
88+
koi8_u_to_utf8 KOI8U UTF8 koi8u_to_utf8 utf8_and_cyrillic \
8789
utf8_to_windows_866 UTF8 WIN866 utf8_to_win utf8_and_win \
8890
windows_866_to_utf8 WIN866 UTF8 win_to_utf8 utf8_and_win \
8991
utf8_to_windows_874 UTF8 WIN874 utf8_to_win utf8_and_win \

src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Portions Copyright (c) 1994, Regents of the University of California
77
*
88
* IDENTIFICATION
9-
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c,v 1.23 2009/01/29 19:23:40 tgl Exp $
9+
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c,v 1.24 2009/02/10 19:29:39 petere Exp $
1010
*
1111
*-------------------------------------------------------------------------
1212
*/
@@ -16,15 +16,23 @@
1616
#include "mb/pg_wchar.h"
1717
#include "../../Unicode/utf8_to_koi8r.map"
1818
#include "../../Unicode/koi8r_to_utf8.map"
19+
#include "../../Unicode/utf8_to_koi8u.map"
20+
#include "../../Unicode/koi8u_to_utf8.map"
1921

2022
PG_MODULE_MAGIC;
2123

2224
PG_FUNCTION_INFO_V1(utf8_to_koi8r);
2325
PG_FUNCTION_INFO_V1(koi8r_to_utf8);
2426

27+
PG_FUNCTION_INFO_V1(utf8_to_koi8u);
28+
PG_FUNCTION_INFO_V1(koi8u_to_utf8);
29+
2530
extern Datum utf8_to_koi8r(PG_FUNCTION_ARGS);
2631
extern Datum koi8r_to_utf8(PG_FUNCTION_ARGS);
2732

33+
extern Datum utf8_to_koi8u(PG_FUNCTION_ARGS);
34+
extern Datum koi8u_to_utf8(PG_FUNCTION_ARGS);
35+
2836
/* ----------
2937
* conv_proc(
3038
* INTEGER, -- source encoding id
@@ -65,3 +73,33 @@ koi8r_to_utf8(PG_FUNCTION_ARGS)
6573

6674
PG_RETURN_VOID();
6775
}
76+
77+
Datum
78+
utf8_to_koi8u(PG_FUNCTION_ARGS)
79+
{
80+
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
81+
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
82+
int len = PG_GETARG_INT32(4);
83+
84+
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_KOI8U);
85+
86+
UtfToLocal(src, dest, ULmapKOI8U, NULL,
87+
sizeof(ULmapKOI8U) / sizeof(pg_utf_to_local), 0, PG_KOI8U, len);
88+
89+
PG_RETURN_VOID();
90+
}
91+
92+
Datum
93+
koi8u_to_utf8(PG_FUNCTION_ARGS)
94+
{
95+
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
96+
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
97+
int len = PG_GETARG_INT32(4);
98+
99+
CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8U, PG_UTF8);
100+
101+
LocalToUtf(src, dest, LUmapKOI8U, NULL,
102+
sizeof(LUmapKOI8U) / sizeof(pg_local_to_utf), 0, PG_KOI8U, len);
103+
104+
PG_RETURN_VOID();
105+
}

0 commit comments

Comments
 (0)