Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit e6227fd

Browse files
committed
Add missing Unicode multibyte files.
1 parent 92288a1 commit e6227fd

19 files changed

+113340
-0
lines changed
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
#! /usr/bin/perl
2+
#
3+
# Copyright 2001 by PostgreSQL Global Development Group
4+
#
5+
# $Id: UCS_to_GBK.pl,v 1.1 2002/03/06 06:12:55 momjian Exp $
6+
#
7+
#
8+
# Generate UTF-8 <--> GBK code conversion tables from
9+
# map files provided by Unicode organization.
10+
# Unfortunately it is prohibited by the organization
11+
# to distribute the map files. So if you try to use this script,
12+
# you have to obtain CP936.TXT from
13+
# the organization's ftp site.
14+
#
15+
# CP936.TXT format:
16+
# GBK code in hex
17+
# UCS-2 code in hex
18+
# # and Unicode name (not used in this script)
19+
20+
require "ucs2utf.pl";
21+
22+
# first generate UTF-8 --> GBK table
23+
24+
$in_file = "CP936.TXT";
25+
26+
open( FILE, $in_file ) || die( "cannot open $in_file" );
27+
28+
while( <FILE> ){
29+
chop;
30+
if( /^#/ ){
31+
next;
32+
}
33+
( $c, $u, $rest ) = split;
34+
$ucs = hex($u);
35+
$code = hex($c);
36+
if( $code >= 0x80 && $ucs >= 0x0080 ){
37+
$utf = &ucs2utf($ucs);
38+
if( $array{ $utf } ne "" ){
39+
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
40+
next;
41+
}
42+
$count++;
43+
44+
$array{ $utf } = $code;
45+
}
46+
}
47+
close( FILE );
48+
49+
#
50+
# first, generate UTF8 --> WIN949 table
51+
#
52+
53+
$file = "utf8_to_gbk.map";
54+
open( FILE, "> $file" ) || die( "cannot open $file" );
55+
print FILE "static pg_utf_to_local ULmapGBK[ $count ] = {\n";
56+
57+
for $index ( sort {$a <=> $b} keys( %array ) ){
58+
$code = $array{ $index };
59+
$count--;
60+
if( $count == 0 ){
61+
printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
62+
} else {
63+
printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
64+
}
65+
}
66+
67+
print FILE "};\n";
68+
close(FILE);
69+
70+
#
71+
# then generate WIN936 --> UTF8 table
72+
#
73+
reset 'array';
74+
75+
open( FILE, $in_file ) || die( "cannot open $in_file" );
76+
77+
while( <FILE> ){
78+
chop;
79+
if( /^#/ ){
80+
next;
81+
}
82+
( $c, $u, $rest ) = split;
83+
$ucs = hex($u);
84+
$code = hex($c);
85+
if( $code >= 0x80 && $ucs >= 0x0080 ){
86+
$utf = &ucs2utf($ucs);
87+
if( $array{ $code } ne "" ){
88+
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
89+
next;
90+
}
91+
$count++;
92+
93+
$array{ $code } = $utf;
94+
}
95+
}
96+
close( FILE );
97+
98+
$file = "gbk_to_utf8.map";
99+
open( FILE, "> $file" ) || die( "cannot open $file" );
100+
print FILE "static pg_local_to_utf LUmapGBK[ $count ] = {\n";
101+
for $index ( sort {$a <=> $b} keys( %array ) ){
102+
$utf = $array{ $index };
103+
$count--;
104+
if( $count == 0 ){
105+
printf FILE " {0x%04x, 0x%04x}\n", $index, $utf;
106+
} else {
107+
printf FILE " {0x%04x, 0x%04x},\n", $index, $utf;
108+
}
109+
}
110+
111+
print FILE "};\n";
112+
close(FILE);
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
#! /usr/bin/perl
2+
#
3+
# Copyright 2001 by PostgreSQL Global Development Group
4+
#
5+
# $Id: UCS_to_JOHAB.pl,v 1.1 2002/03/06 06:12:55 momjian Exp $
6+
#
7+
# Generate UTF-8 <--> JOHAB code conversion tables from
8+
# map files provided by Unicode organization.
9+
# Unfortunately it is prohibited by the organization
10+
# to distribute the map files. So if you try to use this script,
11+
# you have to obtain JOHAB.TXT from
12+
# the organization's ftp site.
13+
#
14+
# JOHAB.TXT format:
15+
# JOHAB code in hex
16+
# UCS-2 code in hex
17+
# # and Unicode name (not used in this script)
18+
19+
require "ucs2utf.pl";
20+
21+
# first generate UTF-8 --> JOHAB table
22+
23+
$in_file = "JOHAB.TXT";
24+
25+
open( FILE, $in_file ) || die( "cannot open $in_file" );
26+
27+
while( <FILE> ){
28+
chop;
29+
if( /^#/ ){
30+
next;
31+
}
32+
( $c, $u, $rest ) = split;
33+
$ucs = hex($u);
34+
$code = hex($c);
35+
if( $code >= 0x80 && $ucs >= 0x0080 ){
36+
$utf = &ucs2utf($ucs);
37+
if( $array{ $utf } ne "" ){
38+
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
39+
next;
40+
}
41+
$count++;
42+
43+
$array{ $utf } = $code;
44+
}
45+
}
46+
close( FILE );
47+
48+
#
49+
# first, generate UTF8 --> JOHAB table
50+
#
51+
52+
$file = "utf8_to_johab.map";
53+
open( FILE, "> $file" ) || die( "cannot open $file" );
54+
print FILE "static pg_utf_to_local ULmapJOHAB[ $count ] = {\n";
55+
56+
for $index ( sort {$a <=> $b} keys( %array ) ){
57+
$code = $array{ $index };
58+
$count--;
59+
if( $count == 0 ){
60+
printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
61+
} else {
62+
printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
63+
}
64+
}
65+
66+
print FILE "};\n";
67+
close(FILE);
68+
69+
#
70+
# then generate JOHAB --> UTF8 table
71+
#
72+
reset 'array';
73+
74+
open( FILE, $in_file ) || die( "cannot open $in_file" );
75+
76+
while( <FILE> ){
77+
chop;
78+
if( /^#/ ){
79+
next;
80+
}
81+
( $c, $u, $rest ) = split;
82+
$ucs = hex($u);
83+
$code = hex($c);
84+
if( $code >= 0x80 && $ucs >= 0x0080 ){
85+
$utf = &ucs2utf($ucs);
86+
if( $array{ $code } ne "" ){
87+
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
88+
next;
89+
}
90+
$count++;
91+
92+
$array{ $code } = $utf;
93+
}
94+
}
95+
close( FILE );
96+
97+
$file = "johab_to_utf8.map";
98+
open( FILE, "> $file" ) || die( "cannot open $file" );
99+
print FILE "static pg_local_to_utf LUmapJOHAB[ $count ] = {\n";
100+
for $index ( sort {$a <=> $b} keys( %array ) ){
101+
$utf = $array{ $index };
102+
$count--;
103+
if( $count == 0 ){
104+
printf FILE " {0x%04x, 0x%04x}\n", $index, $utf;
105+
} else {
106+
printf FILE " {0x%04x, 0x%04x},\n", $index, $utf;
107+
}
108+
}
109+
110+
print FILE "};\n";
111+
close(FILE);
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
#! /usr/bin/perl
2+
#
3+
# Copyright 2001 by PostgreSQL Global Development Group
4+
#
5+
# $Id: UCS_to_UHC.pl,v 1.1 2002/03/06 06:12:55 momjian Exp $
6+
#
7+
# Generate UTF-8 <--> BIG5 code conversion tables from
8+
# map files provided by Unicode organization.
9+
# Unfortunately it is prohibited by the organization
10+
# to distribute the map files. So if you try to use this script,
11+
# you have to obtain OLD5601.TXT from
12+
# the organization's ftp site.
13+
#
14+
# CP949.TXT format:
15+
# UHC code in hex
16+
# UCS-2 code in hex
17+
# # and Unicode name (not used in this script)
18+
19+
require "ucs2utf.pl";
20+
21+
# first generate UTF-8 --> WIN949 table
22+
23+
$in_file = "CP949.TXT";
24+
25+
open( FILE, $in_file ) || die( "cannot open $in_file" );
26+
27+
while( <FILE> ){
28+
chop;
29+
if( /^#/ ){
30+
next;
31+
}
32+
( $c, $u, $rest ) = split;
33+
$ucs = hex($u);
34+
$code = hex($c);
35+
if( $code >= 0x80 && $ucs >= 0x0080 ){
36+
$utf = &ucs2utf($ucs);
37+
if( $array{ $utf } ne "" ){
38+
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
39+
next;
40+
}
41+
$count++;
42+
43+
$array{ $utf } = $code;
44+
}
45+
}
46+
close( FILE );
47+
48+
#
49+
# first, generate UTF8 --> UHC table
50+
#
51+
52+
$file = "utf8_to_uhc.map";
53+
open( FILE, "> $file" ) || die( "cannot open $file" );
54+
print FILE "static pg_utf_to_local ULmapUHC[ $count ] = {\n";
55+
56+
for $index ( sort {$a <=> $b} keys( %array ) ){
57+
$code = $array{ $index };
58+
$count--;
59+
if( $count == 0 ){
60+
printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
61+
} else {
62+
printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
63+
}
64+
}
65+
66+
print FILE "};\n";
67+
close(FILE);
68+
69+
#
70+
# then generate UHC --> UTF8 table
71+
#
72+
reset 'array';
73+
74+
open( FILE, $in_file ) || die( "cannot open $in_file" );
75+
76+
while( <FILE> ){
77+
chop;
78+
if( /^#/ ){
79+
next;
80+
}
81+
( $c, $u, $rest ) = split;
82+
$ucs = hex($u);
83+
$code = hex($c);
84+
if( $code >= 0x80 && $ucs >= 0x0080 ){
85+
$utf = &ucs2utf($ucs);
86+
if( $array{ $code } ne "" ){
87+
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
88+
next;
89+
}
90+
$count++;
91+
92+
$array{ $code } = $utf;
93+
}
94+
}
95+
close( FILE );
96+
97+
$file = "uhc_to_utf8.map";
98+
open( FILE, "> $file" ) || die( "cannot open $file" );
99+
print FILE "static pg_local_to_utf LUmapUHC[ $count ] = {\n";
100+
for $index ( sort {$a <=> $b} keys( %array ) ){
101+
$utf = $array{ $index };
102+
$count--;
103+
if( $count == 0 ){
104+
printf FILE " {0x%04x, 0x%04x}\n", $index, $utf;
105+
} else {
106+
printf FILE " {0x%04x, 0x%04x},\n", $index, $utf;
107+
}
108+
}
109+
110+
print FILE "};\n";
111+
close(FILE);

0 commit comments

Comments
 (0)