Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 5735c4c

Browse files
committed
Enhanced UTF-8/SJIS mapping generator, contributed by
Eiji Tokuya" <e-tokuya@Mail.Sankyo-Unyu.co.jp>
1 parent a9a047e commit 5735c4c

File tree

1 file changed

+38
-25
lines changed

1 file changed

+38
-25
lines changed

src/backend/utils/mb/Unicode/UCS_to_SJIS.pl

Lines changed: 38 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#
33
# Copyright 2001 by PostgreSQL Global Development Group
44
#
5-
# $Id: UCS_to_SJIS.pl,v 1.1 2000/10/30 10:40:29 ishii Exp $
5+
# $Id: UCS_to_SJIS.pl,v 1.2 2001/02/23 08:44:33 ishii Exp $
66
#
77
# Generate UTF-8 <--> SJIS code conversion tables from
88
# map files provided by Unicode organization.
@@ -21,29 +21,45 @@
2121

2222
# first generate UTF-8 --> SJIS table
2323

24-
$in_file = "SHIFTJIS.TXT";
24+
$in_file = "CP932.TXT";
25+
$count = 0;
2526

2627
open( FILE, $in_file ) || die( "cannot open $in_file" );
2728

2829
while( <FILE> ){
29-
chop;
30-
if( /^#/ ){
31-
next;
32-
}
33-
( $c, $u, $rest ) = split;
34-
$ucs = hex($u);
35-
$code = hex($c);
36-
if( $code >= 0x80 && $ucs >= 0x100 ){
37-
$utf = &ucs2utf($ucs);
38-
if( $array{ $utf } ne "" ){
39-
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
40-
next;
41-
}
42-
$count++;
43-
44-
$array{ $utf } = $code;
45-
}
30+
chop;
31+
if( /^#/ ){
32+
next;
33+
}
34+
( $c, $u, $rest ) = split;
35+
$ucs = hex($u);
36+
$code = hex($c);
37+
if( $code >= 0x80 && $ucs >= 0x100 ){
38+
$utf = &ucs2utf($ucs);
39+
if((( $code >= 0xed40 )
40+
&& ( $code <= 0xeefc ))
41+
|| (( $code >= 0x8754 )
42+
&&( $code <= 0x875d ))
43+
|| ( $code == 0x878a )
44+
|| ( $code == 0x8782 )
45+
|| ( $code == 0x8784 )
46+
|| ( $code == 0xfa5b )
47+
|| ( $code == 0xfa54 )
48+
|| (( $code >= 0x8790 )
49+
&& ( $code <= 0x8792 ))
50+
|| (( $code >= 0x8795 )
51+
&& ( $code <= 0x8797 ))
52+
|| (( $code >= 0x879a )
53+
&& ( $code <= 0x879c )))
54+
{
55+
printf STDERR "Warning: duplicate unicode : UCS=0x%04x SJIS=0x%04x\n",$ucs,$code;
56+
next;
57+
}
58+
$count++;
59+
$array{ $utf } = $code;
60+
}
4661
}
62+
4763
close( FILE );
4864

4965
#
@@ -57,7 +73,7 @@
5773
for $index ( sort {$a <=> $b} keys( %array ) ){
5874
$code = $array{ $index };
5975
$count--;
60-
if( $count == 0 ){
76+
if( $count == 0 ){
6177
printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
6278
} else {
6379
printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
@@ -68,12 +84,13 @@
6884
close(FILE);
6985

7086
#
71-
# then generate EUC_JP --> UTF8 table
87+
# then generate SJIS --> UTF8 table
7288
#
7389

7490
open( FILE, $in_file ) || die( "cannot open $in_file" );
7591

7692
reset 'array';
93+
$count = 0;
7794

7895
while( <FILE> ){
7996
chop;
@@ -85,10 +102,6 @@
85102
$code = hex($c);
86103
if( $code >= 0x80 && $ucs >= 0x100 ){
87104
$utf = &ucs2utf($ucs);
88-
if( $array{ $code } ne "" ){
89-
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
90-
next;
91-
}
92105
$count++;
93106

94107
$array{ $code } = $utf;

0 commit comments

Comments
 (0)