|
2 | 2 | #
|
3 | 3 | # Copyright 2001 by PostgreSQL Global Development Group
|
4 | 4 | #
|
5 |
| -# $Id: UCS_to_SJIS.pl,v 1.1 2000/10/30 10:40:29 ishii Exp $ |
| 5 | +# $Id: UCS_to_SJIS.pl,v 1.2 2001/02/23 08:44:33 ishii Exp $ |
6 | 6 | #
|
7 | 7 | # Generate UTF-8 <--> SJIS code conversion tables from
|
8 | 8 | # map files provided by Unicode organization.
|
|
21 | 21 |
|
22 | 22 | # first generate UTF-8 --> SJIS table
|
23 | 23 |
|
24 |
| -$in_file = "SHIFTJIS.TXT"; |
| 24 | +$in_file = "CP932.TXT"; |
| 25 | +$count = 0; |
25 | 26 |
|
26 | 27 | open( FILE, $in_file ) || die( "cannot open $in_file" );
|
27 | 28 |
|
28 | 29 | while( <FILE> ){
|
29 |
| - chop; |
30 |
| - if( /^#/ ){ |
31 |
| - next; |
32 |
| - } |
33 |
| - ( $c, $u, $rest ) = split; |
34 |
| - $ucs = hex($u); |
35 |
| - $code = hex($c); |
36 |
| - if( $code >= 0x80 && $ucs >= 0x100 ){ |
37 |
| - $utf = &ucs2utf($ucs); |
38 |
| - if( $array{ $utf } ne "" ){ |
39 |
| - printf STDERR "Warning: duplicate unicode: %04x\n",$ucs; |
40 |
| - next; |
41 |
| - } |
42 |
| - $count++; |
43 |
| - |
44 |
| - $array{ $utf } = $code; |
45 |
| - } |
| 30 | + chop; |
| 31 | + if( /^#/ ){ |
| 32 | + next; |
| 33 | + } |
| 34 | + ( $c, $u, $rest ) = split; |
| 35 | + $ucs = hex($u); |
| 36 | + $code = hex($c); |
| 37 | + if( $code >= 0x80 && $ucs >= 0x100 ){ |
| 38 | + $utf = &ucs2utf($ucs); |
| 39 | + if((( $code >= 0xed40 ) |
| 40 | + && ( $code <= 0xeefc )) |
| 41 | + || (( $code >= 0x8754 ) |
| 42 | + &&( $code <= 0x875d )) |
| 43 | + || ( $code == 0x878a ) |
| 44 | + || ( $code == 0x8782 ) |
| 45 | + || ( $code == 0x8784 ) |
| 46 | + || ( $code == 0xfa5b ) |
| 47 | + || ( $code == 0xfa54 ) |
| 48 | + || (( $code >= 0x8790 ) |
| 49 | + && ( $code <= 0x8792 )) |
| 50 | + || (( $code >= 0x8795 ) |
| 51 | + && ( $code <= 0x8797 )) |
| 52 | + || (( $code >= 0x879a ) |
| 53 | + && ( $code <= 0x879c ))) |
| 54 | + { |
| 55 | + printf STDERR "Warning: duplicate unicode : UCS=0x%04x SJIS=0x%04x\n",$ucs,$code; |
| 56 | + next; |
| 57 | + } |
| 58 | + $count++; |
| 59 | + $array{ $utf } = $code; |
| 60 | + } |
46 | 61 | }
|
| 62 | + |
47 | 63 | close( FILE );
|
48 | 64 |
|
49 | 65 | #
|
|
57 | 73 | for $index ( sort {$a <=> $b} keys( %array ) ){
|
58 | 74 | $code = $array{ $index };
|
59 | 75 | $count--;
|
60 |
| - if( $count == 0 ){ |
| 76 | + if( $count == 0 ){ |
61 | 77 | printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
|
62 | 78 | } else {
|
63 | 79 | printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
|
|
68 | 84 | close(FILE);
|
69 | 85 |
|
70 | 86 | #
|
71 |
| -# then generate EUC_JP --> UTF8 table |
| 87 | +# then generate SJIS --> UTF8 table |
72 | 88 | #
|
73 | 89 |
|
74 | 90 | open( FILE, $in_file ) || die( "cannot open $in_file" );
|
75 | 91 |
|
76 | 92 | reset 'array';
|
| 93 | +$count = 0; |
77 | 94 |
|
78 | 95 | while( <FILE> ){
|
79 | 96 | chop;
|
|
85 | 102 | $code = hex($c);
|
86 | 103 | if( $code >= 0x80 && $ucs >= 0x100 ){
|
87 | 104 | $utf = &ucs2utf($ucs);
|
88 |
| - if( $array{ $code } ne "" ){ |
89 |
| - printf STDERR "Warning: duplicate code: %04x\n",$ucs; |
90 |
| - next; |
91 |
| - } |
92 | 105 | $count++;
|
93 | 106 |
|
94 | 107 | $array{ $code } = $utf;
|
|
0 commit comments