Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit a507387

Browse files
committed
Fix conversion table generator scripts.
convutils.pm used implicit conversion of undefined value to integer zero. Some of conversion scripts are susceptible to regexp greediness. Fix, avoiding whitespace changes in the output. Also update ICU URLs that moved. No need to back-patch, because the output of these scripts is also in the source tree so we shouldn't need to rerun them on back-branches. Author: Kyotaro Horiguchi <horikyoga.ntt@gmail.com> Discussion: https://postgr.es/m/CA%2BhUKGJ7SEGLbj%3D%3DTQCcyKRA9aqj8%2B6L%3DexSq1y25TA%3DWxLziQ%40mail.gmail.com
1 parent e47c260 commit a507387

File tree

5 files changed

+44
-37
lines changed

5 files changed

+44
-37
lines changed

src/backend/utils/mb/Unicode/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ euc-jis-2004-std.txt sjis-0213-2004-std.txt:
122122
$(DOWNLOAD) http://x0213.org/codetable/$(@F)
123123

124124
gb-18030-2000.xml windows-949-2000.xml:
125-
$(DOWNLOAD) https://ssl.icu-project.org/repos/icu/data/trunk/charset/data/xml/$(@F)
125+
$(DOWNLOAD) https://raw.githubusercontent.com/unicode-org/icu-data/master/charset/data/xml/$(@F)
126126

127127
GB2312.TXT:
128128
$(DOWNLOAD) 'http://trac.greenstone.org/browser/trunk/gsdl/unicode/MAPPINGS/EASTASIA/GB/GB2312.TXT?rev=1842&format=txt'

src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,13 @@
2424

2525
while (my $line = <$in>)
2626
{
27-
if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
27+
if ($line =~ /^0x(\w+)\s*U\+(\w+)\+(\w+)\s*#\s*(\S.*)?\s*$/)
2828
{
2929

3030
# combined characters
3131
my ($c, $u1, $u2) = ($1, $2, $3);
32-
my $rest = "U+" . $u1 . "+" . $u2 . $4;
32+
# The "\t \t" below is just to avoid insubstantial diffs.
33+
my $rest = "U+" . $u1 . "+" . $u2 . "\t \t" . $4;
3334
my $code = hex($c);
3435
my $ucs1 = hex($u1);
3536
my $ucs2 = hex($u2);
@@ -45,7 +46,7 @@
4546
l => $.
4647
};
4748
}
48-
elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
49+
elsif ($line =~ /^0x(\w+)\s*U\+(\w+)\s*#\s*(\S.*)?\s*$/)
4950
{
5051

5152
# non-combined characters

src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,8 @@
8080
}
8181
}
8282

83-
foreach my $i (@mapping)
83+
# extract only SJIS characers
84+
foreach my $i (grep defined $_->{sjis}, @mapping)
8485
{
8586
my $sjis = $i->{sjis};
8687

src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,13 @@
2424

2525
while (my $line = <$in>)
2626
{
27-
if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
27+
if ($line =~ /^0x(\w+)\s*U\+(\w+)\+(\w+)\s*#\s*(\S.*)?\s*$/)
2828
{
2929

3030
# combined characters
3131
my ($c, $u1, $u2) = ($1, $2, $3);
32-
my $rest = "U+" . $u1 . "+" . $u2 . $4;
32+
# The "\t \t" below is just to avoid insubstantial diffs.
33+
my $rest = "U+" . $u1 . "+" . $u2 . "\t \t" . $4;
3334
my $code = hex($c);
3435
my $ucs1 = hex($u1);
3536
my $ucs2 = hex($u2);
@@ -45,7 +46,7 @@
4546
l => $.
4647
};
4748
}
48-
elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
49+
elsif ($line =~ /^0x(\w+)\s*U\+(\w+)\s*#\s*(\S.*)?\s*$/)
4950
{
5051

5152
# non-combined characters

src/backend/utils/mb/Unicode/convutils.pm

Lines changed: 33 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,8 @@ sub print_radix_table
380380
{
381381
header => "Dummy map, for invalid values",
382382
min_idx => 0,
383-
max_idx => $widest_range
383+
max_idx => $widest_range,
384+
label => "dummy map"
384385
};
385386

386387
###
@@ -471,35 +472,37 @@ sub print_radix_table
471472
}
472473

473474
# Also look up the positions of the roots in the table.
474-
my $b1root = $segmap{"1-byte"};
475-
my $b2root = $segmap{"2-byte"};
476-
my $b3root = $segmap{"3-byte"};
477-
my $b4root = $segmap{"4-byte"};
475+
# Missing map represents dummy mapping.
476+
my $b1root = $segmap{"1-byte"} || 0;
477+
my $b2root = $segmap{"2-byte"} || 0;
478+
my $b3root = $segmap{"3-byte"} || 0;
479+
my $b4root = $segmap{"4-byte"} || 0;
478480

479481
# And the lower-upper values of each level in each radix tree.
480-
my $b1_lower = $min_idx{1}{1};
481-
my $b1_upper = $max_idx{1}{1};
482-
483-
my $b2_1_lower = $min_idx{2}{1};
484-
my $b2_1_upper = $max_idx{2}{1};
485-
my $b2_2_lower = $min_idx{2}{2};
486-
my $b2_2_upper = $max_idx{2}{2};
487-
488-
my $b3_1_lower = $min_idx{3}{1};
489-
my $b3_1_upper = $max_idx{3}{1};
490-
my $b3_2_lower = $min_idx{3}{2};
491-
my $b3_2_upper = $max_idx{3}{2};
492-
my $b3_3_lower = $min_idx{3}{3};
493-
my $b3_3_upper = $max_idx{3}{3};
494-
495-
my $b4_1_lower = $min_idx{4}{1};
496-
my $b4_1_upper = $max_idx{4}{1};
497-
my $b4_2_lower = $min_idx{4}{2};
498-
my $b4_2_upper = $max_idx{4}{2};
499-
my $b4_3_lower = $min_idx{4}{3};
500-
my $b4_3_upper = $max_idx{4}{3};
501-
my $b4_4_lower = $min_idx{4}{4};
502-
my $b4_4_upper = $max_idx{4}{4};
482+
# Missing values represent zero.
483+
my $b1_lower = $min_idx{1}{1} || 0;
484+
my $b1_upper = $max_idx{1}{1} || 0;
485+
486+
my $b2_1_lower = $min_idx{2}{1} || 0;
487+
my $b2_1_upper = $max_idx{2}{1} || 0;
488+
my $b2_2_lower = $min_idx{2}{2} || 0;
489+
my $b2_2_upper = $max_idx{2}{2} || 0;
490+
491+
my $b3_1_lower = $min_idx{3}{1} || 0;
492+
my $b3_1_upper = $max_idx{3}{1} || 0;
493+
my $b3_2_lower = $min_idx{3}{2} || 0;
494+
my $b3_2_upper = $max_idx{3}{2} || 0;
495+
my $b3_3_lower = $min_idx{3}{3} || 0;
496+
my $b3_3_upper = $max_idx{3}{3} || 0;
497+
498+
my $b4_1_lower = $min_idx{4}{1} || 0;
499+
my $b4_1_upper = $max_idx{4}{1} || 0;
500+
my $b4_2_lower = $min_idx{4}{2} || 0;
501+
my $b4_2_upper = $max_idx{4}{2} || 0;
502+
my $b4_3_lower = $min_idx{4}{3} || 0;
503+
my $b4_3_upper = $max_idx{4}{3} || 0;
504+
my $b4_4_lower = $min_idx{4}{4} || 0;
505+
my $b4_4_upper = $max_idx{4}{4} || 0;
503506

504507
###
505508
### Find the maximum value in the whole table, to determine if we can
@@ -607,7 +610,8 @@ sub print_radix_table
607610
for (my $j = 0;
608611
$j < $vals_per_line && $i <= $seg->{max_idx}; $j++)
609612
{
610-
my $val = $seg->{values}->{$i};
613+
# missing values represent zero.
614+
my $val = $seg->{values}->{$i} || 0;
611615

612616
printf $out " 0x%0*x", $colwidth, $val;
613617
$off++;

0 commit comments

Comments
 (0)