|
49 | 49 | $simple{$code} = {
|
50 | 50 | Simple_Lowercase => ($simple_lowercase || $code),
|
51 | 51 | Simple_Titlecase => ($simple_titlecase || $code),
|
52 |
| - Simple_Uppercase => ($simple_uppercase || $code) |
| 52 | + Simple_Uppercase => ($simple_uppercase || $code), |
| 53 | + Simple_Foldcase => $code, |
53 | 54 | };
|
54 | 55 | }
|
55 | 56 | }
|
|
87 | 88 | my @lower = map { hex $_ } (grep /^[0-9A-F]+$/, (split /\s+/, $elts[1]));
|
88 | 89 | my @title = map { hex $_ } (grep /^[0-9A-F]+$/, (split /\s+/, $elts[2]));
|
89 | 90 | my @upper = map { hex $_ } (grep /^[0-9A-F]+$/, (split /\s+/, $elts[3]));
|
| 91 | + my @fold = (); |
90 | 92 | my @conditions = map {
|
91 | 93 | # supporting negated conditions may require storing a
|
92 | 94 | # mask of relevant conditions for a given rule to differentiate
|
|
101 | 103 | push @lower, $code if (scalar @lower == 0);
|
102 | 104 | push @title, $code if (scalar @title == 0);
|
103 | 105 | push @upper, $code if (scalar @upper == 0);
|
| 106 | + push @fold, $code; |
104 | 107 |
|
105 | 108 | # none should map to more than 3 codepoints
|
106 | 109 | die "lowercase expansion for 0x$elts[0] exceeds maximum: '$elts[1]'"
|
|
114 | 117 | while (scalar @upper < $MAX_CASE_EXPANSION) { push @upper, 0x000000 }
|
115 | 118 | while (scalar @lower < $MAX_CASE_EXPANSION) { push @lower, 0x000000 }
|
116 | 119 | while (scalar @title < $MAX_CASE_EXPANSION) { push @title, 0x000000 }
|
| 120 | + while (scalar @fold < $MAX_CASE_EXPANSION) { push @fold, 0x000000 } |
117 | 121 |
|
118 | 122 | # Characters with special mappings may not have simple mappings;
|
119 | 123 | # ensure that an entry exists.
|
120 | 124 | $simple{$code} ||= {
|
121 | 125 | Simple_Lowercase => $code,
|
122 | 126 | Simple_Titlecase => $code,
|
123 |
| - Simple_Uppercase => $code |
| 127 | + Simple_Uppercase => $code, |
| 128 | + Simple_Foldcase => $code |
124 | 129 | };
|
125 | 130 |
|
126 | 131 | # Multiple special case rules for a single codepoint could be
|
|
135 | 140 | Lowercase => \@lower,
|
136 | 141 | Titlecase => \@title,
|
137 | 142 | Uppercase => \@upper,
|
| 143 | + Foldcase => \@fold, |
138 | 144 | Conditions => $cond_str
|
139 | 145 | };
|
140 | 146 | }
|
141 | 147 | close $FH;
|
142 | 148 |
|
| 149 | +open($FH, '<', "$output_path/CaseFolding.txt") |
| 150 | + or die "Could not open $output_path/CaseFolding.txt: $!."; |
| 151 | +while (my $line = <$FH>) |
| 152 | +{ |
| 153 | + # remove comments |
| 154 | + $line =~ s/^(.*?)#.*$/$1/s; |
| 155 | + |
| 156 | + # ignore empty lines |
| 157 | + next unless $line =~ /;/; |
| 158 | + |
| 159 | + my @elts = split(';', $line); |
| 160 | + my $code = hex($elts[0]); |
| 161 | + my $status = $elts[1] =~ s/^\s+|\s+$//rg; |
| 162 | + |
| 163 | + # Codepoint may map to multiple characters when folding. Split |
| 164 | + # each mapping on whitespace and extract the hexadecimal into an |
| 165 | + # array of codepoints. |
| 166 | + my @fold = map { hex $_ } (grep /[0-9A-F]+/, (split /\s+/, $elts[2])); |
| 167 | + |
| 168 | + die "codepoint $code out of range" if $code > 0x10FFFF; |
| 169 | + |
| 170 | + # status 'T' unsupported; skip |
| 171 | + next if $status eq 'T'; |
| 172 | + |
| 173 | + # encountered unrecognized status type |
| 174 | + die "unsupported status type '$status'" |
| 175 | + if $status ne 'S' && $status ne 'C' && $status ne 'F'; |
| 176 | + |
| 177 | + # initialize simple case mappings if they don't exist |
| 178 | + $simple{$code} ||= { |
| 179 | + Simple_Lowercase => $code, |
| 180 | + Simple_Titlecase => $code, |
| 181 | + Simple_Uppercase => $code, |
| 182 | + Simple_Foldcase => $code |
| 183 | + }; |
| 184 | + |
| 185 | + if ($status eq 'S' || $status eq 'C') |
| 186 | + { |
| 187 | + die |
| 188 | + "Simple case folding for $code has multiple codepoints: '$line' '$elts[2]'" |
| 189 | + if scalar @fold != 1; |
| 190 | + my $simple_foldcase = $fold[0]; |
| 191 | + |
| 192 | + die "Simple_Foldcase $code out of range" |
| 193 | + if $simple_foldcase > 0x10FFFF; |
| 194 | + |
| 195 | + $simple{$code}{Simple_Foldcase} = $simple_foldcase; |
| 196 | + } |
| 197 | + |
| 198 | + if ($status eq 'F' || ($status eq 'C' && defined $special{$code})) |
| 199 | + { |
| 200 | + while (scalar @fold < $MAX_CASE_EXPANSION) { push @fold, 0x000000 } |
| 201 | + |
| 202 | + #initialize special case mappings if they don't exist |
| 203 | + if (!defined $special{$code}) |
| 204 | + { |
| 205 | + my @lower = ($simple{$code}{Simple_Lowercase}); |
| 206 | + my @title = ($simple{$code}{Simple_Titlecase}); |
| 207 | + my @upper = ($simple{$code}{Simple_Uppercase}); |
| 208 | + while (scalar @lower < $MAX_CASE_EXPANSION) |
| 209 | + { |
| 210 | + push @lower, 0x000000; |
| 211 | + } |
| 212 | + while (scalar @title < $MAX_CASE_EXPANSION) |
| 213 | + { |
| 214 | + push @title, 0x000000; |
| 215 | + } |
| 216 | + while (scalar @upper < $MAX_CASE_EXPANSION) |
| 217 | + { |
| 218 | + push @upper, 0x000000; |
| 219 | + } |
| 220 | + $special{$code} = { |
| 221 | + Lowercase => \@lower, |
| 222 | + Titlecase => \@title, |
| 223 | + Uppercase => \@upper, |
| 224 | + Conditions => '0' |
| 225 | + }; |
| 226 | + } |
| 227 | + |
| 228 | + $special{$code}{Foldcase} = \@fold; |
| 229 | + } |
| 230 | +} |
| 231 | +close $FH; |
| 232 | + |
143 | 233 | # assign sequential array indexes to the special mappings
|
144 | 234 | my $special_idx = 0;
|
145 | 235 | foreach my $code (sort { $a <=> $b } (keys %special))
|
|
202 | 292 | CaseLower = 0,
|
203 | 293 | CaseTitle = 1,
|
204 | 294 | CaseUpper = 2,
|
| 295 | + CaseFold = 3, |
205 | 296 | NCaseKind
|
206 | 297 | } CaseKind;
|
207 | 298 |
|
|
232 | 323 | die if scalar @{ $special{$code}{Lowercase} } != $MAX_CASE_EXPANSION;
|
233 | 324 | die if scalar @{ $special{$code}{Titlecase} } != $MAX_CASE_EXPANSION;
|
234 | 325 | die if scalar @{ $special{$code}{Uppercase} } != $MAX_CASE_EXPANSION;
|
| 326 | + die if scalar @{ $special{$code}{Foldcase} } != $MAX_CASE_EXPANSION; |
235 | 327 | my $lower = join ", ",
|
236 | 328 | (map { sprintf "0x%06x", $_ } @{ $special{$code}{Lowercase} });
|
237 | 329 | my $title = join ", ",
|
238 | 330 | (map { sprintf "0x%06x", $_ } @{ $special{$code}{Titlecase} });
|
239 | 331 | my $upper = join ", ",
|
240 | 332 | (map { sprintf "0x%06x", $_ } @{ $special{$code}{Uppercase} });
|
| 333 | + my $fold = join ", ", |
| 334 | + (map { sprintf "0x%06x", $_ } @{ $special{$code}{Foldcase} }); |
241 | 335 | printf $OT "\t{0x%06x, %s, ", $code, $special{$code}{Conditions};
|
242 |
| - printf $OT "{{%s}, {%s}, {%s}}},\n", $lower, $title, $upper; |
| 336 | + printf $OT "{{%s}, {%s}, {%s}, {%s}}},\n", $lower, $title, $upper, $fold; |
243 | 337 | }
|
244 | 338 |
|
245 | 339 | print $OT "\t{0, 0, {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}}}\n";
|
|
260 | 354 | my $lc = ($simple{$code}{Simple_Lowercase} || $code);
|
261 | 355 | my $tc = ($simple{$code}{Simple_Titlecase} || $code);
|
262 | 356 | my $uc = ($simple{$code}{Simple_Uppercase} || $code);
|
| 357 | + my $fc = ($simple{$code}{Simple_Foldcase} || $code); |
| 358 | + |
263 | 359 | die "unexpected special case for code $code"
|
264 | 360 | if defined $special{$code};
|
265 | 361 | printf $OT
|
266 |
| - "\t{0x%06x, {[CaseLower] = 0x%06x,[CaseTitle] = 0x%06x,[CaseUpper] = 0x%06x}, NULL},\n", |
267 |
| - $code, $lc, $tc, $uc; |
| 362 | + "\t{0x%06x, {[CaseLower] = 0x%06x,[CaseTitle] = 0x%06x,[CaseUpper] = 0x%06x,[CaseFold] = 0x%06x}, NULL},\n", |
| 363 | + $code, $lc, $tc, $uc, $fc; |
268 | 364 | }
|
269 | 365 | printf $OT "\n";
|
270 | 366 |
|
|
280 | 376 | $special_case = sprintf "&special_case[%d]", $special{$code}{Index};
|
281 | 377 | }
|
282 | 378 | printf $OT
|
283 |
| - "\t{0x%06x, {[CaseLower] = 0x%06x,[CaseTitle] = 0x%06x,[CaseUpper] = 0x%06x}, %s},\n", |
| 379 | + "\t{0x%06x, {[CaseLower] = 0x%06x,[CaseTitle] = 0x%06x,[CaseUpper] = 0x%06x,[CaseFold] = 0x%06x}, %s},\n", |
284 | 380 | $code, $map->{Simple_Lowercase}, $map->{Simple_Titlecase},
|
285 |
| - $map->{Simple_Uppercase}, $special_case; |
| 381 | + $map->{Simple_Uppercase}, $map->{Simple_Foldcase}, $special_case; |
286 | 382 | }
|
287 | 383 | print $OT "};\n";
|
0 commit comments