Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit f8c8a8b

Browse files
committed
Revert "Change mbbisearch to return the character range"
This reverts commit 78ab944. After I had committed eb0d0d2 and 78ab944, I decided to add a sanity check for a "can't happen" scenario just to be cautious. It turned out that it already happened in the official Unicode source data, namely that a character can be both wide and a combining character. This fact renders the aforementioned commits unnecessary, so revert both of them. Discussion: https://www.postgresql.org/message-id/CAFBsxsH5ejH4-1xaTLpSK8vWoK1m6fA1JBtTM6jmBsLfmDki1g%40mail.gmail.com
1 parent 0d906b2 commit f8c8a8b

File tree

3 files changed

+203
-208
lines changed

3 files changed

+203
-208
lines changed

src/common/unicode/generate-unicode_width_table.pl

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
print
1818
"/* generated by src/common/unicode/generate-unicode_width_table.pl, do not edit */\n\n";
1919

20-
print "static const struct mbinterval wcwidth[] = {\n";
20+
print "static const struct mbinterval combining[] = {\n";
2121

2222
foreach my $line (<ARGV>)
2323
{
@@ -40,7 +40,7 @@
4040
# not a combining character, print out previous range if any
4141
if (defined($range_start))
4242
{
43-
printf "\t{0x%04X, 0x%04X, 0},\n", $range_start, $prev_codepoint;
43+
printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_codepoint;
4444
$range_start = undef;
4545
}
4646
}

src/common/wchar.c

+8-13
Original file line numberDiff line numberDiff line change
@@ -585,18 +585,17 @@ struct mbinterval
585585
{
586586
unsigned short first;
587587
unsigned short last;
588-
signed int width;
589588
};
590589

591590
/* auxiliary function for binary search in interval table */
592-
static const struct mbinterval *
591+
static int
593592
mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
594593
{
595594
int min = 0;
596595
int mid;
597596

598597
if (ucs < table[0].first || ucs > table[max].last)
599-
return NULL;
598+
return 0;
600599
while (max >= min)
601600
{
602601
mid = (min + max) / 2;
@@ -605,10 +604,10 @@ mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
605604
else if (ucs < table[mid].first)
606605
max = mid - 1;
607606
else
608-
return &table[mid];
607+
return 1;
609608
}
610609

611-
return NULL;
610+
return 0;
612611
}
613612

614613

@@ -647,21 +646,17 @@ ucs_wcwidth(pg_wchar ucs)
647646
{
648647
#include "common/unicode_width_table.h"
649648

650-
const struct mbinterval *range;
651-
652649
/* test for 8-bit control characters */
653650
if (ucs == 0)
654651
return 0;
655652

656653
if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
657654
return -1;
658655

659-
/* binary search in table of character widths */
660-
range = mbbisearch(ucs, wcwidth,
661-
sizeof(wcwidth) / sizeof(struct mbinterval) - 1);
662-
663-
if (range != NULL)
664-
return range->width;
656+
/* binary search in table of non-spacing characters */
657+
if (mbbisearch(ucs, combining,
658+
sizeof(combining) / sizeof(struct mbinterval) - 1))
659+
return 0;
665660

666661
/*
667662
* if we arrive here, ucs is not a combining or C0/C1 control character

0 commit comments

Comments
 (0)