|
35 | 35 | * receive a different case-normalization mapping.
|
36 | 36 | */
|
37 | 37 | int
|
38 |
| -ScanKeywordLookup(const char *text, |
| 38 | +ScanKeywordLookup(const char *str, |
39 | 39 | const ScanKeywordList *keywords)
|
40 | 40 | {
|
41 |
| - int len, |
42 |
| - i; |
43 |
| - char word[NAMEDATALEN]; |
44 |
| - const char *kw_string; |
45 |
| - const uint16 *kw_offsets; |
46 |
| - const uint16 *low; |
47 |
| - const uint16 *high; |
48 |
| - |
49 |
| - len = strlen(text); |
| 41 | + size_t len; |
| 42 | + int h; |
| 43 | + const char *kw; |
50 | 44 |
|
| 45 | + /* |
| 46 | + * Reject immediately if too long to be any keyword. This saves useless |
| 47 | + * hashing and downcasing work on long strings. |
| 48 | + */ |
| 49 | + len = strlen(str); |
51 | 50 | if (len > keywords->max_kw_len)
|
52 |
| - return -1; /* too long to be any keyword */ |
53 |
| - |
54 |
| - /* We assume all keywords are shorter than NAMEDATALEN. */ |
55 |
| - Assert(len < NAMEDATALEN); |
| 51 | + return -1; |
56 | 52 |
|
57 | 53 | /*
|
58 |
| - * Apply an ASCII-only downcasing. We must not use tolower() since it may |
59 |
| - * produce the wrong translation in some locales (eg, Turkish). |
| 54 | + * Compute the hash function. We assume it was generated to produce |
| 55 | + * case-insensitive results. Since it's a perfect hash, we need only |
| 56 | + * match to the specific keyword it identifies. |
60 | 57 | */
|
61 |
| - for (i = 0; i < len; i++) |
62 |
| - { |
63 |
| - char ch = text[i]; |
| 58 | + h = keywords->hash(str, len); |
64 | 59 |
|
65 |
| - if (ch >= 'A' && ch <= 'Z') |
66 |
| - ch += 'a' - 'A'; |
67 |
| - word[i] = ch; |
68 |
| - } |
69 |
| - word[len] = '\0'; |
| 60 | + /* An out-of-range result implies no match */ |
| 61 | + if (h < 0 || h >= keywords->num_keywords) |
| 62 | + return -1; |
70 | 63 |
|
71 | 64 | /*
|
72 |
| - * Now do a binary search using plain strcmp() comparison. |
| 65 | + * Compare character-by-character to see if we have a match, applying an |
| 66 | + * ASCII-only downcasing to the input characters. We must not use |
| 67 | + * tolower() since it may produce the wrong translation in some locales |
| 68 | + * (eg, Turkish). |
73 | 69 | */
|
74 |
| - kw_string = keywords->kw_string; |
75 |
| - kw_offsets = keywords->kw_offsets; |
76 |
| - low = kw_offsets; |
77 |
| - high = kw_offsets + (keywords->num_keywords - 1); |
78 |
| - while (low <= high) |
| 70 | + kw = GetScanKeyword(h, keywords); |
| 71 | + while (*str != '\0') |
79 | 72 | {
|
80 |
| - const uint16 *middle; |
81 |
| - int difference; |
| 73 | + char ch = *str++; |
82 | 74 |
|
83 |
| - middle = low + (high - low) / 2; |
84 |
| - difference = strcmp(kw_string + *middle, word); |
85 |
| - if (difference == 0) |
86 |
| - return middle - kw_offsets; |
87 |
| - else if (difference < 0) |
88 |
| - low = middle + 1; |
89 |
| - else |
90 |
| - high = middle - 1; |
| 75 | + if (ch >= 'A' && ch <= 'Z') |
| 76 | + ch += 'a' - 'A'; |
| 77 | + if (ch != *kw++) |
| 78 | + return -1; |
91 | 79 | }
|
| 80 | + if (*kw != '\0') |
| 81 | + return -1; |
92 | 82 |
|
93 |
| - return -1; |
| 83 | + /* Success! */ |
| 84 | + return h; |
94 | 85 | }
|
0 commit comments