Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 7ac8a4b

Browse files
committed
Multibyte encodings support for ISpell dictionary
1 parent e3b9852 commit 7ac8a4b

File tree

5 files changed

+232
-147
lines changed

5 files changed

+232
-147
lines changed

contrib/tsearch2/ispell/regis.c

Lines changed: 80 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,23 @@
11
#include <stdio.h>
22
#include <stdlib.h>
33
#include <string.h>
4-
#include <ctype.h>
54

65
#include "regis.h"
6+
#include "ts_locale.h"
77
#include "common.h"
88

9-
int
9+
bool
1010
RS_isRegis(const char *str)
1111
{
1212
unsigned char *ptr = (unsigned char *) str;
1313

1414
while (ptr && *ptr)
15-
if (isalpha(*ptr) || *ptr == '[' || *ptr == ']' || *ptr == '^')
16-
ptr++;
15+
if (t_isalpha(ptr) || t_iseq(ptr,'[') || t_iseq(ptr,']') || t_iseq(ptr, '^'))
16+
ptr+=pg_mblen(ptr);
1717
else
18-
return 0;
19-
return 1;
18+
return false;
19+
20+
return true;
2021
}
2122

2223
#define RS_IN_ONEOF 1
@@ -38,34 +39,32 @@ newRegisNode(RegisNode * prev, int len)
3839
return ptr;
3940
}
4041

41-
int
42-
RS_compile(Regis * r, int issuffix, const char *str)
42+
void
43+
RS_compile(Regis * r, bool issuffix, char *str)
4344
{
44-
int i,
45-
len = strlen(str);
45+
int len = strlen(str);
4646
int state = RS_IN_WAIT;
47+
char *c = (char*)str;
4748
RegisNode *ptr = NULL;
4849

4950
memset(r, 0, sizeof(Regis));
5051
r->issuffix = (issuffix) ? 1 : 0;
5152

52-
for (i = 0; i < len; i++)
53+
while(*c)
5354
{
54-
unsigned char c = *(((unsigned char *) str) + i);
55-
5655
if (state == RS_IN_WAIT)
5756
{
58-
if (isalpha(c))
57+
if (t_isalpha(c))
5958
{
6059
if (ptr)
6160
ptr = newRegisNode(ptr, len);
6261
else
6362
ptr = r->node = newRegisNode(NULL, len);
64-
ptr->data[0] = c;
63+
COPYCHAR(ptr->data, c);
6564
ptr->type = RSF_ONEOF;
66-
ptr->len = 1;
65+
ptr->len = pg_mblen(c);
6766
}
68-
else if (c == '[')
67+
else if (t_iseq(c,'['))
6968
{
7069
if (ptr)
7170
ptr = newRegisNode(ptr, len);
@@ -75,38 +74,39 @@ RS_compile(Regis * r, int issuffix, const char *str)
7574
state = RS_IN_ONEOF;
7675
}
7776
else
78-
ts_error(ERROR, "Error in regis: %s at pos %d\n", str, i + 1);
77+
ts_error(ERROR, "Error in regis: %s", str );
7978
}
8079
else if (state == RS_IN_ONEOF)
8180
{
82-
if (c == '^')
81+
if (t_iseq(c,'^'))
8382
{
8483
ptr->type = RSF_NONEOF;
8584
state = RS_IN_NONEOF;
8685
}
87-
else if (isalpha(c))
86+
else if (t_isalpha(c))
8887
{
89-
ptr->data[0] = c;
90-
ptr->len = 1;
88+
COPYCHAR(ptr->data, c);
89+
ptr->len = pg_mblen(c);
9190
state = RS_IN_ONEOF_IN;
9291
}
9392
else
94-
ts_error(ERROR, "Error in regis: %s at pos %d\n", str, i + 1);
93+
ts_error(ERROR, "Error in regis: %s", str);
9594
}
9695
else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
9796
{
98-
if (isalpha(c))
97+
if (t_isalpha(c))
9998
{
100-
ptr->data[ptr->len] = c;
101-
ptr->len++;
99+
COPYCHAR(ptr->data+ptr->len, c);
100+
ptr->len+=pg_mblen(c);
102101
}
103-
else if (c == ']')
102+
else if (t_iseq(c,']'))
104103
state = RS_IN_WAIT;
105104
else
106-
ts_error(ERROR, "Error in regis: %s at pos %d\n", str, i + 1);
105+
ts_error(ERROR, "Error in regis: %s", str);
107106
}
108107
else
109-
ts_error(ERROR, "Internal error in RS_compile: %d\n", state);
108+
ts_error(ERROR, "Internal error in RS_compile: %d", state);
109+
c += pg_mblen(c);
110110
}
111111

112112
ptr = r->node;
@@ -115,8 +115,6 @@ RS_compile(Regis * r, int issuffix, const char *str)
115115
r->nchar++;
116116
ptr = ptr->next;
117117
}
118-
119-
return 0;
120118
}
121119

122120
void
@@ -135,51 +133,77 @@ RS_free(Regis * r)
135133
r->node = NULL;
136134
}
137135

138-
int
139-
RS_execute(Regis * r, const char *str, int len)
136+
#ifdef TS_USE_WIDE
137+
static bool
138+
mb_strchr(char *str, char *c) {
139+
int clen = pg_mblen(c), plen,i;
140+
char *ptr =str;
141+
bool res=false;
142+
143+
clen = pg_mblen(c);
144+
while( *ptr && !res) {
145+
plen = pg_mblen(ptr);
146+
if ( plen == clen ) {
147+
i=plen;
148+
res = true;
149+
while(i--)
150+
if ( *(ptr+i) != *(c+i) ) {
151+
res = false;
152+
break;
153+
}
154+
}
155+
156+
ptr += plen;
157+
}
158+
159+
return res;
160+
}
161+
#else
162+
#define mb_strchr(s,c) ( (strchr((s),*(c)) == NULL) ? false : true )
163+
#endif
164+
165+
166+
bool
167+
RS_execute(Regis * r, char *str)
140168
{
141169
RegisNode *ptr = r->node;
142-
unsigned char *c;
170+
char *c = str;
171+
int len=0;
143172

144-
if (len < 0)
145-
len = strlen(str);
173+
while(*c) {
174+
len++;
175+
c += pg_mblen(c);
176+
}
146177

147178
if (len < r->nchar)
148179
return 0;
149180

150-
if (r->issuffix)
151-
c = ((unsigned char *) str) + len - r->nchar;
152-
else
153-
c = (unsigned char *) str;
181+
c = str;
182+
if (r->issuffix) {
183+
len -= r->nchar;
184+
while(len-- > 0)
185+
c += pg_mblen(c);
186+
}
187+
154188

155189
while (ptr)
156190
{
157191
switch (ptr->type)
158192
{
159193
case RSF_ONEOF:
160-
if (ptr->len == 0)
161-
{
162-
if (*c != *(ptr->data))
163-
return 0;
164-
}
165-
else if (strchr((char *) ptr->data, *c) == NULL)
166-
return 0;
194+
if ( mb_strchr((char *) ptr->data, c) != true )
195+
return false;
167196
break;
168197
case RSF_NONEOF:
169-
if (ptr->len == 0)
170-
{
171-
if (*c == *(ptr->data))
172-
return 0;
173-
}
174-
else if (strchr((char *) ptr->data, *c) != NULL)
175-
return 0;
198+
if ( mb_strchr((char *) ptr->data, c) == true )
199+
return false;
176200
break;
177201
default:
178202
ts_error(ERROR, "RS_execute: Unknown type node: %d\n", ptr->type);
179203
}
180204
ptr = ptr->next;
181-
c++;
205+
c+=pg_mblen(c);
182206
}
183207

184-
return 1;
208+
return true;
185209
}

contrib/tsearch2/ispell/regis.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,12 @@ typedef struct Regis
2727
unused:15;
2828
} Regis;
2929

30-
int RS_isRegis(const char *str);
30+
bool RS_isRegis(const char *str);
3131

32-
int RS_compile(Regis * r, int issuffix, const char *str);
32+
void RS_compile(Regis * r, bool issuffix, char *str);
3333
void RS_free(Regis * r);
3434

35-
/*×ÏÚ×ÒÁÝÁÅÔ 1 ÅÓÌÉ ÍÁÔÞÉÔÓÑ */
36-
int RS_execute(Regis * r, const char *str, int len);
35+
/*returns true if matches */
36+
bool RS_execute(Regis * r, char *str);
3737

3838
#endif

0 commit comments

Comments
 (0)