1
1
#include <stdio.h>
2
2
#include <stdlib.h>
3
3
#include <string.h>
4
- #include <ctype.h>
5
4
6
5
#include "regis.h"
6
+ #include "ts_locale.h"
7
7
#include "common.h"
8
8
9
- int
9
+ bool
10
10
RS_isRegis (const char * str )
11
11
{
12
12
unsigned char * ptr = (unsigned char * ) str ;
13
13
14
14
while (ptr && * ptr )
15
- if (isalpha ( * ptr ) || * ptr == '[' || * ptr == ']' || * ptr == '^' )
16
- ptr ++ ;
15
+ if (t_isalpha ( ptr ) || t_iseq ( ptr , '[' ) || t_iseq ( ptr , ']' ) || t_iseq ( ptr , '^' ) )
16
+ ptr += pg_mblen ( ptr ) ;
17
17
else
18
- return 0 ;
19
- return 1 ;
18
+ return false;
19
+
20
+ return true;
20
21
}
21
22
22
23
#define RS_IN_ONEOF 1
@@ -38,34 +39,32 @@ newRegisNode(RegisNode * prev, int len)
38
39
return ptr ;
39
40
}
40
41
41
- int
42
- RS_compile (Regis * r , int issuffix , const char * str )
42
+ void
43
+ RS_compile (Regis * r , bool issuffix , char * str )
43
44
{
44
- int i ,
45
- len = strlen (str );
45
+ int len = strlen (str );
46
46
int state = RS_IN_WAIT ;
47
+ char * c = (char * )str ;
47
48
RegisNode * ptr = NULL ;
48
49
49
50
memset (r , 0 , sizeof (Regis ));
50
51
r -> issuffix = (issuffix ) ? 1 : 0 ;
51
52
52
- for ( i = 0 ; i < len ; i ++ )
53
+ while ( * c )
53
54
{
54
- unsigned char c = * (((unsigned char * ) str ) + i );
55
-
56
55
if (state == RS_IN_WAIT )
57
56
{
58
- if (isalpha (c ))
57
+ if (t_isalpha (c ))
59
58
{
60
59
if (ptr )
61
60
ptr = newRegisNode (ptr , len );
62
61
else
63
62
ptr = r -> node = newRegisNode (NULL , len );
64
- ptr -> data [ 0 ] = c ;
63
+ COPYCHAR ( ptr -> data , c ) ;
65
64
ptr -> type = RSF_ONEOF ;
66
- ptr -> len = 1 ;
65
+ ptr -> len = pg_mblen ( c ) ;
67
66
}
68
- else if (c == '[' )
67
+ else if (t_iseq ( c , '[' ) )
69
68
{
70
69
if (ptr )
71
70
ptr = newRegisNode (ptr , len );
@@ -75,38 +74,39 @@ RS_compile(Regis * r, int issuffix, const char *str)
75
74
state = RS_IN_ONEOF ;
76
75
}
77
76
else
78
- ts_error (ERROR , "Error in regis: %s at pos %d\n " , str , i + 1 );
77
+ ts_error (ERROR , "Error in regis: %s" , str );
79
78
}
80
79
else if (state == RS_IN_ONEOF )
81
80
{
82
- if (c == '^' )
81
+ if (t_iseq ( c , '^' ) )
83
82
{
84
83
ptr -> type = RSF_NONEOF ;
85
84
state = RS_IN_NONEOF ;
86
85
}
87
- else if (isalpha (c ))
86
+ else if (t_isalpha (c ))
88
87
{
89
- ptr -> data [ 0 ] = c ;
90
- ptr -> len = 1 ;
88
+ COPYCHAR ( ptr -> data , c ) ;
89
+ ptr -> len = pg_mblen ( c ) ;
91
90
state = RS_IN_ONEOF_IN ;
92
91
}
93
92
else
94
- ts_error (ERROR , "Error in regis: %s at pos %d\n " , str , i + 1 );
93
+ ts_error (ERROR , "Error in regis: %s" , str );
95
94
}
96
95
else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF )
97
96
{
98
- if (isalpha (c ))
97
+ if (t_isalpha (c ))
99
98
{
100
- ptr -> data [ ptr -> len ] = c ;
101
- ptr -> len ++ ;
99
+ COPYCHAR ( ptr -> data + ptr -> len , c ) ;
100
+ ptr -> len += pg_mblen ( c ) ;
102
101
}
103
- else if (c == ']' )
102
+ else if (t_iseq ( c , ']' ) )
104
103
state = RS_IN_WAIT ;
105
104
else
106
- ts_error (ERROR , "Error in regis: %s at pos %d\n " , str , i + 1 );
105
+ ts_error (ERROR , "Error in regis: %s" , str );
107
106
}
108
107
else
109
- ts_error (ERROR , "Internal error in RS_compile: %d\n" , state );
108
+ ts_error (ERROR , "Internal error in RS_compile: %d" , state );
109
+ c += pg_mblen (c );
110
110
}
111
111
112
112
ptr = r -> node ;
@@ -115,8 +115,6 @@ RS_compile(Regis * r, int issuffix, const char *str)
115
115
r -> nchar ++ ;
116
116
ptr = ptr -> next ;
117
117
}
118
-
119
- return 0 ;
120
118
}
121
119
122
120
void
@@ -135,51 +133,77 @@ RS_free(Regis * r)
135
133
r -> node = NULL ;
136
134
}
137
135
138
- int
139
- RS_execute (Regis * r , const char * str , int len )
136
+ #ifdef TS_USE_WIDE
137
+ static bool
138
+ mb_strchr (char * str , char * c ) {
139
+ int clen = pg_mblen (c ), plen ,i ;
140
+ char * ptr = str ;
141
+ bool res = false;
142
+
143
+ clen = pg_mblen (c );
144
+ while ( * ptr && !res ) {
145
+ plen = pg_mblen (ptr );
146
+ if ( plen == clen ) {
147
+ i = plen ;
148
+ res = true;
149
+ while (i -- )
150
+ if ( * (ptr + i ) != * (c + i ) ) {
151
+ res = false;
152
+ break ;
153
+ }
154
+ }
155
+
156
+ ptr += plen ;
157
+ }
158
+
159
+ return res ;
160
+ }
161
+ #else
162
+ #define mb_strchr (s ,c ) ( (strchr((s),*(c)) == NULL) ? false : true )
163
+ #endif
164
+
165
+
166
+ bool
167
+ RS_execute (Regis * r , char * str )
140
168
{
141
169
RegisNode * ptr = r -> node ;
142
- unsigned char * c ;
170
+ char * c = str ;
171
+ int len = 0 ;
143
172
144
- if (len < 0 )
145
- len = strlen (str );
173
+ while (* c ) {
174
+ len ++ ;
175
+ c += pg_mblen (c );
176
+ }
146
177
147
178
if (len < r -> nchar )
148
179
return 0 ;
149
180
150
- if (r -> issuffix )
151
- c = ((unsigned char * ) str ) + len - r -> nchar ;
152
- else
153
- c = (unsigned char * ) str ;
181
+ c = str ;
182
+ if (r -> issuffix ) {
183
+ len -= r -> nchar ;
184
+ while (len -- > 0 )
185
+ c += pg_mblen (c );
186
+ }
187
+
154
188
155
189
while (ptr )
156
190
{
157
191
switch (ptr -> type )
158
192
{
159
193
case RSF_ONEOF :
160
- if (ptr -> len == 0 )
161
- {
162
- if (* c != * (ptr -> data ))
163
- return 0 ;
164
- }
165
- else if (strchr ((char * ) ptr -> data , * c ) == NULL )
166
- return 0 ;
194
+ if ( mb_strchr ((char * ) ptr -> data , c ) != true )
195
+ return false;
167
196
break ;
168
197
case RSF_NONEOF :
169
- if (ptr -> len == 0 )
170
- {
171
- if (* c == * (ptr -> data ))
172
- return 0 ;
173
- }
174
- else if (strchr ((char * ) ptr -> data , * c ) != NULL )
175
- return 0 ;
198
+ if ( mb_strchr ((char * ) ptr -> data , c ) == true )
199
+ return false;
176
200
break ;
177
201
default :
178
202
ts_error (ERROR , "RS_execute: Unknown type node: %d\n" , ptr -> type );
179
203
}
180
204
ptr = ptr -> next ;
181
- c ++ ;
205
+ c += pg_mblen ( c ) ;
182
206
}
183
207
184
- return 1 ;
208
+ return true ;
185
209
}
0 commit comments