Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit cb4ea99

Browse files
committed
Improve support of multibyte encoding:
- tsvector_(in|out) - tsquery_(in|out) - to_tsvector - to_tsquery, plainto_tsquery - 'simple' dictionary
1 parent ec0baf9 commit cb4ea99

19 files changed

+263
-146
lines changed

contrib/tsearch2/dict.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ void sortstoplist(StopList * s);
1414
void freestoplist(StopList * s);
1515
void readstoplist(text *in, StopList * s);
1616
bool searchstoplist(StopList * s, char *key);
17-
char *lowerstr(char *str);
1817

1918
typedef struct
2019
{

contrib/tsearch2/dict_ex.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
#include "dict.h"
88
#include "common.h"
9+
#include "ts_locale.h"
910

1011
typedef struct
1112
{

contrib/tsearch2/dict_ispell.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "dict.h"
1010
#include "common.h"
1111
#include "ispell/spell.h"
12+
#include "ts_locale.h"
1213

1314
typedef struct
1415
{

contrib/tsearch2/dict_snowball.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "snowball/header.h"
1111
#include "snowball/english_stem.h"
1212
#include "snowball/russian_stem.h"
13+
#include "ts_locale.h"
1314

1415
typedef struct
1516
{

contrib/tsearch2/dict_syn.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include "dict.h"
1010
#include "common.h"
11+
#include "ts_locale.h"
1112

1213
#define SYNBUFLEN 4096
1314
typedef struct

contrib/tsearch2/gendict/dict_snowball.c.IN

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "common.h"
1313
#include "snowball/header.h"
1414
#include "subinclude.h"
15+
#include "ts_locale.h"
1516

1617
typedef struct {
1718
struct SN_env *z;

contrib/tsearch2/gendict/dict_tmpl.c.IN

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "common.h"
1313

1414
#include "subinclude.h"
15+
#include "ts_locale.h"
1516

1617
HASINIT typedef struct {
1718
HASINIT StopList stoplist;

contrib/tsearch2/ispell/spell.c

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include "postgres.h"
77

88
#include "spell.h"
9+
#include "ts_locale.h"
910

1011
#define MAX_NORM 1024
1112
#define MAXNORMLEN 256
@@ -30,18 +31,6 @@ cmpspellaffix(const void *s1, const void *s2)
3031
return (strcmp(((const SPELL *) s1)->p.flag, ((const SPELL *) s2)->p.flag));
3132
}
3233

33-
static void
34-
strlower(char *str)
35-
{
36-
unsigned char *ptr = (unsigned char *) str;
37-
38-
while (*ptr)
39-
{
40-
*ptr = tolower(*ptr);
41-
ptr++;
42-
}
43-
}
44-
4534
static char *
4635
strnduplicate(char *s, int len)
4736
{
@@ -175,7 +164,7 @@ NIImportDictionary(IspellDict * Conf, const char *filename)
175164
}
176165
else
177166
flag = "";
178-
strlower(str);
167+
lowerstr(str);
179168
/* Dont load words if first letter is not required */
180169
/* It allows to optimize loading at search time */
181170
s = str;
@@ -385,7 +374,7 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
385374
*s = 0;
386375
if (!*str)
387376
continue;
388-
strlower(str);
377+
lowerstr(str);
389378
strcpy(mask, "");
390379
strcpy(find, "");
391380
strcpy(repl, "");
@@ -851,7 +840,7 @@ NormalizeSubWord(IspellDict * Conf, char *word, char flag)
851840

852841
if (wrdlen > MAXNORMLEN)
853842
return NULL;
854-
strlower(word);
843+
lowerstr(word);
855844
cur = forms = (char **) palloc(MAX_NORM * sizeof(char *));
856845
*cur = NULL;
857846

contrib/tsearch2/prs_dcfg.c

Lines changed: 34 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include "dict.h"
1010
#include "common.h"
11+
#include "ts_locale.h"
1112

1213
#define CS_WAITKEY 0
1314
#define CS_INKEY 1
@@ -30,11 +31,11 @@ nstrdup(char *ptr, int len)
3031
cptr = ptr = res;
3132
while (*ptr)
3233
{
33-
if (*ptr == '\\')
34+
if (t_iseq(ptr, '\\'))
3435
ptr++;
35-
*cptr = *ptr;
36-
ptr++;
37-
cptr++;
36+
COPYCHAR( cptr, ptr );
37+
cptr+=pg_mblen(ptr);
38+
ptr+=pg_mblen(ptr);
3839
}
3940
*cptr = '\0';
4041

@@ -52,9 +53,9 @@ parse_cfgdict(text *in, Map ** m)
5253

5354
while (ptr - VARDATA(in) < VARSIZE(in) - VARHDRSZ)
5455
{
55-
if (*ptr == ',')
56+
if ( t_iseq(ptr, ',') )
5657
num++;
57-
ptr++;
58+
ptr+=pg_mblen(ptr);
5859
}
5960

6061
*m = mptr = (Map *) palloc(sizeof(Map) * (num + 2));
@@ -64,93 +65,93 @@ parse_cfgdict(text *in, Map ** m)
6465
{
6566
if (state == CS_WAITKEY)
6667
{
67-
if (isalpha((unsigned char) *ptr))
68+
if (t_isalpha(ptr))
6869
{
6970
begin = ptr;
7071
state = CS_INKEY;
7172
}
72-
else if (!isspace((unsigned char) *ptr))
73+
else if (!t_isspace(ptr))
7374
ereport(ERROR,
7475
(errcode(ERRCODE_SYNTAX_ERROR),
7576
errmsg("syntax error"),
76-
errdetail("Syntax error in position %d near \"%c\"",
77-
(int) (ptr - VARDATA(in)), *ptr)));
77+
errdetail("Syntax error in position %d",
78+
(int) (ptr - VARDATA(in)))));
7879
}
7980
else if (state == CS_INKEY)
8081
{
81-
if (isspace((unsigned char) *ptr))
82+
if (t_isspace(ptr))
8283
{
8384
mptr->key = nstrdup(begin, ptr - begin);
8485
state = CS_WAITEQ;
8586
}
86-
else if (*ptr == '=')
87+
else if (t_iseq(ptr,'='))
8788
{
8889
mptr->key = nstrdup(begin, ptr - begin);
8990
state = CS_WAITVALUE;
9091
}
91-
else if (!isalpha((unsigned char) *ptr))
92+
else if (!t_isalpha(ptr))
9293
ereport(ERROR,
9394
(errcode(ERRCODE_SYNTAX_ERROR),
9495
errmsg("syntax error"),
95-
errdetail("Syntax error in position %d near \"%c\"",
96-
(int) (ptr - VARDATA(in)), *ptr)));
96+
errdetail("Syntax error in position %d",
97+
(int) (ptr - VARDATA(in)))));
9798
}
9899
else if (state == CS_WAITEQ)
99100
{
100-
if (*ptr == '=')
101+
if (t_iseq(ptr, '='))
101102
state = CS_WAITVALUE;
102-
else if (!isspace((unsigned char) *ptr))
103+
else if (!t_isspace(ptr))
103104
ereport(ERROR,
104105
(errcode(ERRCODE_SYNTAX_ERROR),
105106
errmsg("syntax error"),
106-
errdetail("Syntax error in position %d near \"%c\"",
107-
(int) (ptr - VARDATA(in)), *ptr)));
107+
errdetail("Syntax error in position %d",
108+
(int) (ptr - VARDATA(in)))));
108109
}
109110
else if (state == CS_WAITVALUE)
110111
{
111-
if (*ptr == '"')
112+
if (t_iseq(ptr, '"'))
112113
{
113114
begin = ptr + 1;
114115
state = CS_INVALUE;
115116
}
116-
else if (!isspace((unsigned char) *ptr))
117+
else if (!t_isspace(ptr))
117118
{
118119
begin = ptr;
119120
state = CS_IN2VALUE;
120121
}
121122
}
122123
else if (state == CS_INVALUE)
123124
{
124-
if (*ptr == '"')
125+
if (t_iseq(ptr, '"'))
125126
{
126127
mptr->value = nstrdup(begin, ptr - begin);
127128
mptr++;
128129
state = CS_WAITDELIM;
129130
}
130-
else if (*ptr == '\\')
131+
else if (t_iseq(ptr, '\\'))
131132
state = CS_INESC;
132133
}
133134
else if (state == CS_IN2VALUE)
134135
{
135-
if (isspace((unsigned char) *ptr) || *ptr == ',')
136+
if (t_isspace(ptr) || t_iseq(ptr, ','))
136137
{
137138
mptr->value = nstrdup(begin, ptr - begin);
138139
mptr++;
139-
state = (*ptr == ',') ? CS_WAITKEY : CS_WAITDELIM;
140+
state = (t_iseq(ptr, ',')) ? CS_WAITKEY : CS_WAITDELIM;
140141
}
141-
else if (*ptr == '\\')
142+
else if (t_iseq(ptr, '\\'))
142143
state = CS_INESC;
143144
}
144145
else if (state == CS_WAITDELIM)
145146
{
146-
if (*ptr == ',')
147+
if (t_iseq(ptr, ','))
147148
state = CS_WAITKEY;
148-
else if (!isspace((unsigned char) *ptr))
149+
else if (!t_isspace(ptr))
149150
ereport(ERROR,
150151
(errcode(ERRCODE_SYNTAX_ERROR),
151152
errmsg("syntax error"),
152-
errdetail("Syntax error in position %d near \"%c\"",
153-
(int) (ptr - VARDATA(in)), *ptr)));
153+
errdetail("Syntax error in position %d",
154+
(int) (ptr - VARDATA(in)))));
154155
}
155156
else if (state == CS_INESC)
156157
state = CS_INVALUE;
@@ -160,9 +161,9 @@ parse_cfgdict(text *in, Map ** m)
160161
ereport(ERROR,
161162
(errcode(ERRCODE_SYNTAX_ERROR),
162163
errmsg("bad parser state"),
163-
errdetail("%d at position %d near \"%c\"",
164-
state, (int) (ptr - VARDATA(in)), *ptr)));
165-
ptr++;
164+
errdetail("%d at position %d",
165+
state, (int) (ptr - VARDATA(in)))));
166+
ptr+=pg_mblen(ptr);
166167
}
167168

168169
if (state == CS_IN2VALUE)

0 commit comments

Comments
 (0)