Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 678d0e2

Browse files
committed
Update snowball
Update to snowball tag v2.1.0. Major changes are new stemmers for Armenian, Serbian, and Yiddish.
1 parent b071a31 commit 678d0e2

File tree

106 files changed

+23449
-14979
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

106 files changed

+23449
-14979
lines changed

doc/src/sgml/textsearch.sgml

+3
Original file line numberDiff line numberDiff line change
@@ -3837,6 +3837,7 @@ Parser: "pg_catalog.default"
38373837
Schema | Name | Description
38383838
------------+-----------------+-----------------------------------------------------------
38393839
pg_catalog | arabic_stem | snowball stemmer for arabic language
3840+
pg_catalog | armenian_stem | snowball stemmer for armenian language
38403841
pg_catalog | basque_stem | snowball stemmer for basque language
38413842
pg_catalog | catalan_stem | snowball stemmer for catalan language
38423843
pg_catalog | danish_stem | snowball stemmer for danish language
@@ -3857,11 +3858,13 @@ Parser: "pg_catalog.default"
38573858
pg_catalog | portuguese_stem | snowball stemmer for portuguese language
38583859
pg_catalog | romanian_stem | snowball stemmer for romanian language
38593860
pg_catalog | russian_stem | snowball stemmer for russian language
3861+
pg_catalog | serbian_stem | snowball stemmer for serbian language
38603862
pg_catalog | simple | simple dictionary: just lower case and check for stopword
38613863
pg_catalog | spanish_stem | snowball stemmer for spanish language
38623864
pg_catalog | swedish_stem | snowball stemmer for swedish language
38633865
pg_catalog | tamil_stem | snowball stemmer for tamil language
38643866
pg_catalog | turkish_stem | snowball stemmer for turkish language
3867+
pg_catalog | yiddish_stem | snowball stemmer for yiddish language
38653868
</screen>
38663869
</para>
38673870
</listitem>

src/backend/snowball/Makefile

+8-2
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ OBJS += \
4343
stem_ISO_8859_2_romanian.o \
4444
stem_KOI8_R_russian.o \
4545
stem_UTF_8_arabic.o \
46+
stem_UTF_8_armenian.o \
4647
stem_UTF_8_basque.o \
4748
stem_UTF_8_catalan.o \
4849
stem_UTF_8_danish.o \
@@ -64,17 +65,20 @@ OBJS += \
6465
stem_UTF_8_portuguese.o \
6566
stem_UTF_8_romanian.o \
6667
stem_UTF_8_russian.o \
68+
stem_UTF_8_serbian.o \
6769
stem_UTF_8_spanish.o \
6870
stem_UTF_8_swedish.o \
6971
stem_UTF_8_tamil.o \
70-
stem_UTF_8_turkish.o
72+
stem_UTF_8_turkish.o \
73+
stem_UTF_8_yiddish.o
7174

7275
# first column is language name and also name of dictionary for not-all-ASCII
7376
# words, second is name of dictionary for all-ASCII words
7477
# Note order dependency: use of some other language as ASCII dictionary
7578
# must come after creation of that language
7679
LANGUAGES= \
7780
arabic arabic \
81+
armenian armenian \
7882
basque basque \
7983
catalan catalan \
8084
danish danish \
@@ -95,10 +99,12 @@ LANGUAGES= \
9599
portuguese portuguese \
96100
romanian romanian \
97101
russian english \
102+
serbian serbian \
98103
spanish spanish \
99104
swedish swedish \
100105
tamil tamil \
101-
turkish turkish
106+
turkish turkish \
107+
yiddish yiddish
102108

103109

104110
SQLSCRIPT= snowball_create.sql

src/backend/snowball/README

+4-3
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ We choose to include the derived files in the PostgreSQL distribution
2929
because most installations will not have the Snowball compiler available.
3030

3131
We are currently synced with the Snowball git commit
32-
c70ed64f9d41c1032fba4e962b054f8e9d489a74 (tag v2.0.0)
33-
of 2019-10-02.
32+
4764395431c8f2a0b4fe18b816ab1fc966a45837 (tag v2.1.0)
33+
of 2021-01-21.
3434

3535
To update the PostgreSQL sources from a new Snowball version:
3636

@@ -59,7 +59,8 @@ do not require any changes.
5959

6060
4. Check whether any stemmer modules have been added or removed. If so, edit
6161
the OBJS list in Makefile, the list of #include's in dict_snowball.c, and the
62-
stemmer_modules[] table in dict_snowball.c. You might also need to change
62+
stemmer_modules[] table in dict_snowball.c, as well as the list in the
63+
documentation in textsearch.sgml. You might also need to change
6364
the LANGUAGES list in Makefile and tsearch_config_languages in initdb.c.
6465

6566
5. The various stopword files in stopwords/ must be downloaded

src/backend/snowball/dict_snowball.c

+6
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
#include "snowball/libstemmer/stem_ISO_8859_2_romanian.h"
4747
#include "snowball/libstemmer/stem_KOI8_R_russian.h"
4848
#include "snowball/libstemmer/stem_UTF_8_arabic.h"
49+
#include "snowball/libstemmer/stem_UTF_8_armenian.h"
4950
#include "snowball/libstemmer/stem_UTF_8_basque.h"
5051
#include "snowball/libstemmer/stem_UTF_8_catalan.h"
5152
#include "snowball/libstemmer/stem_UTF_8_danish.h"
@@ -67,10 +68,12 @@
6768
#include "snowball/libstemmer/stem_UTF_8_portuguese.h"
6869
#include "snowball/libstemmer/stem_UTF_8_romanian.h"
6970
#include "snowball/libstemmer/stem_UTF_8_russian.h"
71+
#include "snowball/libstemmer/stem_UTF_8_serbian.h"
7072
#include "snowball/libstemmer/stem_UTF_8_spanish.h"
7173
#include "snowball/libstemmer/stem_UTF_8_swedish.h"
7274
#include "snowball/libstemmer/stem_UTF_8_tamil.h"
7375
#include "snowball/libstemmer/stem_UTF_8_turkish.h"
76+
#include "snowball/libstemmer/stem_UTF_8_yiddish.h"
7477

7578
PG_MODULE_MAGIC;
7679

@@ -117,6 +120,7 @@ static const stemmer_module stemmer_modules[] =
117120
STEMMER_MODULE(romanian, PG_LATIN2, ISO_8859_2),
118121
STEMMER_MODULE(russian, PG_KOI8R, KOI8_R),
119122
STEMMER_MODULE(arabic, PG_UTF8, UTF_8),
123+
STEMMER_MODULE(armenian, PG_UTF8, UTF_8),
120124
STEMMER_MODULE(basque, PG_UTF8, UTF_8),
121125
STEMMER_MODULE(catalan, PG_UTF8, UTF_8),
122126
STEMMER_MODULE(danish, PG_UTF8, UTF_8),
@@ -138,10 +142,12 @@ static const stemmer_module stemmer_modules[] =
138142
STEMMER_MODULE(portuguese, PG_UTF8, UTF_8),
139143
STEMMER_MODULE(romanian, PG_UTF8, UTF_8),
140144
STEMMER_MODULE(russian, PG_UTF8, UTF_8),
145+
STEMMER_MODULE(serbian, PG_UTF8, UTF_8),
141146
STEMMER_MODULE(spanish, PG_UTF8, UTF_8),
142147
STEMMER_MODULE(swedish, PG_UTF8, UTF_8),
143148
STEMMER_MODULE(tamil, PG_UTF8, UTF_8),
144149
STEMMER_MODULE(turkish, PG_UTF8, UTF_8),
150+
STEMMER_MODULE(yiddish, PG_UTF8, UTF_8),
145151

146152
/*
147153
* Stemmer with PG_SQL_ASCII encoding should be valid for any server

src/backend/snowball/libstemmer/api.c

+1-8
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#include "header.h"
22

3-
extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
3+
extern struct SN_env * SN_create_env(int S_size, int I_size)
44
{
55
struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
66
if (z == NULL) return NULL;
@@ -25,12 +25,6 @@ extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
2525
if (z->I == NULL) goto error;
2626
}
2727

28-
if (B_size)
29-
{
30-
z->B = (unsigned char *) calloc(B_size, sizeof(unsigned char));
31-
if (z->B == NULL) goto error;
32-
}
33-
3428
return z;
3529
error:
3630
SN_close_env(z, S_size);
@@ -50,7 +44,6 @@ extern void SN_close_env(struct SN_env * z, int S_size)
5044
free(z->S);
5145
}
5246
free(z->I);
53-
free(z->B);
5447
if (z->p) lose_s(z->p);
5548
free(z);
5649
}

0 commit comments

Comments
 (0)