32
32
#include "snowball/libstemmer/stem_ISO_8859_1_finnish.h"
33
33
#include "snowball/libstemmer/stem_ISO_8859_1_french.h"
34
34
#include "snowball/libstemmer/stem_ISO_8859_1_german.h"
35
- #include "snowball/libstemmer/stem_ISO_8859_1_hungarian.h"
35
+ #include "snowball/libstemmer/stem_ISO_8859_1_indonesian.h"
36
+ #include "snowball/libstemmer/stem_ISO_8859_1_irish.h"
36
37
#include "snowball/libstemmer/stem_ISO_8859_1_italian.h"
37
38
#include "snowball/libstemmer/stem_ISO_8859_1_norwegian.h"
38
39
#include "snowball/libstemmer/stem_ISO_8859_1_porter.h"
39
40
#include "snowball/libstemmer/stem_ISO_8859_1_portuguese.h"
40
41
#include "snowball/libstemmer/stem_ISO_8859_1_spanish.h"
41
42
#include "snowball/libstemmer/stem_ISO_8859_1_swedish.h"
43
+ #include "snowball/libstemmer/stem_ISO_8859_2_hungarian.h"
42
44
#include "snowball/libstemmer/stem_ISO_8859_2_romanian.h"
43
45
#include "snowball/libstemmer/stem_KOI8_R_russian.h"
46
+ #include "snowball/libstemmer/stem_UTF_8_arabic.h"
44
47
#include "snowball/libstemmer/stem_UTF_8_danish.h"
45
48
#include "snowball/libstemmer/stem_UTF_8_dutch.h"
46
49
#include "snowball/libstemmer/stem_UTF_8_english.h"
47
50
#include "snowball/libstemmer/stem_UTF_8_finnish.h"
48
51
#include "snowball/libstemmer/stem_UTF_8_french.h"
49
52
#include "snowball/libstemmer/stem_UTF_8_german.h"
50
53
#include "snowball/libstemmer/stem_UTF_8_hungarian.h"
54
+ #include "snowball/libstemmer/stem_UTF_8_indonesian.h"
55
+ #include "snowball/libstemmer/stem_UTF_8_irish.h"
51
56
#include "snowball/libstemmer/stem_UTF_8_italian.h"
57
+ #include "snowball/libstemmer/stem_UTF_8_lithuanian.h"
58
+ #include "snowball/libstemmer/stem_UTF_8_nepali.h"
52
59
#include "snowball/libstemmer/stem_UTF_8_norwegian.h"
53
60
#include "snowball/libstemmer/stem_UTF_8_porter.h"
54
61
#include "snowball/libstemmer/stem_UTF_8_portuguese.h"
55
62
#include "snowball/libstemmer/stem_UTF_8_romanian.h"
56
63
#include "snowball/libstemmer/stem_UTF_8_russian.h"
57
64
#include "snowball/libstemmer/stem_UTF_8_spanish.h"
58
65
#include "snowball/libstemmer/stem_UTF_8_swedish.h"
66
+ #include "snowball/libstemmer/stem_UTF_8_tamil.h"
59
67
#include "snowball/libstemmer/stem_UTF_8_turkish.h"
60
68
61
69
PG_MODULE_MAGIC ;
@@ -74,48 +82,60 @@ typedef struct stemmer_module
74
82
int (* stem ) (struct SN_env * );
75
83
} stemmer_module ;
76
84
85
+ /* Args: stemmer name, PG code for encoding, Snowball's name for encoding */
86
+ #define STEMMER_MODULE (name ,enc ,senc ) \
87
+ {#name, enc, name##_##senc##_create_env, name##_##senc##_close_env, name##_##senc##_stem}
88
+
77
89
static const stemmer_module stemmer_modules [] =
78
90
{
79
91
/*
80
92
* Stemmers list from Snowball distribution
81
93
*/
82
- {"danish" , PG_LATIN1 , danish_ISO_8859_1_create_env , danish_ISO_8859_1_close_env , danish_ISO_8859_1_stem },
83
- {"dutch" , PG_LATIN1 , dutch_ISO_8859_1_create_env , dutch_ISO_8859_1_close_env , dutch_ISO_8859_1_stem },
84
- {"english" , PG_LATIN1 , english_ISO_8859_1_create_env , english_ISO_8859_1_close_env , english_ISO_8859_1_stem },
85
- {"finnish" , PG_LATIN1 , finnish_ISO_8859_1_create_env , finnish_ISO_8859_1_close_env , finnish_ISO_8859_1_stem },
86
- {"french" , PG_LATIN1 , french_ISO_8859_1_create_env , french_ISO_8859_1_close_env , french_ISO_8859_1_stem },
87
- {"german" , PG_LATIN1 , german_ISO_8859_1_create_env , german_ISO_8859_1_close_env , german_ISO_8859_1_stem },
88
- {"hungarian" , PG_LATIN1 , hungarian_ISO_8859_1_create_env , hungarian_ISO_8859_1_close_env , hungarian_ISO_8859_1_stem },
89
- {"italian" , PG_LATIN1 , italian_ISO_8859_1_create_env , italian_ISO_8859_1_close_env , italian_ISO_8859_1_stem },
90
- {"norwegian" , PG_LATIN1 , norwegian_ISO_8859_1_create_env , norwegian_ISO_8859_1_close_env , norwegian_ISO_8859_1_stem },
91
- {"porter" , PG_LATIN1 , porter_ISO_8859_1_create_env , porter_ISO_8859_1_close_env , porter_ISO_8859_1_stem },
92
- {"portuguese" , PG_LATIN1 , portuguese_ISO_8859_1_create_env , portuguese_ISO_8859_1_close_env , portuguese_ISO_8859_1_stem },
93
- {"spanish" , PG_LATIN1 , spanish_ISO_8859_1_create_env , spanish_ISO_8859_1_close_env , spanish_ISO_8859_1_stem },
94
- {"swedish" , PG_LATIN1 , swedish_ISO_8859_1_create_env , swedish_ISO_8859_1_close_env , swedish_ISO_8859_1_stem },
95
- {"romanian" , PG_LATIN2 , romanian_ISO_8859_2_create_env , romanian_ISO_8859_2_close_env , romanian_ISO_8859_2_stem },
96
- {"russian" , PG_KOI8R , russian_KOI8_R_create_env , russian_KOI8_R_close_env , russian_KOI8_R_stem },
97
- {"danish" , PG_UTF8 , danish_UTF_8_create_env , danish_UTF_8_close_env , danish_UTF_8_stem },
98
- {"dutch" , PG_UTF8 , dutch_UTF_8_create_env , dutch_UTF_8_close_env , dutch_UTF_8_stem },
99
- {"english" , PG_UTF8 , english_UTF_8_create_env , english_UTF_8_close_env , english_UTF_8_stem },
100
- {"finnish" , PG_UTF8 , finnish_UTF_8_create_env , finnish_UTF_8_close_env , finnish_UTF_8_stem },
101
- {"french" , PG_UTF8 , french_UTF_8_create_env , french_UTF_8_close_env , french_UTF_8_stem },
102
- {"german" , PG_UTF8 , german_UTF_8_create_env , german_UTF_8_close_env , german_UTF_8_stem },
103
- {"hungarian" , PG_UTF8 , hungarian_UTF_8_create_env , hungarian_UTF_8_close_env , hungarian_UTF_8_stem },
104
- {"italian" , PG_UTF8 , italian_UTF_8_create_env , italian_UTF_8_close_env , italian_UTF_8_stem },
105
- {"norwegian" , PG_UTF8 , norwegian_UTF_8_create_env , norwegian_UTF_8_close_env , norwegian_UTF_8_stem },
106
- {"porter" , PG_UTF8 , porter_UTF_8_create_env , porter_UTF_8_close_env , porter_UTF_8_stem },
107
- {"portuguese" , PG_UTF8 , portuguese_UTF_8_create_env , portuguese_UTF_8_close_env , portuguese_UTF_8_stem },
108
- {"romanian" , PG_UTF8 , romanian_UTF_8_create_env , romanian_UTF_8_close_env , romanian_UTF_8_stem },
109
- {"russian" , PG_UTF8 , russian_UTF_8_create_env , russian_UTF_8_close_env , russian_UTF_8_stem },
110
- {"spanish" , PG_UTF8 , spanish_UTF_8_create_env , spanish_UTF_8_close_env , spanish_UTF_8_stem },
111
- {"swedish" , PG_UTF8 , swedish_UTF_8_create_env , swedish_UTF_8_close_env , swedish_UTF_8_stem },
112
- {"turkish" , PG_UTF8 , turkish_UTF_8_create_env , turkish_UTF_8_close_env , turkish_UTF_8_stem },
94
+ STEMMER_MODULE (danish , PG_LATIN1 , ISO_8859_1 ),
95
+ STEMMER_MODULE (dutch , PG_LATIN1 , ISO_8859_1 ),
96
+ STEMMER_MODULE (english , PG_LATIN1 , ISO_8859_1 ),
97
+ STEMMER_MODULE (finnish , PG_LATIN1 , ISO_8859_1 ),
98
+ STEMMER_MODULE (french , PG_LATIN1 , ISO_8859_1 ),
99
+ STEMMER_MODULE (german , PG_LATIN1 , ISO_8859_1 ),
100
+ STEMMER_MODULE (indonesian , PG_LATIN1 , ISO_8859_1 ),
101
+ STEMMER_MODULE (irish , PG_LATIN1 , ISO_8859_1 ),
102
+ STEMMER_MODULE (italian , PG_LATIN1 , ISO_8859_1 ),
103
+ STEMMER_MODULE (norwegian , PG_LATIN1 , ISO_8859_1 ),
104
+ STEMMER_MODULE (porter , PG_LATIN1 , ISO_8859_1 ),
105
+ STEMMER_MODULE (portuguese , PG_LATIN1 , ISO_8859_1 ),
106
+ STEMMER_MODULE (spanish , PG_LATIN1 , ISO_8859_1 ),
107
+ STEMMER_MODULE (swedish , PG_LATIN1 , ISO_8859_1 ),
108
+ STEMMER_MODULE (hungarian , PG_LATIN2 , ISO_8859_2 ),
109
+ STEMMER_MODULE (romanian , PG_LATIN2 , ISO_8859_2 ),
110
+ STEMMER_MODULE (russian , PG_KOI8R , KOI8_R ),
111
+ STEMMER_MODULE (arabic , PG_UTF8 , UTF_8 ),
112
+ STEMMER_MODULE (danish , PG_UTF8 , UTF_8 ),
113
+ STEMMER_MODULE (dutch , PG_UTF8 , UTF_8 ),
114
+ STEMMER_MODULE (english , PG_UTF8 , UTF_8 ),
115
+ STEMMER_MODULE (finnish , PG_UTF8 , UTF_8 ),
116
+ STEMMER_MODULE (french , PG_UTF8 , UTF_8 ),
117
+ STEMMER_MODULE (german , PG_UTF8 , UTF_8 ),
118
+ STEMMER_MODULE (hungarian , PG_UTF8 , UTF_8 ),
119
+ STEMMER_MODULE (indonesian , PG_UTF8 , UTF_8 ),
120
+ STEMMER_MODULE (irish , PG_UTF8 , UTF_8 ),
121
+ STEMMER_MODULE (italian , PG_UTF8 , UTF_8 ),
122
+ STEMMER_MODULE (lithuanian , PG_UTF8 , UTF_8 ),
123
+ STEMMER_MODULE (nepali , PG_UTF8 , UTF_8 ),
124
+ STEMMER_MODULE (norwegian , PG_UTF8 , UTF_8 ),
125
+ STEMMER_MODULE (porter , PG_UTF8 , UTF_8 ),
126
+ STEMMER_MODULE (portuguese , PG_UTF8 , UTF_8 ),
127
+ STEMMER_MODULE (romanian , PG_UTF8 , UTF_8 ),
128
+ STEMMER_MODULE (russian , PG_UTF8 , UTF_8 ),
129
+ STEMMER_MODULE (spanish , PG_UTF8 , UTF_8 ),
130
+ STEMMER_MODULE (swedish , PG_UTF8 , UTF_8 ),
131
+ STEMMER_MODULE (tamil , PG_UTF8 , UTF_8 ),
132
+ STEMMER_MODULE (turkish , PG_UTF8 , UTF_8 ),
113
133
114
134
/*
115
135
* Stemmer with PG_SQL_ASCII encoding should be valid for any server
116
136
* encoding
117
137
*/
118
- { " english" , PG_SQL_ASCII , english_ISO_8859_1_create_env , english_ISO_8859_1_close_env , english_ISO_8859_1_stem } ,
138
+ STEMMER_MODULE ( english , PG_SQL_ASCII , ISO_8859_1 ) ,
119
139
120
140
{NULL , 0 , NULL , NULL , NULL } /* list end marker */
121
141
};
0 commit comments