File tree 2 files changed +59
-0
lines changed
contrib/tsearch2/my2ispell
2 files changed +59
-0
lines changed Original file line number Diff line number Diff line change
1
+ ZIPFILE =nb_NO
2
+ LANGUAGE =norsk
3
+
4
+
5
+ UNZIP =unzip -o
6
+
7
+
8
+ all : $(LANGUAGE ) .dict $(LANGUAGE ) .aff
9
+
10
+ $(ZIPFILE ) .aff : $(ZIPFILE ) .zip
11
+ $(UNZIP ) $? $@
12
+ touch $@
13
+
14
+
15
+ # 1 Cleanup dictionary
16
+ # 2 remove " symbol
17
+ # 3 add compoundwords controlled flag to word which hasn't it, but
18
+ # has compound only suffixes
19
+
20
+ $(LANGUAGE ) .dict : $(ZIPFILE ) .zip
21
+ $(UNZIP ) $? $(ZIPFILE ) .dic
22
+ grep -v -E ' ^[[:digit:]]+$$' < $(ZIPFILE ) .dic \
23
+ | grep -v ' \.' \
24
+ | sed -e ' s/"//g' \
25
+ | perl -pi -e ' s|/(\S+)| $$q=$$1; ( $$q=~/[\\_`]/ && $$q!~/z/ ) ? "/$${q}z" : "/$${q}"|e' \
26
+ | sort \
27
+ > $@
28
+
29
+ # just convert affix file
30
+
31
+ $(LANGUAGE ) .aff : $(ZIPFILE ) .aff
32
+ grep -v -i zyzyzy $(ZIPFILE ) .aff \
33
+ | grep -v -i zyzyzy \
34
+ | perl -pi \
35
+ -e ' s/^COMPOUNDFLAG\s+(\S+)/compoundwords controlled $$1/;' \
36
+ -e ' s/^COMPOUNDMIN\s+(\d+)/compoundmin $$1/;' \
37
+ -e ' s/^PFX\s+(\S+)\s+Y\s+\d+.*$$/ if ( !$$wasprf ) { $$wasprf=1; "prefixes\n\nflag $$1:" } else { "flag $$1:" } /e;' \
38
+ -e ' s/^PFX\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)/ uc(" $$3 > $$2")/e;' \
39
+ -e ' s/^(.*)SFX\s+(\S+)\s+([YN])\s+\d+.*$$/ $$flg=($$3 eq "Y") ? "*" : ""; $$flg="~$$flg" if length $$1; $$q=$$2; $$q="\\$$q" if $$q!~m#[a-zA-Z]#; if ( !$$wassfx ) { $$wassfx=1; "suffixes\n\nflag $$flg$$q:" } else { "flag $$flg$$q:" } /e;' \
40
+ -e ' s/^.*SFX\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)/ uc(" $$3 > ".( ($$1 eq "0") ? "" : "-$$1,").( ($$2 eq "0") ? "" : "$$2") )/e;' \
41
+ -e ' s/^(SET|TRY)/#$$1/' \
42
+ > $@
43
+
44
+ clean :
45
+ rm -rf $(ZIPFILE ) .aff $(ZIPFILE ) .dic $(LANGUAGE ) .dict $(LANGUAGE ) .aff
46
+
47
+
Original file line number Diff line number Diff line change
1
+ Utility for convert MySpell dictionary and affix from
2
+ myspell to ispell format.
3
+ Utility tested on nb_NO.zip and nn_NO.zip from
4
+ OpenOffice (http://lingucomponent.openoffice.org/download_dictionary.html)
5
+
6
+ usage:
7
+ For example, make norwegian dictionary and affix:
8
+ % cp nb_NO.zip my2ispell
9
+ % cd my2ispell
10
+ % gmake ZIPFILE=nb_NO LANGUAGE=norsk
11
+
12
+ Author: Teodor Sigaev <teodor@sigaev.ru>
You can’t perform that action at this time.
0 commit comments