-
Notifications
You must be signed in to change notification settings - Fork 0
/
build_dict.sh
executable file
·45 lines (33 loc) · 1.63 KB
/
build_dict.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/bin/bash
# sort
export LC_ALL=C && sort -u diccionari.txt >diccionari_sorted.txt
#cp diccionari_sorted.txt ./backups/diccionari_$(date +%Y%m%d-%H%M%S).txt
rm diccionari.txt
mv diccionari_sorted.txt diccionari.txt
# replace whitespaces with tabs
perl sptotabs.pl <diccionari.txt >diccionari_tabs.txt
export LC_ALL=C && sort diccionari_tabs.txt >diccionari_tabs_sorted.txt
rm diccionari_tabs.txt
mv diccionari_tabs_sorted.txt diccionari_tabs.txt
# create list of used tags
#gawk -f tags.awk diccionari_tabs.txt | sort -u > catalan_tags.txt
# create tagger dictionary with morfologik tools
java -cp ~/lt/languagetool.jar org.languagetool.dev.POSDictionaryBuilder diccionari_tabs.txt catalan.info ca_wordlist.xml
cp /tmp/DictionaryBuilder*.dict ./catalan.dict
rm /tmp/DictionaryBuilder*.dict
# dump the tagger dictionary
java -cp ~/lt/languagetool.jar org.languagetool.dev.DictionaryExporter catalan.dict > catalan_lt.txt
# create synthesis dictionary with morfologik tools
java -cp ~/lt/languagetool.jar org.languagetool.dev.SynthDictionaryBuilder diccionari_tabs.txt catalan_synth.info
#cp /tmp/SynthDictionaryBuilder*_tags.txt ./catalan_tags.txt
cp /tmp/DictionaryBuilder*.dict ./catalan_synth.dict
rm /tmp/DictionaryBuilder*.dict
cp /tmp/SynthDictionaryBuilder*_tags.txt ./catalan_tags.txt
rm /tmp/SynthDictionaryBuilder*_tags.txt
# dump synthesis dictionary
java -cp ~/lt/languagetool.jar org.languagetool.dev.DictionaryExporter catalan_synth.dict > catalan_synth_lt.txt
rm diccionari_tabs.txt
#convert catalan_tags.txt to DOS file
sed 's/$'"/`echo \\\r`/" catalan_tags.txt > catalan_tags_dos.txt
rm catalan_tags.txt
mv catalan_tags_dos.txt catalan_tags.txt