-
Notifications
You must be signed in to change notification settings - Fork 1
/
en2wn.pl
30 lines (24 loc) · 807 Bytes
/
en2wn.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
#!/usr/bin/perl
use strict;
use warnings;
binmode STDOUT, ":utf8";
binmode STDERR, ":utf8";
# converts En-Ir dictionary in diolaim/c to a pot file that
# is used for mapping the English defs to WN sense_keys
open(ENIR, "<:utf8", "/home/kps/gaeilge/diolaim/c/en") or die "Could not open Eng-Ir dictionary: $!\n";
print "msgid \"\"\nmsgstr \"\"\n\"Content-Type: text/plain; charset=UTF-8\\n\"\n\n";
while (<ENIR>) {
chomp;
s/\[/{/g;
s/\]/}/g;
(my $word, my $disambpos, my $note, my $defs) = /^([^:]+) ((?:[a-z]+)?(?: \([A-Z\/a-z'-]+\))?)\. (?:{([^}]+)})?: (.+)\.$/;
if ($disambpos) { # not "unknown" POS
if ($disambpos =~ /^(?:a|n|v|adv)( |$)/) {
print "#. $note\n" if ($note);
print "#. ga=$defs\n";
print "msgid \"$word $disambpos\"\nmsgstr \"\"\n\n";
}
}
}
close ENIR;
exit 0;