-
Notifications
You must be signed in to change notification settings - Fork 1
/
lexibank_sidwellbahnaric.py
84 lines (74 loc) · 3.09 KB
/
lexibank_sidwellbahnaric.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from pathlib import Path
import lingpy as lp
from clldutils.misc import slug
from pylexibank import Dataset as BaseDataset
from pylexibank.util import getEvoBibAsBibtex
from pylexibank import progressbar
from pylexibank import Concept, Language
import attr
@attr.s
class CustomConcept(Concept):
Number = attr.ib(default=None)
class Dataset(BaseDataset):
dir = Path(__file__).parent
id = "sidwellbahnaric"
concept_class = CustomConcept
def cmd_makecldf(self, args):
concepts = {}
wl = lp.Wordlist(self.raw_dir.joinpath("D_test_Bahnaric-200-24.tsv").as_posix())
for concept in self.conceptlists[0].concepts.values():
idx = "{0}_{1}".format(concept.number, slug(concept.english))
args.writer.add_concept(
ID=idx,
Number=concept.number,
Name=concept.english,
Concepticon_ID=concept.concepticon_id,
Concepticon_Gloss=concept.concepticon_gloss,
)
concepts[concept.english] = idx
# concepts['burn'] = concepts['burn tr.']
# concepts['claw'] = concepts['claw (nail)']
# concepts['come (V)'] = concepts['come']
# concepts['die (V)'] = concepts['die']
# concepts['drink (V)'] = concepts['drink']
# concepts['eat (V)'] = concepts['eat']
# concepts['fat'] = concepts['fat n.']
# concepts['fly'] = concepts['fly v.']
# concepts['give (V)'] = concepts['give']
# concepts['hear (V)'] = concepts['hear']
# concepts['kill (V)'] = concepts['kill']
# concepts['know (V)'] = concepts['know']
# concepts['lie (V)'] = concepts['lie']
# concepts['rain (V)'] = concepts['rain']
# concepts['say (V)'] = concepts['say']
# concepts['see (V)'] = concepts['see']
# concepts['sit (V)'] = concepts['sit']
# concepts['sleep (V)'] = concepts['sleep']
# concepts['stand (V)'] = concepts['stand']
# concepts['swim (V)'] = concepts['swim']
# concepts['walk (V)'] = concepts['walk(go)']
languages = args.writer.add_languages(
lookup_factory="Name", id_factory=lambda x: slug(x["Name"])
)
args.writer.add_sources()
visited = set()
for idx, concept in wl.iter_rows("concept"):
if wl[idx, "concept"] in concepts:
lexeme = args.writer.add_form(
Language_ID=languages[wl[idx, "language"]],
Parameter_ID=concepts[wl[idx, "concept"]],
Value=wl[idx, "ipa"],
Form=".".join(wl[idx, "tokens"]),
Source="Sidwell2015",
Loan=True if wl[idx, "cogid"] < 0 else False,
)
args.writer.add_cognate(
lexeme=lexeme,
Cognateset_ID=wl[idx, "cogid"],
Cognate_Detection_Method="expert",
Source=["Sidwell2015"],
)
else:
if concept not in visited:
visited.add(concept)
print(concept)