Skip to content

Commit

Permalink
Fixed some types of recursive derivations.
Browse files Browse the repository at this point in the history
  • Loading branch information
timarkh committed Dec 19, 2022
1 parent 1c43966 commit adc246e
Show file tree
Hide file tree
Showing 8 changed files with 120 additions and 27 deletions.
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = uniparser-morph
version = 2.7.2
version = 2.7.3
author = Timofey Arkhangelskiy
author_email = timarkh@gmail.com
description = Rule-based, linguist-friendly (and rather slow) morphological analysis
Expand Down
28 changes: 27 additions & 1 deletion tests/derivations.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,36 @@
keep_lex_data: brackets
gloss: PV


-deriv-type: V-nyt
lex: <0>ныт[.]ын
stem: ныт[.].
regex-stem: т.*
gramm: +pv,pv_ny
gloss: PV



-deriv-type: V-se
lex: <0>[.]se
stem: [.]se.
paradigm: der_test_adv
gramm: adv,ptcp
gloss: PTCP
id: septcp

-deriv-type: A-mi
lex: [.]+mï
stem: [.]mï.
paradigm: der_test_noun
gramm: n
gloss: NMLZ
id: minmlz

-deriv-type: V-semi
lex: [.]+semï
stem: [.]se&mï.
paradigm: der_test_noun
gramm: n
gloss: PTCP&NMLZ
id: septcp,minmlz

12 changes: 11 additions & 1 deletion tests/import_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
analyses = a.analyze_words('юртъёсаз', format='conll')
print(analyses)

# Test derivations
# Test simple derivations
analyses = a.analyze_words('тулы')
print(analyses)
analyses = a.analyze_words('ныттулы')
Expand All @@ -64,6 +64,16 @@
analyses = a.analyze_words('ныууыныс')
print(analyses)

# Test recursive derivations
for w in [
"yarika", # bare verb
"yarikase", # adverbialized verb
"tatune", # bare adverb
"tatunemï", # nominalized adverb
"yarikasemï" # adverbialized, then nominalized verb
]:
print(a.analyze_words(w))

# Test sentences and complex structures
analyses = a.analyze_words(['Морфологиез', [['А'], ['Мон', 'тонэ', 'яратӥсько', '.']]], format='xml')
print(analyses)
Expand Down
17 changes: 17 additions & 0 deletions tests/lexemes.txt
Original file line number Diff line number Diff line change
Expand Up @@ -238812,3 +238812,20 @@
trans_ru: видеть
trans_en: see
gramm: V,tr

-lexeme
lex: yarika
stem: .yarika.
paradigm: der_test_verb
paradigm: der_test_v_nmlz
trans_en: laugh
gloss: laugh
gramm: v

-lexeme
lex: tatune
stem: .tatune.
paradigm: der_test_adv
trans_en: warm
gloss: warm
gramm: adv
21 changes: 20 additions & 1 deletion tests/paradigms.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5881,4 +5881,23 @@
-deriv-link: V-nyw
recurs_class: 1
-deriv-link: V-nyt
recurs_class: 1
recurs_class: 1

-paradigm: der_test_verb
-flex: .
gramm:

-paradigm: der_test_noun
-flex: .
gramm:

-paradigm: der_test_adv
-flex: .
gramm:
deriv-link: A-mi

-paradigm: der_test_v_nmlz
-flex: .
gramm:
deriv-link: V-se
deriv-link: V-semi
14 changes: 9 additions & 5 deletions uniparser_morph/grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,10 @@ def load_lex_rules(self, fnames):
return len(self.lexRulesByLemma) + len(self.lexRulesByStem)

def load_clitics(self, fnames):
"""Load clitics from the file or files specified by fnames.
Return the number of lexemes loaded."""
"""
Load clitics from the file or files specified by fnames.
Return the number of lexemes loaded.
"""
clDescrs = self.load_yaml_descrs(fnames)
for dictDescr in clDescrs:
if dictDescr is None or len(dictDescr) <= 0:
Expand All @@ -192,14 +194,16 @@ def load_clitics(self, fnames):
return len(self.clitics)

def load_derivations(self, fnames, compileDerivs=False):
"""Load derivations from the file or files specified by fnames.
Return the number of derivations loaded."""
"""
Load derivations from the file or files specified by fnames.
Return the number of derivations loaded.
"""
derivDescrs = self.load_yaml_descrs(fnames)
for dictDescr in derivDescrs:
dictDescr['value'] = '#deriv#' + dictDescr['value']
try:
self.derivations[dictDescr['value']] =\
Derivation(self, dictDescr, self.errorHandler)
Derivation(self, dictDescr, self.errorHandler)
except MemoryError:
self.raise_error('Not enough memory for the derivations.')
return
Expand Down
36 changes: 24 additions & 12 deletions uniparser_morph/paradigm.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def __init__(self, g, dictDescr, errorHandler=None):
self.otherDataBracketR = '' # if keepOtherData == True, append this to the non-essential
# values copied from lexeme (e.g. to translations)
self.otherData = []
self.lemmaChanger = None # an inflexion object which changes the lemma
self.lemmaChangers = [] # inflexions object which change the lemma
self.startWithSelf = False # if true, start with the inflexion when joining
# itself to a stem or to a previous inflexion
try:
Expand Down Expand Up @@ -225,8 +225,9 @@ def add_lemma_changer(self, obj):
self.raise_error('Wrong lemma in ' + self.flex + ': ', newLemma)
return
dictDescr = {'name': 'flex', 'value': newLemma, 'content': []}
self.lemmaChanger = Inflexion(self.g, dictDescr, self.errorHandler)
self.lemmaChanger.startWithSelf = True
lemmaChanger = Inflexion(self.g, dictDescr, self.errorHandler)
lemmaChanger.startWithSelf = True
self.lemmaChangers.append(lemmaChanger)

def remove_stem_number(self):
flex = self.flex
Expand Down Expand Up @@ -771,11 +772,12 @@ def init_derivation(self, data):
for stem, gloss, gramm in zip(stems, glosses, gramms):
for stemVar in stem.split('//'):
stemVar = re.sub('\\.(?!\\])', '<.>', stemVar)
stemVar = stemVar.replace('[.]', '.')
# Different conventions for morphemes in stems and inflexions:
stemVar = stemVar.replace('[.]', '.').replace('&', '|')
bReplaceGrammar = True
arrContent = copy.deepcopy(newData)
if len(gloss) > 0:
arrContent.append({'name': 'gloss', 'value': gloss})
arrContent.append({'name': 'gloss', 'value': gloss.replace('&', '|')})
if gramm.startswith('+') or len(gramm) <= 0:
bReplaceGrammar = False
gramm = gramm[1:]
Expand Down Expand Up @@ -991,7 +993,11 @@ def extend_one(self, flexL):
return extensions

@classmethod
def join_inflexions(cls, flexL, flexR, paradigmLink, partialCompile=True):
def join_inflexions(cls,
flexL: Inflexion,
flexR: Inflexion,
paradigmLink: ParadigmLink=None,
partialCompile=True):
# print(flexL.flex, flexR.flex)
if not cls.stem_numbers_agree(flexL, flexR):
return None
Expand All @@ -1006,13 +1012,13 @@ def join_inflexions(cls, flexL, flexR, paradigmLink, partialCompile=True):
flexL.copy_std()

# Manage links to the subsequent paradigms:
if paradigmLink.position != POS_UNSPECIFIED:
if paradigmLink is not None and paradigmLink.position != POS_UNSPECIFIED:
flexL.position = paradigmLink.position
else:
flexL.position = flexR.position
if paradigmLink.position == POS_FINAL:
if paradigmLink is not None and paradigmLink.position == POS_FINAL:
flexL.make_final()
elif len(paradigmLink.subsequent) > 0:
elif paradigmLink is not None and len(paradigmLink.subsequent) > 0:
flexL.subsequent = paradigmLink.subsequent
else:
flexL.subsequent = flexR.subsequent
Expand Down Expand Up @@ -1050,10 +1056,13 @@ def join_inflexions(cls, flexL, flexR, paradigmLink, partialCompile=True):
flexR.flexStdObj,
paradigmLink,
partialCompile)

# Join lexeme changers
flexL.lemmaChangers += flexR.lemmaChangers
return flexL

@classmethod
def join_other_data(cls, flexL, flexR):
def join_other_data(cls, flexL: Inflexion, flexR: Inflexion):
"""
Add otherData values from flexR to flexL.
"""
Expand Down Expand Up @@ -1202,8 +1211,11 @@ def join_inflexion_parts(cls, flexPartsL, flexPartsR):
fpOldL = flexPartsL[-1][1:]
else:
fpOldL = flexPartsL[-1]
if fpOldL[0].flex != '<.>':
fpOldL.insert(0, InflexionPart('<.>', '<.>', GLOSS_NEXT_FLEX))
# if fpOldL[0].flex != '<.>':
# fpOldL.insert(0, InflexionPart('<.>', '<.>', GLOSS_NEXT_FLEX))
# TODO: Deal with GLOSS_STARTWITHSELF in a definitive way.
# TODO: Probably abandon it in favor of uniform handling of stems
# TODO: in prardigms and derivations.
fpNew = [InflexionPart('', '', GLOSS_STARTWITHSELF)]
else:
fpOldR = flexPartsR[0]
Expand Down
17 changes: 11 additions & 6 deletions uniparser_morph/wordform.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import re
import xml.sax.saxutils
from .common_functions import wfPropertyFields, check_compatibility, join_stem_flex
from .paradigm import Paradigm, ParadigmLink


class Wordform:
Expand Down Expand Up @@ -58,28 +59,32 @@ def raise_error(self, message, data=None):
self.errorHandler.raise_error(message, data)

def add_lemma(self, lex, flex):
if flex.lemmaChanger is None:
if len(flex.lemmaChangers) <= 0:
if self.lemma == "":
self.lemma = lex.lemma
elif lex.lemma != "":
self.lemma += "+"+lex.lemma
return
unifiedLC = copy.deepcopy(flex.lemmaChangers[0])
for i in range(1, len(flex.lemmaChangers)):
lc = flex.lemmaChangers[i]
Paradigm.join_inflexions(unifiedLC, lc)
suitableSubLex = [sl for sl in lex.subLexemes
if flex.lemmaChanger.stemNum is None or
len(sl.numStem & flex.lemmaChanger.stemNum) > 0]
if unifiedLC.stemNum is None
or len(sl.numStem & unifiedLC.stemNum) > 0]
if len(suitableSubLex) <= 0:
if lex.num_stems() == 1:
suitableSubLex = lex.subLexemes
if len(suitableSubLex) <= 0:
self.raise_error('No stems available to create the new lemma ' +
flex.lemmaChanger.flex)
unifiedLC.flex)
self.lemma = ''
return
if len(suitableSubLex) > 1:
if self.verbosity > 0:
self.raise_error('Several stems available to create the new lemma ' +
flex.lemmaChanger.flex)
wfLemma = Wordform(self.g, suitableSubLex[0], flex.lemmaChanger,
unifiedLC.flex)
wfLemma = Wordform(self.g, suitableSubLex[0], unifiedLC,
self.errorHandler)
self.lemma = wfLemma.wf

Expand Down

0 comments on commit adc246e

Please sign in to comment.