############################################################
# verb de-inflection
-# REs for word classes a specific inflection rule may generally be
-# applicable to, as tagged in the gloss part of dictionary entries.
-_vconj_wclass = { # TODO: adjust these REs to best fit word classes to inflection rule.
- 0: re.compile(r'\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))'), # plain, negative, nonpast
- 1: re.compile(r'\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))'), # polite, non-past
- 2: re.compile(r'\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))'), # conditional
- 3: re.compile(r'\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))'), # volitional
- 4: re.compile(r'\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))'), # te-form
- 5: re.compile(r'\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))'), # plain, past
- 6: re.compile(r'\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))'), # plain, negative, past
- 7: re.compile(r'\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))'), # passive
- 8: re.compile(r'\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))'), # causative
- 9: re.compile(r'\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))'), # potential or imperative
- 10: re.compile(r'\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))'), # imperative
- 11: re.compile(r'\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))'), # polite, past
- 12: re.compile(r'\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))'), # polite, negative, non-past
- 13: re.compile(r'\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))'), # polite, negative, past
- 14: re.compile(r'\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))'), # polite, volitional
- 15: re.compile(r'\((adj|adv|aux|n-adv)'), # adj. -> adverb
- 16: re.compile(r'\((adj|adv|aux|n-adv)'), # adj., past
- 17: re.compile(r'\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))'), # polite
- 18: re.compile(r'\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))'), # polite, volitional
- 19: re.compile(r'\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))'), # passive or potential
- 20: re.compile(r'\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))'), # passive (or potential if Grp 2)
- 21: re.compile(r'\((adj|adv|aux|n-adv)'), # adj., negative
- 22: re.compile(r'\((adj|adv|aux|n-adv)'), # adj., negative, past
- 23: re.compile(r'\((adj|adv|aux|n-adv)'), # adj., past
- 24: re.compile(r'\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))'), # plain verb
- 25: re.compile(r'\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))'), # polite, te-form
-}
+Vtype = namedtuple('Vtype', 'wclass label')
+Vconj = namedtuple('Vconj', 'regex conj infi rule')
-_vconj_type = dict()
-_vconj_deinf = []
+_vconj_type = dict() # format: { rule_no: (wclass, label), ... }
+_vconj_deinf = [] # format: [ (regex, conj, infinitve, rule_no), ... ]
_vconj_loaded = False
def _get_dfile_path(fname, mode=os.R_OK):
return fname
# load and parse VCONJ rule file
-Vconj = namedtuple('Vconj', 'regex conj infi rule')
-
def _vconj_load():
global _vconj_loaded
vcname = _JITENPAI_VCONJ
vcname = _get_dfile_path(os.path.join(_JITENPAI_DIR, _JITENPAI_VCONJ), mode=os.R_OK)
try:
with open(vcname) as vcfile:
- re_type = re.compile(r'^(\d+)\s+(.+)$')
+ re_type = re.compile(r'^(\d+)\s+"(\S+)"\s+(.+)$')
re_deinf = re.compile(r'^\s*([^#\s]+)\s+(\S+)\s+(\d+)\s*$')
for line in vcfile:
match = re_type.match(line)
if match:
- _vconj_type[match.group(1)] = match.group(2)
+ wclass = re.compile(match.group(2))
+ _vconj_type[int(match.group(1))] = Vtype(wclass, match.group(3))
continue
match = re_deinf.match(line)
if match:
regex = re.compile('%s$' % match.group(1))
- _vconj_deinf.append(Vconj(regex, match.group(1), match.group(2), match.group(3)))
+ _vconj_deinf.append(Vconj(regex, match.group(1), match.group(2), int(match.group(3))))
continue
_vconj_loaded = len(_vconj_deinf) > 0
except Exception as e:
eprint('_vconj_load:', vcname, str(e))
# collect inflection rules potentially applicable to a verb(-candidate)
-Vinf = namedtuple('Vinf', 'infi blurb rule')
+Vinf = namedtuple('Vinf', 'infi blurb wclass')
def _vconj_deinflect(verb):
inf = []
for deinf in _vconj_deinf:
verb_inf = deinf.regex.sub(deinf.infi, verb)
if verb_inf != verb:
- blurb = '%s %s → %s' % (_vconj_type[deinf.rule], deinf.conj, deinf.infi)
- inf.append(Vinf(verb_inf, blurb, int(deinf.rule)))
+ blurb = '%s %s → %s' % (_vconj_type[deinf.rule].label, deinf.conj, deinf.infi)
+ wclass = _vconj_type[deinf.rule].wclass
+ inf.append(Vinf(verb_inf, blurb, wclass))
return inf
# keep only results belonging to a suitable word class and
# attach the inflection info; reject everything else
for r in res:
- if _vconj_wclass[inf.rule].search(r.gloss):
+ if inf.wclass.search(r.gloss):
result.append(EntryEx(r.headword, r.reading, r.gloss, inf))
limit -= 1
if limit <= 0 or not ok:
#
# V C O N J - control file for verb and adjective deinflection
#
-# the following section sets up the labels which are used for the
-# various inflections. These are displayed by the program.
-# The initial labels can be edited by the user.
+# Adapted from xjdic, Copyright (c) 2003 J.W. Breen
#
-# First there are the labels for the types of conjugations
+# First there are the labels for the types of conjugations, along with
+# regular expressions to filter potential dictionary matches based on
+# the tags in the gloss to only include words of suitable classes,
+# depending on the nature of the rule.
#
-0 plain, negative, nonpast
-1 polite, non-past
-2 conditional
-3 volitional
-4 te-form
-5 plain, past
-6 plain, negative, past
-7 passive
-8 causative
-9 potential or imperative
-10 imperative
-11 polite, past
-12 polite, negative, non-past
-13 polite, negative, past
-14 polite, volitional
-15 adj. -> adverb
-16 adj., past
-17 polite
-18 polite, volitional
-19 passive or potential
-20 passive (or potential if Grp 2)
-21 adj., negative
-22 adj., negative, past
-23 adj., past
-24 plain verb
-25 polite, te-form
+# TODO: adjust these REs to best fit
#
-# and these are the conjugations/inflections, and their dictionary forms
-# (please note that these are scanned from the top, so the order is
-# critical if the correct guess is to be made.)
+0 "\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))" plain, negative, nonpast
+1 "\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))" polite, non-past
+2 "\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))" conditional
+3 "\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))" volitional
+4 "\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))" te-form
+5 "\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))" plain, past
+6 "\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))" plain, negative, past
+7 "\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))" passive
+8 "\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))" causative
+9 "\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))" potential or imperative
+10 "\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))" imperative
+11 "\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))" polite, past
+12 "\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))" polite, negative, non-past
+13 "\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))" polite, negative, past
+14 "\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))" polite, volitional
+15 "\((adj|adv|aux|n-adv)" adj. -> adverb
+16 "\((adj|adv|aux|n-adv)" adj., past
+17 "\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))" polite
+18 "\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))" polite, volitional
+19 "\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))" passive or potential
+20 "\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))" passive (or potential if Grp 2)
+21 "\((adj|adv|aux|n-adv)" adj., negative
+22 "\((adj|adv|aux|n-adv)" adj., negative, past
+23 "\((adj|adv|aux|n-adv)" adj., past
+24 "\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))" plain verb
+25 "\((adj|adv|aux|n-adv|v(?!ulg|idg|ie))" polite, te-form
#
-$ this line flags the start of them
+# And these are the conjugations/inflections, and their dictionary forms.
#
た る 5
て る 4
ましょう る 18
れば る 2
よう る 3
-#て る 4 # 2021-06-19 disabled here, appears in line 44
+#て る 4 # 2021-06-19 disabled here, already present above
た る 5
られ る 20
させ る 8