From: Urban Wallasch Date: Fri, 25 Jun 2021 10:53:25 +0000 (+0200) Subject: * Refactoring; comments. X-Git-Tag: v0.1.0~34 X-Git-Url: https://git.packet-gain.de/?a=commitdiff_plain;h=3a7f32031b3177fb049af40f731cee1741a8d470;p=jiten-pai.git * Refactoring; comments. --- diff --git a/jiten-pai.py b/jiten-pai.py index f74d9ec..28b8fa1 100755 --- a/jiten-pai.py +++ b/jiten-pai.py @@ -49,15 +49,20 @@ def die(rc=0): def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) -def is_kanji(s): - return True if re.match("^[\u4e00-\u9FFF]$", s) else False +# Note: we only test for common CJK ideographs +_u_CJK_Uni = r'\u4e00-\u9FFF' +_u_CJK_Kana = r'\u3040-\u30ff' -def has_jap(s): - if re.search("[\u3040-\u30ff]", s): # kana - return True - if re.search("[\u4e00-\u9FFF]", s): # kanji - return True - return False +_re_kanji = re.compile('^[' + _u_CJK_Uni + ']$') +_re_jap = re.compile('[' + _u_CJK_Uni + _u_CJK_Kana + ']') + +# test, if a single character /might/ be a kanji +def _is_kanji(s): + return _re_kanji.match(s) + +# test, if a string contains any common Japanese characters +def _has_jap(s): + return _re_jap.search(s) class ScanMode(enum.Enum): JAP = 1 @@ -95,6 +100,7 @@ cfg = { } def _get_cfile_path(fname, mode=os.R_OK): + # try to find a suitable configuration file / prefix cdirs = [] if os.environ.get('APPDATA'): cdirs.append(os.environ.get('APPDATA')) @@ -158,6 +164,7 @@ _vconj_deinf = [] _vconj_loaded = False def _get_dfile_path(fname, mode=os.R_OK): + # try to locate a data file in some common prefixes: cdirs = [] if os.environ.get('APPDATA'): cdirs.append(os.environ.get('APPDATA')) @@ -438,7 +445,7 @@ class zQTextEdit(QTextEdit): char = tcur.selectedText() self.setTextCursor(old_tcur) self.verticalScrollBar().setValue(scr) - if is_kanji(char): + if _is_kanji(char): self.kanji = char self._override_cursor() else: @@ -1285,12 +1292,11 @@ class jpMainWindow(QMainWindow): # perform lookup res, ok = dict_lookup(dic, s_term, mode, limit) for r in list(res): - # reject anything not a verb, or adjective, or ... + # reject anything not tagged as verb, or adjective, or ... if not re_consider.search(r[2]): continue - # keep the rest with appended inflection info - r.append(inf) - result.append(r) + # keep the rest with added inflection info + result.append(r + [inf]) limit -= 1 if limit <= 0 or not ok: break @@ -1325,7 +1331,7 @@ class jpMainWindow(QMainWindow): # search self.result_group.setTitle('Search results: ...') QApplication.processEvents() - mode = ScanMode.JAP if has_jap(term) else ScanMode.ENG + mode = ScanMode.JAP if _has_jap(term) else ScanMode.ENG if self.genopt_dict.isChecked(): dics = [[self.genopt_dictsel.currentText(), self.genopt_dictsel.itemData(self.genopt_dictsel.currentIndex())]] else: diff --git a/kanjidic.py b/kanjidic.py index 9f11fba..a5e4fcc 100755 --- a/kanjidic.py +++ b/kanjidic.py @@ -44,9 +44,16 @@ def die(rc=0): def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) -def is_kanji(s): - return True if re.match("^[\u4e00-\u9FFF]$", s) else False +# Note: we only test for common CJK ideographs +_u_CJK_Uni = r'\u4e00-\u9FFF' +_re_kanji = re.compile('^[' + _u_CJK_Uni + ']$') + +# test, if a single character /might/ be a kanji +def _is_kanji(s): + return _re_kanji.match(s) + +# try to locate a data file in some common prefixes: def _get_dfile_path(fname, mode=os.R_OK): cdirs = [] if os.environ.get('APPDATA'): @@ -129,7 +136,7 @@ def _rad_load(): radical = m.group(1) stroke = int(m.group(2)) _radk[radical] = [stroke, ''] - _srad[stroke] += m.group(1) + _srad[stroke] += radical except Exception as e: eprint('_rad_load:', radk_name, str(e)) res = False @@ -340,7 +347,7 @@ class zQTextEdit(QTextEdit): char = tcur.selectedText() self.setTextCursor(old_tcur) self.verticalScrollBar().setValue(scr) - if is_kanji(char): + if _is_kanji(char): self.kanji = char self._override_cursor() else: