* Refactoring; comments.
authorUrban Wallasch <urban.wallasch@freenet.de>
Fri, 25 Jun 2021 10:53:25 +0000 (12:53 +0200)
committerUrban Wallasch <urban.wallasch@freenet.de>
Fri, 25 Jun 2021 11:57:51 +0000 (13:57 +0200)
jiten-pai.py
kanjidic.py

index f74d9ecc0c457b5689aa114beb6182a878d4cd75..28b8fa1cab9a7aca60587b225cc190fc3cd15dd0 100755 (executable)
@@ -49,15 +49,20 @@ def die(rc=0):
 def eprint(*args, **kwargs):
     print(*args, file=sys.stderr, **kwargs)
 
-def is_kanji(s):
-    return True if re.match("^[\u4e00-\u9FFF]$", s) else False
+# Note: we only test for common CJK ideographs
+_u_CJK_Uni = r'\u4e00-\u9FFF'
+_u_CJK_Kana = r'\u3040-\u30ff'
 
-def has_jap(s):
-    if re.search("[\u3040-\u30ff]", s):  # kana
-        return True
-    if re.search("[\u4e00-\u9FFF]", s): # kanji
-        return True
-    return False
+_re_kanji = re.compile('^[' + _u_CJK_Uni + ']$')
+_re_jap = re.compile('[' + _u_CJK_Uni + _u_CJK_Kana + ']')
+
+# test, if a single character /might/ be a kanji
+def _is_kanji(s):
+    return _re_kanji.match(s)
+
+# test, if a string contains any common Japanese characters
+def _has_jap(s):
+    return _re_jap.search(s)
 
 class ScanMode(enum.Enum):
     JAP = 1
@@ -95,6 +100,7 @@ cfg = {
 }
 
 def _get_cfile_path(fname, mode=os.R_OK):
+    # try to find a suitable configuration file / prefix
     cdirs = []
     if os.environ.get('APPDATA'):
         cdirs.append(os.environ.get('APPDATA'))
@@ -158,6 +164,7 @@ _vconj_deinf = []
 _vconj_loaded = False
 
 def _get_dfile_path(fname, mode=os.R_OK):
+    # try to locate a data file in some common prefixes:
     cdirs = []
     if os.environ.get('APPDATA'):
         cdirs.append(os.environ.get('APPDATA'))
@@ -438,7 +445,7 @@ class zQTextEdit(QTextEdit):
             char = tcur.selectedText()
         self.setTextCursor(old_tcur)
         self.verticalScrollBar().setValue(scr)
-        if is_kanji(char):
+        if _is_kanji(char):
             self.kanji = char
             self._override_cursor()
         else:
@@ -1285,12 +1292,11 @@ class jpMainWindow(QMainWindow):
             # perform lookup
             res, ok = dict_lookup(dic, s_term, mode, limit)
             for r in list(res):
-                # reject anything not a verb, or adjective, or ...
+                # reject anything not tagged as verb, or adjective, or ...
                 if not re_consider.search(r[2]):
                     continue
-                # keep the rest with appended inflection info
-                r.append(inf)
-                result.append(r)
+                # keep the rest with added inflection info
+                result.append(r + [inf])
                 limit -= 1
             if limit <= 0 or not ok:
                 break
@@ -1325,7 +1331,7 @@ class jpMainWindow(QMainWindow):
         # search
         self.result_group.setTitle('Search results: ...')
         QApplication.processEvents()
-        mode = ScanMode.JAP if has_jap(term) else ScanMode.ENG
+        mode = ScanMode.JAP if _has_jap(term) else ScanMode.ENG
         if self.genopt_dict.isChecked():
             dics = [[self.genopt_dictsel.currentText(), self.genopt_dictsel.itemData(self.genopt_dictsel.currentIndex())]]
         else:
index 9f11fba0f0e51bc896bd86a1e72fa07ab65f68a7..a5e4fcc5f0fcecbce85be5594c90fde21f48f374 100755 (executable)
@@ -44,9 +44,16 @@ def die(rc=0):
 def eprint(*args, **kwargs):
     print(*args, file=sys.stderr, **kwargs)
 
-def is_kanji(s):
-    return True if re.match("^[\u4e00-\u9FFF]$", s) else False
+# Note: we only test for common CJK ideographs
+_u_CJK_Uni = r'\u4e00-\u9FFF'
 
+_re_kanji = re.compile('^[' + _u_CJK_Uni + ']$')
+
+# test, if a single character /might/ be a kanji
+def _is_kanji(s):
+    return _re_kanji.match(s)
+
+# try to locate a data file in some common prefixes:
 def _get_dfile_path(fname, mode=os.R_OK):
     cdirs = []
     if os.environ.get('APPDATA'):
@@ -129,7 +136,7 @@ def _rad_load():
                     radical = m.group(1)
                     stroke = int(m.group(2))
                     _radk[radical] = [stroke, '']
-                    _srad[stroke] += m.group(1)
+                    _srad[stroke] += radical
     except Exception as e:
         eprint('_rad_load:', radk_name, str(e))
         res = False
@@ -340,7 +347,7 @@ class zQTextEdit(QTextEdit):
         char = tcur.selectedText()
         self.setTextCursor(old_tcur)
         self.verticalScrollBar().setValue(scr)
-        if is_kanji(char):
+        if _is_kanji(char):
             self.kanji = char
             self._override_cursor()
         else: