* proof of concept edict parser
authorUrban Wallasch <urban.wallasch@freenet.de>
Sat, 12 Jun 2021 18:13:44 +0000 (20:13 +0200)
committerUrban Wallasch <urban.wallasch@freenet.de>
Sat, 12 Jun 2021 18:13:44 +0000 (20:13 +0200)
.gitignore [new file with mode: 0644]
ROADMAP.txt [new file with mode: 0644]
demo.sh [new file with mode: 0755]
jiten-pai.py [new file with mode: 0755]

diff --git a/.gitignore b/.gitignore
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/ROADMAP.txt b/ROADMAP.txt
new file mode 100644 (file)
index 0000000..4325535
--- /dev/null
@@ -0,0 +1,30 @@
+Jiten-pai Road Map
+==================
+
+
+Phase I
+-------
+[ ] command line dict parser, proof of concept
+[ ] regex support in search
+[ ] ...
+
+
+Phase II
+--------
+[ ] GUI (Qt?)
+[ ] romaji support?
+[ ] ...
+
+
+Phase III
+---------
+[ ] dict download & conversion (as separate tool!)
+[ ] ...
+
+
+Phase IV
+--------
+[ ] Kanji-Dic
+[ ] ...
+
+
diff --git a/demo.sh b/demo.sh
new file mode 100755 (executable)
index 0000000..0160f7e
--- /dev/null
+++ b/demo.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+dict=/usr/share/gjiten/dics/edict
+
+terms=(
+    '^こころ$'
+    'heart.*spirit'
+    'ビール'
+)
+
+for t in "${terms[@]}"; do
+    echo "search term: '$t'"
+    ./jiten-pai.py -d "$dict" "$t"
+    echo "---------------------------"
+done
diff --git a/jiten-pai.py b/jiten-pai.py
new file mode 100755 (executable)
index 0000000..e2f40e8
--- /dev/null
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+
+"""
+jiten-pai
+
+"""
+
+import sys
+
+_PYTHON_VERSION = float("%d.%d" % (sys.version_info.major, sys.version_info.minor))
+if _PYTHON_VERSION < 3.6:
+    raise Exception ('Need Python version 3.6 or later, got version ' + str(sys.version))
+
+import platform
+import io
+import os
+import signal
+import re
+import argparse
+
+
+cfg = {
+    'dict': '',
+    'term': '',
+}
+
+
+def parse_cmdline():
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.RawTextHelpFormatter,
+        description='jiten-pai',
+        epilog=''
+    )
+    parser.add_argument('term', default='^$', help='search term; regex supported')
+    parser.add_argument('-d', '--dict', default=cfg['dict'], help='dictionary file')
+    args = parser.parse_args()
+    if args.dict:
+        cfg['dict'] = args.dict
+    if args.term:
+        cfg['term'] = args.term
+
+
+def dict_lookup(dict_fname, term):
+    with open(dict_fname) as dict_file:
+        # edict example line:
+        # 〆日 [しめび] /(n) time limit/closing day/settlement day (payment)/deadline/
+        re_split = re.compile(r'^(.*) *\[(.*)\] */(.*)/$')
+        re_term = re.compile(term)
+        for line in dict_file:
+            try:
+                kanji, kana, trans = re_split.match(line.strip()).groups()
+            except:
+                continue
+            # for now promiscuously try to match anything anywhere
+            if re_term.search(kanji) is not None or \
+               re_term.search(kana) is not None or \
+               re_term.search(trans) is not None:
+                print("%s (%s) %s" % (kanji, kana, trans))
+
+
+############################################################
+# main function
+
+def main():
+    parse_cmdline()
+    dict_lookup(cfg['dict'], cfg['term'])
+    sys.exit(0)
+
+# run application
+if __name__== "__main__":
+    main()
+
+# EOF