From: Urban Wallasch Date: Sat, 12 Jun 2021 18:13:44 +0000 (+0200) Subject: * proof of concept edict parser X-Git-Url: https://git.packet-gain.de/?a=commitdiff_plain;h=9f9c8ac6a453389f022bfa6c19da240d4859269d;p=jiten-pai.git * proof of concept edict parser --- 9f9c8ac6a453389f022bfa6c19da240d4859269d diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e69de29 diff --git a/ROADMAP.txt b/ROADMAP.txt new file mode 100644 index 0000000..4325535 --- /dev/null +++ b/ROADMAP.txt @@ -0,0 +1,30 @@ +Jiten-pai Road Map +================== + + +Phase I +------- +[ ] command line dict parser, proof of concept +[ ] regex support in search +[ ] ... + + +Phase II +-------- +[ ] GUI (Qt?) +[ ] romaji support? +[ ] ... + + +Phase III +--------- +[ ] dict download & conversion (as separate tool!) +[ ] ... + + +Phase IV +-------- +[ ] Kanji-Dic +[ ] ... + + diff --git a/demo.sh b/demo.sh new file mode 100755 index 0000000..0160f7e --- /dev/null +++ b/demo.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +dict=/usr/share/gjiten/dics/edict + +terms=( + '^こころ$' + 'heart.*spirit' + 'ビール' +) + +for t in "${terms[@]}"; do + echo "search term: '$t'" + ./jiten-pai.py -d "$dict" "$t" + echo "---------------------------" +done diff --git a/jiten-pai.py b/jiten-pai.py new file mode 100755 index 0000000..e2f40e8 --- /dev/null +++ b/jiten-pai.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 + +""" +jiten-pai + +""" + +import sys + +_PYTHON_VERSION = float("%d.%d" % (sys.version_info.major, sys.version_info.minor)) +if _PYTHON_VERSION < 3.6: + raise Exception ('Need Python version 3.6 or later, got version ' + str(sys.version)) + +import platform +import io +import os +import signal +import re +import argparse + + +cfg = { + 'dict': '', + 'term': '', +} + + +def parse_cmdline(): + parser = argparse.ArgumentParser( + formatter_class=argparse.RawTextHelpFormatter, + description='jiten-pai', + epilog='' + ) + parser.add_argument('term', default='^$', help='search term; regex supported') + parser.add_argument('-d', '--dict', default=cfg['dict'], help='dictionary file') + args = parser.parse_args() + if args.dict: + cfg['dict'] = args.dict + if args.term: + cfg['term'] = args.term + + +def dict_lookup(dict_fname, term): + with open(dict_fname) as dict_file: + # edict example line: + # 〆日 [しめび] /(n) time limit/closing day/settlement day (payment)/deadline/ + re_split = re.compile(r'^(.*) *\[(.*)\] */(.*)/$') + re_term = re.compile(term) + for line in dict_file: + try: + kanji, kana, trans = re_split.match(line.strip()).groups() + except: + continue + # for now promiscuously try to match anything anywhere + if re_term.search(kanji) is not None or \ + re_term.search(kana) is not None or \ + re_term.search(trans) is not None: + print("%s (%s) %s" % (kanji, kana, trans)) + + +############################################################ +# main function + +def main(): + parse_cmdline() + dict_lookup(cfg['dict'], cfg['term']) + sys.exit(0) + +# run application +if __name__== "__main__": + main() + +# EOF