From 7d9a86d55aa96cddc22270fbd3cd45e2815df95b Mon Sep 17 00:00:00 2001 From: Urban Wallasch Date: Fri, 2 Jul 2021 13:15:37 +0200 Subject: [PATCH] * Added support for kanjidic_comb_utf8 plain text version of extended kanjidic. * Always make sure the correct set of kradfile[2]/radkfile[2] is loaded. * Disable full text search for XML version of kanjidic2. --- README.md | 7 +++++++ kanjidic.py | 43 ++++++++++++++++++++++++++----------------- 2 files changed, 33 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 135db35..8ff1882 100644 --- a/README.md +++ b/README.md @@ -54,9 +54,16 @@ available by the EDRDG: * download file, unpack and convert to UTF-8 *(see above)* * install via Edit->Preferences +* [KANJIDIC_COMB](http://ftp.edrdg.org/pub/Nihongo/kanjidic_comb_utf8.gz) *(alternative)* + * Kanji dictionary; plain text format; UTF-8 encoding + * contains additional kanji from JIS X 0212/0213 supplementary sets + * download file, unpack *(see above)* + * install via Edit->Preferences + * [KANJIDIC2](http://ftp.edrdg.org/pub/Nihongo/kanjidic2.xml.gz) *(alternative)* * Kanji dictionary; XML format; UTF-8 encoding * contains additional kanji from JIS X 0212/0213 supplementary sets + * **caveat:** does not support full text search * download file, unpack * install via Edit->Preferences diff --git a/kanjidic.py b/kanjidic.py index ec7a432..070943e 100755 --- a/kanjidic.py +++ b/kanjidic.py @@ -117,9 +117,10 @@ _srad = [''] * 20 # format: [ stroke_cnt -> 'radical_list' ] _radk = dict() # format: { 'radical': [stroke_cnt, 'kanji_list'], ... } _krad = dict() # format: { 'kanji': 'radical_list', ... } -def _rad_load(): +def _rad_load(version): res = True - for radk_name in _KANJIDIC_RADK: + for v in range(version): + radk_name = _KANJIDIC_RADK[v] if not os.access(radk_name, os.R_OK): radk_name = _get_dfile_path(os.path.join(_KANJIDIC_DIR, radk_name), mode=os.R_OK) try: @@ -142,7 +143,8 @@ def _rad_load(): except Exception as e: eprint('_rad_load:', radk_name, str(e)) res = False - for krad_name in _KANJIDIC_KRAD: + for v in range(version): + krad_name = _KANJIDIC_KRAD[v] if not os.access(krad_name, os.R_OK): krad_name = _get_dfile_path(os.path.join(_KANJIDIC_DIR, krad_name), mode=os.R_OK) try: @@ -199,10 +201,13 @@ def _kanjidic1_load(dict_fname): ] re_braces = re.compile(r'\{.*\}.*$') re_tags = re.compile(r'[BCFGJHNVDPSUIQMEKLOWYXZ]\S+') + krad_set = 1 try: with open(dict_fname) as dict_file: for line in dict_file: if line[0] in '# ': + if 'KANJD212' in line: + krad_set = 2 continue info = { 'strokes': '', @@ -217,9 +222,10 @@ def _kanjidic1_load(dict_fname): # skip kanji and JIS code line = line[6:] # save meaning - m = re_braces.search(line).group(0) - info['meaning'] = m.replace('{', '').replace('}', ';').strip() - line = re_braces.sub('', line) + m = re_braces.search(line) + if m: + info['meaning'] = m.group(0).replace('{', '').replace('}', ';').strip() + line = re_braces.sub('', line) # get tags tlist = [] while True: @@ -240,8 +246,8 @@ def _kanjidic1_load(dict_fname): _kanjidic[kanji] = info except Exception as e: eprint('_kanjidic1_load:', dict_fname, str(e)) - return False - return True + return False, 0, 0 + return True, 1, krad_set # load kanjidic2.xml # See: http://www.edrdg.org/wiki/index.php/KANJIDIC_Project#Content_.26_Format @@ -299,8 +305,8 @@ def _kanjidic2_load(dict_fname): _kanjidic[kanji] = info except Exception as e: eprint('_kanjidic2_load:', dict_fname, str(e)) - return False - return True + return False, 0, 0 + return True, 2, 2 def _kanjidic_load(dict_fname): tag_line = '' @@ -309,13 +315,10 @@ def _kanjidic_load(dict_fname): tag_line = f.readline() if ' 1: + # disable full text search for the XML version + self.text_search_check.hide() + self.text_search_box.hide() + self.text_search_clearbtn.hide() + if not _rad_load(krad_set): self.show_error('Error loading radkfile/kradfile!') self.dic_ok = False # evaluate command line arguments -- 2.30.2