* Added full width Latin characters to set of japanese characters.
authorUrban Wallasch <urban.wallasch@freenet.de>
Sat, 26 Jun 2021 11:42:27 +0000 (13:42 +0200)
committerUrban Wallasch <urban.wallasch@freenet.de>
Sat, 26 Jun 2021 11:42:27 +0000 (13:42 +0200)
jiten-pai.py

index da4cb6c1d1ceb23cd1d6370ef4d1c2dd7db0cc87..7753139552e85e9b3afa7dc0abfb5aca48d23062 100755 (executable)
@@ -52,9 +52,10 @@ def eprint(*args, **kwargs):
 # Note: we only test for common CJK ideographs
 _u_CJK_Uni = r'\u4e00-\u9FFF'
 _u_CJK_Kana = r'\u3040-\u30ff'
+_u_CJK_FullHalf = r'\uFF00-\uFFEF'
 
 _re_kanji = re.compile('^[' + _u_CJK_Uni + ']$')
-_re_jap = re.compile('[' + _u_CJK_Uni + _u_CJK_Kana + ']')
+_re_jap = re.compile('[' + _u_CJK_Uni + _u_CJK_Kana + _u_CJK_FullHalf + ']')
 
 # test, if a single character /might/ be a kanji
 def _is_kanji(s):