Procházet zdrojové kódy

v3.6: 正则支持部分掉字匹配

Tinger před 2 roky
rodič
revize
557f61e0d4
1 změnil soubory, kde provedl 19 přidání a 18 odebrání
  1. 19 18
      blues/idc.py

+ 19 - 18
blues/idc.py

@@ -9,22 +9,26 @@ idc = Blueprint("idc", __name__, url_prefix="/idc")
 
 _MIN_SIZE = 46
 _EXCLUDE_CHAR = "中国CHINA *#★☆"
-__face_ptn = r"姓名(?P<name>.+)" \
-             r"性别(?P<gender>男|女)民族(?P<nation>.+)" \
-             r"出生(?P<year>\d{4})年(?P<month>\d+)月(?P<day>\d+)日" \
-             r"住址(?P<addr>.+)" \
-             r"公民身份号码(?P<idn>\d{17}\d|x|X)"
-__icon_ptn = r"中华人民共和国" \
-             r"居民身份证" \
-             r"签发机关(?P<agent>.+)" \
-             r"有效期限(?P<from_year>\d{4})\.(?P<from_month>\d{2})\.(?P<from_day>\d{2})" \
-             r"[^\d]+(?P<to_year>\d{4})\.(?P<to_month>\d{2})\.(?P<to_day>\d{2})"
+__face_ptn = re.compile(
+    r"[姓名]{0,2}(?P<name>.+?)[姓名]{0,2}"
+    r"[性别]{1,2}(?P<gender>[男女])民族(?P<nation>.+?)"
+    r"[出生]{1,2}(?P<year>\d{4})年(?P<month>\d+)月(?P<day>\d+)日"
+    r"[住址]{1,2}(?P<addr>.+?)"
+    r"[公民身份号码]{1,6}(?P<idn>\d{17}[\dx])$",
+    re.I
+)
+__icon_ptn = re.compile(
+    r"[中华人民共和国居身份证]{0,12}?"
+    r"[签发机关]{1,4}(?P<agent>.+?)"
+    r"[有效期限]{1,4}(?P<from_year>\d{4})\.(?P<from_month>\d{2})\.(?P<from_day>\d{2})"
+    r"[^\d]+(?P<to_year>\d{4})\.(?P<to_month>\d{2})\.(?P<to_day>\d{2})$"
+)
 
 
 def get_face_info(data_str: "str") -> "tuple[dict, bool]":
     res = {"name": "", "gender": "", "nation": "", "birth": {"year": "", "month": "", "day": ""}, "addr": "", "idn": ""}
 
-    if match := re.match(__face_ptn, data_str):
+    if match := __face_ptn.match(data_str):
         res["name"] = match.group("name")
         res["gender"] = match.group("gender")
         res["nation"] = match.group("nation")
@@ -43,7 +47,7 @@ def get_face_info(data_str: "str") -> "tuple[dict, bool]":
 def get_icon_info(data_str: "str") -> "tuple[dict, bool]":
     res = {"agent": "", "from": {"year": "", "month": "", "day": ""}, "to": {"year": "", "month": "", "day": ""}}
 
-    if match := re.match(__icon_ptn, data_str):
+    if match := __icon_ptn.match(data_str):
         res["agent"] = match.group("agent")
         res["from"] = {
             "year": match.group("from_year"),
@@ -90,14 +94,11 @@ class IdcView(views.MethodView):
         for i, ocr_res in enumerate(recognizes):
             rec_str = "".join([it[0] for it in ocr_res if not str_include(_EXCLUDE_CHAR, it[0])])
             if which == "face":
-                if rec_str.startswith("姓名"):
-                    idx = i
-                    info, sta = get_face_info(rec_str)
+                info, sta = get_face_info(rec_str)
             else:
-                if rec_str.startswith("中华"):
-                    idx = i
-                    info, sta = get_icon_info(rec_str)
+                info, sta = get_icon_info(rec_str)
             if sta:
+                idx = i
                 break
             elif len(rec_str) >= _MIN_SIZE:
                 msg = "识别失败,建议选择深色背景"