Browse Source

ocr idc V1.1

Tinger 2 năm trước cách đây
mục cha
commit
b781c03489
3 tập tin đã thay đổi với 81 bổ sung51 xóa
  1. 1 1
      Readme.md
  2. 72 49
      blues/idc.py
  3. 8 1
      utils/util.py

+ 1 - 1
Readme.md

@@ -2,7 +2,7 @@
 
 ---
 
-**$host: http://139.9.167.178:5050**
+**$host: https://img.ifarmcloud.com/ocr**
 
 **接口说明:[内网](http://192.168.1.6:10393/shareDoc?issue=df3c14ae8582a40ee191762f8fda504b) 、
 [外网](https://console-docs.apipost.cn/preview/d46d3cce127f3856/39166dc588c72981)**

+ 72 - 49
blues/idc.py

@@ -5,50 +5,72 @@ from utils.conf import MAX_CONTENT_LENGTH
 
 idc = Blueprint("idc", __name__, url_prefix="/idc")
 
-__name_ptn = r"姓 *名 *(?P<name>.+) *"
-__gender_nation_ptn = r"性 *别 *(?P<gender>男|女) *民 *族 *(?P<nation>.+) *"
-__birth_ymd_ptn = r"出 *生 *(?P<year>\d{4}) *年 *(?P<month>\d{2}) *月 *(?P<day>\d{2}) *日 *"
-__addr_start_ptn = r"住 *址 *(?P<addr>.+) *"
-__idn_ptn = r"公 *民 *身 *份 *号 *码 *(?P<idn>\d{18}) *"
-__agent_ptn = r"签 *发 *机 *关 *(?P<agent>.*) *"
-__valid_date_ptn = r"有 *效 *期 *限 *(?P<from_year>\d{4})\.(?P<from_month>\d{2})\.(?P<from_day>\d{2})" \
-                   r"[^\d]+(?P<to_year>\d{4})\.(?P<to_month>\d{2})\.(?P<to_day>\d{2}) *"
-
-
-def get_face_info(data: "list[str]"):
-    res = {"name": "", "gender": "", "nation": "", "addr": "", "idn": "", "birth": {"year": "", "month": "", "day": ""}}
-    for item in data:
-        if name := re.match(__name_ptn, item):
-            res["name"] = name.group("name")
-        elif gender_nation := re.match(__gender_nation_ptn, item):
-            res["gender"] = gender_nation.group("gender")
-            res["nation"] = gender_nation.group("nation")
-        elif birth_ymd := re.match(__birth_ymd_ptn, item):
-            res["birth"]["year"] = birth_ymd.group("year")
-            res["birth"]["month"] = birth_ymd.group("month")
-            res["birth"]["day"] = birth_ymd.group("day")
-        elif addr := re.match(__addr_start_ptn, item):
-            res["addr"] = addr.group("addr")
-        elif idn := re.match(__idn_ptn, item):
-            res["idn"] = idn.group("idn")
-        else:
-            res["addr"] += item
-    return res
+__CN = "中国CHINA"
+__face_ptn = r"^姓名(?P<name>.+)性别(?P<gender>男|女)民族(?P<nation>.+)" \
+             r"出生(?P<year>\d{4})年(?P<month>\d\d)月(?P<day>\d\d)日" \
+             r"住址(?P<addr>.+)公民身份号码(?P<idn>\d{17}\d|x|X)$"
+__icon_ptn = r"^中华人民共和国居民身份证签发机关(?P<agent>.+)" \
+             r"有效期限(?P<from_year>\d{4})\.(?P<from_month>\d{2})\.(?P<from_day>\d{2})" \
+             r"[^\d]+(?P<to_year>\d{4})\.(?P<to_month>\d{2})\.(?P<to_day>\d{2})$"
+
+
+# 需要图片在PC上看着是:横长竖宽
+def get_face_info(data: "list[str]") -> "tuple[dict, str, bool]":
+    res = {"name": "", "gender": "", "nation": "", "birth": {"year": "", "month": "", "day": ""}, "addr": "", "idn": ""}
+
+    if len(data) < 5:  # 最少 5 个识别结果
+        return res, "请使用正确的身份证人像面照片", False
+    deal = [item.replace(" ", "") for item in data if not str_include(__CN, item)]
+    if not deal[0].startswith("姓名"):  # 非正,逆序后尝试
+        deal.reverse()
+    if not deal[0].startswith("姓名"):
+        return res, "请确保照片为:横长竖宽,正面朝上", False
+
+    str_all = "".join(deal)
+    print(str_all)
+    if match := re.match(__face_ptn, str_all):
+        res["name"] = match.group("name")
+        res["gender"] = match.group("gender")
+        res["nation"] = match.group("nation")
+        res["birth"] = {
+            "year": match.group("year"),
+            "month": match.group("month"),
+            "day": match.group("day")
+        }
+        res["addr"] = match.group("addr")
+        res["idn"] = match.group("idn")
+        return res, "", True
+
+    return res, "识别失败,请重新选择", False
 
 
 def get_icon_info(data: "list[str]"):
     res = {"agent": "", "from": {"year": "", "month": "", "day": ""}, "to": {"year": "", "month": "", "day": ""}}
-    for item in data:
-        if agent := re.match(__agent_ptn, item):
-            res["agent"] = agent.group("agent")
-        elif valid_date := re.match(__valid_date_ptn, item):
-            res["from"]["year"] = valid_date.group("from_year")
-            res["from"]["month"] = valid_date.group("from_month")
-            res["from"]["day"] = valid_date.group("from_day")
-            res["to"]["year"] = valid_date.group("to_year")
-            res["to"]["month"] = valid_date.group("to_month")
-            res["to"]["day"] = valid_date.group("to_day")
-    return res
+
+    if len(data) < 4:  # 最少 4 个识别结果
+        return res, "请使用正确的身份证国徽面照片", False
+    deal = [item.replace(" ", "") for item in data if not str_include(__CN, item)]
+    if not deal[0].startswith("中华"):  # 非正,逆序后尝试
+        deal.reverse()
+    if not deal[0].startswith("中华"):
+        return res, "请确保照片为:横长竖宽,正面朝上", False
+
+    str_all = "".join(deal)
+    print(str_all)
+    if match := re.match(__icon_ptn, str_all):
+        res["agent"] = match.group("agent")
+        res["from"] = {
+            "year": match.group("from_year"),
+            "month": match.group("from_month"),
+            "day": match.group("from_day"),
+        }
+        res["to"] = {
+            "year": match.group("to_year"),
+            "month": match.group("to_month"),
+            "day": match.group("to_day"),
+        }
+        return res, "", True
+    return res, "识别失败,请重新选择", False
 
 
 class IdcView(views.MethodView):
@@ -81,14 +103,14 @@ class IdcView(views.MethodView):
         ocr_res, _ = recognize(content)
         words = [it[1][0] for it in ocr_res]
         if which == "face":
-            info = get_face_info(words)
-            if json_all(info):
+            info, msg, sta = get_face_info(words)
+            if sta:
                 return Response(data=info)
-            return Response("识别失败,请重新选择", info)
-        info = get_icon_info(words)
-        if json_all(info):
+            return Response(msg, info)
+        info, msg, sta = get_icon_info(words)
+        if sta:
             return Response(data=info)
-        return Response("识别失败,请重新选择", info)
+        return Response(msg, info)
 
 
 class IdcHtmlView(views.MethodView):
@@ -120,10 +142,11 @@ class IdcHtmlView(views.MethodView):
         words = [it[1][0] for it in ocr_res]
         draw_img(img_shape, [{"pos": it[0], "word": it[1][0], "rate": it[1][1]} for it in ocr_res], rec_path)
         if which == "face":
-            info = get_face_info(words)
+            info, msg, sta = get_face_info(words)
         else:
-            info = get_icon_info(words)
-        info["SUCCESS"] = str(json_all(info)).upper()
+            info, msg, sta = get_icon_info(words)
+        info["SUCCESS"] = str(sta).upper()
+        info["MESSAGE"] = msg
         return render_template("k-v_result.html", raw=raw_path, rec=rec_path, data=info)
 
 

+ 8 - 1
utils/util.py

@@ -8,7 +8,7 @@ from paddleocr.tools.infer.utility import draw_box_txt_fine
 
 __all__ = [
     "Args", "Response", "rand_str", "current_time", "get_ext_name", "is_image_ext", "recognize", "draw_img",
-    "json_all"
+    "json_all", "str_include"
 ]
 
 __StrBase = "qwertyuioplkjhgfdsazxcvbnm1234567890ZXCVBNMLKJHGFDSAQWERTYUIOP"
@@ -132,3 +132,10 @@ def json_all(data: "dict or list") -> "bool":
                 return False
         return True
     raise TypeError(f"except node type are: [list, dict], but got a {type(data)} instead.")
+
+
+def str_include(str_long: "str", str_short: "str") -> "bool":
+    for it in str_short:
+        if it not in str_long:
+            return False
+    return True