2 năm trước cách đây · d0eafd9106
--- a/.gitignore
+++ b/.gitignore
@@ -3,4 +3,4 @@ venv
 
				 .idea
			
 
				 static/images/*
			
 
				 
			
 
				-test.py
			
 
				+test
			
--- a/blues/idc.py
+++ b/blues/idc.py
@@ -3,25 +3,28 @@ from time import time
 
				 from utils.util import *
			
 
				 from utils.conf import MAX_CONTENT_LENGTH
			
 
				 from flask import Blueprint, views, render_template, request
			
 
				+from utils.logger import Logger
			
 
				 
			
 
				 idc = Blueprint("idc", __name__, url_prefix="/idc")
			
 
				 
			
 
				+_MIN_SIZE = 46
			
 
				 __exclude = "中国CHINA *#★☆"
			
 
				-__face_ptn = r"^姓名(?P<name>.+)性别(?P<gender>男|女)民族(?P<nation>.+)" \
			
 
				+__face_ptn = r"姓名(?P<name>.+)" \
			
 
				+             r"性别(?P<gender>男|女)民族(?P<nation>.+)" \
			
 
				              r"出生(?P<year>\d{4})年(?P<month>\d\d)月(?P<day>\d\d)日" \
			
 
				-             r"住址(?P<addr>.+)公民身份号码(?P<idn>\d{17}\d|x|X)$"
			
 
				-__icon_ptn = r"^中华人民共和国居民身份证签发机关(?P<agent>.+)" \
			
 
				+             r"住址(?P<addr>.+)" \
			
 
				+             r"公民身份号码(?P<idn>\d{17}\d|x|X)"
			
 
				+__icon_ptn = r"中华人民共和国" \
			
 
				+             r"居民身份证" \
			
 
				+             r"签发机关(?P<agent>.+)" \
			
 
				              r"有效期限(?P<from_year>\d{4})\.(?P<from_month>\d{2})\.(?P<from_day>\d{2})" \
			
 
				-             r"[^\d]+(?P<to_year>\d{4})\.(?P<to_month>\d{2})\.(?P<to_day>\d{2})$"
			
 
				+             r"[^\d]+(?P<to_year>\d{4})\.(?P<to_month>\d{2})\.(?P<to_day>\d{2})"
			
 
				 
			
 
				 
			
 
				-def get_face_info(data: "list[str]") -> "tuple[dict, str, bool]":
			
 
				+def get_face_info(data_str: "str") -> "tuple[dict, bool]":
			
 
				     res = {"name": "", "gender": "", "nation": "", "birth": {"year": "", "month": "", "day": ""}, "addr": "", "idn": ""}
			
 
				-    if len(data) < 5:  # 最少 5 个识别结果
			
 
				-        return res, "请使用正确的身份证人像面照片", False
			
 
				 
			
 
				-    str_all = "".join([item for item in data if not str_include(__exclude, item)])
			
 
				-    if match := re.match(__face_ptn, str_all):
			
 
				+    if match := re.match(__face_ptn, data_str):
			
 
				         res["name"] = match.group("name")
			
 
				         res["gender"] = match.group("gender")
			
 
				         res["nation"] = match.group("nation")
			
@@ -32,18 +35,15 @@ def get_face_info(data: "list[str]") -> "tuple[dict, str, bool]":
 
				         }
			
 
				         res["addr"] = match.group("addr")
			
 
				         res["idn"] = match.group("idn")
			
 
				-        return res, str_all, True
			
 
				+        return res, True
			
 
				 
			
 
				-    return res, "识别失败，请重新选择", False
			
 
				+    return res, False
			
 
				 
			
 
				 
			
 
				-def get_icon_info(data: "list[str]"):
			
 
				+def get_icon_info(data_str: "str") -> "tuple[dict, bool]":
			
 
				     res = {"agent": "", "from": {"year": "", "month": "", "day": ""}, "to": {"year": "", "month": "", "day": ""}}
			
 
				-    if len(data) < 4:  # 最少 4 个识别结果
			
 
				-        return res, "请使用正确的身份证国徽面照片", False
			
 
				 
			
 
				-    str_all = "".join([item for item in data if not str_include(__exclude, item)])
			
 
				-    if match := re.match(__icon_ptn, str_all):
			
 
				+    if match := re.match(__icon_ptn, data_str):
			
 
				         res["agent"] = match.group("agent")
			
 
				         res["from"] = {
			
 
				             "year": match.group("from_year"),
			
@@ -55,8 +55,8 @@ def get_icon_info(data: "list[str]"):
 
				             "month": match.group("to_month"),
			
 
				             "day": match.group("to_day"),
			
 
				         }
			
 
				-        return res, str_all, True
			
 
				-    return res, "识别失败，请重新选择", False
			
 
				+        return res, True
			
 
				+    return res, False
			
 
				 
			
 
				 
			
 
				 class IdcView(views.MethodView):
			
@@ -83,28 +83,34 @@ class IdcView(views.MethodView):
 
				             return Response("文件过大，请重新选择")
			
 
				 
			
 
				         img = read_img(content)
			
 
				-        images = rot_img(img)
			
 
				+        cropped = crop_img(img)  # 边缘裁剪，对深色背景的效果很好
			
 
				+        images = [item for item in rot_img(cropped) if item.shape[0] < item.shape[1]]  # 旋转后仅取横长竖宽
			
 
				         rec = Engine.rec_multi(images)
			
 
				-        info, msg, sta, idx = {}, "识别失败，请重新选择", False, 0
			
 
				-        for idx, ocr_res in enumerate(rec):
			
 
				-            words = [it[0] for it in ocr_res]
			
 
				+        info, err_rec, sta, idx = {}, [], False, 0
			
 
				+        for i, ocr_res in enumerate(rec):
			
 
				+            rec_str = "".join([it[0] for it in ocr_res])
			
 
				             if which == "face":
			
 
				-                if not words or not words[0].startswith("姓名"):
			
 
				-                    continue
			
 
				-                info, msg, sta = get_face_info(words)
			
 
				+                if rec_str.startswith("姓名"):
			
 
				+                    idx = i
			
 
				+                    info, sta = get_face_info(rec_str)
			
 
				             else:
			
 
				-                if not words or not words[0].startswith("中华"):
			
 
				-                    continue
			
 
				-                info, msg, sta = get_icon_info(words)
			
 
				+                if rec_str.startswith("中华"):
			
 
				+                    idx = i
			
 
				+                    info, sta = get_icon_info(rec_str)
			
 
				             if sta:
			
 
				                 break
			
 
				+            elif len(rec_str) >= _MIN_SIZE:
			
 
				+                Logger.error(rec_str)
			
 
				+                err_rec.append(rec_str)
			
 
				 
			
 
				         info["duration"] = time() - start
			
 
				         if sta:
			
 
				             raw_path = f"static/images/{current_time()}_{rand_str()}.{ext}"
			
 
				             save_img(raw_path, images[idx])
			
 
				             return Response(data=info)
			
 
				-        return Response(msg, info)
			
 
				+        else:
			
 
				+            msg = "识别失败，建议使用深色背景\n识别结果：\n" + "\n".join(err_rec)
			
 
				+            return Response(msg, info)
			
 
				 
			
 
				 
			
 
				 class IdcHtmlView(views.MethodView):
			
@@ -127,27 +133,34 @@ class IdcHtmlView(views.MethodView):
 
				             return Response("文件过大，请重新选择")
			
 
				 
			
 
				         img = read_img(content)
			
 
				-        images = rot_img(img)
			
 
				+        cropped = crop_img(img)
			
 
				+        images = [item for item in rot_img(cropped) if item.shape[0] < item.shape[1]]
			
 
				         rec = Engine.rec_multi(images)
			
 
				-        info, msg, sta, idx = {}, "识别失败，请重新选择", False, 0
			
 
				-        for idx, ocr_res in enumerate(rec):
			
 
				-            words = [it[0].replace(" ", "") for it in ocr_res]
			
 
				+        info, err_rec, sta, idx = {}, [], False, 0
			
 
				+        for i, ocr_res in enumerate(rec):
			
 
				+            rec_str = "".join([it[0] for it in ocr_res])
			
 
				             if which == "face":
			
 
				-                if not words or not words[0].startswith("姓名"):
			
 
				-                    continue
			
 
				-                info, msg, sta = get_face_info(words)
			
 
				+                if rec_str.startswith("姓名"):
			
 
				+                    idx = i
			
 
				+                    info, sta = get_face_info(rec_str)
			
 
				             else:
			
 
				-                if not words or not words[0].startswith("中华"):
			
 
				-                    continue
			
 
				-                info, msg, sta = get_icon_info(words)
			
 
				+                if rec_str.startswith("中华"):
			
 
				+                    idx = i
			
 
				+                    info, sta = get_icon_info(rec_str)
			
 
				             if sta:
			
 
				                 break
			
 
				+            elif len(rec_str) >= _MIN_SIZE:
			
 
				+                Logger.error(rec_str)
			
 
				+                err_rec.append(rec_str)
			
 
				 
			
 
				         file_path = f"static/images/{current_time()}_{rand_str()}.{ext}"
			
 
				         save_img(file_path, images[idx])
			
 
				 
			
 
				         info["SUCCESS"] = str(sta).upper()
			
 
				-        info["MESSAGE"] = msg
			
 
				+        if sta:
			
 
				+            info["MESSAGE"] = "识别成功"
			
 
				+        else:
			
 
				+            info["MESSAGE"] = "识别失败，建议使用深色背景<br>识别结果：<br>" + "<br>".join(err_rec)
			
 
				         info["DURATION"] = time() - start  # noqa
			
 
				         return render_template("k-v_result.html", raw=file_path, data=info)
			
 
				 
			
--- a/templates/k-v_result.html
+++ b/templates/k-v_result.html
@@ -75,7 +75,7 @@
 
				         {% for key, value in data.items() %}
			
 
				             <tr>
			
 
				                 <td class="center">{{ key }}</td>
			
 
				-                <td>{{ value }}</td>
			
 
				+                <td>{{ value|safe }}</td>
			
 
				             </tr>
			
 
				         {% endfor %}
			
 
				         </tbody>
			
--- a/utils/util.py
+++ b/utils/util.py
@@ -8,7 +8,7 @@ from time import localtime, strftime
 
				 
			
 
				 __all__ = [
			
 
				     "Response", "rand_str", "current_time", "get_ext_name", "is_image_ext",
			
 
				-    "json_all", "str_include", "read_img", "rot_img", "save_img", "Engine"
			
 
				+    "str_include", "read_img", "crop_img", "rot_img", "save_img", "Engine"
			
 
				 ]
			
 
				 
			
 
				 __StrBase = "qwertyuioplkjhgfdsazxcvbnm1234567890ZXCVBNMLKJHGFDSAQWERTYUIOP"
			
@@ -39,24 +39,6 @@ def is_image_ext(ext: "str") -> bool:
 
				     return ext in __AcceptExtNames
			
 
				 
			
 
				 
			
 
				-def json_all(data: "Union[list, dict]") -> "bool":
			
 
				-    if isinstance(data, list):
			
 
				-        for item in data:
			
 
				-            if isinstance(item, str) and not item:
			
 
				-                return False
			
 
				-            elif isinstance(item, (list, dict)) and not json_all(item):
			
 
				-                return False
			
 
				-        return True
			
 
				-    elif isinstance(data, dict):
			
 
				-        for value in data.values():
			
 
				-            if isinstance(value, str) and not value:
			
 
				-                return False
			
 
				-            elif isinstance(value, (list, dict)) and not json_all(value):
			
 
				-                return False
			
 
				-        return True
			
 
				-    raise TypeError(f"except node type are: [list, dict], but got a {type(data)} instead.")
			
 
				-
			
 
				-
			
 
				 def str_include(str_long: "str", str_short: "str") -> "bool":
			
 
				     for it in str_short:
			
 
				         if it not in str_long:
			
@@ -68,6 +50,18 @@ def read_img(content: "str") -> "np.ndarray":
 
				     return cv2.imdecode(np.frombuffer(content, np.uint8), 1)  # noqa
			
 
				 
			
 
				 
			
 
				+def crop_img(image: "np.ndarray") -> "np.ndarray":
			
 
				+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)  # noqa 将图像转换为灰度图像
			
 
				+    _, threshold = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)  # noqa 换为二值图像 => save: [150,255]
			
 
				+    contours, _ = cv2.findContours(threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)  # noqa 查找轮廓
			
 
				+    max_contour = max(contours, key=cv2.contourArea)  # noqa 找到最大的轮廓
			
 
				+    rect = cv2.minAreaRect(max_contour)  # noqa 计算最小外接矩形
			
 
				+    box = cv2.boxPoints(rect)  # noqa 获取矩形的四个角点
			
 
				+    box = np.intp(box)
			
 
				+    # 裁剪图像
			
 
				+    return image[min(box[:, 1]):max(box[:, 1]), min(box[:, 0]):max(box[:, 0])]
			
 
				+
			
 
				+
			
 
				 def rot_img(img: "np.ndarray") -> "list[np.ndarray]":
			
 
				     return [img, np.rot90(img), np.rot90(img, 2), np.rot90(img, 3)]