Browse Source

v3.5: edge detect and image crop for dark background.

Tinger 2 years ago
parent
commit
d0eafd9106
4 changed files with 68 additions and 61 deletions
  1. 1 1
      .gitignore
  2. 53 40
      blues/idc.py
  3. 1 1
      templates/k-v_result.html
  4. 13 19
      utils/util.py

+ 1 - 1
.gitignore

@@ -3,4 +3,4 @@ venv
 .idea
 static/images/*
 
-test.py
+test

+ 53 - 40
blues/idc.py

@@ -3,25 +3,28 @@ from time import time
 from utils.util import *
 from utils.conf import MAX_CONTENT_LENGTH
 from flask import Blueprint, views, render_template, request
+from utils.logger import Logger
 
 idc = Blueprint("idc", __name__, url_prefix="/idc")
 
+_MIN_SIZE = 46
 __exclude = "中国CHINA *#★☆"
-__face_ptn = r"^姓名(?P<name>.+)性别(?P<gender>男|女)民族(?P<nation>.+)" \
+__face_ptn = r"姓名(?P<name>.+)" \
+             r"性别(?P<gender>男|女)民族(?P<nation>.+)" \
              r"出生(?P<year>\d{4})年(?P<month>\d\d)月(?P<day>\d\d)日" \
-             r"住址(?P<addr>.+)公民身份号码(?P<idn>\d{17}\d|x|X)$"
-__icon_ptn = r"^中华人民共和国居民身份证签发机关(?P<agent>.+)" \
+             r"住址(?P<addr>.+)" \
+             r"公民身份号码(?P<idn>\d{17}\d|x|X)"
+__icon_ptn = r"中华人民共和国" \
+             r"居民身份证" \
+             r"签发机关(?P<agent>.+)" \
              r"有效期限(?P<from_year>\d{4})\.(?P<from_month>\d{2})\.(?P<from_day>\d{2})" \
-             r"[^\d]+(?P<to_year>\d{4})\.(?P<to_month>\d{2})\.(?P<to_day>\d{2})$"
+             r"[^\d]+(?P<to_year>\d{4})\.(?P<to_month>\d{2})\.(?P<to_day>\d{2})"
 
 
-def get_face_info(data: "list[str]") -> "tuple[dict, str, bool]":
+def get_face_info(data_str: "str") -> "tuple[dict, bool]":
     res = {"name": "", "gender": "", "nation": "", "birth": {"year": "", "month": "", "day": ""}, "addr": "", "idn": ""}
-    if len(data) < 5:  # 最少 5 个识别结果
-        return res, "请使用正确的身份证人像面照片", False
 
-    str_all = "".join([item for item in data if not str_include(__exclude, item)])
-    if match := re.match(__face_ptn, str_all):
+    if match := re.match(__face_ptn, data_str):
         res["name"] = match.group("name")
         res["gender"] = match.group("gender")
         res["nation"] = match.group("nation")
@@ -32,18 +35,15 @@ def get_face_info(data: "list[str]") -> "tuple[dict, str, bool]":
         }
         res["addr"] = match.group("addr")
         res["idn"] = match.group("idn")
-        return res, str_all, True
+        return res, True
 
-    return res, "识别失败,请重新选择", False
+    return res, False
 
 
-def get_icon_info(data: "list[str]"):
+def get_icon_info(data_str: "str") -> "tuple[dict, bool]":
     res = {"agent": "", "from": {"year": "", "month": "", "day": ""}, "to": {"year": "", "month": "", "day": ""}}
-    if len(data) < 4:  # 最少 4 个识别结果
-        return res, "请使用正确的身份证国徽面照片", False
 
-    str_all = "".join([item for item in data if not str_include(__exclude, item)])
-    if match := re.match(__icon_ptn, str_all):
+    if match := re.match(__icon_ptn, data_str):
         res["agent"] = match.group("agent")
         res["from"] = {
             "year": match.group("from_year"),
@@ -55,8 +55,8 @@ def get_icon_info(data: "list[str]"):
             "month": match.group("to_month"),
             "day": match.group("to_day"),
         }
-        return res, str_all, True
-    return res, "识别失败,请重新选择", False
+        return res, True
+    return res, False
 
 
 class IdcView(views.MethodView):
@@ -83,28 +83,34 @@ class IdcView(views.MethodView):
             return Response("文件过大,请重新选择")
 
         img = read_img(content)
-        images = rot_img(img)
+        cropped = crop_img(img)  # 边缘裁剪,对深色背景的效果很好
+        images = [item for item in rot_img(cropped) if item.shape[0] < item.shape[1]]  # 旋转后仅取横长竖宽
         rec = Engine.rec_multi(images)
-        info, msg, sta, idx = {}, "识别失败,请重新选择", False, 0
-        for idx, ocr_res in enumerate(rec):
-            words = [it[0] for it in ocr_res]
+        info, err_rec, sta, idx = {}, [], False, 0
+        for i, ocr_res in enumerate(rec):
+            rec_str = "".join([it[0] for it in ocr_res])
             if which == "face":
-                if not words or not words[0].startswith("姓名"):
-                    continue
-                info, msg, sta = get_face_info(words)
+                if rec_str.startswith("姓名"):
+                    idx = i
+                    info, sta = get_face_info(rec_str)
             else:
-                if not words or not words[0].startswith("中华"):
-                    continue
-                info, msg, sta = get_icon_info(words)
+                if rec_str.startswith("中华"):
+                    idx = i
+                    info, sta = get_icon_info(rec_str)
             if sta:
                 break
+            elif len(rec_str) >= _MIN_SIZE:
+                Logger.error(rec_str)
+                err_rec.append(rec_str)
 
         info["duration"] = time() - start
         if sta:
             raw_path = f"static/images/{current_time()}_{rand_str()}.{ext}"
             save_img(raw_path, images[idx])
             return Response(data=info)
-        return Response(msg, info)
+        else:
+            msg = "识别失败,建议使用深色背景\n识别结果:\n" + "\n".join(err_rec)
+            return Response(msg, info)
 
 
 class IdcHtmlView(views.MethodView):
@@ -127,27 +133,34 @@ class IdcHtmlView(views.MethodView):
             return Response("文件过大,请重新选择")
 
         img = read_img(content)
-        images = rot_img(img)
+        cropped = crop_img(img)
+        images = [item for item in rot_img(cropped) if item.shape[0] < item.shape[1]]
         rec = Engine.rec_multi(images)
-        info, msg, sta, idx = {}, "识别失败,请重新选择", False, 0
-        for idx, ocr_res in enumerate(rec):
-            words = [it[0].replace(" ", "") for it in ocr_res]
+        info, err_rec, sta, idx = {}, [], False, 0
+        for i, ocr_res in enumerate(rec):
+            rec_str = "".join([it[0] for it in ocr_res])
             if which == "face":
-                if not words or not words[0].startswith("姓名"):
-                    continue
-                info, msg, sta = get_face_info(words)
+                if rec_str.startswith("姓名"):
+                    idx = i
+                    info, sta = get_face_info(rec_str)
             else:
-                if not words or not words[0].startswith("中华"):
-                    continue
-                info, msg, sta = get_icon_info(words)
+                if rec_str.startswith("中华"):
+                    idx = i
+                    info, sta = get_icon_info(rec_str)
             if sta:
                 break
+            elif len(rec_str) >= _MIN_SIZE:
+                Logger.error(rec_str)
+                err_rec.append(rec_str)
 
         file_path = f"static/images/{current_time()}_{rand_str()}.{ext}"
         save_img(file_path, images[idx])
 
         info["SUCCESS"] = str(sta).upper()
-        info["MESSAGE"] = msg
+        if sta:
+            info["MESSAGE"] = "识别成功"
+        else:
+            info["MESSAGE"] = "识别失败,建议使用深色背景<br>识别结果:<br>" + "<br>".join(err_rec)
         info["DURATION"] = time() - start  # noqa
         return render_template("k-v_result.html", raw=file_path, data=info)
 

+ 1 - 1
templates/k-v_result.html

@@ -75,7 +75,7 @@
         {% for key, value in data.items() %}
             <tr>
                 <td class="center">{{ key }}</td>
-                <td>{{ value }}</td>
+                <td>{{ value|safe }}</td>
             </tr>
         {% endfor %}
         </tbody>

+ 13 - 19
utils/util.py

@@ -8,7 +8,7 @@ from time import localtime, strftime
 
 __all__ = [
     "Response", "rand_str", "current_time", "get_ext_name", "is_image_ext",
-    "json_all", "str_include", "read_img", "rot_img", "save_img", "Engine"
+    "str_include", "read_img", "crop_img", "rot_img", "save_img", "Engine"
 ]
 
 __StrBase = "qwertyuioplkjhgfdsazxcvbnm1234567890ZXCVBNMLKJHGFDSAQWERTYUIOP"
@@ -39,24 +39,6 @@ def is_image_ext(ext: "str") -> bool:
     return ext in __AcceptExtNames
 
 
-def json_all(data: "Union[list, dict]") -> "bool":
-    if isinstance(data, list):
-        for item in data:
-            if isinstance(item, str) and not item:
-                return False
-            elif isinstance(item, (list, dict)) and not json_all(item):
-                return False
-        return True
-    elif isinstance(data, dict):
-        for value in data.values():
-            if isinstance(value, str) and not value:
-                return False
-            elif isinstance(value, (list, dict)) and not json_all(value):
-                return False
-        return True
-    raise TypeError(f"except node type are: [list, dict], but got a {type(data)} instead.")
-
-
 def str_include(str_long: "str", str_short: "str") -> "bool":
     for it in str_short:
         if it not in str_long:
@@ -68,6 +50,18 @@ def read_img(content: "str") -> "np.ndarray":
     return cv2.imdecode(np.frombuffer(content, np.uint8), 1)  # noqa
 
 
+def crop_img(image: "np.ndarray") -> "np.ndarray":
+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)  # noqa 将图像转换为灰度图像
+    _, threshold = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)  # noqa 换为二值图像 => save: [150,255]
+    contours, _ = cv2.findContours(threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)  # noqa 查找轮廓
+    max_contour = max(contours, key=cv2.contourArea)  # noqa 找到最大的轮廓
+    rect = cv2.minAreaRect(max_contour)  # noqa 计算最小外接矩形
+    box = cv2.boxPoints(rect)  # noqa 获取矩形的四个角点
+    box = np.intp(box)
+    # 裁剪图像
+    return image[min(box[:, 1]):max(box[:, 1]), min(box[:, 0]):max(box[:, 0])]
+
+
 def rot_img(img: "np.ndarray") -> "list[np.ndarray]":
     return [img, np.rot90(img), np.rot90(img, 2), np.rot90(img, 3)]