|
@@ -3,25 +3,28 @@ from time import time
|
|
|
from utils.util import *
|
|
|
from utils.conf import MAX_CONTENT_LENGTH
|
|
|
from flask import Blueprint, views, render_template, request
|
|
|
+from utils.logger import Logger
|
|
|
|
|
|
idc = Blueprint("idc", __name__, url_prefix="/idc")
|
|
|
|
|
|
+_MIN_SIZE = 46
|
|
|
__exclude = "中国CHINA *#★☆"
|
|
|
-__face_ptn = r"^姓名(?P<name>.+)性别(?P<gender>男|女)民族(?P<nation>.+)" \
|
|
|
+__face_ptn = r"姓名(?P<name>.+)" \
|
|
|
+ r"性别(?P<gender>男|女)民族(?P<nation>.+)" \
|
|
|
r"出生(?P<year>\d{4})年(?P<month>\d\d)月(?P<day>\d\d)日" \
|
|
|
- r"住址(?P<addr>.+)公民身份号码(?P<idn>\d{17}\d|x|X)$"
|
|
|
-__icon_ptn = r"^中华人民共和国居民身份证签发机关(?P<agent>.+)" \
|
|
|
+ r"住址(?P<addr>.+)" \
|
|
|
+ r"公民身份号码(?P<idn>\d{17}\d|x|X)"
|
|
|
+__icon_ptn = r"中华人民共和国" \
|
|
|
+ r"居民身份证" \
|
|
|
+ r"签发机关(?P<agent>.+)" \
|
|
|
r"有效期限(?P<from_year>\d{4})\.(?P<from_month>\d{2})\.(?P<from_day>\d{2})" \
|
|
|
- r"[^\d]+(?P<to_year>\d{4})\.(?P<to_month>\d{2})\.(?P<to_day>\d{2})$"
|
|
|
+ r"[^\d]+(?P<to_year>\d{4})\.(?P<to_month>\d{2})\.(?P<to_day>\d{2})"
|
|
|
|
|
|
|
|
|
-def get_face_info(data: "list[str]") -> "tuple[dict, str, bool]":
|
|
|
+def get_face_info(data_str: "str") -> "tuple[dict, bool]":
|
|
|
res = {"name": "", "gender": "", "nation": "", "birth": {"year": "", "month": "", "day": ""}, "addr": "", "idn": ""}
|
|
|
- if len(data) < 5: # 最少 5 个识别结果
|
|
|
- return res, "请使用正确的身份证人像面照片", False
|
|
|
|
|
|
- str_all = "".join([item for item in data if not str_include(__exclude, item)])
|
|
|
- if match := re.match(__face_ptn, str_all):
|
|
|
+ if match := re.match(__face_ptn, data_str):
|
|
|
res["name"] = match.group("name")
|
|
|
res["gender"] = match.group("gender")
|
|
|
res["nation"] = match.group("nation")
|
|
@@ -32,18 +35,15 @@ def get_face_info(data: "list[str]") -> "tuple[dict, str, bool]":
|
|
|
}
|
|
|
res["addr"] = match.group("addr")
|
|
|
res["idn"] = match.group("idn")
|
|
|
- return res, str_all, True
|
|
|
+ return res, True
|
|
|
|
|
|
- return res, "识别失败,请重新选择", False
|
|
|
+ return res, False
|
|
|
|
|
|
|
|
|
-def get_icon_info(data: "list[str]"):
|
|
|
+def get_icon_info(data_str: "str") -> "tuple[dict, bool]":
|
|
|
res = {"agent": "", "from": {"year": "", "month": "", "day": ""}, "to": {"year": "", "month": "", "day": ""}}
|
|
|
- if len(data) < 4: # 最少 4 个识别结果
|
|
|
- return res, "请使用正确的身份证国徽面照片", False
|
|
|
|
|
|
- str_all = "".join([item for item in data if not str_include(__exclude, item)])
|
|
|
- if match := re.match(__icon_ptn, str_all):
|
|
|
+ if match := re.match(__icon_ptn, data_str):
|
|
|
res["agent"] = match.group("agent")
|
|
|
res["from"] = {
|
|
|
"year": match.group("from_year"),
|
|
@@ -55,8 +55,8 @@ def get_icon_info(data: "list[str]"):
|
|
|
"month": match.group("to_month"),
|
|
|
"day": match.group("to_day"),
|
|
|
}
|
|
|
- return res, str_all, True
|
|
|
- return res, "识别失败,请重新选择", False
|
|
|
+ return res, True
|
|
|
+ return res, False
|
|
|
|
|
|
|
|
|
class IdcView(views.MethodView):
|
|
@@ -83,28 +83,34 @@ class IdcView(views.MethodView):
|
|
|
return Response("文件过大,请重新选择")
|
|
|
|
|
|
img = read_img(content)
|
|
|
- images = rot_img(img)
|
|
|
+ cropped = crop_img(img) # 边缘裁剪,对深色背景的效果很好
|
|
|
+ images = [item for item in rot_img(cropped) if item.shape[0] < item.shape[1]] # 旋转后仅取横长竖宽
|
|
|
rec = Engine.rec_multi(images)
|
|
|
- info, msg, sta, idx = {}, "识别失败,请重新选择", False, 0
|
|
|
- for idx, ocr_res in enumerate(rec):
|
|
|
- words = [it[0] for it in ocr_res]
|
|
|
+ info, err_rec, sta, idx = {}, [], False, 0
|
|
|
+ for i, ocr_res in enumerate(rec):
|
|
|
+ rec_str = "".join([it[0] for it in ocr_res])
|
|
|
if which == "face":
|
|
|
- if not words or not words[0].startswith("姓名"):
|
|
|
- continue
|
|
|
- info, msg, sta = get_face_info(words)
|
|
|
+ if rec_str.startswith("姓名"):
|
|
|
+ idx = i
|
|
|
+ info, sta = get_face_info(rec_str)
|
|
|
else:
|
|
|
- if not words or not words[0].startswith("中华"):
|
|
|
- continue
|
|
|
- info, msg, sta = get_icon_info(words)
|
|
|
+ if rec_str.startswith("中华"):
|
|
|
+ idx = i
|
|
|
+ info, sta = get_icon_info(rec_str)
|
|
|
if sta:
|
|
|
break
|
|
|
+ elif len(rec_str) >= _MIN_SIZE:
|
|
|
+ Logger.error(rec_str)
|
|
|
+ err_rec.append(rec_str)
|
|
|
|
|
|
info["duration"] = time() - start
|
|
|
if sta:
|
|
|
raw_path = f"static/images/{current_time()}_{rand_str()}.{ext}"
|
|
|
save_img(raw_path, images[idx])
|
|
|
return Response(data=info)
|
|
|
- return Response(msg, info)
|
|
|
+ else:
|
|
|
+ msg = "识别失败,建议使用深色背景\n识别结果:\n" + "\n".join(err_rec)
|
|
|
+ return Response(msg, info)
|
|
|
|
|
|
|
|
|
class IdcHtmlView(views.MethodView):
|
|
@@ -127,27 +133,34 @@ class IdcHtmlView(views.MethodView):
|
|
|
return Response("文件过大,请重新选择")
|
|
|
|
|
|
img = read_img(content)
|
|
|
- images = rot_img(img)
|
|
|
+ cropped = crop_img(img)
|
|
|
+ images = [item for item in rot_img(cropped) if item.shape[0] < item.shape[1]]
|
|
|
rec = Engine.rec_multi(images)
|
|
|
- info, msg, sta, idx = {}, "识别失败,请重新选择", False, 0
|
|
|
- for idx, ocr_res in enumerate(rec):
|
|
|
- words = [it[0].replace(" ", "") for it in ocr_res]
|
|
|
+ info, err_rec, sta, idx = {}, [], False, 0
|
|
|
+ for i, ocr_res in enumerate(rec):
|
|
|
+ rec_str = "".join([it[0] for it in ocr_res])
|
|
|
if which == "face":
|
|
|
- if not words or not words[0].startswith("姓名"):
|
|
|
- continue
|
|
|
- info, msg, sta = get_face_info(words)
|
|
|
+ if rec_str.startswith("姓名"):
|
|
|
+ idx = i
|
|
|
+ info, sta = get_face_info(rec_str)
|
|
|
else:
|
|
|
- if not words or not words[0].startswith("中华"):
|
|
|
- continue
|
|
|
- info, msg, sta = get_icon_info(words)
|
|
|
+ if rec_str.startswith("中华"):
|
|
|
+ idx = i
|
|
|
+ info, sta = get_icon_info(rec_str)
|
|
|
if sta:
|
|
|
break
|
|
|
+ elif len(rec_str) >= _MIN_SIZE:
|
|
|
+ Logger.error(rec_str)
|
|
|
+ err_rec.append(rec_str)
|
|
|
|
|
|
file_path = f"static/images/{current_time()}_{rand_str()}.{ext}"
|
|
|
save_img(file_path, images[idx])
|
|
|
|
|
|
info["SUCCESS"] = str(sta).upper()
|
|
|
- info["MESSAGE"] = msg
|
|
|
+ if sta:
|
|
|
+ info["MESSAGE"] = "识别成功"
|
|
|
+ else:
|
|
|
+ info["MESSAGE"] = "识别失败,建议使用深色背景<br>识别结果:<br>" + "<br>".join(err_rec)
|
|
|
info["DURATION"] = time() - start # noqa
|
|
|
return render_template("k-v_result.html", raw=file_path, data=info)
|
|
|
|