import re from time import time from utils.util import * from utils.conf import MAX_FILE_SIZE from flask import Blueprint, views, render_template, request from utils.logger import Logger idc = Blueprint("idc", __name__, url_prefix="/idc") _MIN_SIZE = 46 _EXCLUDE_CHAR = "中国CHINA *#★☆" __face_ptn = re.compile( r"[姓名]{0,2}(?P.+?)[姓名]{0,2}" # 解决name在`姓名`之前,name字体较大 r"[性别]{1,2}(?P[男女])民族(?P.+?)" r"([出生]{1,2}(?P\d{4})年(?P\d+)月(?P\d+)日)?" # 测试中出现该行有较大概率未被识别 r"[住址]{1,2}(?P.+?)" r"[公民身份号码]{1,6}(?P\d{17}[\dx])$", re.I ) __icon_ptn = re.compile( r"[中华人民共和国居身份证]{0,12}?" r"[签发机关]{1,4}(?P.+?)" r"[有效期限]{1,4}(?P\d{4})\.(?P\d{2})\.(?P\d{2})" r"[^\d]+(?P\d{4})\.(?P\d{2})\.(?P\d{2})$" ) def get_face_info(data_str: "str") -> "tuple[dict, bool]": res = {"name": "", "gender": "", "nation": "", "birth": {"year": "", "month": "", "day": ""}, "addr": "", "idn": ""} if match := __face_ptn.match(data_str): res["idn"] = match.group("idn") res["name"] = match.group("name") res["gender"] = match.group("gender") res["nation"] = match.group("nation") res["birth"] = { "year": match.group("year") or res["idn"][6:10], "month": match.group("month") or res["idn"][10:12], "day": match.group("day") or res["idn"][12:14] } res["addr"] = match.group("addr") return res, True return res, False def get_icon_info(data_str: "str") -> "tuple[dict, bool]": res = {"agent": "", "from": {"year": "", "month": "", "day": ""}, "to": {"year": "", "month": "", "day": ""}} if match := __icon_ptn.match(data_str): res["agent"] = match.group("agent") res["from"] = { "year": match.group("from_year"), "month": match.group("from_month"), "day": match.group("from_day"), } res["to"] = { "year": match.group("to_year"), "month": match.group("to_month"), "day": match.group("to_day"), } return res, True return res, False class IdcView(views.MethodView): @staticmethod def get(): return render_template("idc_index.html") @staticmethod def post(): start = time() which = request.form.get("which") if which is not None: which = which.lower() if which not in ["face", "icon"]: return Response(f"not recognized arg : '{which}'") pic = request.files.get("picture") if pic is None: return Response("empty body") ext = get_ext_name(pic.filename) if not is_image_ext(ext): return Response("文件类型错误") content = pic.read() if len(content) > MAX_FILE_SIZE: return Response("文件过大,请重新选择") processed = preprocess_img(read_img(content)) # 图像预处理,对深色背景的效果很好 images = rot_img_2(processed) recognizes = Engine.rec_multi(images) info, msg, sta, idx = {}, "识别失败,请重新选择", False, 0 for i, ocr_res in enumerate(recognizes): rec_str = "".join([it[0] for it in ocr_res if not str_include(_EXCLUDE_CHAR, it[0])]) if which == "face": info, sta = get_face_info(rec_str) else: info, sta = get_icon_info(rec_str) if sta: idx = i break elif len(rec_str) >= _MIN_SIZE: msg = "识别失败,建议选择深色背景" Logger.error(rec_str) info["duration"] = time() - start if sta: raw_path = f"static/images/{current_time()}_{rand_str()}.{ext}" save_img(raw_path, images[idx]) return Response(data=info) return Response(msg, info) class IdcHtmlView(views.MethodView): @staticmethod def post(): start = time() which = request.form.get("which") if which is not None: which = which.lower() if which not in ["face", "icon"]: return Response(f"not recognized arg : '{which}'") pic = request.files.get("picture") if pic is None: return Response("empty body") ext = get_ext_name(pic.filename) if not is_image_ext(ext): return Response("文件类型错误") content = pic.read() if len(content) > MAX_FILE_SIZE: return Response("文件过大,请重新选择") cropped = preprocess_img(read_img(content)) images = rot_img_2(cropped) recognizes = Engine.rec_multi(images) info, err_rec, sta, idx = {}, [], False, 0 msg = "识别失败,请重新选择" for i, ocr_res in enumerate(recognizes): rec_str = "".join([it[0] for it in ocr_res if not str_include(_EXCLUDE_CHAR, it[0])]) if which == "face": if rec_str.startswith("姓名"): idx = i info, sta = get_face_info(rec_str) else: if rec_str.startswith("中华"): idx = i info, sta = get_icon_info(rec_str) if sta: msg = "识别成功" break elif len(rec_str) >= _MIN_SIZE: msg = "识别失败,建议选择深色背景" Logger.error(rec_str) err_rec.append(rec_str) file_path = f"static/images/{current_time()}_{rand_str()}.{ext}" save_img(file_path, images[idx]) info["SUCCESS"] = str(sta).upper() info["MESSAGE"] = msg if len(err_rec): info["MESSAGE"] += "
识别结果:
" + "
".join(err_rec) info["DURATION"] = time() - start # noqa return render_template("k-v_result.html", raw=file_path, data=info) idc.add_url_rule("/", view_func=IdcView.as_view("idc")) idc.add_url_rule("/html/", view_func=IdcHtmlView.as_view("idc-html"))