import re from time import time from utils.util import * from utils.conf import MAX_CONTENT_LENGTH from flask import Blueprint, views, render_template, request from utils.logger import Logger idc = Blueprint("idc", __name__, url_prefix="/idc") _MIN_SIZE = 46 __exclude = "中国CHINA *#★☆" __face_ptn = r"姓名(?P.+)" \ r"性别(?P男|女)民族(?P.+)" \ r"出生(?P\d{4})年(?P\d\d)月(?P\d\d)日" \ r"住址(?P.+)" \ r"公民身份号码(?P\d{17}\d|x|X)" __icon_ptn = r"中华人民共和国" \ r"居民身份证" \ r"签发机关(?P.+)" \ r"有效期限(?P\d{4})\.(?P\d{2})\.(?P\d{2})" \ r"[^\d]+(?P\d{4})\.(?P\d{2})\.(?P\d{2})" def get_face_info(data_str: "str") -> "tuple[dict, bool]": res = {"name": "", "gender": "", "nation": "", "birth": {"year": "", "month": "", "day": ""}, "addr": "", "idn": ""} if match := re.match(__face_ptn, data_str): res["name"] = match.group("name") res["gender"] = match.group("gender") res["nation"] = match.group("nation") res["birth"] = { "year": match.group("year"), "month": match.group("month"), "day": match.group("day") } res["addr"] = match.group("addr") res["idn"] = match.group("idn") return res, True return res, False def get_icon_info(data_str: "str") -> "tuple[dict, bool]": res = {"agent": "", "from": {"year": "", "month": "", "day": ""}, "to": {"year": "", "month": "", "day": ""}} if match := re.match(__icon_ptn, data_str): res["agent"] = match.group("agent") res["from"] = { "year": match.group("from_year"), "month": match.group("from_month"), "day": match.group("from_day"), } res["to"] = { "year": match.group("to_year"), "month": match.group("to_month"), "day": match.group("to_day"), } return res, True return res, False class IdcView(views.MethodView): @staticmethod def get(): return render_template("idc_index.html") @staticmethod def post(): start = time() which = request.form.get("which") if which is not None: which = which.lower() if which not in ["face", "icon"]: return Response(f"not recognized arg : '{which}'") pic = request.files.get("picture") if pic is None: return Response("empty body") ext = get_ext_name(pic.filename) if not is_image_ext(ext): return Response("文件类型错误") content = pic.read() if len(content) > MAX_CONTENT_LENGTH: return Response("文件过大,请重新选择") img = read_img(content) cropped = crop_img(img) # 边缘裁剪,对深色背景的效果很好 images = [item for item in rot_img(cropped) if item.shape[0] < item.shape[1]] # 旋转后仅取横长竖宽 rec = Engine.rec_multi(images) info, err_rec, sta, idx = {}, [], False, 0 for i, ocr_res in enumerate(rec): rec_str = "".join([it[0] for it in ocr_res]) if which == "face": if rec_str.startswith("姓名"): idx = i info, sta = get_face_info(rec_str) else: if rec_str.startswith("中华"): idx = i info, sta = get_icon_info(rec_str) if sta: break elif len(rec_str) >= _MIN_SIZE: Logger.error(rec_str) err_rec.append(rec_str) info["duration"] = time() - start if sta: raw_path = f"static/images/{current_time()}_{rand_str()}.{ext}" save_img(raw_path, images[idx]) return Response(data=info) else: msg = "识别失败,建议使用深色背景" return Response(msg, info) class IdcHtmlView(views.MethodView): @staticmethod def post(): start = time() which = request.form.get("which") if which is not None: which = which.lower() if which not in ["face", "icon"]: return Response(f"not recognized arg : '{which}'") pic = request.files.get("picture") if pic is None: return Response("empty body") ext = get_ext_name(pic.filename) if not is_image_ext(ext): return Response("文件类型错误") content = pic.read() if len(content) > MAX_CONTENT_LENGTH: return Response("文件过大,请重新选择") img = read_img(content) cropped = crop_img(img) images = [item for item in rot_img(cropped) if item.shape[0] < item.shape[1]] rec = Engine.rec_multi(images) info, err_rec, sta, idx = {}, [], False, 0 for i, ocr_res in enumerate(rec): rec_str = "".join([it[0] for it in ocr_res]) if which == "face": if rec_str.startswith("姓名"): idx = i info, sta = get_face_info(rec_str) else: if rec_str.startswith("中华"): idx = i info, sta = get_icon_info(rec_str) if sta: break elif len(rec_str) >= _MIN_SIZE: Logger.error(rec_str) err_rec.append(rec_str) file_path = f"static/images/{current_time()}_{rand_str()}.{ext}" save_img(file_path, images[idx]) info["SUCCESS"] = str(sta).upper() if sta: info["MESSAGE"] = "识别成功" else: info["MESSAGE"] = "识别失败,建议使用深色背景
识别结果:
" + "
".join(err_rec) info["DURATION"] = time() - start # noqa return render_template("k-v_result.html", raw=file_path, data=info) idc.add_url_rule("/", view_func=IdcView.as_view("idc")) idc.add_url_rule("/html/", view_func=IdcHtmlView.as_view("idc-html"))