Tinger
/
All


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
							import re
from time import time
from utils.util import *
from utils.conf import MAX_FILE_SIZE
from flask import Blueprint, views, render_template, request
from utils.logger import Logger

idc = Blueprint("idc", __name__, url_prefix="/idc")

_MIN_SIZE = 46
_EXCLUDE_CHAR = "中国CHINA *#★☆"
__face_ptn = re.compile(
    r"[姓名]{0,2}(?P<name>.+?)[姓名]{0,2}"                            # 解决name在`姓名`之前，name字体较大
    r"[性别]{1,2}(?P<gender>[男女])民族(?P<nation>.+?)"
    r"([出生]{1,2}(?P<year>\d{4})年(?P<month>\d+)月(?P<day>\d+)日)?"  # 测试中出现该行有较大概率未被识别
    r"[住址]{1,2}(?P<addr>.+?)"
    r"[公民身份号码]{1,6}(?P<idn>\d{17}[\dx])$",
    re.I
)
__icon_ptn = re.compile(
    r"[中华人民共和国居身份证]{0,12}?"
    r"[签发机关]{1,4}(?P<agent>.+?)"
    r"[有效期限]{1,4}(?P<from_year>\d{4})\.(?P<from_month>\d{2})\.(?P<from_day>\d{2})"
    r"[^\d]+(?P<to_year>\d{4})\.(?P<to_month>\d{2})\.(?P<to_day>\d{2})$"
)


def get_face_info(data_str: "str") -> "tuple[dict, bool]":
    res = {"name": "", "gender": "", "nation": "", "birth": {"year": "", "month": "", "day": ""}, "addr": "", "idn": ""}

    if match := __face_ptn.match(data_str):
        res["idn"] = match.group("idn")
        res["name"] = match.group("name")
        res["gender"] = match.group("gender")
        res["nation"] = match.group("nation")
        res["birth"] = {
            "year": match.group("year") or res["idn"][6:10],
            "month": match.group("month") or res["idn"][10:12],
            "day": match.group("day") or res["idn"][12:14]
        }
        res["addr"] = match.group("addr")
        return res, True

    return res, False


def get_icon_info(data_str: "str") -> "tuple[dict, bool]":
    res = {"agent": "", "from": {"year": "", "month": "", "day": ""}, "to": {"year": "", "month": "", "day": ""}}

    if match := __icon_ptn.match(data_str):
        res["agent"] = match.group("agent")
        res["from"] = {
            "year": match.group("from_year"),
            "month": match.group("from_month"),
            "day": match.group("from_day"),
        }
        res["to"] = {
            "year": match.group("to_year"),
            "month": match.group("to_month"),
            "day": match.group("to_day"),
        }
        return res, True
    return res, False


class IdcView(views.MethodView):
    @staticmethod
    def get():
        return render_template("idc_index.html")

    @staticmethod
    def post():
        start = time()
        which = request.form.get("which")
        if which is not None:
            which = which.lower()
        if which not in ["face", "icon"]:
            return Response(f"not recognized arg <which>: '{which}'")
        pic = request.files.get("picture")
        if pic is None:
            return Response("empty body")
        ext = get_ext_name(pic.filename)
        if not is_image_ext(ext):
            return Response("文件类型错误")
        content = pic.read()
        if len(content) > MAX_FILE_SIZE:
            return Response("文件过大，请重新选择")

        processed = preprocess_img(read_img(content))  # 图像预处理，对深色背景的效果很好
        images = rot_img_2(processed)
        recognizes = Engine.rec_multi(images)
        info, msg, sta, idx = {}, "识别失败，请重新选择", False, 0
        for i, ocr_res in enumerate(recognizes):
            rec_str = "".join([it[0] for it in ocr_res if not str_include(_EXCLUDE_CHAR, it[0])])
            if which == "face":
                info, sta = get_face_info(rec_str)
            else:
                info, sta = get_icon_info(rec_str)
            if sta:
                idx = i
                break
            elif len(rec_str) >= _MIN_SIZE:
                msg = "识别失败，建议选择深色背景"
                Logger.error(rec_str)

        info["duration"] = time() - start
        if sta:
            raw_path = f"static/images/{current_time()}_{rand_str()}.{ext}"
            save_img(raw_path, images[idx])
            return Response(data=info)
        return Response(msg, info)


class IdcHtmlView(views.MethodView):
    @staticmethod
    def post():
        start = time()
        which = request.form.get("which")
        if which is not None:
            which = which.lower()
        if which not in ["face", "icon"]:
            return Response(f"not recognized arg <which>: '{which}'")
        pic = request.files.get("picture")
        if pic is None:
            return Response("empty body")
        ext = get_ext_name(pic.filename)
        if not is_image_ext(ext):
            return Response("文件类型错误")
        content = pic.read()
        if len(content) > MAX_FILE_SIZE:
            return Response("文件过大，请重新选择")

        cropped = preprocess_img(read_img(content))
        images = rot_img_2(cropped)
        recognizes = Engine.rec_multi(images)
        info, err_rec, sta, idx = {}, [], False, 0
        msg = "识别失败，请重新选择"
        for i, ocr_res in enumerate(recognizes):
            rec_str = "".join([it[0] for it in ocr_res if not str_include(_EXCLUDE_CHAR, it[0])])
            if which == "face":
                if rec_str.startswith("姓名"):
                    idx = i
                    info, sta = get_face_info(rec_str)
            else:
                if rec_str.startswith("中华"):
                    idx = i
                    info, sta = get_icon_info(rec_str)
            if sta:
                msg = "识别成功"
                break
            elif len(rec_str) >= _MIN_SIZE:
                msg = "识别失败，建议选择深色背景"
                Logger.error(rec_str)
                err_rec.append(rec_str)

        file_path = f"static/images/{current_time()}_{rand_str()}.{ext}"
        save_img(file_path, images[idx])

        info["SUCCESS"] = str(sta).upper()
        info["MESSAGE"] = msg
        if len(err_rec):
            info["MESSAGE"] += "<br>识别结果：<br>" + "<br>".join(err_rec)
        info["DURATION"] = time() - start  # noqa
        return render_template("k-v_result.html", raw=file_path, data=info)


idc.add_url_rule("/", view_func=IdcView.as_view("idc"))
idc.add_url_rule("/html/", view_func=IdcHtmlView.as_view("idc-html"))