Tinger
/
OCR


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
							import re
from time import time
from utils.util import *
from utils.conf import MAX_CONTENT_LENGTH
from flask import Blueprint, views, render_template, request

idc = Blueprint("idc", __name__, url_prefix="/idc")

__exclude = "中国CHINA *#★☆"
__face_ptn = r"^姓名(?P<name>.+)性别(?P<gender>男|女)民族(?P<nation>.+)" \
             r"出生(?P<year>\d{4})年(?P<month>\d\d)月(?P<day>\d\d)日" \
             r"住址(?P<addr>.+)公民身份号码(?P<idn>\d{17}\d|x|X)$"
__icon_ptn = r"^中华人民共和国居民身份证签发机关(?P<agent>.+)" \
             r"有效期限(?P<from_year>\d{4})\.(?P<from_month>\d{2})\.(?P<from_day>\d{2})" \
             r"[^\d]+(?P<to_year>\d{4})\.(?P<to_month>\d{2})\.(?P<to_day>\d{2})$"


def get_face_info(data: "list[str]") -> "tuple[dict, str, bool]":
    res = {"name": "", "gender": "", "nation": "", "birth": {"year": "", "month": "", "day": ""}, "addr": "", "idn": ""}
    if len(data) < 5:  # 最少 5 个识别结果
        return res, "请使用正确的身份证人像面照片", False

    str_all = "".join([item for item in data if not str_include(__exclude, item)])
    if match := re.match(__face_ptn, str_all):
        res["name"] = match.group("name")
        res["gender"] = match.group("gender")
        res["nation"] = match.group("nation")
        res["birth"] = {
            "year": match.group("year"),
            "month": match.group("month"),
            "day": match.group("day")
        }
        res["addr"] = match.group("addr")
        res["idn"] = match.group("idn")
        return res, str_all, True

    return res, "识别失败，请重新选择", False


def get_icon_info(data: "list[str]"):
    res = {"agent": "", "from": {"year": "", "month": "", "day": ""}, "to": {"year": "", "month": "", "day": ""}}
    if len(data) < 4:  # 最少 4 个识别结果
        return res, "请使用正确的身份证国徽面照片", False

    str_all = "".join([item for item in data if not str_include(__exclude, item)])
    if match := re.match(__icon_ptn, str_all):
        res["agent"] = match.group("agent")
        res["from"] = {
            "year": match.group("from_year"),
            "month": match.group("from_month"),
            "day": match.group("from_day"),
        }
        res["to"] = {
            "year": match.group("to_year"),
            "month": match.group("to_month"),
            "day": match.group("to_day"),
        }
        return res, str_all, True
    return res, "识别失败，请重新选择", False


class IdcView(views.MethodView):
    @staticmethod
    def get():
        return render_template("idc_index.html")

    @staticmethod
    def post():
        start = time()
        which = request.form.get("which")
        if which is not None:
            which = which.lower()
        if which not in ["face", "icon"]:
            return Response(f"not recognized arg <which>: '{which}'")
        pic = request.files.get("picture")
        if pic is None:
            return Response("empty body")
        ext = get_ext_name(pic.filename)
        if not is_image_ext(ext):
            return Response("文件类型错误")
        content = pic.read()
        if len(content) > MAX_CONTENT_LENGTH:
            return Response("文件过大，请重新选择")

        img = read_img(content)
        images = rot_img(img)
        rec = Engine.ocr_multi(images, cls=True, use_space=False)
        info, msg, sta, idx = {}, "识别失败，请重新选择", False, 0
        for idx, ocr_res in enumerate(rec):
            words = [it[0].replace(" ", "") for it in ocr_res]
            if which == "face":
                if not words or not words[0].startswith("姓名"):
                    continue
                info, msg, sta = get_face_info(words)
            else:
                if not words or not words[0].startswith("中华"):
                    continue
                info, msg, sta = get_icon_info(words)
            if sta:
                break

        info["duration"] = time() - start
        if sta:
            raw_path = f"static/images/{current_time()}_{rand_str()}.{ext}"
            save_img(raw_path, images[idx])
            return Response(data=info)
        return Response(msg, info)


class IdcHtmlView(views.MethodView):
    @staticmethod
    def post():
        start = time()
        which = request.form.get("which")
        if which is not None:
            which = which.lower()
        if which not in ["face", "icon"]:
            return Response(f"not recognized arg <which>: '{which}'")
        pic = request.files.get("picture")
        if pic is None:
            return Response("empty body")
        ext = get_ext_name(pic.filename)
        if not is_image_ext(ext):
            return Response("文件类型错误")
        content = pic.read()
        if len(content) > MAX_CONTENT_LENGTH:
            return Response("文件过大，请重新选择")

        img = read_img(content)
        images = rot_img(img)
        rec = Engine.ocr_multi(images, cls=True, use_space=False)
        info, msg, sta, idx = {}, "识别失败，请重新选择", False, 0
        for idx, ocr_res in enumerate(rec):
            words = [it[0].replace(" ", "") for it in ocr_res]
            if which == "face":
                if not words or not words[0].startswith("姓名"):
                    continue
                info, msg, sta = get_face_info(words)
            else:
                if not words or not words[0].startswith("中华"):
                    continue
                info, msg, sta = get_icon_info(words)
            if sta:
                break

        file_path = f"static/images/{current_time()}_{rand_str()}.{ext}"
        save_img(file_path, images[idx])

        info["SUCCESS"] = str(sta).upper()
        info["MESSAGE"] = msg
        info["DURATION"] = time() - start  # noqa
        return render_template("k-v_result.html", raw=file_path, data=info)


idc.add_url_rule("/", view_func=IdcView.as_view("idc"))
idc.add_url_rule("/html/", view_func=IdcHtmlView.as_view("idc-html"))