123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168 |
- import re
- from time import time
- from utils.util import *
- from utils.conf import MAX_FILE_SIZE
- from flask import Blueprint, views, render_template, request
- from utils.logger import Logger
- idc = Blueprint("idc", __name__, url_prefix="/idc")
- _MIN_SIZE = 46
- _EXCLUDE_CHAR = "中国CHINA *#★☆"
- __face_ptn = re.compile(
- r"[姓名]{0,2}(?P<name>.+?)[姓名]{0,2}" # 解决name在`姓名`之前,name字体较大
- r"[性别]{1,2}(?P<gender>[男女])民族(?P<nation>.+?)"
- r"([出生]{1,2}(?P<year>\d{4})年(?P<month>\d+)月(?P<day>\d+)日)?" # 测试中出现该行有较大概率未被识别
- r"[住址]{1,2}(?P<addr>.+?)"
- r"[公民身份号码]{1,6}(?P<idn>\d{17}[\dx])$",
- re.I
- )
- __icon_ptn = re.compile(
- r"[中华人民共和国居身份证]{0,12}?"
- r"[签发机关]{1,4}(?P<agent>.+?)"
- r"[有效期限]{1,4}(?P<from_year>\d{4})\.(?P<from_month>\d{2})\.(?P<from_day>\d{2})"
- r"[^\d]+(?P<to_year>\d{4})\.(?P<to_month>\d{2})\.(?P<to_day>\d{2})$"
- )
- def get_face_info(data_str: "str") -> "tuple[dict, bool]":
- res = {"name": "", "gender": "", "nation": "", "birth": {"year": "", "month": "", "day": ""}, "addr": "", "idn": ""}
- if match := __face_ptn.match(data_str):
- res["idn"] = match.group("idn")
- res["name"] = match.group("name")
- res["gender"] = match.group("gender")
- res["nation"] = match.group("nation")
- res["birth"] = {
- "year": match.group("year") or res["idn"][6:10],
- "month": match.group("month") or res["idn"][10:12],
- "day": match.group("day") or res["idn"][12:14]
- }
- res["addr"] = match.group("addr")
- return res, True
- return res, False
- def get_icon_info(data_str: "str") -> "tuple[dict, bool]":
- res = {"agent": "", "from": {"year": "", "month": "", "day": ""}, "to": {"year": "", "month": "", "day": ""}}
- if match := __icon_ptn.match(data_str):
- res["agent"] = match.group("agent")
- res["from"] = {
- "year": match.group("from_year"),
- "month": match.group("from_month"),
- "day": match.group("from_day"),
- }
- res["to"] = {
- "year": match.group("to_year"),
- "month": match.group("to_month"),
- "day": match.group("to_day"),
- }
- return res, True
- return res, False
- class IdcView(views.MethodView):
- @staticmethod
- def get():
- return render_template("idc_index.html")
- @staticmethod
- def post():
- start = time()
- which = request.form.get("which")
- if which is not None:
- which = which.lower()
- if which not in ["face", "icon"]:
- return Response(f"not recognized arg <which>: '{which}'")
- pic = request.files.get("picture")
- if pic is None:
- return Response("empty body")
- ext = get_ext_name(pic.filename)
- if not is_image_ext(ext):
- return Response("文件类型错误")
- content = pic.read()
- if len(content) > MAX_FILE_SIZE:
- return Response("文件过大,请重新选择")
- processed = preprocess_img(read_img(content)) # 图像预处理,对深色背景的效果很好
- images = rot_img_2(processed)
- recognizes = Engine.rec_multi(images)
- info, msg, sta, idx = {}, "识别失败,请重新选择", False, 0
- for i, ocr_res in enumerate(recognizes):
- rec_str = "".join([it[0] for it in ocr_res if not str_include(_EXCLUDE_CHAR, it[0])])
- if which == "face":
- info, sta = get_face_info(rec_str)
- else:
- info, sta = get_icon_info(rec_str)
- if sta:
- idx = i
- break
- elif len(rec_str) >= _MIN_SIZE:
- msg = "识别失败,建议选择深色背景"
- Logger.error(rec_str)
- info["duration"] = time() - start
- if sta:
- raw_path = f"static/images/{current_time()}_{rand_str()}.{ext}"
- save_img(raw_path, images[idx])
- return Response(data=info)
- return Response(msg, info)
- class IdcHtmlView(views.MethodView):
- @staticmethod
- def post():
- start = time()
- which = request.form.get("which")
- if which is not None:
- which = which.lower()
- if which not in ["face", "icon"]:
- return Response(f"not recognized arg <which>: '{which}'")
- pic = request.files.get("picture")
- if pic is None:
- return Response("empty body")
- ext = get_ext_name(pic.filename)
- if not is_image_ext(ext):
- return Response("文件类型错误")
- content = pic.read()
- if len(content) > MAX_FILE_SIZE:
- return Response("文件过大,请重新选择")
- cropped = preprocess_img(read_img(content))
- images = rot_img_2(cropped)
- recognizes = Engine.rec_multi(images)
- info, err_rec, sta, idx = {}, [], False, 0
- msg = "识别失败,请重新选择"
- for i, ocr_res in enumerate(recognizes):
- rec_str = "".join([it[0] for it in ocr_res if not str_include(_EXCLUDE_CHAR, it[0])])
- if which == "face":
- if rec_str.startswith("姓名"):
- idx = i
- info, sta = get_face_info(rec_str)
- else:
- if rec_str.startswith("中华"):
- idx = i
- info, sta = get_icon_info(rec_str)
- if sta:
- msg = "识别成功"
- break
- elif len(rec_str) >= _MIN_SIZE:
- msg = "识别失败,建议选择深色背景"
- Logger.error(rec_str)
- err_rec.append(rec_str)
- file_path = f"static/images/{current_time()}_{rand_str()}.{ext}"
- save_img(file_path, images[idx])
- info["SUCCESS"] = str(sta).upper()
- info["MESSAGE"] = msg
- if len(err_rec):
- info["MESSAGE"] += "<br>识别结果:<br>" + "<br>".join(err_rec)
- info["DURATION"] = time() - start # noqa
- return render_template("k-v_result.html", raw=file_path, data=info)
- idc.add_url_rule("/", view_func=IdcView.as_view("idc"))
- idc.add_url_rule("/html/", view_func=IdcHtmlView.as_view("idc-html"))
|