idc.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. import re
  2. from time import time
  3. from utils.util import *
  4. from utils.conf import MAX_FILE_SIZE
  5. from flask import Blueprint, views, render_template, request
  6. from utils.logger import Logger
  7. idc = Blueprint("idc", __name__, url_prefix="/idc")
  8. _MIN_SIZE = 46
  9. _EXCLUDE_CHAR = "中国CHINA *#★☆"
  10. __face_ptn = re.compile(
  11. r"[姓名]{0,2}(?P<name>.+?)[姓名]{0,2}" # 解决name在`姓名`之前,name字体较大
  12. r"[性别]{1,2}(?P<gender>[男女])民族(?P<nation>.+?)"
  13. r"([出生]{1,2}(?P<year>\d{4})年(?P<month>\d+)月(?P<day>\d+)日)?" # 测试中出现该行有较大概率未被识别
  14. r"[住址]{1,2}(?P<addr>.+?)"
  15. r"[公民身份号码]{1,6}(?P<idn>\d{17}[\dx])$",
  16. re.I
  17. )
  18. __icon_ptn = re.compile(
  19. r"[中华人民共和国居身份证]{0,12}?"
  20. r"[签发机关]{1,4}(?P<agent>.+?)"
  21. r"[有效期限]{1,4}(?P<from_year>\d{4})\.(?P<from_month>\d{2})\.(?P<from_day>\d{2})"
  22. r"[^\d]+(?P<to_year>\d{4})\.(?P<to_month>\d{2})\.(?P<to_day>\d{2})$"
  23. )
  24. def get_face_info(data_str: "str") -> "tuple[dict, bool]":
  25. res = {"name": "", "gender": "", "nation": "", "birth": {"year": "", "month": "", "day": ""}, "addr": "", "idn": ""}
  26. if match := __face_ptn.match(data_str):
  27. res["idn"] = match.group("idn")
  28. res["name"] = match.group("name")
  29. res["gender"] = match.group("gender")
  30. res["nation"] = match.group("nation")
  31. res["birth"] = {
  32. "year": match.group("year") or res["idn"][6:10],
  33. "month": match.group("month") or res["idn"][10:12],
  34. "day": match.group("day") or res["idn"][12:14]
  35. }
  36. res["addr"] = match.group("addr")
  37. return res, True
  38. return res, False
  39. def get_icon_info(data_str: "str") -> "tuple[dict, bool]":
  40. res = {"agent": "", "from": {"year": "", "month": "", "day": ""}, "to": {"year": "", "month": "", "day": ""}}
  41. if match := __icon_ptn.match(data_str):
  42. res["agent"] = match.group("agent")
  43. res["from"] = {
  44. "year": match.group("from_year"),
  45. "month": match.group("from_month"),
  46. "day": match.group("from_day"),
  47. }
  48. res["to"] = {
  49. "year": match.group("to_year"),
  50. "month": match.group("to_month"),
  51. "day": match.group("to_day"),
  52. }
  53. return res, True
  54. return res, False
  55. class IdcView(views.MethodView):
  56. @staticmethod
  57. def get():
  58. return render_template("idc_index.html")
  59. @staticmethod
  60. def post():
  61. start = time()
  62. which = request.form.get("which")
  63. if which is not None:
  64. which = which.lower()
  65. if which not in ["face", "icon"]:
  66. return Response(f"not recognized arg <which>: '{which}'")
  67. pic = request.files.get("picture")
  68. if pic is None:
  69. return Response("empty body")
  70. ext = get_ext_name(pic.filename)
  71. if not is_image_ext(ext):
  72. return Response("文件类型错误")
  73. content = pic.read()
  74. if len(content) > MAX_FILE_SIZE:
  75. return Response("文件过大,请重新选择")
  76. processed = preprocess_img(read_img(content)) # 图像预处理,对深色背景的效果很好
  77. images = rot_img_2(processed)
  78. recognizes = Engine.rec_multi(images)
  79. info, msg, sta, idx = {}, "识别失败,请重新选择", False, 0
  80. for i, ocr_res in enumerate(recognizes):
  81. rec_str = "".join([it[0] for it in ocr_res if not str_include(_EXCLUDE_CHAR, it[0])])
  82. if which == "face":
  83. info, sta = get_face_info(rec_str)
  84. else:
  85. info, sta = get_icon_info(rec_str)
  86. if sta:
  87. idx = i
  88. break
  89. elif len(rec_str) >= _MIN_SIZE:
  90. msg = "识别失败,建议选择深色背景"
  91. Logger.error(rec_str)
  92. info["duration"] = time() - start
  93. if sta:
  94. raw_path = f"static/images/{current_time()}_{rand_str()}.{ext}"
  95. save_img(raw_path, images[idx])
  96. return Response(data=info)
  97. return Response(msg, info)
  98. class IdcHtmlView(views.MethodView):
  99. @staticmethod
  100. def post():
  101. start = time()
  102. which = request.form.get("which")
  103. if which is not None:
  104. which = which.lower()
  105. if which not in ["face", "icon"]:
  106. return Response(f"not recognized arg <which>: '{which}'")
  107. pic = request.files.get("picture")
  108. if pic is None:
  109. return Response("empty body")
  110. ext = get_ext_name(pic.filename)
  111. if not is_image_ext(ext):
  112. return Response("文件类型错误")
  113. content = pic.read()
  114. if len(content) > MAX_FILE_SIZE:
  115. return Response("文件过大,请重新选择")
  116. cropped = preprocess_img(read_img(content))
  117. images = rot_img_2(cropped)
  118. recognizes = Engine.rec_multi(images)
  119. info, err_rec, sta, idx = {}, [], False, 0
  120. msg = "识别失败,请重新选择"
  121. for i, ocr_res in enumerate(recognizes):
  122. rec_str = "".join([it[0] for it in ocr_res if not str_include(_EXCLUDE_CHAR, it[0])])
  123. if which == "face":
  124. if rec_str.startswith("姓名"):
  125. idx = i
  126. info, sta = get_face_info(rec_str)
  127. else:
  128. if rec_str.startswith("中华"):
  129. idx = i
  130. info, sta = get_icon_info(rec_str)
  131. if sta:
  132. msg = "识别成功"
  133. break
  134. elif len(rec_str) >= _MIN_SIZE:
  135. msg = "识别失败,建议选择深色背景"
  136. Logger.error(rec_str)
  137. err_rec.append(rec_str)
  138. file_path = f"static/images/{current_time()}_{rand_str()}.{ext}"
  139. save_img(file_path, images[idx])
  140. info["SUCCESS"] = str(sta).upper()
  141. info["MESSAGE"] = msg
  142. if len(err_rec):
  143. info["MESSAGE"] += "<br>识别结果:<br>" + "<br>".join(err_rec)
  144. info["DURATION"] = time() - start # noqa
  145. return render_template("k-v_result.html", raw=file_path, data=info)
  146. idc.add_url_rule("/", view_func=IdcView.as_view("idc"))
  147. idc.add_url_rule("/html/", view_func=IdcHtmlView.as_view("idc-html"))