idc.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. import re
  2. from time import time
  3. from utils.util import *
  4. from utils.conf import MAX_CONTENT_LENGTH
  5. from flask import Blueprint, views, render_template, request
  6. from utils.logger import Logger
  7. idc = Blueprint("idc", __name__, url_prefix="/idc")
  8. _MIN_SIZE = 46
  9. __exclude = "中国CHINA *#★☆"
  10. __face_ptn = r"姓名(?P<name>.+)" \
  11. r"性别(?P<gender>男|女)民族(?P<nation>.+)" \
  12. r"出生(?P<year>\d{4})年(?P<month>\d\d)月(?P<day>\d\d)日" \
  13. r"住址(?P<addr>.+)" \
  14. r"公民身份号码(?P<idn>\d{17}\d|x|X)"
  15. __icon_ptn = r"中华人民共和国" \
  16. r"居民身份证" \
  17. r"签发机关(?P<agent>.+)" \
  18. r"有效期限(?P<from_year>\d{4})\.(?P<from_month>\d{2})\.(?P<from_day>\d{2})" \
  19. r"[^\d]+(?P<to_year>\d{4})\.(?P<to_month>\d{2})\.(?P<to_day>\d{2})"
  20. def get_face_info(data_str: "str") -> "tuple[dict, bool]":
  21. res = {"name": "", "gender": "", "nation": "", "birth": {"year": "", "month": "", "day": ""}, "addr": "", "idn": ""}
  22. if match := re.match(__face_ptn, data_str):
  23. res["name"] = match.group("name")
  24. res["gender"] = match.group("gender")
  25. res["nation"] = match.group("nation")
  26. res["birth"] = {
  27. "year": match.group("year"),
  28. "month": match.group("month"),
  29. "day": match.group("day")
  30. }
  31. res["addr"] = match.group("addr")
  32. res["idn"] = match.group("idn")
  33. return res, True
  34. return res, False
  35. def get_icon_info(data_str: "str") -> "tuple[dict, bool]":
  36. res = {"agent": "", "from": {"year": "", "month": "", "day": ""}, "to": {"year": "", "month": "", "day": ""}}
  37. if match := re.match(__icon_ptn, data_str):
  38. res["agent"] = match.group("agent")
  39. res["from"] = {
  40. "year": match.group("from_year"),
  41. "month": match.group("from_month"),
  42. "day": match.group("from_day"),
  43. }
  44. res["to"] = {
  45. "year": match.group("to_year"),
  46. "month": match.group("to_month"),
  47. "day": match.group("to_day"),
  48. }
  49. return res, True
  50. return res, False
  51. class IdcView(views.MethodView):
  52. @staticmethod
  53. def get():
  54. return render_template("idc_index.html")
  55. @staticmethod
  56. def post():
  57. start = time()
  58. which = request.form.get("which")
  59. if which is not None:
  60. which = which.lower()
  61. if which not in ["face", "icon"]:
  62. return Response(f"not recognized arg <which>: '{which}'")
  63. pic = request.files.get("picture")
  64. if pic is None:
  65. return Response("empty body")
  66. ext = get_ext_name(pic.filename)
  67. if not is_image_ext(ext):
  68. return Response("文件类型错误")
  69. content = pic.read()
  70. if len(content) > MAX_CONTENT_LENGTH:
  71. return Response("文件过大,请重新选择")
  72. img = read_img(content)
  73. cropped = crop_img(img) # 边缘裁剪,对深色背景的效果很好
  74. images = [item for item in rot_img(cropped) if item.shape[0] < item.shape[1]] # 旋转后仅取横长竖宽
  75. rec = Engine.rec_multi(images)
  76. info, err_rec, sta, idx = {}, [], False, 0
  77. for i, ocr_res in enumerate(rec):
  78. rec_str = "".join([it[0] for it in ocr_res])
  79. if which == "face":
  80. if rec_str.startswith("姓名"):
  81. idx = i
  82. info, sta = get_face_info(rec_str)
  83. else:
  84. if rec_str.startswith("中华"):
  85. idx = i
  86. info, sta = get_icon_info(rec_str)
  87. if sta:
  88. break
  89. elif len(rec_str) >= _MIN_SIZE:
  90. Logger.error(rec_str)
  91. err_rec.append(rec_str)
  92. info["duration"] = time() - start
  93. if sta:
  94. raw_path = f"static/images/{current_time()}_{rand_str()}.{ext}"
  95. save_img(raw_path, images[idx])
  96. return Response(data=info)
  97. else:
  98. msg = "识别失败,建议使用深色背景"
  99. return Response(msg, info)
  100. class IdcHtmlView(views.MethodView):
  101. @staticmethod
  102. def post():
  103. start = time()
  104. which = request.form.get("which")
  105. if which is not None:
  106. which = which.lower()
  107. if which not in ["face", "icon"]:
  108. return Response(f"not recognized arg <which>: '{which}'")
  109. pic = request.files.get("picture")
  110. if pic is None:
  111. return Response("empty body")
  112. ext = get_ext_name(pic.filename)
  113. if not is_image_ext(ext):
  114. return Response("文件类型错误")
  115. content = pic.read()
  116. if len(content) > MAX_CONTENT_LENGTH:
  117. return Response("文件过大,请重新选择")
  118. img = read_img(content)
  119. cropped = crop_img(img)
  120. images = [item for item in rot_img(cropped) if item.shape[0] < item.shape[1]]
  121. rec = Engine.rec_multi(images)
  122. info, err_rec, sta, idx = {}, [], False, 0
  123. for i, ocr_res in enumerate(rec):
  124. rec_str = "".join([it[0] for it in ocr_res])
  125. if which == "face":
  126. if rec_str.startswith("姓名"):
  127. idx = i
  128. info, sta = get_face_info(rec_str)
  129. else:
  130. if rec_str.startswith("中华"):
  131. idx = i
  132. info, sta = get_icon_info(rec_str)
  133. if sta:
  134. break
  135. elif len(rec_str) >= _MIN_SIZE:
  136. Logger.error(rec_str)
  137. err_rec.append(rec_str)
  138. file_path = f"static/images/{current_time()}_{rand_str()}.{ext}"
  139. save_img(file_path, images[idx])
  140. info["SUCCESS"] = str(sta).upper()
  141. if sta:
  142. info["MESSAGE"] = "识别成功"
  143. else:
  144. info["MESSAGE"] = "识别失败,建议使用深色背景<br>识别结果:<br>" + "<br>".join(err_rec)
  145. info["DURATION"] = time() - start # noqa
  146. return render_template("k-v_result.html", raw=file_path, data=info)
  147. idc.add_url_rule("/", view_func=IdcView.as_view("idc"))
  148. idc.add_url_rule("/html/", view_func=IdcHtmlView.as_view("idc-html"))