util.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. import cv2
  2. import numpy as np
  3. from flask import jsonify
  4. from paddleocr import PaddleOCR
  5. from random import randint, seed
  6. from time import localtime, strftime
  7. from paddleocr.tools.infer.utility import draw_box_txt_fine
  8. __all__ = [
  9. "Args", "Response", "rand_str", "current_time", "get_ext_name", "is_image_ext", "recognize", "draw_img",
  10. "json_all"
  11. ]
  12. __StrBase = "qwertyuioplkjhgfdsazxcvbnm1234567890ZXCVBNMLKJHGFDSAQWERTYUIOP"
  13. __StrBaseLen = len(__StrBase) - 1
  14. __AcceptExtNames = ["jpg", "jpeg", "bmp", "png", "rgb", "tif", "tiff", "gif", "pdf"]
  15. __OcrEngine = PaddleOCR(
  16. use_gpu=False,
  17. enable_mkldnn=True,
  18. det_model_dir="models/det/",
  19. rec_model_dir="models/rec/",
  20. cls_model_dir="models/cls/",
  21. use_angle_cls=True,
  22. use_space_char=True
  23. )
  24. class Args:
  25. def __init__(self, **kwargs):
  26. self.__update(
  27. use_gpu=False, use_xpu=False, use_npu=False, ir_optim=True, use_tensorrt=False,
  28. min_subgraph_size=15, precision="fp32", gpu_mem=500, image_dir=None, page_num=0,
  29. det_algorithm="DB", det_model_dir="models/det/", det_limit_side_len=960, det_limit_type="max",
  30. det_box_type="quad", det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5,
  31. max_batch_size=10, use_dilation=False, det_db_score_mode="fast", det_east_score_thresh=0.8,
  32. det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5,
  33. det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16,
  34. det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5,
  35. rec_algorithm="SVTR_LCNet", rec_model_dir="models/rec/", rec_image_inverse=True,
  36. rec_image_shape="3, 48, 320", rec_batch_num=6, max_text_length=25,
  37. rec_char_dict_path="venv/lib/site-packages/paddleocr/ppocr/utils/ppocr_keys_v1.txt",
  38. use_space_char=True, vis_font_path="static/simfang.ttf", drop_score=0.5,
  39. e2e_algorithm="PGNet", e2e_model_dir=None, e2e_limit_side_len=768, e2e_limit_type="max",
  40. e2e_pgnet_score_thresh=0.5, e2e_char_dict_path="./ppocr/utils/ic15_dict.txt",
  41. e2e_pgnet_valid_set="totaltext", e2e_pgnet_mode="fast", use_angle_cls=True,
  42. cls_model_dir="models/cls/", cls_image_shape="3, 48, 192", label_list=["0", "180"],
  43. cls_batch_num=6, cls_thresh=0.9, enable_mkldnn=True, cpu_threads=10, use_pdserving=False,
  44. sr_model_dir=None, sr_image_shape="3, 32, 128", sr_batch_num=1,
  45. draw_img_save_dir="static/rec_res/", save_crop_res=False, crop_res_save_dir="./output",
  46. use_mp=False, benchmark=False, save_log_path="./log_output/",
  47. show_log=True, use_onnx=False, output="./output", table_max_len=488, table_algorithm="TableAttn",
  48. table_model_dir=None, merge_no_span_structure=True, table_char_dict_path=None,
  49. layout_model_dir=None, layout_dict_path=None, layout_score_threshold=0.5,
  50. layout_nms_threshold=0.5, kie_algorithm="LayoutXLM", ser_model_dir=None, re_model_dir=None,
  51. use_visual_backbone=True, ser_dict_path="../train_data/XFUND/class_list_xfun.txt",
  52. ocr_order_method=None, mode="structure", image_orientation=False, layout=True, table=True,
  53. ocr=True, recovery=False, use_pdf2docx_api=False, lang="ch", det=True, rec=True, type="ocr",
  54. ocr_version="PP-OCRv3", structure_version="PP-StructureV2"
  55. )
  56. self.__update(**kwargs)
  57. def __update(self, **kwargs):
  58. for k, v in kwargs:
  59. self.__dict__[k] = v
  60. def __setattr__(self, key: "str", value):
  61. self.__dict__[key] = value
  62. def __getattribute__(self, key: "str"):
  63. assert key in self.__dict__.keys()
  64. return self.__dict__[key]
  65. def rand_str(size: "int" = 8) -> "str":
  66. return "".join([__StrBase[randint(0, __StrBaseLen)] for _ in range(size)])
  67. def current_time() -> "str":
  68. return strftime("%Y-%m-%d_%H-%M-%S", localtime())
  69. def get_ext_name(name: "str") -> "str":
  70. return name.split(".")[-1].lower()
  71. def is_image_ext(ext: "str") -> bool:
  72. return ext in __AcceptExtNames
  73. def Response(message: "str" = None, data=None):
  74. if message is None:
  75. return jsonify(success=True, message="操作成功", data=data)
  76. return jsonify(success=False, message=message, data=data)
  77. def recognize(content: "str") -> "tuple[list, tuple]":
  78. img = cv2.imdecode(np.fromstring(content, np.uint8), 1) # noqa
  79. return __OcrEngine.ocr(img)[0], img.shape
  80. def draw_img(shape: "tuple", data: "list[dict]", path: "str", drop: "float" = 0.5):
  81. img = np.ones(shape, dtype=np.uint8) * 255
  82. seed(0)
  83. for one in data:
  84. if one["rate"] < drop:
  85. continue
  86. color = (randint(0, 255), randint(0, 255), randint(0, 255))
  87. text = draw_box_txt_fine((shape[1], shape[0]), one["pos"], one["word"], font_path="static/simfang.ttf")
  88. pts = np.array(one["pos"], np.int32).reshape((-1, 1, 2))
  89. cv2.polylines(text, [pts], True, color, 1) # noqa
  90. img = cv2.bitwise_and(img, text) # noqa
  91. cv2.imwrite(path, np.array(img)) # noqa
  92. def json_all(data: "dict or list") -> "bool":
  93. if isinstance(data, list):
  94. for item in data:
  95. if isinstance(item, str) and not item:
  96. return False
  97. elif isinstance(item, (list, dict)) and not json_all(item):
  98. return False
  99. return True
  100. elif isinstance(data, dict):
  101. for value in data.values():
  102. if isinstance(value, str) and not value:
  103. return False
  104. elif isinstance(value, (list, dict)) and not json_all(value):
  105. return False
  106. return True
  107. raise TypeError(f"except node type are: [list, dict], but got a {type(data)} instead.")