util.py 4.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. from time import localtime, strftime
  2. from random import randint, seed
  3. import cv2
  4. from paddleocr.tools.infer.utility import draw_box_txt_fine
  5. import numpy as np
  6. from flask import jsonify
  7. __all__ = ["Args", "Response", "rand_str", "current_time", "get_ext_name", "is_image_ext", "draw_img"]
  8. StrBase = "qwertyuioplkjhgfdsazxcvbnm1234567890ZXCVBNMLKJHGFDSAQWERTYUIOP"
  9. StrBaseLen = len(StrBase) - 1
  10. AcceptExtNames = ["jpg", "jpeg", "bmp", "png", "rgb", "tif", "tiff", "gif", "pdf"]
  11. class Args:
  12. def __init__(self, **kwargs):
  13. self.__update(
  14. use_gpu=False, use_xpu=False, use_npu=False, ir_optim=True, use_tensorrt=False,
  15. min_subgraph_size=15, precision="fp32", gpu_mem=500, image_dir=None, page_num=0,
  16. det_algorithm="DB", det_model_dir="models/det/", det_limit_side_len=960, det_limit_type="max",
  17. det_box_type="quad", det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5,
  18. max_batch_size=10, use_dilation=False, det_db_score_mode="fast", det_east_score_thresh=0.8,
  19. det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5,
  20. det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16,
  21. det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5,
  22. rec_algorithm="SVTR_LCNet", rec_model_dir="models/rec/", rec_image_inverse=True,
  23. rec_image_shape="3, 48, 320", rec_batch_num=6, max_text_length=25,
  24. rec_char_dict_path="E:/Project/Python/PaddleOCR/venv/lib/site-packages/paddleocr/ppocr/utils/ppocr_keys_v1.txt",
  25. use_space_char=True, vis_font_path="static/simfang.ttf", drop_score=0.5,
  26. e2e_algorithm="PGNet", e2e_model_dir=None, e2e_limit_side_len=768, e2e_limit_type="max",
  27. e2e_pgnet_score_thresh=0.5, e2e_char_dict_path="./ppocr/utils/ic15_dict.txt",
  28. e2e_pgnet_valid_set="totaltext", e2e_pgnet_mode="fast", use_angle_cls=True,
  29. cls_model_dir="models/cls/", cls_image_shape="3, 48, 192", label_list=["0", "180"],
  30. cls_batch_num=6, cls_thresh=0.9, enable_mkldnn=True, cpu_threads=10, use_pdserving=False,
  31. sr_model_dir=None, sr_image_shape="3, 32, 128", sr_batch_num=1,
  32. draw_img_save_dir="static/rec_res/", save_crop_res=False, crop_res_save_dir="./output",
  33. use_mp=False, benchmark=False, save_log_path="./log_output/",
  34. show_log=True, use_onnx=False, output="./output", table_max_len=488, table_algorithm="TableAttn",
  35. table_model_dir=None, merge_no_span_structure=True, table_char_dict_path=None,
  36. layout_model_dir=None, layout_dict_path=None, layout_score_threshold=0.5,
  37. layout_nms_threshold=0.5, kie_algorithm="LayoutXLM", ser_model_dir=None, re_model_dir=None,
  38. use_visual_backbone=True, ser_dict_path="../train_data/XFUND/class_list_xfun.txt",
  39. ocr_order_method=None, mode="structure", image_orientation=False, layout=True, table=True,
  40. ocr=True, recovery=False, use_pdf2docx_api=False, lang="ch", det=True, rec=True, type="ocr",
  41. ocr_version="PP-OCRv3", structure_version="PP-StructureV2"
  42. )
  43. self.__update(**kwargs)
  44. def __update(self, **kwargs):
  45. for k, v in kwargs:
  46. self.__dict__[k] = v
  47. def __setattr__(self, key: "str", value):
  48. self.__dict__[key] = value
  49. def __getattribute__(self, key: "str"):
  50. assert key in self.__dict__.keys()
  51. return self.__dict__[key]
  52. def rand_str(size: "int" = 8) -> "str":
  53. return "".join([StrBase[randint(0, StrBaseLen)] for _ in range(size)])
  54. def current_time() -> "str":
  55. return strftime("%Y-%m-%d_%H-%M-%S", localtime())
  56. def get_ext_name(name: "str") -> "str":
  57. return name.split(".")[-1].lower()
  58. def is_image_ext(ext: "str") -> bool:
  59. return ext in AcceptExtNames
  60. def Response(message: "str" = None, data=None):
  61. if message is None:
  62. return jsonify(success=True, message="操作成功", data=data)
  63. return jsonify(success=False, message=message, data=data)
  64. def draw_img(shape: "tuple", data: "list[dict]", drop: "float" = 0.5):
  65. img = np.ones(shape, dtype=np.uint8) * 255
  66. seed(0)
  67. for one in data:
  68. if one["rate"] < drop:
  69. continue
  70. color = (randint(0, 255), randint(0, 255), randint(0, 255))
  71. text = draw_box_txt_fine((shape[1], shape[0]), one["pos"], one["word"])
  72. pts = np.array(one["pos"], np.int32).reshape((-1, 1, 2))
  73. cv2.polylines(text, [pts], True, color, 1) # noqa
  74. img = cv2.bitwise_and(img, text) # noqa
  75. return np.array(img)