app.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. from flask import Flask, render_template, request
  2. from utils.util import *
  3. from utils.conf import MAX_CONTENT_LENGTH
  4. import numpy as np
  5. import cv2
  6. from paddleocr import PaddleOCR
  7. # from paddleocr import PaddleOCR
  8. # from utils import Args
  9. # from paddleocr.tools.infer.predict_system import TextSystem
  10. #
  11. app = Flask(__name__)
  12. app.config["JSON_AS_ASCII"] = False
  13. # 待优化为 TextSystem
  14. engine = PaddleOCR(
  15. use_gpu=False,
  16. enable_mkldnn=True,
  17. det_model_dir="models/det/",
  18. rec_model_dir="models/rec/",
  19. cls_model_dir="models/cls/",
  20. use_angle_cls=True,
  21. use_space_char=True
  22. )
  23. # args = Args(
  24. # use_gpu=False,
  25. # enable_mkldnn=True,
  26. # det_model_dir="models/det/",
  27. # rec_model_dir="models/rec/",
  28. # )
  29. # eng2 = TextSystem(args)
  30. # warmup
  31. @app.route("/")
  32. def index():
  33. return render_template("index.html")
  34. @app.route("/ocr-raw-api", methods=["POST"])
  35. def ocr_raw():
  36. # 文件处理
  37. pic = request.files.get("picture")
  38. if pic is None:
  39. return Response("empty body")
  40. ext = get_ext_name(pic.filename)
  41. if not is_image_ext(ext):
  42. return Response("文件类型错误")
  43. content = pic.read()
  44. if len(content) > MAX_CONTENT_LENGTH:
  45. return Response("文件过大,请压缩后尝试")
  46. path = f"static/images/{current_time()}_{rand_str()}.{ext}"
  47. with open(path, "wb") as fp:
  48. fp.write(content)
  49. fp.close()
  50. # 内容识别
  51. array = cv2.imdecode(np.fromstring(content, np.uint8), 1) # noqa
  52. ocr_res = engine.ocr(array)[0]
  53. res = [{"pos": it[0], "word": it[1][0], "rate": it[1][1]} for it in ocr_res]
  54. return Response(data=res)
  55. @app.route("/ocr-filter", methods=["POST"])
  56. def ocr_filter():
  57. # 文件处理
  58. pic = request.files.get("picture")
  59. if pic is None:
  60. return Response("empty body")
  61. ext = get_ext_name(pic.filename)
  62. if not is_image_ext(ext):
  63. return Response("文件类型错误")
  64. content = pic.read()
  65. if len(content) > MAX_CONTENT_LENGTH:
  66. return Response("文件过大,请压缩后尝试")
  67. path = f"static/images/{current_time()}_{rand_str()}.{ext}"
  68. with open(path, "wb") as fp:
  69. fp.write(content)
  70. fp.close()
  71. # 内容识别
  72. array = cv2.imdecode(np.fromstring(content, np.uint8), 1) # noqa
  73. ocr_res = engine.ocr(array)[0]
  74. # 过滤出想要的数据
  75. res = [it[1][0] for it in ocr_res]
  76. return Response(data=res)
  77. @app.route("/ocr-html", methods=["POST"])
  78. def ocr_html():
  79. # 文件处理
  80. pic = request.files.get("picture")
  81. if pic is None:
  82. return Response("empty body")
  83. ext = get_ext_name(pic.filename)
  84. if not is_image_ext(ext):
  85. return Response("文件类型错误")
  86. content = pic.read()
  87. if len(content) > MAX_CONTENT_LENGTH:
  88. return Response("文件过大,请压缩后尝试")
  89. cur, rnd = current_time(), rand_str()
  90. raw_path = f"static/images/{cur}_{rnd}.{ext}"
  91. rec_path = f"static/images/{cur}_{rnd}-rec.{ext}"
  92. with open(raw_path, "wb") as fp:
  93. fp.write(content)
  94. fp.close()
  95. # 内容识别
  96. array = cv2.imdecode(np.fromstring(content, np.uint8), 1) # noqa
  97. ocr_res = engine.ocr(array)[0]
  98. res = [{"pos": it[0], "word": it[1][0], "rate": it[1][1]} for it in ocr_res]
  99. # 画图
  100. rec_img_data = draw_img(array.shape, res)
  101. cv2.imwrite(rec_path, rec_img_data) # noqa
  102. return render_template("result.html", raw=raw_path, rec=rec_path, data=res)
  103. if __name__ == "__main__":
  104. app.run()
  105. """
  106. step:
  107. pip install PaddlePaddle-GPU==2.4.2 PaddleOCR
  108. pip install PaddleHub
  109. hub install chinese_ocr_db_crnn_server
  110. python E:/Project/Python/PaddleOCR/venv/Lib/site-packages/paddleocr/tools/infer/predict_system.py
  111. --image_dir="C:/Users/huimv/Pictures/Saved Pictures/"
  112. --det_model_dir="D:/BaiduNetdisk/module/aiengine/model/ocr/ch_pp-ocrv3_det_infer/"
  113. --rec_model_dir="D:/BaiduNetdisk/module/aiengine/model/ocr/ch_pp-ocrv3_rec_infer/"
  114. --cls_model_dir="D:/BaiduNetdisk/module/aiengine/model/ocr/ch_ppocr_mobile_v2.0_cls_infer/"
  115. --use_angle_cls=True --use_space_char=True --use_gpu=False
  116. python tools/infer/predict_system.py
  117. --image_dir="E:/Project/Python/PaddleOCR/static/pic2.png"
  118. --det_model_dir="E:/Project/Python/PaddleOCR/models/det/"
  119. --rec_model_dir="E:/Project/Python/PaddleOCR/models/rec/"
  120. deploy:
  121. python -m pip install PaddlePaddle==2.4.2 PaddleOCR -i https://pypi.tuna.tsinghua.edu.cn/simple
  122. """