app.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. from flask import Flask, render_template, request
  2. from utils.util import *
  3. from utils.conf import MAX_CONTENT_LENGTH
  4. import numpy as np
  5. import cv2
  6. from paddleocr import PaddleOCR
  7. # from paddleocr import PaddleOCR
  8. # from utils import Args
  9. # from paddleocr.tools.infer.predict_system import TextSystem
  10. #
  11. app = Flask(__name__)
  12. # 待优化为 TextSystem
  13. eng1 = PaddleOCR(
  14. use_gpu=False,
  15. enable_mkldnn=True,
  16. det_model_dir="models/det/",
  17. rec_model_dir="models/rec/",
  18. cls_model_dir="models/cls/",
  19. use_angle_cls=True,
  20. use_space_char=True
  21. )
  22. # args = Args(
  23. # use_gpu=False,
  24. # enable_mkldnn=True,
  25. # det_model_dir="models/det/",
  26. # rec_model_dir="models/rec/",
  27. # )
  28. # eng2 = TextSystem(args)
  29. # warmup
  30. @app.route("/")
  31. def index():
  32. return render_template("index.html")
  33. @app.route("/ocr-raw-api", methods=["POST"])
  34. def ocr_raw():
  35. # 文件处理
  36. pic = request.files.get("picture")
  37. if pic is None:
  38. return Response("empty body")
  39. ext = get_ext_name(pic.filename)
  40. if not is_image_ext(ext):
  41. return Response("文件类型错误")
  42. content = pic.read()
  43. if len(content) > MAX_CONTENT_LENGTH:
  44. return Response("文件过大,请压缩后尝试")
  45. path = f"static/images/{current_time()}_{rand_str()}.{ext}"
  46. with open(path, "wb") as fp:
  47. fp.write(content)
  48. fp.close()
  49. # 内容识别
  50. array = cv2.imdecode(np.fromstring(content, np.uint8), 1) # noqa
  51. ocr_res = eng1.ocr(array)[0]
  52. res = [{"pos": it[0], "word": it[1][0], "rate": it[1][1]} for it in ocr_res]
  53. return Response(data=res)
  54. @app.route("/ocr-filter", methods=["POST"])
  55. def ocr_filter():
  56. # 文件处理
  57. pic = request.files.get("picture")
  58. if pic is None:
  59. return Response("empty body")
  60. ext = get_ext_name(pic.filename)
  61. if not is_image_ext(ext):
  62. return Response("文件类型错误")
  63. content = pic.read()
  64. if len(content) > MAX_CONTENT_LENGTH:
  65. return Response("文件过大,请压缩后尝试")
  66. path = f"static/images/{current_time()}_{rand_str()}.{ext}"
  67. with open(path, "wb") as fp:
  68. fp.write(content)
  69. fp.close()
  70. # 内容识别
  71. array = cv2.imdecode(np.fromstring(content, np.uint8), 1) # noqa
  72. ocr_res = eng1.ocr(array)[0]
  73. # 过滤出想要的数据
  74. res = [it[1][0] for it in ocr_res]
  75. return Response(data=res)
  76. @app.route("/ocr-html", methods=["POST"])
  77. def ocr_html():
  78. # 文件处理
  79. pic = request.files.get("picture")
  80. if pic is None:
  81. return Response("empty body")
  82. ext = get_ext_name(pic.filename)
  83. if not is_image_ext(ext):
  84. return Response("文件类型错误")
  85. content = pic.read()
  86. if len(content) > MAX_CONTENT_LENGTH:
  87. return Response("文件过大,请压缩后尝试")
  88. cur, rnd = current_time(), rand_str()
  89. raw_path = f"static/images/{cur}_{rnd}.{ext}"
  90. rec_path = f"static/images/{cur}_{rnd}-rec.{ext}"
  91. with open(raw_path, "wb") as fp:
  92. fp.write(content)
  93. fp.close()
  94. # 内容识别
  95. array = cv2.imdecode(np.fromstring(content, np.uint8), 1) # noqa
  96. ocr_res = eng1.ocr(array)[0]
  97. res = [{"pos": it[0], "word": it[1][0], "rate": it[1][1]} for it in ocr_res]
  98. # 画图
  99. rec_img_data = draw_img(array.shape, res)
  100. cv2.imwrite(rec_path, rec_img_data) # noqa
  101. return render_template("result.html", raw=raw_path, rec=rec_path, data=res)
  102. if __name__ == "__main__":
  103. app.run()
  104. """
  105. step:
  106. pip install PaddlePaddle-GPU==2.4.2 PaddleOCR
  107. pip install PaddleHub
  108. hub install chinese_ocr_db_crnn_server
  109. python E:/Project/Python/PaddleOCR/venv/Lib/site-packages/paddleocr/tools/infer/predict_system.py
  110. --image_dir="C:/Users/huimv/Pictures/Saved Pictures/"
  111. --det_model_dir="D:/BaiduNetdisk/module/aiengine/model/ocr/ch_pp-ocrv3_det_infer/"
  112. --rec_model_dir="D:/BaiduNetdisk/module/aiengine/model/ocr/ch_pp-ocrv3_rec_infer/"
  113. --cls_model_dir="D:/BaiduNetdisk/module/aiengine/model/ocr/ch_ppocr_mobile_v2.0_cls_infer/"
  114. --use_angle_cls=True --use_space_char=True --use_gpu=False
  115. python tools/infer/predict_system.py
  116. --image_dir="E:/Project/Python/PaddleOCR/static/pic2.png"
  117. --det_model_dir="E:/Project/Python/PaddleOCR/models/det/"
  118. --rec_model_dir="E:/Project/Python/PaddleOCR/models/rec/"
  119. deploy:
  120. python -m pip install PaddlePaddle==2.4.2 PaddleOCR -i https://pypi.tuna.tsinghua.edu.cn/simple
  121. """