app.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. from flask import Flask, render_template, request
  2. from utils.util import *
  3. from utils.conf import MAX_CONTENT_LENGTH
  4. import logging
  5. import numpy as np
  6. import cv2
  7. from paddleocr import PaddleOCR
  8. app = Flask(__name__)
  9. app.config["JSON_AS_ASCII"] = False
  10. logging.getLogger("ppocr").setLevel(logging.WARN)
  11. engine = PaddleOCR(
  12. use_gpu=False,
  13. enable_mkldnn=True,
  14. det_model_dir="models/det/",
  15. rec_model_dir="models/rec/",
  16. cls_model_dir="models/cls/",
  17. use_angle_cls=True,
  18. use_space_char=True
  19. )
  20. @app.route("/")
  21. def index():
  22. return render_template("index.html")
  23. @app.route("/ocr-raw-api", methods=["POST"])
  24. def ocr_raw():
  25. # 文件处理
  26. pic = request.files.get("picture")
  27. if pic is None:
  28. return Response("empty body")
  29. ext = get_ext_name(pic.filename)
  30. if not is_image_ext(ext):
  31. return Response("文件类型错误")
  32. content = pic.read()
  33. if len(content) > MAX_CONTENT_LENGTH:
  34. return Response("文件过大,请压缩后尝试")
  35. path = f"static/images/{current_time()}_{rand_str()}.{ext}"
  36. with open(path, "wb") as fp:
  37. fp.write(content)
  38. fp.close()
  39. # 内容识别
  40. array = cv2.imdecode(np.fromstring(content, np.uint8), 1) # noqa
  41. ocr_res = engine.ocr(array)[0]
  42. res = [{"pos": it[0], "word": it[1][0], "rate": it[1][1]} for it in ocr_res]
  43. return Response(data=res)
  44. @app.route("/ocr-filter", methods=["POST"])
  45. def ocr_filter():
  46. # 文件处理
  47. pic = request.files.get("picture")
  48. if pic is None:
  49. return Response("empty body")
  50. ext = get_ext_name(pic.filename)
  51. if not is_image_ext(ext):
  52. return Response("文件类型错误")
  53. content = pic.read()
  54. if len(content) > MAX_CONTENT_LENGTH:
  55. return Response("文件过大,请压缩后尝试")
  56. path = f"static/images/{current_time()}_{rand_str()}.{ext}"
  57. with open(path, "wb") as fp:
  58. fp.write(content)
  59. fp.close()
  60. # 内容识别
  61. array = cv2.imdecode(np.fromstring(content, np.uint8), 1) # noqa
  62. ocr_res = engine.ocr(array)[0]
  63. # 过滤出想要的数据
  64. res = [it[1][0] for it in ocr_res]
  65. return Response(data=res)
  66. @app.route("/ocr-html", methods=["POST"])
  67. def ocr_html():
  68. # 文件处理
  69. pic = request.files.get("picture")
  70. if pic is None:
  71. return Response("empty body")
  72. ext = get_ext_name(pic.filename)
  73. if not is_image_ext(ext):
  74. return Response("文件类型错误")
  75. content = pic.read()
  76. if len(content) > MAX_CONTENT_LENGTH:
  77. return Response("文件过大,请压缩后尝试")
  78. cur, rnd = current_time(), rand_str()
  79. raw_path = f"static/images/{cur}_{rnd}.{ext}"
  80. rec_path = f"static/images/{cur}_{rnd}-rec.{ext}"
  81. with open(raw_path, "wb") as fp:
  82. fp.write(content)
  83. fp.close()
  84. # 内容识别
  85. array = cv2.imdecode(np.fromstring(content, np.uint8), 1) # noqa
  86. ocr_res = engine.ocr(array)[0]
  87. res = [{"pos": it[0], "word": it[1][0], "rate": it[1][1]} for it in ocr_res]
  88. # 画图
  89. rec_img_data = draw_img(array.shape, res)
  90. cv2.imwrite(rec_path, rec_img_data) # noqa
  91. return render_template("result.html", raw=raw_path, rec=rec_path, data=res)
  92. if __name__ == "__main__":
  93. app.run()