app.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. from flask import Flask, render_template, request
  2. from utils.util import *
  3. from utils.conf import MAX_CONTENT_LENGTH
  4. import logging
  5. import numpy as np
  6. import cv2
  7. from paddleocr import PaddleOCR
  8. app = Flask(__name__)
  9. app.config["JSON_AS_ASCII"] = False
  10. logging.getLogger("ppocr").setLevel(logging.WARN)
  11. engine = PaddleOCR(
  12. use_gpu=True,
  13. det_model_dir="models/det/",
  14. rec_model_dir="models/rec/",
  15. cls_model_dir="models/cls/",
  16. use_angle_cls=True,
  17. use_space_char=True
  18. )
  19. @app.route("/")
  20. def index():
  21. return render_template("index.html")
  22. @app.route("/ocr-raw-api", methods=["POST"])
  23. def ocr_raw():
  24. # 文件处理
  25. pic = request.files.get("picture")
  26. if pic is None:
  27. return Response("empty body")
  28. ext = get_ext_name(pic.filename)
  29. if not is_image_ext(ext):
  30. return Response("文件类型错误")
  31. content = pic.read()
  32. if len(content) > MAX_CONTENT_LENGTH:
  33. return Response("文件过大,请压缩后尝试")
  34. path = f"static/images/{current_time()}_{rand_str()}.{ext}"
  35. with open(path, "wb") as fp:
  36. fp.write(content)
  37. fp.close()
  38. # 内容识别
  39. array = cv2.imdecode(np.fromstring(content, np.uint8), 1) # noqa
  40. ocr_res = engine.ocr(array)[0]
  41. res = [{"pos": it[0], "word": it[1][0], "rate": it[1][1]} for it in ocr_res]
  42. return Response(data=res)
  43. @app.route("/ocr-filter", methods=["POST"])
  44. def ocr_filter():
  45. # 文件处理
  46. pic = request.files.get("picture")
  47. if pic is None:
  48. return Response("empty body")
  49. ext = get_ext_name(pic.filename)
  50. if not is_image_ext(ext):
  51. return Response("文件类型错误")
  52. content = pic.read()
  53. if len(content) > MAX_CONTENT_LENGTH:
  54. return Response("文件过大,请压缩后尝试")
  55. path = f"static/images/{current_time()}_{rand_str()}.{ext}"
  56. with open(path, "wb") as fp:
  57. fp.write(content)
  58. fp.close()
  59. # 内容识别
  60. array = cv2.imdecode(np.fromstring(content, np.uint8), 1) # noqa
  61. ocr_res = engine.ocr(array)[0]
  62. # 过滤出想要的数据
  63. res = [it[1][0] for it in ocr_res]
  64. return Response(data=res)
  65. @app.route("/ocr-html", methods=["POST"])
  66. def ocr_html():
  67. # 文件处理
  68. pic = request.files.get("picture")
  69. if pic is None:
  70. return Response("empty body")
  71. ext = get_ext_name(pic.filename)
  72. if not is_image_ext(ext):
  73. return Response("文件类型错误")
  74. content = pic.read()
  75. if len(content) > MAX_CONTENT_LENGTH:
  76. return Response("文件过大,请压缩后尝试")
  77. cur, rnd = current_time(), rand_str()
  78. raw_path = f"static/images/{cur}_{rnd}.{ext}"
  79. rec_path = f"static/images/{cur}_{rnd}-rec.{ext}"
  80. with open(raw_path, "wb") as fp:
  81. fp.write(content)
  82. fp.close()
  83. # 内容识别
  84. array = cv2.imdecode(np.fromstring(content, np.uint8), 1) # noqa
  85. ocr_res = engine.ocr(array)[0]
  86. res = [{"pos": it[0], "word": it[1][0], "rate": it[1][1]} for it in ocr_res]
  87. # 画图
  88. rec_img_data = draw_img(array.shape, res)
  89. cv2.imwrite(rec_path, rec_img_data) # noqa
  90. return render_template("result.html", raw=raw_path, rec=rec_path, data=res)
  91. if __name__ == "__main__":
  92. app.run()