123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110 |
- from flask import Flask, render_template, request
- from utils.util import *
- from utils.conf import MAX_CONTENT_LENGTH
- import logging
- import numpy as np
- import cv2
- from paddleocr import PaddleOCR
- app = Flask(__name__)
- app.config["JSON_AS_ASCII"] = False
- logging.getLogger("ppocr").setLevel(logging.WARN)
- engine = PaddleOCR(
- use_gpu=False,
- enable_mkldnn=True,
- det_model_dir="models/det/",
- rec_model_dir="models/rec/",
- cls_model_dir="models/cls/",
- use_angle_cls=True,
- use_space_char=True
- )
- @app.route("/")
- def index():
- return render_template("index.html")
- @app.route("/ocr-raw-api", methods=["POST"])
- def ocr_raw():
- # 文件处理
- pic = request.files.get("picture")
- if pic is None:
- return Response("empty body")
- ext = get_ext_name(pic.filename)
- if not is_image_ext(ext):
- return Response("文件类型错误")
- content = pic.read()
- if len(content) > MAX_CONTENT_LENGTH:
- return Response("文件过大,请压缩后尝试")
- path = f"static/images/{current_time()}_{rand_str()}.{ext}"
- with open(path, "wb") as fp:
- fp.write(content)
- fp.close()
- # 内容识别
- array = cv2.imdecode(np.fromstring(content, np.uint8), 1) # noqa
- ocr_res = engine.ocr(array)[0]
- res = [{"pos": it[0], "word": it[1][0], "rate": it[1][1]} for it in ocr_res]
- return Response(data=res)
- @app.route("/ocr-filter", methods=["POST"])
- def ocr_filter():
- # 文件处理
- pic = request.files.get("picture")
- if pic is None:
- return Response("empty body")
- ext = get_ext_name(pic.filename)
- if not is_image_ext(ext):
- return Response("文件类型错误")
- content = pic.read()
- if len(content) > MAX_CONTENT_LENGTH:
- return Response("文件过大,请压缩后尝试")
- path = f"static/images/{current_time()}_{rand_str()}.{ext}"
- with open(path, "wb") as fp:
- fp.write(content)
- fp.close()
- # 内容识别
- array = cv2.imdecode(np.fromstring(content, np.uint8), 1) # noqa
- ocr_res = engine.ocr(array)[0]
- # 过滤出想要的数据
- res = [it[1][0] for it in ocr_res]
- return Response(data=res)
- @app.route("/ocr-html", methods=["POST"])
- def ocr_html():
- # 文件处理
- pic = request.files.get("picture")
- if pic is None:
- return Response("empty body")
- ext = get_ext_name(pic.filename)
- if not is_image_ext(ext):
- return Response("文件类型错误")
- content = pic.read()
- if len(content) > MAX_CONTENT_LENGTH:
- return Response("文件过大,请压缩后尝试")
- cur, rnd = current_time(), rand_str()
- raw_path = f"static/images/{cur}_{rnd}.{ext}"
- rec_path = f"static/images/{cur}_{rnd}-rec.{ext}"
- with open(raw_path, "wb") as fp:
- fp.write(content)
- fp.close()
- # 内容识别
- array = cv2.imdecode(np.fromstring(content, np.uint8), 1) # noqa
- ocr_res = engine.ocr(array)[0]
- res = [{"pos": it[0], "word": it[1][0], "rate": it[1][1]} for it in ocr_res]
- # 画图
- rec_img_data = draw_img(array.shape, res)
- cv2.imwrite(rec_path, rec_img_data) # noqa
- return render_template("result.html", raw=raw_path, rec=rec_path, data=res)
- if __name__ == "__main__":
- app.run()
|