from flask import Flask, render_template, request from utils.util import * from utils.conf import MAX_CONTENT_LENGTH import numpy as np import cv2 from paddleocr import PaddleOCR # from paddleocr import PaddleOCR # from utils import Args # from paddleocr.tools.infer.predict_system import TextSystem # app = Flask(__name__) # 待优化为 TextSystem eng1 = PaddleOCR( use_gpu=False, enable_mkldnn=True, det_model_dir="models/det/", rec_model_dir="models/rec/", cls_model_dir="models/cls/", use_angle_cls=True, use_space_char=True ) # args = Args( # use_gpu=False, # enable_mkldnn=True, # det_model_dir="models/det/", # rec_model_dir="models/rec/", # ) # eng2 = TextSystem(args) # warmup @app.route("/") def index(): return render_template("index.html") @app.route("/ocr-raw-api", methods=["POST"]) def ocr_raw(): # 文件处理 pic = request.files.get("picture") if pic is None: return Response("empty body") ext = get_ext_name(pic.filename) if not is_image_ext(ext): return Response("文件类型错误") content = pic.read() if len(content) > MAX_CONTENT_LENGTH: return Response("文件过大,请压缩后尝试") path = f"static/images/{current_time()}_{rand_str()}.{ext}" with open(path, "wb") as fp: fp.write(content) fp.close() # 内容识别 array = cv2.imdecode(np.fromstring(content, np.uint8), 1) # noqa ocr_res = eng1.ocr(array)[0] res = [{"pos": it[0], "word": it[1][0], "rate": it[1][1]} for it in ocr_res] return Response(data=res) @app.route("/ocr-filter", methods=["POST"]) def ocr_filter(): # 文件处理 pic = request.files.get("picture") if pic is None: return Response("empty body") ext = get_ext_name(pic.filename) if not is_image_ext(ext): return Response("文件类型错误") content = pic.read() if len(content) > MAX_CONTENT_LENGTH: return Response("文件过大,请压缩后尝试") path = f"static/images/{current_time()}_{rand_str()}.{ext}" with open(path, "wb") as fp: fp.write(content) fp.close() # 内容识别 array = cv2.imdecode(np.fromstring(content, np.uint8), 1) # noqa ocr_res = eng1.ocr(array)[0] # 过滤出想要的数据 res = [it[1][0] for it in ocr_res] return Response(data=res) @app.route("/ocr-html", methods=["POST"]) def ocr_html(): # 文件处理 pic = request.files.get("picture") if pic is None: return Response("empty body") ext = get_ext_name(pic.filename) if not is_image_ext(ext): return Response("文件类型错误") content = pic.read() if len(content) > MAX_CONTENT_LENGTH: return Response("文件过大,请压缩后尝试") cur, rnd = current_time(), rand_str() raw_path = f"static/images/{cur}_{rnd}.{ext}" rec_path = f"static/images/{cur}_{rnd}-rec.{ext}" with open(raw_path, "wb") as fp: fp.write(content) fp.close() # 内容识别 array = cv2.imdecode(np.fromstring(content, np.uint8), 1) # noqa ocr_res = eng1.ocr(array)[0] res = [{"pos": it[0], "word": it[1][0], "rate": it[1][1]} for it in ocr_res] # 画图 rec_img_data = draw_img(array.shape, res) cv2.imwrite(rec_path, rec_img_data) # noqa return render_template("result.html", raw=raw_path, rec=rec_path, data=res) if __name__ == "__main__": app.run() """ step: pip install PaddlePaddle-GPU==2.4.2 PaddleOCR pip install PaddleHub hub install chinese_ocr_db_crnn_server python E:/Project/Python/PaddleOCR/venv/Lib/site-packages/paddleocr/tools/infer/predict_system.py --image_dir="C:/Users/huimv/Pictures/Saved Pictures/" --det_model_dir="D:/BaiduNetdisk/module/aiengine/model/ocr/ch_pp-ocrv3_det_infer/" --rec_model_dir="D:/BaiduNetdisk/module/aiengine/model/ocr/ch_pp-ocrv3_rec_infer/" --cls_model_dir="D:/BaiduNetdisk/module/aiengine/model/ocr/ch_ppocr_mobile_v2.0_cls_infer/" --use_angle_cls=True --use_space_char=True --use_gpu=False python tools/infer/predict_system.py --image_dir="E:/Project/Python/PaddleOCR/static/pic2.png" --det_model_dir="E:/Project/Python/PaddleOCR/models/det/" --rec_model_dir="E:/Project/Python/PaddleOCR/models/rec/" deploy: python -m pip install PaddlePaddle==2.4.2 PaddleOCR -i https://pypi.tuna.tsinghua.edu.cn/simple """