123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145 |
- from flask import Flask, render_template, request
- from utils.util import *
- from utils.conf import MAX_CONTENT_LENGTH
- import numpy as np
- import cv2
- from paddleocr import PaddleOCR
- # from paddleocr import PaddleOCR
- # from utils import Args
- # from paddleocr.tools.infer.predict_system import TextSystem
- #
- app = Flask(__name__)
- # 待优化为 TextSystem
- eng1 = PaddleOCR(
- use_gpu=False,
- enable_mkldnn=True,
- det_model_dir="models/det/",
- rec_model_dir="models/rec/",
- cls_model_dir="models/cls/",
- use_angle_cls=True,
- use_space_char=True
- )
- # args = Args(
- # use_gpu=False,
- # enable_mkldnn=True,
- # det_model_dir="models/det/",
- # rec_model_dir="models/rec/",
- # )
- # eng2 = TextSystem(args)
- # warmup
- @app.route("/")
- def index():
- return render_template("index.html")
- @app.route("/ocr-raw-api", methods=["POST"])
- def ocr_raw():
- # 文件处理
- pic = request.files.get("picture")
- if pic is None:
- return Response("empty body")
- ext = get_ext_name(pic.filename)
- if not is_image_ext(ext):
- return Response("文件类型错误")
- content = pic.read()
- if len(content) > MAX_CONTENT_LENGTH:
- return Response("文件过大,请压缩后尝试")
- path = f"static/images/{current_time()}_{rand_str()}.{ext}"
- with open(path, "wb") as fp:
- fp.write(content)
- fp.close()
- # 内容识别
- array = cv2.imdecode(np.fromstring(content, np.uint8), 1) # noqa
- ocr_res = eng1.ocr(array)[0]
- res = [{"pos": it[0], "word": it[1][0], "rate": it[1][1]} for it in ocr_res]
- return Response(data=res)
- @app.route("/ocr-filter", methods=["POST"])
- def ocr_filter():
- # 文件处理
- pic = request.files.get("picture")
- if pic is None:
- return Response("empty body")
- ext = get_ext_name(pic.filename)
- if not is_image_ext(ext):
- return Response("文件类型错误")
- content = pic.read()
- if len(content) > MAX_CONTENT_LENGTH:
- return Response("文件过大,请压缩后尝试")
- path = f"static/images/{current_time()}_{rand_str()}.{ext}"
- with open(path, "wb") as fp:
- fp.write(content)
- fp.close()
- # 内容识别
- array = cv2.imdecode(np.fromstring(content, np.uint8), 1) # noqa
- ocr_res = eng1.ocr(array)[0]
- # 过滤出想要的数据
- res = [it[1][0] for it in ocr_res]
- return Response(data=res)
- @app.route("/ocr-html", methods=["POST"])
- def ocr_html():
- # 文件处理
- pic = request.files.get("picture")
- if pic is None:
- return Response("empty body")
- ext = get_ext_name(pic.filename)
- if not is_image_ext(ext):
- return Response("文件类型错误")
- content = pic.read()
- if len(content) > MAX_CONTENT_LENGTH:
- return Response("文件过大,请压缩后尝试")
- cur, rnd = current_time(), rand_str()
- raw_path = f"static/images/{cur}_{rnd}.{ext}"
- rec_path = f"static/images/{cur}_{rnd}-rec.{ext}"
- with open(raw_path, "wb") as fp:
- fp.write(content)
- fp.close()
- # 内容识别
- array = cv2.imdecode(np.fromstring(content, np.uint8), 1) # noqa
- ocr_res = eng1.ocr(array)[0]
- res = [{"pos": it[0], "word": it[1][0], "rate": it[1][1]} for it in ocr_res]
- # 画图
- rec_img_data = draw_img(array.shape, res)
- cv2.imwrite(rec_path, rec_img_data) # noqa
- return render_template("result.html", raw=raw_path, rec=rec_path, data=res)
- if __name__ == "__main__":
- app.run()
- """
- step:
- pip install PaddlePaddle-GPU==2.4.2 PaddleOCR
- pip install PaddleHub
- hub install chinese_ocr_db_crnn_server
- python E:/Project/Python/PaddleOCR/venv/Lib/site-packages/paddleocr/tools/infer/predict_system.py
- --image_dir="C:/Users/huimv/Pictures/Saved Pictures/"
- --det_model_dir="D:/BaiduNetdisk/module/aiengine/model/ocr/ch_pp-ocrv3_det_infer/"
- --rec_model_dir="D:/BaiduNetdisk/module/aiengine/model/ocr/ch_pp-ocrv3_rec_infer/"
- --cls_model_dir="D:/BaiduNetdisk/module/aiengine/model/ocr/ch_ppocr_mobile_v2.0_cls_infer/"
- --use_angle_cls=True --use_space_char=True --use_gpu=False
- python tools/infer/predict_system.py
- --image_dir="E:/Project/Python/PaddleOCR/static/pic2.png"
- --det_model_dir="E:/Project/Python/PaddleOCR/models/det/"
- --rec_model_dir="E:/Project/Python/PaddleOCR/models/rec/"
- deploy:
- python -m pip install PaddlePaddle==2.4.2 PaddleOCR -i https://pypi.tuna.tsinghua.edu.cn/simple
- """
|