Tinger 2 年之前
當前提交
afb53c2ca2

+ 12 - 0
Dockerfile

@@ -0,0 +1,12 @@
+FROM python:3.9.2-alpine
+
+WORKDIR /srv
+
+COPY requires.txt .
+
+ARG tuna=https://pypi.tuna.tsinghua.edu.cn/simple
+
+RUN pip install --no-cache-dir --upgrade pip -i $tuna \
+  && pip install --no-cache-dir -r requires.txt -i $tuna
+
+CMD ["gunicorn", "app:app", "-c", "gunicorn.conf"]

+ 19 - 0
Readme.md

@@ -0,0 +1,19 @@
+## PaddleOCR API
+**$host: http://localhost:5000**
++ $host/  
+  + 请求方法:浏览器访问
+  + 说明:在线测试演示首页
+
++ $host/ocr-raw-api/
+  + 请求方法:POST
+  + 参数列表:
+    + picture: 待识别的图片文件
+  + 返回数据:JSON
+  + 详见:[ApiPost 接口说明1](https://console-docs.apipost.cn/preview/29f73aa84047b12a/b294d4eccd3ec68e)
+
++ $host/ocr-filter/
+  + 请求方法:POST
+  + 参数列表:
+    + picture: 待识别的图片文件
+  + 返回数据:JSON
+  + 详见:[ApiPost 接口说明2](https://console-docs.apipost.cn/preview/91517399fb42f789/47a77024dd17b335)

+ 145 - 0
app.py

@@ -0,0 +1,145 @@
+from flask import Flask, render_template, request
+from utils.util import *
+from utils.conf import MAX_CONTENT_LENGTH
+import numpy as np
+import cv2
+from paddleocr import PaddleOCR
+
+# from paddleocr import PaddleOCR
+# from utils import Args
+# from paddleocr.tools.infer.predict_system import TextSystem
+#
+app = Flask(__name__)
+# 待优化为 TextSystem
+eng1 = PaddleOCR(
+    use_gpu=False,
+    enable_mkldnn=True,
+    det_model_dir="models/det/",
+    rec_model_dir="models/rec/",
+    cls_model_dir="models/cls/",
+    use_angle_cls=True,
+    use_space_char=True
+)
+
+
+# args = Args(
+#     use_gpu=False,
+#     enable_mkldnn=True,
+#     det_model_dir="models/det/",
+#     rec_model_dir="models/rec/",
+# )
+# eng2 = TextSystem(args)
+# warmup
+
+
+@app.route("/")
+def index():
+    return render_template("index.html")
+
+
+@app.route("/ocr-raw-api", methods=["POST"])
+def ocr_raw():
+    # 文件处理
+    pic = request.files.get("picture")
+    if pic is None:
+        return Response("empty body")
+    ext = get_ext_name(pic.filename)
+    if not is_image_ext(ext):
+        return Response("文件类型错误")
+    content = pic.read()
+    if len(content) > MAX_CONTENT_LENGTH:
+        return Response("文件过大,请压缩后尝试")
+    path = f"static/images/{current_time()}_{rand_str()}.{ext}"
+    with open(path, "wb") as fp:
+        fp.write(content)
+        fp.close()
+
+    # 内容识别
+    array = cv2.imdecode(np.fromstring(content, np.uint8), 1)  # noqa
+    ocr_res = eng1.ocr(array)[0]
+    res = [{"pos": it[0], "word": it[1][0], "rate": it[1][1]} for it in ocr_res]
+    return Response(data=res)
+
+
+@app.route("/ocr-filter", methods=["POST"])
+def ocr_filter():
+    # 文件处理
+    pic = request.files.get("picture")
+    if pic is None:
+        return Response("empty body")
+    ext = get_ext_name(pic.filename)
+    if not is_image_ext(ext):
+        return Response("文件类型错误")
+    content = pic.read()
+    if len(content) > MAX_CONTENT_LENGTH:
+        return Response("文件过大,请压缩后尝试")
+    path = f"static/images/{current_time()}_{rand_str()}.{ext}"
+    with open(path, "wb") as fp:
+        fp.write(content)
+        fp.close()
+
+    # 内容识别
+    array = cv2.imdecode(np.fromstring(content, np.uint8), 1)  # noqa
+    ocr_res = eng1.ocr(array)[0]
+
+    # 过滤出想要的数据
+    res = [it[1][0] for it in ocr_res]
+    return Response(data=res)
+
+
+@app.route("/ocr-html", methods=["POST"])
+def ocr_html():
+    # 文件处理
+    pic = request.files.get("picture")
+    if pic is None:
+        return Response("empty body")
+    ext = get_ext_name(pic.filename)
+    if not is_image_ext(ext):
+        return Response("文件类型错误")
+    content = pic.read()
+    if len(content) > MAX_CONTENT_LENGTH:
+        return Response("文件过大,请压缩后尝试")
+    cur, rnd = current_time(), rand_str()
+    raw_path = f"static/images/{cur}_{rnd}.{ext}"
+    rec_path = f"static/images/{cur}_{rnd}-rec.{ext}"
+    with open(raw_path, "wb") as fp:
+        fp.write(content)
+        fp.close()
+
+    # 内容识别
+    array = cv2.imdecode(np.fromstring(content, np.uint8), 1)  # noqa
+    ocr_res = eng1.ocr(array)[0]
+    res = [{"pos": it[0], "word": it[1][0], "rate": it[1][1]} for it in ocr_res]
+
+    # 画图
+    rec_img_data = draw_img(array.shape, res)
+    cv2.imwrite(rec_path, rec_img_data)  # noqa
+    return render_template("result.html", raw=raw_path, rec=rec_path, data=res)
+
+
+if __name__ == "__main__":
+    app.run()
+
+"""
+step:
+    pip install PaddlePaddle-GPU==2.4.2 PaddleOCR
+    pip install PaddleHub
+    hub install chinese_ocr_db_crnn_server
+
+
+python E:/Project/Python/PaddleOCR/venv/Lib/site-packages/paddleocr/tools/infer/predict_system.py
+    --image_dir="C:/Users/huimv/Pictures/Saved Pictures/"
+    --det_model_dir="D:/BaiduNetdisk/module/aiengine/model/ocr/ch_pp-ocrv3_det_infer/"
+    --rec_model_dir="D:/BaiduNetdisk/module/aiengine/model/ocr/ch_pp-ocrv3_rec_infer/"
+    --cls_model_dir="D:/BaiduNetdisk/module/aiengine/model/ocr/ch_ppocr_mobile_v2.0_cls_infer/"
+    --use_angle_cls=True --use_space_char=True --use_gpu=False
+
+python tools/infer/predict_system.py
+    --image_dir="E:/Project/Python/PaddleOCR/static/pic2.png"
+    --det_model_dir="E:/Project/Python/PaddleOCR/models/det/"
+    --rec_model_dir="E:/Project/Python/PaddleOCR/models/rec/"
+
+
+deploy:
+    python -m pip install PaddlePaddle==2.4.2 PaddleOCR -i https://pypi.tuna.tsinghua.edu.cn/simple
+"""

+ 10 - 0
docker-compose.yml

@@ -0,0 +1,10 @@
+version: "3"
+
+services:
+  Ocr:
+    build: .
+    image: ocr:latest
+    container_name: Ocr
+    restart: unless-stopped
+    volumes:
+      - ./:/srv/

+ 3 - 0
gunicorn.conf

@@ -0,0 +1,3 @@
+bind="0.0.0.0:80"
+worker_class="gevent"
+workers=5

二進制
models/cls/inference.pdiparams


二進制
models/cls/inference.pdiparams.info


二進制
models/cls/inference.pdmodel


二進制
models/det/inference.pdiparams


二進制
models/det/inference.pdiparams.info


二進制
models/det/inference.pdmodel


二進制
models/rec/inference.pdiparams


二進制
models/rec/inference.pdiparams.info


二進制
models/rec/inference.pdmodel


+ 132 - 0
requires.txt

@@ -0,0 +1,132 @@
+aiofiles==23.1.0
+aiohttp==3.8.4
+aiosignal==1.3.1
+altair==4.2.2
+anyio==3.6.2
+astor==0.8.1
+async-timeout==4.0.2
+attrdict==2.0.1
+attrs==23.1.0
+Babel==2.12.1
+bce-python-sdk==0.8.83
+beautifulsoup4==4.12.2
+blinker==1.6.2
+Brotli==1.0.9
+cachetools==5.3.0
+certifi==2022.12.7
+cffi==1.15.1
+charset-normalizer==3.1.0
+click==8.1.3
+colorama==0.4.6
+contourpy==1.0.7
+cssselect==1.2.0
+cssutils==2.6.0
+cycler==0.11.0
+Cython==0.29.34
+decorator==5.1.1
+entrypoints==0.4
+et-xmlfile==1.1.0
+fastapi==0.95.1
+ffmpy==0.3.0
+filelock==3.12.0
+fire==0.5.0
+Flask==2.3.1
+flask-babel==3.1.0
+fonttools==4.39.3
+frozenlist==1.3.3
+fsspec==2023.4.0
+future==0.18.3
+gevent==22.10.2
+geventhttpclient==2.0.2
+gradio==3.28.0
+gradio_client==0.1.4
+greenlet==2.0.2
+grpcio==1.54.0
+gunicorn==20.1.0
+h11==0.14.0
+httpcore==0.17.0
+httpx==0.24.0
+huggingface-hub==0.14.1
+idna==3.4
+imageio==2.28.0
+imgaug==0.4.0
+importlib-metadata==6.6.0
+importlib-resources==5.12.0
+itsdangerous==2.1.2
+Jinja2==3.1.2
+jsonschema==4.17.3
+kiwisolver==1.4.4
+lazy_loader==0.2
+linkify-it-py==2.0.0
+lmdb==1.4.1
+lxml==4.9.2
+markdown-it-py==2.2.0
+MarkupSafe==2.1.2
+matplotlib==3.7.1
+mdit-py-plugins==0.3.3
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.0.4
+networkx==3.1
+numpy==1.24.3
+onnx==1.12.0
+opencv-contrib-python==4.6.0.66
+opencv-python==4.6.0.66
+openpyxl==3.1.2
+opt-einsum==3.3.0
+orjson==3.8.11
+packaging==23.1
+paddle-bfloat==0.1.7
+paddleocr==2.6.1.3
+paddlepaddle==2.4.2
+pandas==2.0.1
+pdf2docx==0.5.6
+Pillow==9.5.0
+premailer==3.10.0
+protobuf==3.20.0
+psutil==5.9.5
+pyclipper==1.3.0.post4
+pycparser==2.21
+pycryptodome==3.17
+pydantic==1.10.7
+pydub==0.25.1
+PyMuPDF==1.20.2
+pyparsing==3.0.9
+pyrsistent==0.19.3
+python-dateutil==2.8.2
+python-docx==0.8.11
+python-multipart==0.0.6
+python-rapidjson==1.10
+pytz==2023.3
+PyWavelets==1.4.1
+PyYAML==6.0
+rapidfuzz==3.0.0
+rarfile==4.0
+requests==2.29.0
+scikit-image==0.20.0
+scipy==1.9.1
+semantic-version==2.10.0
+shapely==2.0.1
+six==1.16.0
+sniffio==1.3.0
+soupsieve==2.4.1
+starlette==0.26.1
+sympy==1.11.1
+termcolor==2.3.0
+tifffile==2023.4.12
+toolz==0.12.0
+tqdm==4.65.0
+tritonclient==2.33.0
+typing_extensions==4.5.0
+tzdata==2023.3
+uc-micro-py==1.0.1
+urllib3==1.26.15
+uvicorn==0.22.0
+visualdl==2.5.1
+websockets==11.0.2
+Werkzeug==2.3.1
+x2paddle==1.4.1
+yarl==1.9.2
+zipp==3.15.0
+zope.event==4.6
+zope.interface==6.0

二進制
static/simfang.ttf


+ 24 - 0
templates/index.html

@@ -0,0 +1,24 @@
+<!DOCTYPE html>
+<html lang="zh">
+<head>
+    <meta charset="UTF-8">
+    <title>upload index</title>
+</head>
+<body>
+<h2>原始OCR:</h2>
+<form action="/ocr-raw-api" method="POST" enctype="multipart/form-data">
+    <input type="file" name="picture">
+    <input type="submit" value="上传">
+</form>
+<h2>过滤结果:</h2>
+<form action="/ocr-filter" method="POST" enctype="multipart/form-data">
+    <input type="file" name="picture">
+    <input type="submit" value="上传">
+</form>
+<h2>图片演示:</h2>
+<form action="/ocr-html" method="POST" enctype="multipart/form-data">
+    <input type="file" name="picture">
+    <input type="submit" value="上传">
+</form>
+</body>
+</html>

+ 101 - 0
templates/result.html

@@ -0,0 +1,101 @@
+<!DOCTYPE html>
+<html lang="zh">
+<head>
+    <meta charset="UTF-8">
+    <title>ocr result</title>
+    <style>
+        html, body {
+            width: 100%;
+            padding: 0;
+            margin: 0;
+        }
+
+        .img-line {
+            width: 100%;
+            box-sizing: border-box;
+            padding: 0 100px;
+            display: flex;
+            justify-content: space-between;
+            margin-top: 50px;
+        }
+
+        .img-box {
+            margin: 0 20px;
+        }
+
+        img {
+            width: 500px;
+            height: auto;
+            box-sizing: border-box;
+            padding: 5px;
+            border: 1px solid #000;
+        }
+
+        .data-table {
+            width: 100%;
+            justify-content: center;
+            box-sizing: border-box;
+            margin: 50px 0;
+            padding: 0 20px;
+        }
+
+        table {
+            border: none;
+            background-color: aqua;
+        }
+
+        .col1 {
+            width: 60%;
+        }
+
+        .col2 {
+            width: 15%;
+        }
+
+        .col3 {
+            width: 25%;
+        }
+
+        td, th {
+            background-color: white;
+        }
+
+        .center {
+            text-align: center;
+        }
+    </style>
+</head>
+<body>
+<h1>识别结果展示页面</h1>
+<div class="img-line">
+    <div class="img-box">
+        <h1>原图</h1>
+        <a target="_blank" href="/{{ raw }}"><img src="/{{ raw }}" alt="raw"></a>
+    </div>
+    <div class="img-box">
+        <h1>结果</h1>
+        <a target="_blank" href="/{{ rec }}"><img src="/{{ rec }}" alt="rec"></a>
+    </div>
+</div>
+<div class="data-table">
+    <table>
+        <thead>
+        <tr>
+            <th class="col1">内容</th>
+            <th class="col2">概率</th>
+            <th class="col3">位置</th>
+        </tr>
+        </thead>
+        <tbody>
+        {% for item in data %}
+            <tr>
+                <td>{{ item.word }}</td>
+                <td class="center">{{ item.rate }}</td>
+                <td>{{ item.pos }}</td>
+            </tr>
+        {% endfor %}
+        </tbody>
+    </table>
+</div>
+</body>
+</html>

+ 2 - 0
utils/__init__.py

@@ -0,0 +1,2 @@
+from .util import *
+from .ocr import *

+ 2 - 0
utils/conf.py

@@ -0,0 +1,2 @@
+MAX_CONTENT_LENGTH = 5 * 1024 * 1024  # 5M图片
+DEBUG = False

+ 16 - 0
utils/ocr.py

@@ -0,0 +1,16 @@
+from .util import Args
+import numpy as np
+from paddleocr.tools.infer.predict_system import TextSystem
+
+
+class Ocr:
+    def __init__(self):
+        self._args = Args(
+            use_gpu=False,
+            enable_mkldnn=True,
+            det_model_dir="models/det/",
+            rec_model_dir="models/rec/"
+        )
+        self._eng = TextSystem(self._args)
+        img = np.random.uniform(0, 255, [640, 640, 3]).astype(np.uint8)
+        [self._eng(img) for _ in range(10)]

+ 97 - 0
utils/util.py

@@ -0,0 +1,97 @@
+from time import localtime, strftime
+from random import randint, seed
+import cv2
+from paddleocr.tools.infer.utility import draw_box_txt_fine
+import numpy as np
+from flask import jsonify
+
+__all__ = ["Args", "Response", "rand_str", "current_time", "get_ext_name", "is_image_ext", "draw_img"]
+
+StrBase = "qwertyuioplkjhgfdsazxcvbnm1234567890ZXCVBNMLKJHGFDSAQWERTYUIOP"
+StrBaseLen = len(StrBase) - 1
+AcceptExtNames = ["jpg", "jpeg", "bmp", "png", "rgb", "tif", "tiff", "gif", "pdf"]
+
+
+class Args:
+    def __init__(self, **kwargs):
+        self.__update(
+            use_gpu=False, use_xpu=False, use_npu=False, ir_optim=True, use_tensorrt=False,
+            min_subgraph_size=15, precision="fp32", gpu_mem=500, image_dir=None, page_num=0,
+            det_algorithm="DB", det_model_dir="models/det/", det_limit_side_len=960, det_limit_type="max",
+            det_box_type="quad", det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5,
+            max_batch_size=10, use_dilation=False, det_db_score_mode="fast", det_east_score_thresh=0.8,
+            det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5,
+            det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16,
+            det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5,
+            rec_algorithm="SVTR_LCNet", rec_model_dir="models/rec/", rec_image_inverse=True,
+            rec_image_shape="3, 48, 320", rec_batch_num=6, max_text_length=25,
+            rec_char_dict_path="E:/Project/Python/PaddleOCR/venv/lib/site-packages/paddleocr/ppocr/utils/ppocr_keys_v1.txt",
+            use_space_char=True, vis_font_path="static/simfang.ttf", drop_score=0.5,
+            e2e_algorithm="PGNet", e2e_model_dir=None, e2e_limit_side_len=768, e2e_limit_type="max",
+            e2e_pgnet_score_thresh=0.5, e2e_char_dict_path="./ppocr/utils/ic15_dict.txt",
+            e2e_pgnet_valid_set="totaltext", e2e_pgnet_mode="fast", use_angle_cls=True,
+            cls_model_dir="models/cls/", cls_image_shape="3, 48, 192", label_list=["0", "180"],
+            cls_batch_num=6, cls_thresh=0.9, enable_mkldnn=True, cpu_threads=10, use_pdserving=False,
+            sr_model_dir=None, sr_image_shape="3, 32, 128", sr_batch_num=1,
+            draw_img_save_dir="static/rec_res/", save_crop_res=False, crop_res_save_dir="./output",
+            use_mp=False, benchmark=False, save_log_path="./log_output/",
+            show_log=True, use_onnx=False, output="./output", table_max_len=488, table_algorithm="TableAttn",
+            table_model_dir=None, merge_no_span_structure=True, table_char_dict_path=None,
+            layout_model_dir=None, layout_dict_path=None, layout_score_threshold=0.5,
+            layout_nms_threshold=0.5, kie_algorithm="LayoutXLM", ser_model_dir=None, re_model_dir=None,
+            use_visual_backbone=True, ser_dict_path="../train_data/XFUND/class_list_xfun.txt",
+            ocr_order_method=None, mode="structure", image_orientation=False, layout=True, table=True,
+            ocr=True, recovery=False, use_pdf2docx_api=False, lang="ch", det=True, rec=True, type="ocr",
+            ocr_version="PP-OCRv3", structure_version="PP-StructureV2"
+        )
+
+        self.__update(**kwargs)
+
+    def __update(self, **kwargs):
+        for k, v in kwargs:
+            self.__dict__[k] = v
+
+    def __setattr__(self, key: "str", value):
+        self.__dict__[key] = value
+
+    def __getattribute__(self, key: "str"):
+        assert key in self.__dict__.keys()
+        return self.__dict__[key]
+
+
+def rand_str(size: "int" = 8) -> "str":
+    return "".join([StrBase[randint(0, StrBaseLen)] for _ in range(size)])
+
+
+def current_time() -> "str":
+    return strftime("%Y-%m-%d_%H-%M-%S", localtime())
+
+
+def get_ext_name(name: "str") -> "str":
+    return name.split(".")[-1].lower()
+
+
+def is_image_ext(ext: "str") -> bool:
+    return ext in AcceptExtNames
+
+
+def Response(message: "str" = None, data=None):
+    if message is None:
+        return jsonify(success=True, message="操作成功", data=data)
+    return jsonify(success=False, message=message, data=data)
+
+
+def draw_img(shape: "tuple", data: "list[dict]", drop: "float" = 0.5):
+    img = np.ones(shape, dtype=np.uint8) * 255
+    seed(0)
+
+    for one in data:
+        if one["rate"] < drop:
+            continue
+        color = (randint(0, 255), randint(0, 255), randint(0, 255))
+        text = draw_box_txt_fine((shape[1], shape[0]), one["pos"], one["word"])
+        pts = np.array(one["pos"], np.int32).reshape((-1, 1, 2))
+        cv2.polylines(text, [pts], True, color, 1)  # noqa
+        img = cv2.bitwise_and(img, text)  # noqa
+
+    return np.array(img)