Bladeren bron

v3.5: 2×5, multiple process with task queue for concurrency.

Tinger 2 jaren geleden
bovenliggende
commit
d2fe950bdf
6 gewijzigde bestanden met toevoegingen van 64 en 20 verwijderingen
  1. 1 1
      blues/com.py
  2. 3 3
      blues/idc.py
  3. 2 2
      hmOCR/__init__.py
  4. 1 3
      hmOCR/argument.py
  5. 55 9
      hmOCR/core.py
  6. 2 2
      utils/util.py

+ 1 - 1
blues/com.py

@@ -25,7 +25,7 @@ class ComView(views.MethodView):
         save_img(file_path, content)
 
         img = read_img(content)
-        ocr_res = Engine.ocr_one(img, cls=True)
+        ocr_res = Engine.rec_one(img)
         kind = request.form.get("type")
         if kind is not None:
             kind = kind.lower()

+ 3 - 3
blues/idc.py

@@ -84,10 +84,10 @@ class IdcView(views.MethodView):
 
         img = read_img(content)
         images = rot_img(img)
-        rec = Engine.ocr_multi(images, cls=True, use_space=False)
+        rec = Engine.rec_multi(images)
         info, msg, sta, idx = {}, "识别失败,请重新选择", False, 0
         for idx, ocr_res in enumerate(rec):
-            words = [it[0].replace(" ", "") for it in ocr_res]
+            words = [it[0] for it in ocr_res]
             if which == "face":
                 if not words or not words[0].startswith("姓名"):
                     continue
@@ -128,7 +128,7 @@ class IdcHtmlView(views.MethodView):
 
         img = read_img(content)
         images = rot_img(img)
-        rec = Engine.ocr_multi(images, cls=True, use_space=False)
+        rec = Engine.rec_multi(images)
         info, msg, sta, idx = {}, "识别失败,请重新选择", False, 0
         for idx, ocr_res in enumerate(rec):
             words = [it[0].replace(" ", "") for it in ocr_res]

+ 2 - 2
hmOCR/__init__.py

@@ -1,3 +1,3 @@
-from .core import HuiMvOcr
-from .argument import Args, ArgType
+from .core import *
 from .utility import *
+from .argument import *

+ 1 - 3
hmOCR/argument.py

@@ -21,9 +21,7 @@ class Args:
             rec_image_shape="3, 48, 320", rec_batch_num=8, max_text_length=25,
             rec_char_dict_path="hmOCR/static/key-set.txt", use_space_char=False,
             # OCR
-            drop_score=0.5,
-            # test
-            image_dir="static/test_image", warmup=True
+            drop_score=0.5, workers=5, interval=0.1,
         )
         self.__update(**kwargs)
 

+ 55 - 9
hmOCR/core.py

@@ -1,13 +1,18 @@
+import logging
 from .parts import *
 from .utility import *
+from time import sleep
+from numpy import ndarray
 from copy import deepcopy
 from .argument import ArgType
-from concurrent.futures import ThreadPoolExecutor
+from threading import Thread, Lock
 
-__all__ = ["HuiMvOcr"]
+__all__ = ["Engine", "HuiMvOCR"]
+logger = logging.getLogger("hm-ocr")
+logger.setLevel(logging.INFO)
 
 
-class HuiMvOcr:
+class Engine:
     __worker_count = 1
 
     def __init__(self, args: "ArgType"):
@@ -19,7 +24,7 @@ class HuiMvOcr:
         self.drop_score = args.drop_score
         self.crop_image_res_index = 0
 
-    def ocr_one(self, img, cls: "bool" = False, use_space: "bool" = True):
+    def __call__(self, img, cls: "bool" = True, use_space: "bool" = True):
         ori_im = img.copy()
         dt_boxes = self.det(img)
         if dt_boxes is None:
@@ -48,9 +53,50 @@ class HuiMvOcr:
 
         return filter_rec_res
 
-    def ocr_multi(self, img_list, cls: "bool" = False, use_space: "bool" = True):
-        pool = ThreadPoolExecutor(HuiMvOcr.__worker_count)
-        loop = range(len(img_list))
-        tasks = [pool.submit(self.ocr_one, img_list[i], cls, use_space) for i in loop]
 
-        return [tasks[i].result() for i in loop]
+class HuiMvOCR:
+    __lock = Lock()
+    __tasks = []  # item: [img: "ndarray", ocr_args: "dict", callback: "fn", callback_args: "dict"]
+
+    def __init__(self, args: "ArgType"):
+        self.interval = args.interval
+
+        for i in range(args.workers):
+            Thread(target=self.__processor, args=(Engine(args), i), daemon=True).start()
+
+    @staticmethod
+    def __processor(ocr: "Engine", eid: "int"):
+        logger.info(f"================ Engine[{eid}] initialized ================")
+        while True:
+            if HuiMvOCR.__tasks:
+                HuiMvOCR.__lock.acquire()
+                img, ocr_args, callback, callback_args = HuiMvOCR.__tasks.pop(0)
+                HuiMvOCR.__lock.release()
+                res = ocr(img)
+                callback(res, **callback_args)
+            sleep(0.1)
+
+    def rec_one(self, img: "ndarray", cls: "bool" = True, use_space: "bool" = True):
+        def callback(res):
+            foo[1] = res
+            foo[0] = 1
+
+        foo = [0, None]  # finish_count, result
+        args = {"cls": cls, "use_space": use_space}
+        HuiMvOCR.__tasks.append([img, args, callback, {}])
+        while foo[0] < 1:
+            sleep(self.interval)
+        return foo[1]
+
+    def rec_multi(self, images: "list[ndarray]", cls: "bool" = False, use_space: "bool" = False):
+        def callback(res, index):
+            foo[1][index] = res
+            foo[0] += 1
+
+        size, args = len(images), {"cls": cls, "use_space": use_space}
+        foo = [0, [...] * size]  # finish_count, result
+        for i in range(size):
+            HuiMvOCR.__tasks.append([images[i], args, callback, {"index": i}])
+        while foo[0] < size:
+            sleep(self.interval)
+        return foo[1]

+ 2 - 2
utils/util.py

@@ -3,7 +3,7 @@ import numpy as np
 from typing import Union
 from flask import jsonify
 from random import randint
-from hmOCR import HuiMvOcr, Args
+from hmOCR import HuiMvOCR, Args
 from time import localtime, strftime
 
 __all__ = [
@@ -14,7 +14,7 @@ __all__ = [
 __StrBase = "qwertyuioplkjhgfdsazxcvbnm1234567890ZXCVBNMLKJHGFDSAQWERTYUIOP"
 __StrBaseLen = len(__StrBase) - 1
 __AcceptExtNames = ["jpg", "jpeg", "bmp", "png", "rgb", "tif", "tiff", "gif"]
-Engine = HuiMvOcr(Args())
+Engine = HuiMvOCR(Args())
 
 
 def Response(message: "str" = None, data=None):