123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081 |
- import cv2
- import math
- import numpy as np
- from .utils import *
- from hmOCR.argument import Args
- from .operator import CTCLabelDecode
- class Recognizer:
- def __init__(self, args: "Args"):
- self.rec_image_shape = [int(v) for v in args.rec_image_shape.split(",")]
- self.rec_batch_num = args.rec_batch_num
- # rec_algorithm: only "SVTR_LCNet" now
- # self.rec_algorithm = args.rec_algorithm
- self.post_op = CTCLabelDecode(args.rec_char_dict_path)
- self.predictor, self.input_tensor, self.output_tensors = create_predictor(args, "rec")
- def resize_norm_img(self, img, max_wh_ratio):
- imgC, imgH, imgW = self.rec_image_shape
- assert imgC == img.shape[2]
- imgW = int((imgH * max_wh_ratio))
- h, w = img.shape[:2]
- ratio = w / float(h)
- if math.ceil(imgH * ratio) > imgW:
- resized_w = imgW
- else:
- resized_w = int(math.ceil(imgH * ratio))
- resized_image = cv2.resize(img, (resized_w, imgH)) # noqa
- resized_image = resized_image.astype("float32")
- resized_image = resized_image.transpose((2, 0, 1)) / 255
- resized_image -= 0.5
- resized_image /= 0.5
- padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
- padding_im[:, :, 0:resized_w] = resized_image
- return padding_im
- def __call__(self, img_list, use_space=False):
- img_num = len(img_list)
- width_list = []
- for img in img_list:
- width_list.append(img.shape[1] / float(img.shape[0]))
- # Sorting can speed up the recognition process
- indices = np.argsort(np.array(width_list))
- rec_res = [["", 0.0]] * img_num
- batch_num = self.rec_batch_num
- for beg_img_no in range(0, img_num, batch_num):
- end_img_no = min(img_num, beg_img_no + batch_num)
- norm_img_batch = []
- imgC, imgH, imgW = self.rec_image_shape[:3]
- max_wh_ratio = imgW / imgH
- for ino in range(beg_img_no, end_img_no):
- h, w = img_list[indices[ino]].shape[0:2]
- wh_ratio = w * 1.0 / h
- max_wh_ratio = max(max_wh_ratio, wh_ratio)
- for ino in range(beg_img_no, end_img_no):
- norm_img = self.resize_norm_img(img_list[indices[ino]], max_wh_ratio)
- norm_img = norm_img[np.newaxis, :]
- norm_img_batch.append(norm_img)
- norm_img_batch = np.concatenate(norm_img_batch)
- norm_img_batch = norm_img_batch.copy()
- self.input_tensor.copy_from_cpu(norm_img_batch)
- self.predictor.run()
- outputs = []
- for output_tensor in self.output_tensors:
- output = output_tensor.copy_to_cpu()
- outputs.append(output)
- if len(outputs) != 1:
- preds = outputs
- else:
- preds = outputs[0]
- rec_result = self.post_op(preds, use_space=use_space)
- for rno in range(len(rec_result)):
- rec_res[indices[beg_img_no + rno]] = rec_result[rno]
- return rec_res
|