import cv2 import math import numpy as np from .utils import * from hmOCR.argument import Args from .operator import CTCLabelDecode class Recognizer: def __init__(self, args: "Args"): self.rec_image_shape = [int(v) for v in args.rec_image_shape.split(",")] self.rec_batch_num = args.rec_batch_num # rec_algorithm: only "SVTR_LCNet" now # self.rec_algorithm = args.rec_algorithm self.post_op = CTCLabelDecode(args.rec_char_dict_path) self.predictor, self.input_tensor, self.output_tensors = create_predictor(args, "rec") def resize_norm_img(self, img, max_wh_ratio): imgC, imgH, imgW = self.rec_image_shape assert imgC == img.shape[2] imgW = int((imgH * max_wh_ratio)) h, w = img.shape[:2] ratio = w / float(h) if math.ceil(imgH * ratio) > imgW: resized_w = imgW else: resized_w = int(math.ceil(imgH * ratio)) resized_image = cv2.resize(img, (resized_w, imgH)) # noqa resized_image = resized_image.astype("float32") resized_image = resized_image.transpose((2, 0, 1)) / 255 resized_image -= 0.5 resized_image /= 0.5 padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) padding_im[:, :, 0:resized_w] = resized_image return padding_im def __call__(self, img_list, use_space=False): img_num = len(img_list) width_list = [] for img in img_list: width_list.append(img.shape[1] / float(img.shape[0])) # Sorting can speed up the recognition process indices = np.argsort(np.array(width_list)) rec_res = [["", 0.0]] * img_num batch_num = self.rec_batch_num for beg_img_no in range(0, img_num, batch_num): end_img_no = min(img_num, beg_img_no + batch_num) norm_img_batch = [] imgC, imgH, imgW = self.rec_image_shape[:3] max_wh_ratio = imgW / imgH for ino in range(beg_img_no, end_img_no): h, w = img_list[indices[ino]].shape[0:2] wh_ratio = w * 1.0 / h max_wh_ratio = max(max_wh_ratio, wh_ratio) for ino in range(beg_img_no, end_img_no): norm_img = self.resize_norm_img(img_list[indices[ino]], max_wh_ratio) norm_img = norm_img[np.newaxis, :] norm_img_batch.append(norm_img) norm_img_batch = np.concatenate(norm_img_batch) norm_img_batch = norm_img_batch.copy() self.input_tensor.copy_from_cpu(norm_img_batch) self.predictor.run() outputs = [] for output_tensor in self.output_tensors: output = output_tensor.copy_to_cpu() outputs.append(output) if len(outputs) != 1: preds = outputs else: preds = outputs[0] rec_result = self.post_op(preds, use_space=use_space) for rno in range(len(rec_result)): rec_res[indices[beg_img_no + rno]] = rec_result[rno] return rec_res