import re import cv2 import pyclipper import numpy as np from PIL import Image from paddle import Tensor from shapely.geometry import Polygon __all__ = [ "DetResizeForTest", "NormalizeImage", "ToCHWImage", "KeepKeys", "DBPostProcess", "ClsPostProcess", "CTCLabelDecode" ] class DetResizeForTest: def __init__(self, **kwargs): if "limit_side_len" in kwargs: self.limit_side_len = kwargs["limit_side_len"] self.limit_type = kwargs.get("limit_type", "min") else: self.limit_side_len = 736 self.limit_type = "min" def __call__(self, data): img = data["image"] src_h, src_w, _ = img.shape if sum([src_h, src_w]) < 64: img = self.image_padding(img) img, [ratio_h, ratio_w] = self.resize_image_type0(img) data["image"] = img data["shape"] = np.array([src_h, src_w, ratio_h, ratio_w]) return data @staticmethod def image_padding(im, value=0): h, w, c = im.shape im_pad = np.zeros((max(32, h), max(32, w), c), np.uint8) + value im_pad[:h, :w, :] = im return im_pad def resize_image_type0(self, img): """ resize image to a size multiple of 32 which is required by the network args: img(array): array with shape [h, w, c] return(tuple): img, (ratio_h, ratio_w) """ limit_side_len = self.limit_side_len h, w, c = img.shape # limit the max side if self.limit_type == "max": if max(h, w) > limit_side_len: if h > w: ratio = float(limit_side_len) / h else: ratio = float(limit_side_len) / w else: ratio = 1. elif self.limit_type == "min": if min(h, w) < limit_side_len: if h < w: ratio = float(limit_side_len) / h else: ratio = float(limit_side_len) / w else: ratio = 1. elif self.limit_type == "resize_long": ratio = float(limit_side_len) / max(h, w) else: raise Exception("not support limit type, image ") resize_h = int(h * ratio) resize_w = int(w * ratio) resize_h = max(int(round(resize_h / 32) * 32), 32) resize_w = max(int(round(resize_w / 32) * 32), 32) try: if int(resize_w) <= 0 or int(resize_h) <= 0: return None, (None, None) img = cv2.resize(img, (int(resize_w), int(resize_h))) # noqa except Exception as e: print(img.shape, resize_w, resize_h, e) exit(0) ratio_h = resize_h / float(h) ratio_w = resize_w / float(w) return img, [ratio_h, ratio_w] class NormalizeImage: def __init__(self, scale, mean, std, order="chw"): self.scale = np.float32(scale) shape = (3, 1, 1) if order == "chw" else (1, 1, 3) self.mean = np.array(mean).reshape(shape).astype("float32") self.std = np.array(std).reshape(shape).astype("float32") def __call__(self, data): img = data["image"] if isinstance(img, Image.Image): img = np.array(img) # noqa assert isinstance(img, np.ndarray), "invalid input img in NormalizeImage" data["image"] = (img.astype("float32") * self.scale - self.mean) / self.std return data class ToCHWImage: def __call__(self, data): img = data["image"] if isinstance(img, Image.Image): img = np.array(img) # noqa data["image"] = img.transpose((2, 0, 1)) return data class KeepKeys: def __init__(self, keep_keys): self.keep_keys = keep_keys def __call__(self, data): return [data[key] for key in self.keep_keys] class DBPostProcess: def __init__( self, thresh=0.3, box_thresh=0.7, max_candidates=1000, unclip_ratio=2.0, use_dilation=False, score_mode="fast", box_type="quad" ): self.thresh = thresh self.box_thresh = box_thresh self.max_candidates = max_candidates self.unclip_ratio = unclip_ratio self.min_size = 3 self.score_mode = score_mode self.box_type = box_type assert score_mode in [ "slow", "fast" ], f"Score mode must be in [slow, fast] but got: {score_mode}" self.dilation_kernel = None if not use_dilation else np.array( [[1, 1], [1, 1]]) def polygons_from_bitmap(self, pred, _bitmap, dest_width, dest_height): """ _bitmap: single map with shape (1, H, W), whose values are binarized as {0, 1} """ bitmap = _bitmap height, width = bitmap.shape boxes, scores = [], [] contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) # noqa for contour in contours[:self.max_candidates]: epsilon = 0.002 * cv2.arcLength(contour, True) # noqa approx = cv2.approxPolyDP(contour, epsilon, True) # noqa points = approx.reshape((-1, 2)) if points.shape[0] < 4: continue score = self.box_score_fast(pred, points.reshape(-1, 2)) if self.box_thresh > score: continue if points.shape[0] > 2: box = self.unclip(points, self.unclip_ratio) if len(box) > 1: continue else: continue box = box.reshape(-1, 2) _, s_side = self.get_mini_boxes(box.reshape((-1, 1, 2))) if s_side < self.min_size + 2: continue box = np.array(box) box[:, 0] = np.clip( np.round(box[:, 0] / width * dest_width), 0, dest_width) box[:, 1] = np.clip( np.round(box[:, 1] / height * dest_height), 0, dest_height) boxes.append(box.tolist()) scores.append(score) return boxes, scores def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height): """ _bitmap: single map with shape (1, H, W), whose values are binarized as {0, 1} """ bitmap, contours = _bitmap, None height, width = bitmap.shape outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) # noqa if len(outs) == 3: img, contours, _ = outs[0], outs[1], outs[2] elif len(outs) == 2: contours, _ = outs[0], outs[1] num_contours = min(len(contours), self.max_candidates) boxes = [] scores = [] for index in range(num_contours): contour = contours[index] points, s_side = self.get_mini_boxes(contour) if s_side < self.min_size: continue points = np.array(points) if self.score_mode == "fast": score = self.box_score_fast(pred, points.reshape(-1, 2)) else: score = self.box_score_slow(pred, contour) if self.box_thresh > score: continue box = self.unclip(points, self.unclip_ratio).reshape(-1, 1, 2) # noqa box, s_side = self.get_mini_boxes(box) if s_side < self.min_size + 2: continue box = np.array(box) box[:, 0] = np.clip( np.round(box[:, 0] / width * dest_width), 0, dest_width) box[:, 1] = np.clip( np.round(box[:, 1] / height * dest_height), 0, dest_height) boxes.append(box.astype("int32")) scores.append(score) return np.array(boxes, dtype="int32"), scores @staticmethod def unclip(box, unclip_ratio): poly = Polygon(box) distance = poly.area * unclip_ratio / poly.length offset = pyclipper.PyclipperOffset() offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) expanded = np.array(offset.Execute(distance)) return expanded @staticmethod def get_mini_boxes(contour): bounding_box = cv2.minAreaRect(contour) # noqa points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0]) # noqa if points[1][1] > points[0][1]: index_1 = 0 index_4 = 1 else: index_1 = 1 index_4 = 0 if points[3][1] > points[2][1]: index_2 = 2 index_3 = 3 else: index_2 = 3 index_3 = 2 box = [points[index_1], points[index_2], points[index_3], points[index_4]] return box, min(bounding_box[1]) @staticmethod def box_score_fast(bitmap, _box): """ box_score_fast: use bbox mean score as the mean score """ h, w = bitmap.shape[:2] box = _box.copy() x_min = np.clip(np.floor(box[:, 0].min()).astype("int32"), 0, w - 1) x_max = np.clip(np.ceil(box[:, 0].max()).astype("int32"), 0, w - 1) y_min = np.clip(np.floor(box[:, 1].min()).astype("int32"), 0, h - 1) y_max = np.clip(np.ceil(box[:, 1].max()).astype("int32"), 0, h - 1) mask = np.zeros((y_max - y_min + 1, x_max - x_min + 1), dtype=np.uint8) box[:, 0] = box[:, 0] - x_min box[:, 1] = box[:, 1] - y_min cv2.fillPoly(mask, box.reshape(1, -1, 2).astype("int32"), 1) # noqa return cv2.mean(bitmap[y_min:y_max + 1, x_min:x_max + 1], mask)[0] # noqa @staticmethod def box_score_slow(bitmap, contour): """ box_score_slow: use polyon mean score as the mean score """ h, w = bitmap.shape[:2] contour = contour.copy() contour = np.reshape(contour, (-1, 2)) x_min = np.clip(np.min(contour[:, 0]), 0, w - 1) x_max = np.clip(np.max(contour[:, 0]), 0, w - 1) y_min = np.clip(np.min(contour[:, 1]), 0, h - 1) y_max = np.clip(np.max(contour[:, 1]), 0, h - 1) mask = np.zeros((y_max - y_min + 1, x_max - x_min + 1), dtype=np.uint8) contour[:, 0] = contour[:, 0] - x_min contour[:, 1] = contour[:, 1] - y_min cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype("int32"), 1) # noqa return cv2.mean(bitmap[y_min:y_max + 1, x_min:x_max + 1], mask)[0] # noqa def __call__(self, outs_dict, shape_list): pred = outs_dict["maps"] if isinstance(pred, Tensor): pred = pred.numpy() pred = pred[:, 0, :, :] segmentation = pred > self.thresh boxes_batch = [] for batch_index in range(pred.shape[0]): src_h, src_w, ratio_h, ratio_w = shape_list[batch_index] if self.dilation_kernel is not None: mask = cv2.dilate(np.array(segmentation[batch_index]).astype(np.uint8), self.dilation_kernel) # noqa else: mask = segmentation[batch_index] if self.box_type == "poly": boxes, scores = self.polygons_from_bitmap(pred[batch_index], mask, src_w, src_h) elif self.box_type == "quad": boxes, scores = self.boxes_from_bitmap(pred[batch_index], mask, src_w, src_h) else: raise ValueError("box_type can only be one of ['quad', 'poly']") boxes_batch.append({"points": boxes}) return boxes_batch class ClsPostProcess: """ Convert between text-label and text-index """ def __init__(self, label_list=None): self.label_list = label_list def __call__(self, preds, label=None, *args, **kwargs): label_list = self.label_list if label_list is None: label_list = {idx: idx for idx in range(preds.shape[-1])} if isinstance(preds, Tensor): preds = preds.numpy() pred_ids = preds.argmax(axis=1) decode_out = [(label_list[idx], preds[i, idx]) for i, idx in enumerate(pred_ids)] if label is None: return decode_out label = [(label_list[idx], 1.0) for idx in label] return decode_out, label class __BaseRecDecoder: """ Convert between text-label and text-index """ def __init__(self, character_dict_path=None): self.beg_str = "sos" self.end_str = "eos" self.reverse = False self.character_str = [] if character_dict_path is None: self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz " dict_character = list(self.character_str) else: with open(character_dict_path, "rb") as fin: lines = fin.readlines() for line in lines: line = line.decode("utf-8").strip("\n").strip("\r\n") self.character_str.append(line) self.character_str.append(" ") dict_character = list(self.character_str) dict_character = self.add_special_char(dict_character) self.max_index = len(dict_character) - 1 self.dict = {} for i, char in enumerate(dict_character): self.dict[char] = i self.character = dict_character @staticmethod def pred_reverse(pred): pred_re = [] c_current = "" for c in pred: if not bool(re.search("[a-zA-Z0-9 :*./%+-]", c)): if c_current != "": pred_re.append(c_current) pred_re.append(c) c_current = "" else: c_current += c if c_current != "": pred_re.append(c_current) return "".join(pred_re[::-1]) def add_special_char(self, dict_character): return dict_character def decode(self, text_index, text_prob=None, is_remove_duplicate=False, use_space=False): """ convert text-index into text-label. """ result_list = [] ignored_tokens = self.get_ignored_tokens() batch_size = len(text_index) for batch_idx in range(batch_size): selection = np.ones(len(text_index[batch_idx]), dtype=bool) if is_remove_duplicate: selection[1:] = text_index[batch_idx][1:] != text_index[batch_idx][:-1] for ignored_token in ignored_tokens: selection &= text_index[batch_idx] != ignored_token char_list = [] for index in text_index[batch_idx][selection]: if index == self.max_index and not use_space: continue char_list.append(self.character[index]) if text_prob is not None: conf_list = text_prob[batch_idx][selection] else: conf_list = [1] * len(selection) if len(conf_list) == 0: conf_list = [0] text = "".join(char_list) result_list.append((text, np.mean(conf_list).tolist())) return result_list @staticmethod def get_ignored_tokens(): return [0] # for ctc blank class CTCLabelDecode(__BaseRecDecoder): """ Convert between text-label and text-index """ def __init__(self, character_dict_path=None): super(CTCLabelDecode, self).__init__(character_dict_path) def __call__(self, preds, use_space=False, *args, **kwargs): if isinstance(preds, tuple) or isinstance(preds, list): preds = preds[-1] if isinstance(preds, Tensor): preds = preds.numpy() preds_idx = preds.argmax(axis=2) preds_prob = preds.max(axis=2) return self.decode(preds_idx, preds_prob, is_remove_duplicate=True, use_space=use_space) def add_special_char(self, dict_character): dict_character = ["blank"] + dict_character return dict_character