from .operator import * # noqa from copy import deepcopy from os import path, popen from platform import system from utils.logger import Logger from paddle import inference, fluid __all__ = ["create_operators", "build_post_process", "create_predictor", "transform"] def create_operators(op_param_list): ops = [] for operator in op_param_list: op_name = list(operator)[0] param = {} if operator[op_name] is None else operator[op_name] op = eval(op_name)(**param) ops.append(op) return ops def transform(data, ops=None): if ops is None: ops = [] for op in ops: data = op(data) if data is None: return None return data def build_post_process(config): config = deepcopy(config) module_name = config.pop("name") return eval(module_name)(**config) def __get_gpu_id(): if system() == "Windows": return 0 if not fluid.core.is_compiled_with_rocm(): cmd = "env | grep CUDA_VISIBLE_DEVICES" else: cmd = "env | grep HIP_VISIBLE_DEVICES" env_cuda = popen(cmd).readlines() if len(env_cuda) == 0: return 0 else: gpu_id = env_cuda[0].strip().split("=")[1] return int(gpu_id[0]) def __get_output_tensors(args, mode, predictor): output_names = predictor.get_output_names() output_tensors = [] if mode == "rec" and args.rec_algorithm in ["CRNN", "SVTR_LCNet"]: output_name = "softmax_0.tmp_0" if output_name in output_names: return [predictor.get_output_handle(output_name)] else: for output_name in output_names: output_tensor = predictor.get_output_handle(output_name) output_tensors.append(output_tensor) else: for output_name in output_names: output_tensor = predictor.get_output_handle(output_name) output_tensors.append(output_tensor) return output_tensors def create_predictor(args, mode): if mode == "det": model_dir = args.det_model_dir elif mode == "cls": model_dir = args.cls_model_dir else: # rec model_dir = args.rec_model_dir if model_dir is None: Logger.error("no model_dir defined in args") exit(0) file_names, model_path, param_path = ["model", "inference"], None, None for file_name in file_names: model_file_path = path.join(model_dir, f"{file_name}.pdmodel") params_file_path = path.join(model_dir, f"{file_name}.pdiparams") if path.exists(model_file_path) and path.exists(params_file_path): model_path, param_path = model_file_path, params_file_path break if model_path is None: raise ValueError(f"not find model.pdmodel or inference.pdmodel in {model_dir}") if param_path is None: raise ValueError(f"not find model.pdiparams or inference.pdiparams in {model_dir}") config = inference.Config(model_path, param_path) precision = inference.PrecisionType.Float32 if hasattr(args, "precision"): if args.precision == "fp16" and args.use_tensorrt: precision = inference.PrecisionType.Half elif args.precision == "int8": precision = inference.PrecisionType.Int8 else: precision = inference.PrecisionType.Float32 if args.use_gpu: gpu_id = __get_gpu_id() if gpu_id is None: Logger.warning( "GPU is not found in current device by nvidia-smi. " "Please check your device or ignore it if run on jetson." ) config.enable_use_gpu(args.gpu_mem, 0) if args.use_tensorrt: config.enable_tensorrt_engine( workspace_size=1 << 30, precision_mode=precision, max_batch_size=args.max_batch_size, min_subgraph_size=args.min_subgraph_size, # skip the minmum trt subgraph use_calib_mode=False ) # collect shape trt_shape_f = path.join(model_dir, f"{mode}_trt_dynamic_shape.txt") if not path.exists(trt_shape_f): config.collect_shape_range_info(trt_shape_f) Logger.warning(f"collect dynamic shape info into : {trt_shape_f}") try: config.enable_tuned_tensorrt_dynamic_shape(trt_shape_f, True) except Exception as E: Logger.error(E) Logger.error("Please keep your paddlepaddle-gpu >= 2.3.0!") else: config.disable_gpu() if args.enable_mkldnn: config.set_mkldnn_cache_capacity(10) config.enable_mkldnn() if hasattr(args, "cpu_threads"): config.set_cpu_math_library_num_threads(args.cpu_threads) else: config.set_cpu_math_library_num_threads(10) config.enable_memory_optim() config.disable_glog_info() config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") config.delete_pass("matmul_transpose_reshape_fuse_pass") config.switch_use_feed_fetch_ops(False) config.switch_ir_optim(True) predictor = inference.create_predictor(config) input_names = predictor.get_input_names() input_tensor = None for name in input_names: input_tensor = predictor.get_input_handle(name) output_tensors = __get_output_tensors(args, mode, predictor) return predictor, input_tensor, output_tensors