123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160 |
- from .operator import * # noqa
- from copy import deepcopy
- from os import path, popen
- from platform import system
- from utils.logger import Logger
- from paddle import inference, fluid
- __all__ = ["create_operators", "build_post_process", "create_predictor", "transform"]
- def create_operators(op_param_list):
- ops = []
- for operator in op_param_list:
- op_name = list(operator)[0]
- param = {} if operator[op_name] is None else operator[op_name]
- op = eval(op_name)(**param)
- ops.append(op)
- return ops
- def transform(data, ops=None):
- if ops is None:
- ops = []
- for op in ops:
- data = op(data)
- if data is None:
- return None
- return data
- def build_post_process(config):
- config = deepcopy(config)
- module_name = config.pop("name")
- return eval(module_name)(**config)
- def __get_gpu_id():
- if system() == "Windows":
- return 0
- if not fluid.core.is_compiled_with_rocm():
- cmd = "env | grep CUDA_VISIBLE_DEVICES"
- else:
- cmd = "env | grep HIP_VISIBLE_DEVICES"
- env_cuda = popen(cmd).readlines()
- if len(env_cuda) == 0:
- return 0
- else:
- gpu_id = env_cuda[0].strip().split("=")[1]
- return int(gpu_id[0])
- def __get_output_tensors(args, mode, predictor):
- output_names = predictor.get_output_names()
- output_tensors = []
- if mode == "rec" and args.rec_algorithm in ["CRNN", "SVTR_LCNet"]:
- output_name = "softmax_0.tmp_0"
- if output_name in output_names:
- return [predictor.get_output_handle(output_name)]
- else:
- for output_name in output_names:
- output_tensor = predictor.get_output_handle(output_name)
- output_tensors.append(output_tensor)
- else:
- for output_name in output_names:
- output_tensor = predictor.get_output_handle(output_name)
- output_tensors.append(output_tensor)
- return output_tensors
- def create_predictor(args, mode):
- if mode == "det":
- model_dir = args.det_model_dir
- elif mode == "cls":
- model_dir = args.cls_model_dir
- else: # rec
- model_dir = args.rec_model_dir
- if model_dir is None:
- Logger.error("no model_dir defined in args")
- exit(0)
- file_names, model_path, param_path = ["model", "inference"], None, None
- for file_name in file_names:
- model_file_path = path.join(model_dir, f"{file_name}.pdmodel")
- params_file_path = path.join(model_dir, f"{file_name}.pdiparams")
- if path.exists(model_file_path) and path.exists(params_file_path):
- model_path, param_path = model_file_path, params_file_path
- break
- if model_path is None:
- raise ValueError(f"not find model.pdmodel or inference.pdmodel in {model_dir}")
- if param_path is None:
- raise ValueError(f"not find model.pdiparams or inference.pdiparams in {model_dir}")
- config = inference.Config(model_path, param_path)
- precision = inference.PrecisionType.Float32
- if hasattr(args, "precision"):
- if args.precision == "fp16" and args.use_tensorrt:
- precision = inference.PrecisionType.Half
- elif args.precision == "int8":
- precision = inference.PrecisionType.Int8
- else:
- precision = inference.PrecisionType.Float32
- if args.use_gpu:
- gpu_id = __get_gpu_id()
- if gpu_id is None:
- Logger.warning(
- "GPU is not found in current device by nvidia-smi. "
- "Please check your device or ignore it if run on jetson."
- )
- config.enable_use_gpu(args.gpu_mem, 0)
- if args.use_tensorrt:
- config.enable_tensorrt_engine(
- workspace_size=1 << 30,
- precision_mode=precision,
- max_batch_size=args.max_batch_size,
- min_subgraph_size=args.min_subgraph_size, # skip the minmum trt subgraph
- use_calib_mode=False
- )
- # collect shape
- trt_shape_f = path.join(model_dir, f"{mode}_trt_dynamic_shape.txt")
- if not path.exists(trt_shape_f):
- config.collect_shape_range_info(trt_shape_f)
- Logger.warning(f"collect dynamic shape info into : {trt_shape_f}")
- try:
- config.enable_tuned_tensorrt_dynamic_shape(trt_shape_f, True)
- except Exception as E:
- Logger.error(E)
- Logger.error("Please keep your paddlepaddle-gpu >= 2.3.0!")
- else:
- config.disable_gpu()
- if args.enable_mkldnn:
- config.set_mkldnn_cache_capacity(10)
- config.enable_mkldnn()
- if hasattr(args, "cpu_threads"):
- config.set_cpu_math_library_num_threads(args.cpu_threads)
- else:
- config.set_cpu_math_library_num_threads(10)
- config.enable_memory_optim()
- config.disable_glog_info()
- config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
- config.delete_pass("matmul_transpose_reshape_fuse_pass")
- config.switch_use_feed_fetch_ops(False)
- config.switch_ir_optim(True)
- predictor = inference.create_predictor(config)
- input_names = predictor.get_input_names()
- input_tensor = None
- for name in input_names:
- input_tensor = predictor.get_input_handle(name)
- output_tensors = __get_output_tensors(args, mode, predictor)
- return predictor, input_tensor, output_tensors
|