utils.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. from .operator import * # noqa
  2. from copy import deepcopy
  3. from os import path, popen
  4. from platform import system
  5. from utils.logger import Logger
  6. from paddle import inference, fluid
  7. __all__ = ["create_operators", "build_post_process", "create_predictor", "transform"]
  8. def create_operators(op_param_list):
  9. ops = []
  10. for operator in op_param_list:
  11. op_name = list(operator)[0]
  12. param = {} if operator[op_name] is None else operator[op_name]
  13. op = eval(op_name)(**param)
  14. ops.append(op)
  15. return ops
  16. def transform(data, ops=None):
  17. if ops is None:
  18. ops = []
  19. for op in ops:
  20. data = op(data)
  21. if data is None:
  22. return None
  23. return data
  24. def build_post_process(config):
  25. config = deepcopy(config)
  26. module_name = config.pop("name")
  27. return eval(module_name)(**config)
  28. def __get_gpu_id():
  29. if system() == "Windows":
  30. return 0
  31. if not fluid.core.is_compiled_with_rocm():
  32. cmd = "env | grep CUDA_VISIBLE_DEVICES"
  33. else:
  34. cmd = "env | grep HIP_VISIBLE_DEVICES"
  35. env_cuda = popen(cmd).readlines()
  36. if len(env_cuda) == 0:
  37. return 0
  38. else:
  39. gpu_id = env_cuda[0].strip().split("=")[1]
  40. return int(gpu_id[0])
  41. def __get_output_tensors(args, mode, predictor):
  42. output_names = predictor.get_output_names()
  43. output_tensors = []
  44. if mode == "rec" and args.rec_algorithm in ["CRNN", "SVTR_LCNet"]:
  45. output_name = "softmax_0.tmp_0"
  46. if output_name in output_names:
  47. return [predictor.get_output_handle(output_name)]
  48. else:
  49. for output_name in output_names:
  50. output_tensor = predictor.get_output_handle(output_name)
  51. output_tensors.append(output_tensor)
  52. else:
  53. for output_name in output_names:
  54. output_tensor = predictor.get_output_handle(output_name)
  55. output_tensors.append(output_tensor)
  56. return output_tensors
  57. def create_predictor(args, mode):
  58. if mode == "det":
  59. model_dir = args.det_model_dir
  60. elif mode == "cls":
  61. model_dir = args.cls_model_dir
  62. else: # rec
  63. model_dir = args.rec_model_dir
  64. if model_dir is None:
  65. Logger.error("no model_dir defined in args")
  66. exit(0)
  67. file_names, model_path, param_path = ["model", "inference"], None, None
  68. for file_name in file_names:
  69. model_file_path = path.join(model_dir, f"{file_name}.pdmodel")
  70. params_file_path = path.join(model_dir, f"{file_name}.pdiparams")
  71. if path.exists(model_file_path) and path.exists(params_file_path):
  72. model_path, param_path = model_file_path, params_file_path
  73. break
  74. if model_path is None:
  75. raise ValueError(f"not find model.pdmodel or inference.pdmodel in {model_dir}")
  76. if param_path is None:
  77. raise ValueError(f"not find model.pdiparams or inference.pdiparams in {model_dir}")
  78. config = inference.Config(model_path, param_path)
  79. precision = inference.PrecisionType.Float32
  80. if hasattr(args, "precision"):
  81. if args.precision == "fp16" and args.use_tensorrt:
  82. precision = inference.PrecisionType.Half
  83. elif args.precision == "int8":
  84. precision = inference.PrecisionType.Int8
  85. else:
  86. precision = inference.PrecisionType.Float32
  87. if args.use_gpu:
  88. gpu_id = __get_gpu_id()
  89. if gpu_id is None:
  90. Logger.warning(
  91. "GPU is not found in current device by nvidia-smi. "
  92. "Please check your device or ignore it if run on jetson."
  93. )
  94. config.enable_use_gpu(args.gpu_mem, 0)
  95. if args.use_tensorrt:
  96. config.enable_tensorrt_engine(
  97. workspace_size=1 << 30,
  98. precision_mode=precision,
  99. max_batch_size=args.max_batch_size,
  100. min_subgraph_size=args.min_subgraph_size, # skip the minmum trt subgraph
  101. use_calib_mode=False
  102. )
  103. # collect shape
  104. trt_shape_f = path.join(model_dir, f"{mode}_trt_dynamic_shape.txt")
  105. if not path.exists(trt_shape_f):
  106. config.collect_shape_range_info(trt_shape_f)
  107. Logger.warning(f"collect dynamic shape info into : {trt_shape_f}")
  108. try:
  109. config.enable_tuned_tensorrt_dynamic_shape(trt_shape_f, True)
  110. except Exception as E:
  111. Logger.error(E)
  112. Logger.error("Please keep your paddlepaddle-gpu >= 2.3.0!")
  113. else:
  114. config.disable_gpu()
  115. if args.enable_mkldnn:
  116. config.set_mkldnn_cache_capacity(10)
  117. config.enable_mkldnn()
  118. if hasattr(args, "cpu_threads"):
  119. config.set_cpu_math_library_num_threads(args.cpu_threads)
  120. else:
  121. config.set_cpu_math_library_num_threads(10)
  122. config.enable_memory_optim()
  123. config.disable_glog_info()
  124. config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
  125. config.delete_pass("matmul_transpose_reshape_fuse_pass")
  126. config.switch_use_feed_fetch_ops(False)
  127. config.switch_ir_optim(True)
  128. predictor = inference.create_predictor(config)
  129. input_names = predictor.get_input_names()
  130. input_tensor = None
  131. for name in input_names:
  132. input_tensor = predictor.get_input_handle(name)
  133. output_tensors = __get_output_tensors(args, mode, predictor)
  134. return predictor, input_tensor, output_tensors