utils.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. from .operator import * # noqa
  2. from copy import deepcopy
  3. from os import path, popen
  4. from platform import system
  5. from paddle import inference, fluid
  6. __all__ = ["create_operators", "build_post_process", "create_predictor", "transform"]
  7. def create_operators(op_param_list):
  8. ops = []
  9. for operator in op_param_list:
  10. op_name = list(operator)[0]
  11. param = {} if operator[op_name] is None else operator[op_name]
  12. op = eval(op_name)(**param)
  13. ops.append(op)
  14. return ops
  15. def transform(data, ops=None):
  16. if ops is None:
  17. ops = []
  18. for op in ops:
  19. data = op(data)
  20. if data is None:
  21. return None
  22. return data
  23. def build_post_process(config):
  24. config = deepcopy(config)
  25. module_name = config.pop("name")
  26. return eval(module_name)(**config)
  27. def __get_gpu_id():
  28. if system() == "Windows":
  29. return 0
  30. if not fluid.core.is_compiled_with_rocm():
  31. cmd = "env | grep CUDA_VISIBLE_DEVICES"
  32. else:
  33. cmd = "env | grep HIP_VISIBLE_DEVICES"
  34. env_cuda = popen(cmd).readlines()
  35. if len(env_cuda) == 0:
  36. return 0
  37. else:
  38. gpu_id = env_cuda[0].strip().split("=")[1]
  39. return int(gpu_id[0])
  40. def __get_output_tensors(args, mode, predictor):
  41. output_names = predictor.get_output_names()
  42. output_tensors = []
  43. if mode == "rec" and args.rec_algorithm in ["CRNN", "SVTR_LCNet"]:
  44. output_name = "softmax_0.tmp_0"
  45. if output_name in output_names:
  46. return [predictor.get_output_handle(output_name)]
  47. else:
  48. for output_name in output_names:
  49. output_tensor = predictor.get_output_handle(output_name)
  50. output_tensors.append(output_tensor)
  51. else:
  52. for output_name in output_names:
  53. output_tensor = predictor.get_output_handle(output_name)
  54. output_tensors.append(output_tensor)
  55. return output_tensors
  56. def create_predictor(args, mode):
  57. if mode == "det":
  58. model_dir = args.det_model_dir
  59. elif mode == "cls":
  60. model_dir = args.cls_model_dir
  61. else: # rec
  62. model_dir = args.rec_model_dir
  63. if model_dir is None:
  64. print("no model_dir defined in args")
  65. exit(0)
  66. file_names, model_path, param_path = ["model", "inference"], None, None
  67. for file_name in file_names:
  68. model_file_path = path.join(model_dir, f"{file_name}.pdmodel")
  69. params_file_path = path.join(model_dir, f"{file_name}.pdiparams")
  70. if path.exists(model_file_path) and path.exists(params_file_path):
  71. model_path, param_path = model_file_path, params_file_path
  72. break
  73. if model_path is None:
  74. raise ValueError(f"not find model.pdmodel or inference.pdmodel in {model_dir}")
  75. if param_path is None:
  76. raise ValueError(f"not find model.pdiparams or inference.pdiparams in {model_dir}")
  77. config = inference.Config(model_path, param_path)
  78. precision = inference.PrecisionType.Float32
  79. if hasattr(args, "precision"):
  80. if args.precision == "fp16" and args.use_tensorrt:
  81. precision = inference.PrecisionType.Half
  82. elif args.precision == "int8":
  83. precision = inference.PrecisionType.Int8
  84. else:
  85. precision = inference.PrecisionType.Float32
  86. if args.use_gpu:
  87. gpu_id = __get_gpu_id()
  88. if gpu_id is None:
  89. print(
  90. "WARING:",
  91. "GPU is not found in current device by nvidia-smi.",
  92. "Please check your device or ignore it if run on jetson."
  93. )
  94. config.enable_use_gpu(args.gpu_mem, 0)
  95. if args.use_tensorrt:
  96. config.enable_tensorrt_engine(
  97. workspace_size=1 << 30,
  98. precision_mode=precision,
  99. max_batch_size=args.max_batch_size,
  100. min_subgraph_size=args.min_subgraph_size, # skip the minmum trt subgraph
  101. use_calib_mode=False
  102. )
  103. # collect shape
  104. trt_shape_f = path.join(model_dir, f"{mode}_trt_dynamic_shape.txt")
  105. if not path.exists(trt_shape_f):
  106. config.collect_shape_range_info(trt_shape_f)
  107. print(f"collect dynamic shape info into : {trt_shape_f}")
  108. try:
  109. config.enable_tuned_tensorrt_dynamic_shape(trt_shape_f, True)
  110. except Exception as E:
  111. print(E)
  112. print("Please keep your paddlepaddle-gpu >= 2.3.0!")
  113. else:
  114. config.disable_gpu()
  115. if args.enable_mkldnn:
  116. config.set_mkldnn_cache_capacity(10)
  117. config.enable_mkldnn()
  118. if hasattr(args, "cpu_threads"):
  119. config.set_cpu_math_library_num_threads(args.cpu_threads)
  120. else:
  121. config.set_cpu_math_library_num_threads(10)
  122. config.enable_memory_optim()
  123. config.disable_glog_info()
  124. config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
  125. config.delete_pass("matmul_transpose_reshape_fuse_pass")
  126. config.switch_use_feed_fetch_ops(False)
  127. config.switch_ir_optim(True)
  128. predictor = inference.create_predictor(config)
  129. input_names = predictor.get_input_names()
  130. input_tensor = None
  131. for name in input_names:
  132. input_tensor = predictor.get_input_handle(name)
  133. output_tensors = __get_output_tensors(args, mode, predictor)
  134. return predictor, input_tensor, output_tensors