centerpoint.h 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. #include "argsParser.h"
  2. #include "buffers.h"
  3. #include "common.h"
  4. #include "logger.h"
  5. #include "parserOnnxConfig.h"
  6. #include "NvInfer.h"
  7. #include <cuda_runtime_api.h>
  8. #include <cstdlib>
  9. #include <fstream>
  10. #include <iostream>
  11. #include <sstream>
  12. #include <string>
  13. #include <sys/time.h>
  14. #include <chrono>
  15. #include "preprocess.h"
  16. #include "postprocess.h"
  17. #include "scatter_cuda.h"
  18. // below head files are defined in TensorRt/samples/common
  19. #include "EntropyCalibrator.h"
  20. #include "BatchStream.h"
  21. #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
  22. struct Params{
  23. std::string pfeOnnxFilePath = "";
  24. std::string rpnOnnxFilePath = "";
  25. std::string pfeSerializedEnginePath = "";
  26. std::string rpnSerializedEnginePath = "";
  27. // Input Output Names
  28. std::vector<std::string> pfeInputTensorNames;
  29. std::vector<std::string> rpnInputTensorNames;
  30. std::vector<std::string> pfeOutputTensorNames;
  31. std::map<std::string, std::vector<std::string>> rpnOutputTensorNames;
  32. // Input Output Paths
  33. std::string savePath ;
  34. std::vector<std::string> filePaths;
  35. // Attrs
  36. int dlaCore = -1;
  37. bool fp16 = false;
  38. bool int8 = false;
  39. bool load_engine = false;
  40. int batch_size = 1;
  41. };
  42. class CenterPoint
  43. {
  44. template <typename T>
  45. using SampleUniquePtr = std::unique_ptr<T, samplesCommon::InferDeleter>;
  46. public:
  47. CenterPoint(const Params params)
  48. : mParams(params)
  49. ,BATCH_SIZE_(params.batch_size)
  50. , mEngine(nullptr)
  51. ,mEngineRPN(nullptr)
  52. {
  53. //const int NUM_THREADS, const int MAX_NUM_PILLARS, const int GRID_X_SIZE, const int GRID_Y_SIZE):
  54. scatter_cuda_ptr_.reset(new ScatterCuda(PFE_OUTPUT_DIM, PFE_OUTPUT_DIM, BEV_W, BEV_H ));
  55. // mallocate a global memory for pointer
  56. GPU_CHECK(cudaMalloc((void**)&dev_points_, MAX_POINTS * POINT_DIM * sizeof(float)));
  57. GPU_CHECK(cudaMemset(dev_points_,0, MAX_POINTS * POINT_DIM * sizeof(float)));
  58. GPU_CHECK(cudaMalloc((void**)&dev_indices_,MAX_PILLARS * sizeof(int)));
  59. GPU_CHECK(cudaMemset(dev_indices_,0,MAX_PILLARS * sizeof(int)));
  60. /**
  61. * @brief : Create and Init Variables for PreProcess
  62. *
  63. */
  64. GPU_CHECK(cudaMalloc((void**)& p_bev_idx_, MAX_POINTS * sizeof(int)));
  65. GPU_CHECK(cudaMalloc((void**)& p_point_num_assigned_, MAX_POINTS * sizeof(int)));
  66. GPU_CHECK(cudaMalloc((void**)& p_mask_, MAX_POINTS * sizeof(bool)));
  67. GPU_CHECK(cudaMalloc((void**)& bev_voxel_idx_, BEV_H * BEV_W * sizeof(int)));
  68. GPU_CHECK(cudaMemset(p_bev_idx_, 0, MAX_POINTS * sizeof(int)));
  69. GPU_CHECK(cudaMemset(p_point_num_assigned_, 0, MAX_POINTS * sizeof(int)));
  70. GPU_CHECK(cudaMemset(p_mask_, 0, MAX_POINTS * sizeof(bool)));
  71. GPU_CHECK(cudaMemset(bev_voxel_idx_, 0, BEV_H * BEV_W * sizeof(int)));
  72. GPU_CHECK(cudaMalloc((void**)&v_point_sum_, MAX_PILLARS * 3 *sizeof(float)));
  73. GPU_CHECK(cudaMalloc((void**)&v_range_, MAX_PILLARS * sizeof(int)));
  74. GPU_CHECK(cudaMalloc((void**)&v_point_num_, MAX_PILLARS * sizeof(int)));
  75. GPU_CHECK(cudaMemset(v_range_,0, MAX_PILLARS * sizeof(int)));
  76. GPU_CHECK(cudaMemset(v_point_sum_, 0, MAX_PILLARS * 3 * sizeof(float)));
  77. /**
  78. * @brief : Create and Init Variables for PostProcess
  79. *
  80. */
  81. GPU_CHECK(cudaMalloc((void**)&dev_score_idx_, OUTPUT_W * OUTPUT_H * sizeof(int)));
  82. GPU_CHECK(cudaMemset(dev_score_idx_, -1 , OUTPUT_W * OUTPUT_H * sizeof(int)));
  83. GPU_CHECK(cudaMallocHost((void**)& mask_cpu, INPUT_NMS_MAX_SIZE * DIVUP (INPUT_NMS_MAX_SIZE ,THREADS_PER_BLOCK_NMS) * sizeof(unsigned long long)));
  84. GPU_CHECK(cudaMemset(mask_cpu, 0 , INPUT_NMS_MAX_SIZE * DIVUP (INPUT_NMS_MAX_SIZE ,THREADS_PER_BLOCK_NMS) * sizeof(unsigned long long)));
  85. GPU_CHECK(cudaMallocHost((void**)& remv_cpu, THREADS_PER_BLOCK_NMS * sizeof(unsigned long long)));
  86. GPU_CHECK(cudaMemset(remv_cpu, 0 , THREADS_PER_BLOCK_NMS * sizeof(unsigned long long)));
  87. GPU_CHECK(cudaMallocHost((void**)&host_score_idx_, OUTPUT_W * OUTPUT_H * sizeof(int)));
  88. GPU_CHECK(cudaMemset(host_score_idx_, -1, OUTPUT_W * OUTPUT_H * sizeof(int)));
  89. GPU_CHECK(cudaMallocHost((void**)&host_keep_data_, INPUT_NMS_MAX_SIZE * sizeof(long)));
  90. GPU_CHECK(cudaMemset(host_keep_data_, -1, INPUT_NMS_MAX_SIZE * sizeof(long)));
  91. GPU_CHECK(cudaMallocHost((void**)&host_boxes_, OUTPUT_NMS_MAX_SIZE * 9 * sizeof(float)));
  92. GPU_CHECK(cudaMemset(host_boxes_, 0 , OUTPUT_NMS_MAX_SIZE * 9 * sizeof(float)));
  93. GPU_CHECK(cudaMallocHost((void**)&host_label_, OUTPUT_NMS_MAX_SIZE * sizeof(int)));
  94. GPU_CHECK(cudaMemset(host_label_, -1, OUTPUT_NMS_MAX_SIZE * sizeof(int)));
  95. }
  96. ~CenterPoint() {
  97. // Free host pointers
  98. // Free global pointers
  99. std::cout << "Free Variables . \n";
  100. GPU_CHECK(cudaFree(dev_indices_));
  101. GPU_CHECK(cudaFree(dev_points_));
  102. GPU_CHECK(cudaFree(dev_score_idx_));
  103. GPU_CHECK(cudaFree( p_bev_idx_));
  104. GPU_CHECK(cudaFree( p_point_num_assigned_));
  105. GPU_CHECK(cudaFree( p_mask_));
  106. GPU_CHECK(cudaFree( bev_voxel_idx_)); // H * W
  107. GPU_CHECK(cudaFree( v_point_sum_));
  108. GPU_CHECK(cudaFree( v_range_));
  109. GPU_CHECK(cudaFree( v_point_num_));
  110. GPU_CHECK(cudaFreeHost(host_keep_data_));
  111. GPU_CHECK(cudaFreeHost(host_boxes_));
  112. GPU_CHECK(cudaFreeHost(host_label_));
  113. GPU_CHECK(cudaFreeHost(host_score_idx_));
  114. GPU_CHECK(cudaFreeHost(remv_cpu));
  115. GPU_CHECK(cudaFreeHost(mask_cpu));
  116. // // Free engine
  117. // std::cout << "Free PFE Engine .\n";
  118. // mEngine->destroy();
  119. // std::cout << "Free RPN Engine .\n";
  120. // mEngineRPN->destroy();
  121. }
  122. std::shared_ptr<nvinfer1::ICudaEngine> build( std::string onnxFilePath,std::string saveEnginePath);
  123. std::shared_ptr<nvinfer1::ICudaEngine> buildFromSerializedEngine(std::string serializedEngineFile);
  124. bool infer(float* lidarpoints,int pointsnum,std::vector<Box>& predResult);
  125. bool engineInitlization();
  126. bool testinfer();
  127. private:
  128. // device pointers
  129. float* dev_scattered_feature_;
  130. float* dev_points_ ;
  131. int* dev_indices_;
  132. int* dev_score_idx_;
  133. long* dev_keep_data_;
  134. SampleUniquePtr<ScatterCuda> scatter_cuda_ptr_;
  135. // device pointers for preprocess
  136. int* p_bev_idx_;
  137. int* p_point_num_assigned_;
  138. bool* p_mask_;
  139. int* bev_voxel_idx_; // H * W
  140. float* v_point_sum_;
  141. int* v_range_;
  142. int* v_point_num_;
  143. // host variables for post process
  144. long* host_keep_data_;
  145. float* host_boxes_;
  146. int* host_label_;
  147. int* host_score_idx_;
  148. unsigned long long* mask_cpu;
  149. unsigned long long* remv_cpu;
  150. Params mParams;
  151. int BATCH_SIZE_ = 1;
  152. nvinfer1::Dims mInputDims; //!< The dimensions of the input to the network.
  153. nvinfer1::Dims mOutputDims; //!< The dimensions of the output to the network.
  154. int mNumber{0}; //!< The number to classify
  155. std::shared_ptr<nvinfer1::ICudaEngine> mEngine; //!< The TensorRT engine used to run the network
  156. std::shared_ptr<nvinfer1::ICudaEngine> mEngineRPN;
  157. samplesCommon::BufferManager * mbuffers;
  158. samplesCommon::BufferManager * mbuffersRPN;
  159. SampleUniquePtr<nvinfer1::IExecutionContext> mContext;
  160. SampleUniquePtr<nvinfer1::IExecutionContext> mContextRPN;
  161. //!
  162. //! \brief Parses an ONNX model for MNIST and creates a TensorRT network
  163. //!
  164. bool constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder,
  165. SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
  166. SampleUniquePtr<nvonnxparser::IParser>& parser,
  167. std::string onnxFilePath);
  168. //!
  169. //! \brief Reads the input and stores the result in a managed buffer
  170. //!
  171. bool processInput(void*& points, std::string& pointFilePath, int& pointNum);
  172. //!
  173. //! \brief Classifies digits and verify result
  174. //!
  175. void saveOutput(std::vector<Box>& predResult, std::string& inputFileName, std::string savePath);
  176. };