pluginImplement.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373
  1. #ifndef __PLUGIN_LAYER_H__
  2. #define __PLUGIN_LAYER_H__
  3. #include <memory>
  4. #include <cassert>
  5. #include <iostream>
  6. #include <cudnn.h>
  7. #include <cstring>
  8. #include <cuda_runtime.h>
  9. #include <cublas_v2.h>
  10. #include "NvCaffeParser.h"
  11. #include "NvInfer.h"
  12. #include "NvInferPlugin.h"
  13. #include "NvUtils.h"
  14. //#include "fp16.h"
  15. #define CHECK(status) \
  16. { \
  17. if (status != 0) \
  18. { \
  19. std::cout << "Cuda failure: " << cudaGetErrorString(status) \
  20. << " at line " << __LINE__ \
  21. << std::endl; \
  22. abort(); \
  23. } \
  24. }
  25. using namespace nvinfer1;
  26. using namespace nvcaffeparser1;
  27. using namespace plugin;
  28. static const int TIMING_ITERATIONS = 1000;
  29. enum FunctionType
  30. {
  31. SELECT=0,
  32. SUMMARY
  33. };
  34. void cudaSoftmax(int n, int channels, float* x, float*y);
  35. //void cudaSoftmax(int n, int channels, __half* x, __half* y);
  36. class bboxProfile {
  37. public:
  38. bboxProfile(float4& p, int idx): pos(p), bboxNum(idx) {}
  39. float4 pos;
  40. int bboxNum = -1;
  41. int labelID = -1;
  42. };
  43. class tagProfile
  44. {
  45. public:
  46. tagProfile(int b, int l): bboxID(b), label(l) {}
  47. int bboxID;
  48. int label;
  49. };
  50. //SSD Reshape layer : shape{0,-1,21}
  51. template<int OutC>
  52. // @TODO: I think the OutC is the Out Channels and it is equal to 21.
  53. class Reshape : public IPlugin
  54. {
  55. public:
  56. Reshape()
  57. {
  58. }
  59. Reshape(const void* buffer, size_t size)
  60. {
  61. assert(size == sizeof(mCopySize));
  62. mCopySize = *reinterpret_cast<const size_t*>(buffer);
  63. }
  64. int getNbOutputs() const override
  65. {
  66. return 1;
  67. }
  68. Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override
  69. {
  70. assert(nbInputDims == 1);
  71. assert(index == 0);
  72. assert(inputs[index].nbDims == 3);
  73. assert((inputs[0].d[0])*(inputs[0].d[1]) % OutC == 0);
  74. // @TODO: Understood this.
  75. return DimsCHW( inputs[0].d[0] * inputs[0].d[1] / OutC, OutC, inputs[0].d[2]);
  76. }
  77. int initialize() override { return 0; }
  78. void terminate() override {}
  79. size_t getWorkspaceSize(int) const override
  80. {
  81. // @TODO: 1 is the batch size.
  82. return mCopySize*1;
  83. }
  84. // currently it is not possible for a plugin to execute "in place". Therefore we memcpy the data from the input to the output buffer
  85. int enqueue(int batchSize, const void*const *inputs, void** outputs, void* workspace, cudaStream_t stream) override
  86. {
  87. if(mDataType == DataType::kFLOAT){ // FP32
  88. CHECK(cudaMemcpyAsync(outputs[0], inputs[0] , mCopySize * batchSize, cudaMemcpyDeviceToDevice, stream));
  89. }
  90. else{ //FP16
  91. CHECK(cudaMemcpyAsync(
  92. reinterpret_cast<__half*>(outputs[0]),
  93. reinterpret_cast<const __half*>(inputs[0]), mCopySize * batchSize,
  94. cudaMemcpyDeviceToDevice, stream));
  95. }
  96. //CHECK(cudaMemcpyAsync(outputs[0], inputs[0] , mCopySize * batchSize, cudaMemcpyDeviceToDevice, stream));
  97. return 0;
  98. }
  99. size_t getSerializationSize() override
  100. {
  101. return sizeof(mCopySize);
  102. }
  103. void serialize(void* buffer) override
  104. {
  105. *reinterpret_cast<size_t*>(buffer) = mCopySize;
  106. }
  107. void configure(const Dims*inputs, int nbInputs, const Dims* outputs, int nbOutputs, int) override
  108. {
  109. mCopySize = inputs[0].d[0] * inputs[0].d[1] * inputs[0].d[2] * sizeof(float);
  110. }
  111. protected:
  112. size_t mCopySize;
  113. DataType mDataType{DataType::kFLOAT};
  114. };
  115. //Softmax layer.TensorRT softmax only support cross channel
  116. class SoftmaxPlugin : public IPlugin
  117. {
  118. //You need to implement it when softmax parameter axis is 2.
  119. public:
  120. int initialize() override { return 0; }
  121. inline void terminate() override {}
  122. SoftmaxPlugin(){}
  123. SoftmaxPlugin( const void* buffer, size_t size)
  124. {
  125. assert(size == sizeof(mCopySize));
  126. mCopySize = *reinterpret_cast<const size_t*>(buffer);
  127. }
  128. inline int getNbOutputs() const override
  129. {
  130. //@TODO: As the number of outputs are only 1, because there is only layer in top.
  131. return 1;
  132. }
  133. Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override
  134. {
  135. assert(nbInputDims == 1);
  136. assert(index == 0);
  137. assert(inputs[index].nbDims == 3);
  138. // assert((inputs[0].d[0])*(inputs[0].d[1]) % OutC == 0);
  139. // @TODO: Understood this.
  140. return DimsCHW( inputs[0].d[0] , inputs[0].d[1] , inputs[0].d[2] );
  141. }
  142. size_t getWorkspaceSize(int) const override
  143. {
  144. // @TODO: 1 is the batch size.
  145. return mCopySize*1;
  146. }
  147. int enqueue(int batchSize, const void*const *inputs, void** outputs, void* workspace, cudaStream_t stream) override
  148. {
  149. //std::cout<<"flatten enqueue:"<<batchSize<<";"<< mCopySize<<std::endl;
  150. // CHECK(cudaMemcpyAsync(outputs[0],inputs[0],batchSize*mCopySize*sizeof(float),cudaMemcpyDeviceToDevice,stream));
  151. //@Seojin add fp16 inference code
  152. //if(mDataType == DataType::kFLOAT){ //FP32
  153. cudaSoftmax( 8732*11, 11, (float *) *inputs, static_cast<float *>(*outputs));
  154. return 0;
  155. }
  156. size_t getSerializationSize() override
  157. {
  158. return sizeof(mCopySize);
  159. }
  160. void serialize(void* buffer) override
  161. {
  162. *reinterpret_cast<size_t*>(buffer) = mCopySize;
  163. }
  164. void configure(const Dims*inputs, int nbInputs, const Dims* outputs, int nbOutputs, int) override
  165. {
  166. mCopySize = inputs[0].d[0] * inputs[0].d[1] * inputs[0].d[2] * sizeof(float);
  167. }
  168. protected:
  169. size_t mCopySize;
  170. DataType mDataType{DataType::kFLOAT};
  171. };
  172. //SSD Flatten layer
  173. class FlattenLayer : public IPlugin
  174. {
  175. public:
  176. FlattenLayer(){}
  177. FlattenLayer(const void* buffer, size_t size)
  178. {
  179. assert(size == 3 * sizeof(int));
  180. const int* d = reinterpret_cast<const int*>(buffer);
  181. _size = d[0] * d[1] * d[2];
  182. dimBottom = DimsCHW{d[0], d[1], d[2]};
  183. }
  184. inline int getNbOutputs() const override { return 1; };
  185. Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override
  186. {
  187. assert(1 == nbInputDims);
  188. assert(0 == index);
  189. assert(3 == inputs[index].nbDims);
  190. _size = inputs[0].d[0] * inputs[0].d[1] * inputs[0].d[2];
  191. return DimsCHW(_size, 1, 1);
  192. }
  193. int initialize() override
  194. {
  195. return 0;
  196. }
  197. inline void terminate() override {}
  198. inline size_t getWorkspaceSize(int) const override { return 0; }
  199. int enqueue(int batchSize, const void*const *inputs, void** outputs, void*, cudaStream_t stream) override
  200. {
  201. //std::cout<<"flatten enqueue:"<<batchSize<<";"<<_size<<std::endl;
  202. if(mDataType == DataType::kFLOAT){ //FP32
  203. CHECK(cudaMemcpyAsync(outputs[0],inputs[0],batchSize*_size*sizeof(float),cudaMemcpyDeviceToDevice,stream));
  204. }
  205. else{ //FP16
  206. CHECK(cudaMemcpyAsync(
  207. reinterpret_cast<__half*>(outputs[0]),
  208. reinterpret_cast<const __half*>(inputs[0]),
  209. batchSize*_size*sizeof(__half),
  210. cudaMemcpyDeviceToDevice,stream));
  211. }
  212. //CHECK(cudaMemcpyAsync(outputs[0],inputs[0],batchSize*_size*sizeof(float),cudaMemcpyDeviceToDevice,stream));
  213. return 0;
  214. }
  215. size_t getSerializationSize() override
  216. {
  217. return 3 * sizeof(int);
  218. }
  219. void serialize(void* buffer) override
  220. {
  221. int* d = reinterpret_cast<int*>(buffer);
  222. d[0] = dimBottom.c(); d[1] = dimBottom.h(); d[2] = dimBottom.w();
  223. }
  224. void configure(const Dims*inputs, int nbInputs, const Dims* outputs, int nbOutputs, int) override
  225. {
  226. dimBottom = DimsCHW(inputs[0].d[0], inputs[0].d[1], inputs[0].d[2]);
  227. }
  228. protected:
  229. DataType mDataType{DataType::kFLOAT};
  230. DimsCHW dimBottom;
  231. int _size;
  232. };
  233. class PluginFactory : public nvinfer1::IPluginFactory, public nvcaffeparser1::IPluginFactory
  234. {
  235. public:
  236. virtual nvinfer1::IPlugin* createPlugin(const char* layerName, const nvinfer1::Weights* weights, int nbWeights) override;
  237. IPlugin* createPlugin(const char* layerName, const void* serialData, size_t serialLength) override;
  238. void(*nvPluginDeleter)(INvPlugin*) { [](INvPlugin* ptr) {ptr->destroy(); } };
  239. bool isPlugin(const char* name) override;
  240. void destroyPlugin();
  241. //pelee
  242. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm1_mbox_loc_perm_layer{ nullptr, nvPluginDeleter };
  243. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm1_mbox_conf_perm_layer{ nullptr, nvPluginDeleter };
  244. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm2_mbox_loc_perm_layer{ nullptr, nvPluginDeleter };
  245. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm2_mbox_conf_perm_layer{ nullptr, nvPluginDeleter };
  246. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm3_mbox_loc_perm_layer{ nullptr, nvPluginDeleter };
  247. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm3_mbox_conf_perm_layer{ nullptr, nvPluginDeleter };
  248. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm4_mbox_loc_perm_layer{ nullptr, nvPluginDeleter };
  249. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm4_mbox_conf_perm_layer{ nullptr, nvPluginDeleter };
  250. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm5_mbox_loc_perm_layer{ nullptr, nvPluginDeleter };
  251. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm5_mbox_conf_perm_layer{ nullptr, nvPluginDeleter };
  252. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm6_mbox_loc_perm_layer{ nullptr, nvPluginDeleter };
  253. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm6_mbox_conf_perm_layer{ nullptr, nvPluginDeleter };
  254. //pelee
  255. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm1_mbox_priorbox_layer{ nullptr, nvPluginDeleter };
  256. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm2_mbox_priorbox_layer{ nullptr, nvPluginDeleter };
  257. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm3_mbox_priorbox_layer{ nullptr, nvPluginDeleter };
  258. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm4_mbox_priorbox_layer{ nullptr, nvPluginDeleter };
  259. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm5_mbox_priorbox_layer{ nullptr, nvPluginDeleter };
  260. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm6_mbox_priorbox_layer{ nullptr, nvPluginDeleter };
  261. //detection output layer
  262. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mDetection_out{ nullptr, nvPluginDeleter };
  263. //pelee
  264. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStem_concat_layer{ nullptr, nvPluginDeleter };
  265. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage1_1_concat_layer{ nullptr, nvPluginDeleter };
  266. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage1_2_concat_layer{ nullptr, nvPluginDeleter };
  267. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage1_3_concat_layer{ nullptr, nvPluginDeleter };
  268. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage2_1_concat_layer{ nullptr, nvPluginDeleter };
  269. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage2_2_concat_layer{ nullptr, nvPluginDeleter };
  270. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage2_3_concat_layer{ nullptr, nvPluginDeleter };
  271. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage2_4_concat_layer{ nullptr, nvPluginDeleter };
  272. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage3_1_concat_layer{ nullptr, nvPluginDeleter };
  273. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage3_2_concat_layer{ nullptr, nvPluginDeleter };
  274. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage3_3_concat_layer{ nullptr, nvPluginDeleter };
  275. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage3_4_concat_layer{ nullptr, nvPluginDeleter };
  276. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage3_5_concat_layer{ nullptr, nvPluginDeleter };
  277. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage3_6_concat_layer{ nullptr, nvPluginDeleter };
  278. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage3_7_concat_layer{ nullptr, nvPluginDeleter };
  279. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage3_8_concat_layer{ nullptr, nvPluginDeleter };
  280. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage4_1_concat_layer{ nullptr, nvPluginDeleter };
  281. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage4_2_concat_layer{ nullptr, nvPluginDeleter };
  282. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage4_3_concat_layer{ nullptr, nvPluginDeleter };
  283. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage4_4_concat_layer{ nullptr, nvPluginDeleter };
  284. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage4_5_concat_layer{ nullptr, nvPluginDeleter };
  285. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage4_6_concat_layer{ nullptr, nvPluginDeleter };
  286. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mBox_loc_layer{ nullptr, nvPluginDeleter };
  287. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mBox_conf_layer{ nullptr, nvPluginDeleter };
  288. std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mBox_priorbox_layer{ nullptr, nvPluginDeleter };
  289. //reshape layer
  290. std::unique_ptr<Reshape<11>> mMbox_conf_reshape{ nullptr };
  291. //flatten layers
  292. //pelee
  293. std::unique_ptr<FlattenLayer> mExt_pm1_mbox_loc_flat_layer{ nullptr };
  294. std::unique_ptr<FlattenLayer> mExt_pm1_mbox_conf_flat_layer{ nullptr };
  295. std::unique_ptr<FlattenLayer> mExt_pm2_mbox_loc_flat_layer{ nullptr };
  296. std::unique_ptr<FlattenLayer> mExt_pm2_mbox_conf_flat_layer{ nullptr };
  297. std::unique_ptr<FlattenLayer> mExt_pm3_mbox_loc_flat_layer{ nullptr };
  298. std::unique_ptr<FlattenLayer> mExt_pm3_mbox_conf_flat_layer{ nullptr };
  299. std::unique_ptr<FlattenLayer> mExt_pm4_mbox_loc_flat_layer{ nullptr };
  300. std::unique_ptr<FlattenLayer> mExt_pm4_mbox_conf_flat_layer{ nullptr };
  301. std::unique_ptr<FlattenLayer> mExt_pm5_mbox_loc_flat_layer{ nullptr };
  302. std::unique_ptr<FlattenLayer> mExt_pm5_mbox_conf_flat_layer{ nullptr };
  303. std::unique_ptr<FlattenLayer> mExt_pm6_mbox_loc_flat_layer{ nullptr };
  304. std::unique_ptr<FlattenLayer> mExt_pm6_mbox_conf_flat_layer{ nullptr };
  305. std::unique_ptr<FlattenLayer> mBox_conf_flat_layer{ nullptr };
  306. //softmax layer
  307. std::unique_ptr<SoftmaxPlugin> mPluginSoftmax{ nullptr };
  308. std::unique_ptr<FlattenLayer> mMbox_conf_flat_layer{ nullptr };
  309. };
  310. #endif