123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373 |
- #ifndef __PLUGIN_LAYER_H__
- #define __PLUGIN_LAYER_H__
- #include <memory>
- #include <cassert>
- #include <iostream>
- #include <cudnn.h>
- #include <cstring>
- #include <cuda_runtime.h>
- #include <cublas_v2.h>
- #include "NvCaffeParser.h"
- #include "NvInfer.h"
- #include "NvInferPlugin.h"
- #include "NvUtils.h"
- //#include "fp16.h"
- #define CHECK(status) \
- { \
- if (status != 0) \
- { \
- std::cout << "Cuda failure: " << cudaGetErrorString(status) \
- << " at line " << __LINE__ \
- << std::endl; \
- abort(); \
- } \
- }
- using namespace nvinfer1;
- using namespace nvcaffeparser1;
- using namespace plugin;
- static const int TIMING_ITERATIONS = 1000;
- enum FunctionType
- {
- SELECT=0,
- SUMMARY
- };
- void cudaSoftmax(int n, int channels, float* x, float*y);
- //void cudaSoftmax(int n, int channels, __half* x, __half* y);
- class bboxProfile {
- public:
- bboxProfile(float4& p, int idx): pos(p), bboxNum(idx) {}
- float4 pos;
- int bboxNum = -1;
- int labelID = -1;
- };
- class tagProfile
- {
- public:
- tagProfile(int b, int l): bboxID(b), label(l) {}
- int bboxID;
- int label;
- };
- //SSD Reshape layer : shape{0,-1,21}
- template<int OutC>
- // @TODO: I think the OutC is the Out Channels and it is equal to 21.
- class Reshape : public IPlugin
- {
- public:
- Reshape()
- {
- }
- Reshape(const void* buffer, size_t size)
- {
- assert(size == sizeof(mCopySize));
- mCopySize = *reinterpret_cast<const size_t*>(buffer);
- }
- int getNbOutputs() const override
- {
- return 1;
- }
- Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override
- {
- assert(nbInputDims == 1);
- assert(index == 0);
- assert(inputs[index].nbDims == 3);
- assert((inputs[0].d[0])*(inputs[0].d[1]) % OutC == 0);
- // @TODO: Understood this.
- return DimsCHW( inputs[0].d[0] * inputs[0].d[1] / OutC, OutC, inputs[0].d[2]);
- }
- int initialize() override { return 0; }
- void terminate() override {}
- size_t getWorkspaceSize(int) const override
- {
- // @TODO: 1 is the batch size.
- return mCopySize*1;
- }
- // currently it is not possible for a plugin to execute "in place". Therefore we memcpy the data from the input to the output buffer
- int enqueue(int batchSize, const void*const *inputs, void** outputs, void* workspace, cudaStream_t stream) override
- {
- if(mDataType == DataType::kFLOAT){ // FP32
- CHECK(cudaMemcpyAsync(outputs[0], inputs[0] , mCopySize * batchSize, cudaMemcpyDeviceToDevice, stream));
- }
- else{ //FP16
- CHECK(cudaMemcpyAsync(
- reinterpret_cast<__half*>(outputs[0]),
- reinterpret_cast<const __half*>(inputs[0]), mCopySize * batchSize,
- cudaMemcpyDeviceToDevice, stream));
- }
- //CHECK(cudaMemcpyAsync(outputs[0], inputs[0] , mCopySize * batchSize, cudaMemcpyDeviceToDevice, stream));
- return 0;
- }
- size_t getSerializationSize() override
- {
- return sizeof(mCopySize);
- }
- void serialize(void* buffer) override
- {
- *reinterpret_cast<size_t*>(buffer) = mCopySize;
- }
- void configure(const Dims*inputs, int nbInputs, const Dims* outputs, int nbOutputs, int) override
- {
- mCopySize = inputs[0].d[0] * inputs[0].d[1] * inputs[0].d[2] * sizeof(float);
- }
- protected:
- size_t mCopySize;
- DataType mDataType{DataType::kFLOAT};
- };
- //Softmax layer.TensorRT softmax only support cross channel
- class SoftmaxPlugin : public IPlugin
- {
- //You need to implement it when softmax parameter axis is 2.
- public:
- int initialize() override { return 0; }
- inline void terminate() override {}
- SoftmaxPlugin(){}
- SoftmaxPlugin( const void* buffer, size_t size)
- {
- assert(size == sizeof(mCopySize));
- mCopySize = *reinterpret_cast<const size_t*>(buffer);
- }
- inline int getNbOutputs() const override
- {
- //@TODO: As the number of outputs are only 1, because there is only layer in top.
- return 1;
- }
- Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override
- {
- assert(nbInputDims == 1);
- assert(index == 0);
- assert(inputs[index].nbDims == 3);
- // assert((inputs[0].d[0])*(inputs[0].d[1]) % OutC == 0);
- // @TODO: Understood this.
- return DimsCHW( inputs[0].d[0] , inputs[0].d[1] , inputs[0].d[2] );
- }
- size_t getWorkspaceSize(int) const override
- {
- // @TODO: 1 is the batch size.
- return mCopySize*1;
- }
- int enqueue(int batchSize, const void*const *inputs, void** outputs, void* workspace, cudaStream_t stream) override
- {
- //std::cout<<"flatten enqueue:"<<batchSize<<";"<< mCopySize<<std::endl;
- // CHECK(cudaMemcpyAsync(outputs[0],inputs[0],batchSize*mCopySize*sizeof(float),cudaMemcpyDeviceToDevice,stream));
- //@Seojin add fp16 inference code
- //if(mDataType == DataType::kFLOAT){ //FP32
- cudaSoftmax( 8732*11, 11, (float *) *inputs, static_cast<float *>(*outputs));
- return 0;
- }
- size_t getSerializationSize() override
- {
- return sizeof(mCopySize);
- }
- void serialize(void* buffer) override
- {
- *reinterpret_cast<size_t*>(buffer) = mCopySize;
- }
- void configure(const Dims*inputs, int nbInputs, const Dims* outputs, int nbOutputs, int) override
- {
- mCopySize = inputs[0].d[0] * inputs[0].d[1] * inputs[0].d[2] * sizeof(float);
- }
- protected:
- size_t mCopySize;
- DataType mDataType{DataType::kFLOAT};
- };
- //SSD Flatten layer
- class FlattenLayer : public IPlugin
- {
- public:
- FlattenLayer(){}
- FlattenLayer(const void* buffer, size_t size)
- {
- assert(size == 3 * sizeof(int));
- const int* d = reinterpret_cast<const int*>(buffer);
- _size = d[0] * d[1] * d[2];
- dimBottom = DimsCHW{d[0], d[1], d[2]};
- }
- inline int getNbOutputs() const override { return 1; };
- Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override
- {
- assert(1 == nbInputDims);
- assert(0 == index);
- assert(3 == inputs[index].nbDims);
- _size = inputs[0].d[0] * inputs[0].d[1] * inputs[0].d[2];
- return DimsCHW(_size, 1, 1);
- }
- int initialize() override
- {
- return 0;
- }
- inline void terminate() override {}
- inline size_t getWorkspaceSize(int) const override { return 0; }
- int enqueue(int batchSize, const void*const *inputs, void** outputs, void*, cudaStream_t stream) override
- {
- //std::cout<<"flatten enqueue:"<<batchSize<<";"<<_size<<std::endl;
- if(mDataType == DataType::kFLOAT){ //FP32
- CHECK(cudaMemcpyAsync(outputs[0],inputs[0],batchSize*_size*sizeof(float),cudaMemcpyDeviceToDevice,stream));
- }
- else{ //FP16
- CHECK(cudaMemcpyAsync(
- reinterpret_cast<__half*>(outputs[0]),
- reinterpret_cast<const __half*>(inputs[0]),
- batchSize*_size*sizeof(__half),
- cudaMemcpyDeviceToDevice,stream));
- }
- //CHECK(cudaMemcpyAsync(outputs[0],inputs[0],batchSize*_size*sizeof(float),cudaMemcpyDeviceToDevice,stream));
- return 0;
- }
- size_t getSerializationSize() override
- {
- return 3 * sizeof(int);
- }
- void serialize(void* buffer) override
- {
- int* d = reinterpret_cast<int*>(buffer);
- d[0] = dimBottom.c(); d[1] = dimBottom.h(); d[2] = dimBottom.w();
- }
- void configure(const Dims*inputs, int nbInputs, const Dims* outputs, int nbOutputs, int) override
- {
- dimBottom = DimsCHW(inputs[0].d[0], inputs[0].d[1], inputs[0].d[2]);
- }
- protected:
- DataType mDataType{DataType::kFLOAT};
- DimsCHW dimBottom;
- int _size;
- };
- class PluginFactory : public nvinfer1::IPluginFactory, public nvcaffeparser1::IPluginFactory
- {
- public:
- virtual nvinfer1::IPlugin* createPlugin(const char* layerName, const nvinfer1::Weights* weights, int nbWeights) override;
- IPlugin* createPlugin(const char* layerName, const void* serialData, size_t serialLength) override;
- void(*nvPluginDeleter)(INvPlugin*) { [](INvPlugin* ptr) {ptr->destroy(); } };
- bool isPlugin(const char* name) override;
- void destroyPlugin();
-
- //pelee
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm1_mbox_loc_perm_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm1_mbox_conf_perm_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm2_mbox_loc_perm_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm2_mbox_conf_perm_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm3_mbox_loc_perm_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm3_mbox_conf_perm_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm4_mbox_loc_perm_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm4_mbox_conf_perm_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm5_mbox_loc_perm_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm5_mbox_conf_perm_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm6_mbox_loc_perm_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm6_mbox_conf_perm_layer{ nullptr, nvPluginDeleter };
- //pelee
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm1_mbox_priorbox_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm2_mbox_priorbox_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm3_mbox_priorbox_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm4_mbox_priorbox_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm5_mbox_priorbox_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mExt_pm6_mbox_priorbox_layer{ nullptr, nvPluginDeleter };
- //detection output layer
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mDetection_out{ nullptr, nvPluginDeleter };
- //pelee
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStem_concat_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage1_1_concat_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage1_2_concat_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage1_3_concat_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage2_1_concat_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage2_2_concat_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage2_3_concat_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage2_4_concat_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage3_1_concat_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage3_2_concat_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage3_3_concat_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage3_4_concat_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage3_5_concat_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage3_6_concat_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage3_7_concat_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage3_8_concat_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage4_1_concat_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage4_2_concat_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage4_3_concat_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage4_4_concat_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage4_5_concat_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mStage4_6_concat_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mBox_loc_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mBox_conf_layer{ nullptr, nvPluginDeleter };
- std::unique_ptr<INvPlugin, decltype(nvPluginDeleter)> mBox_priorbox_layer{ nullptr, nvPluginDeleter };
-
- //reshape layer
- std::unique_ptr<Reshape<11>> mMbox_conf_reshape{ nullptr };
- //flatten layers
- //pelee
- std::unique_ptr<FlattenLayer> mExt_pm1_mbox_loc_flat_layer{ nullptr };
- std::unique_ptr<FlattenLayer> mExt_pm1_mbox_conf_flat_layer{ nullptr };
- std::unique_ptr<FlattenLayer> mExt_pm2_mbox_loc_flat_layer{ nullptr };
- std::unique_ptr<FlattenLayer> mExt_pm2_mbox_conf_flat_layer{ nullptr };
- std::unique_ptr<FlattenLayer> mExt_pm3_mbox_loc_flat_layer{ nullptr };
- std::unique_ptr<FlattenLayer> mExt_pm3_mbox_conf_flat_layer{ nullptr };
- std::unique_ptr<FlattenLayer> mExt_pm4_mbox_loc_flat_layer{ nullptr };
- std::unique_ptr<FlattenLayer> mExt_pm4_mbox_conf_flat_layer{ nullptr };
- std::unique_ptr<FlattenLayer> mExt_pm5_mbox_loc_flat_layer{ nullptr };
- std::unique_ptr<FlattenLayer> mExt_pm5_mbox_conf_flat_layer{ nullptr };
- std::unique_ptr<FlattenLayer> mExt_pm6_mbox_loc_flat_layer{ nullptr };
- std::unique_ptr<FlattenLayer> mExt_pm6_mbox_conf_flat_layer{ nullptr };
- std::unique_ptr<FlattenLayer> mBox_conf_flat_layer{ nullptr };
- //softmax layer
- std::unique_ptr<SoftmaxPlugin> mPluginSoftmax{ nullptr };
- std::unique_ptr<FlattenLayer> mMbox_conf_flat_layer{ nullptr };
- };
- #endif
|