ADPilot
/
modularization


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587
							/*
 * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef TRT_SAMPLE_UTILS_H
#define TRT_SAMPLE_UTILS_H

#include <fstream>
#include <iostream>
#include <memory>
#include <numeric>
#include <random>
#include <unordered_map>
#include <vector>

#include <cuda.h>
#if CUDA_VERSION < 10000
#include <half.h>
#else
#include <cuda_fp16.h>
#endif

#include "NvInfer.h"

#include "common.h"
#include "logger.h"
#include "sampleDevice.h"
#include "sampleOptions.h"

namespace sample
{

inline int dataTypeSize(nvinfer1::DataType dataType)
{
    switch (dataType)
    {
    case nvinfer1::DataType::kINT32:
    case nvinfer1::DataType::kFLOAT: return 4;
    case nvinfer1::DataType::kHALF: return 2;
    case nvinfer1::DataType::kBOOL:
    case nvinfer1::DataType::kINT8: return 1;
    }
    return 0;
}

template <typename T>
inline T roundUp(T m, T n)
{
    return ((m + n - 1) / n) * n;
}

inline int volume(const nvinfer1::Dims& d)
{
    return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies<int>());
}

//! comps is the number of components in a vector. Ignored if vecDim < 0.
inline int64_t volume(const nvinfer1::Dims& dims, const nvinfer1::Dims& strides, int vecDim, int comps, int batch)
{
    int maxNbElems = 1;
    for (int i = 0; i < dims.nbDims; ++i)
    {
        // Get effective length of axis.
        int d = dims.d[i];
        // Any dimension is 0, it is an empty tensor.
        if (d == 0)
        {
            return 0;
        }
        if (i == vecDim)
        {
            d = samplesCommon::divUp(d, comps);
        }
        maxNbElems = std::max(maxNbElems, d * strides.d[i]);
    }
    return static_cast<int64_t>(maxNbElems) * batch * (vecDim < 0 ? 1 : comps);
}

inline int64_t volume(nvinfer1::Dims dims, int vecDim, int comps, int batch)
{
    if (vecDim != -1)
    {
        dims.d[vecDim] = roundUp(dims.d[vecDim], comps);
    }
    return volume(dims) * std::max(batch, 1);
}

inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims)
{
    for (int i = 0; i < dims.nbDims; ++i)
    {
        os << (i ? "x" : "") << dims.d[i];
    }
    return os;
}
inline std::ostream& operator<<(std::ostream& os, const nvinfer1::WeightsRole role)
{
    switch (role)
    {
    case nvinfer1::WeightsRole::kKERNEL:
    {
        os << "Kernel";
        break;
    }
    case nvinfer1::WeightsRole::kBIAS:
    {
        os << "Bias";
        break;
    }
    case nvinfer1::WeightsRole::kSHIFT:
    {
        os << "Shift";
        break;
    }
    case nvinfer1::WeightsRole::kSCALE:
    {
        os << "Scale";
        break;
    }
    case nvinfer1::WeightsRole::kCONSTANT:
    {
        os << "Constant";
        break;
    }
    case nvinfer1::WeightsRole::kANY:
    {
        os << "Any";
        break;
    }
    }

    return os;
}

inline std::ostream& operator<<(std::ostream& os, const std::vector<int>& vec)
{
    for (int i = 0, e = static_cast<int>(vec.size()); i < e; ++i)
    {
        os << (i ? "x" : "") << vec[i];
    }
    return os;
}

inline nvinfer1::Dims toDims(const std::vector<int>& vec)
{
    int limit = static_cast<int>(nvinfer1::Dims::MAX_DIMS);
    if (static_cast<int>(vec.size()) > limit)
    {
        sample::gLogWarning << "Vector too long, only first 8 elements are used in dimension." << std::endl;
    }
    // Pick first nvinfer1::Dims::MAX_DIMS elements
    nvinfer1::Dims dims{std::min(static_cast<int>(vec.size()), limit), {}};
    std::copy_n(vec.begin(), dims.nbDims, std::begin(dims.d));
    return dims;
}

template <typename T>
inline void fillBuffer(void* buffer, int64_t volume, T min, T max)
{
    T* typedBuffer = static_cast<T*>(buffer);
    std::default_random_engine engine;
    if (std::is_integral<T>::value)
    {
        std::uniform_int_distribution<int> distribution(min, max);
        auto generator = [&engine, &distribution]() { return static_cast<T>(distribution(engine)); };
        std::generate(typedBuffer, typedBuffer + volume, generator);
    }
    else
    {
        std::uniform_real_distribution<float> distribution(min, max);
        auto generator = [&engine, &distribution]() { return static_cast<T>(distribution(engine)); };
        std::generate(typedBuffer, typedBuffer + volume, generator);
    }
}

// Specialization needed for custom type __half
template <typename H>
inline void fillBufferHalf(void* buffer, int64_t volume, H min, H max)
{
    H* typedBuffer = static_cast<H*>(buffer);
    std::default_random_engine engine;
    std::uniform_real_distribution<float> distribution(min, max);
    auto generator = [&engine, &distribution]() { return static_cast<H>(distribution(engine)); };
    std::generate(typedBuffer, typedBuffer + volume, generator);
}
template <>
#if CUDA_VERSION < 10000
inline void fillBuffer<half_float::half>(void* buffer, int64_t volume, half_float::half min, half_float::half max)
#else
inline void fillBuffer<__half>(void* buffer, int64_t volume, __half min, __half max)
#endif
{
    fillBufferHalf(buffer, volume, min, max);
}

template <typename T>
inline void dumpBuffer(const void* buffer, const std::string& separator, std::ostream& os, const Dims& dims,
    const Dims& strides, int32_t vectorDim, int32_t spv)
{
    const int64_t volume = std::accumulate(dims.d, dims.d + dims.nbDims, 1, std::multiplies<int64_t>());
    const T* typedBuffer = static_cast<const T*>(buffer);
    std::string sep;
    for (int64_t v = 0; v < volume; ++v)
    {
        int64_t curV = v;
        int32_t dataOffset = 0;
        for (int32_t dimIndex = dims.nbDims - 1; dimIndex >= 0; --dimIndex)
        {
            int32_t dimVal = curV % dims.d[dimIndex];
            if (dimIndex == vectorDim)
            {
                dataOffset += (dimVal / spv) * strides.d[dimIndex] * spv + dimVal % spv;
            }
            else
            {
                dataOffset += dimVal * strides.d[dimIndex] * (vectorDim == -1 ? 1 : spv);
            }
            curV /= dims.d[dimIndex];
            ASSERT(curV >= 0);
        }

        os << sep << typedBuffer[dataOffset];
        sep = separator;
    }
}

struct Binding
{
    bool isInput{false};
    MirroredBuffer buffer;
    int64_t volume{0};
    nvinfer1::DataType dataType{nvinfer1::DataType::kFLOAT};

    void fill(const std::string& fileName)
    {
        std::ifstream file(fileName, std::ios::in | std::ios::binary);
        if (file.is_open())
        {
            file.read(static_cast<char*>(buffer.getHostBuffer()), buffer.getSize());
            file.close();
        }
    }

    void fill()
    {
        switch (dataType)
        {
        case nvinfer1::DataType::kBOOL:
        {
            fillBuffer<bool>(buffer.getHostBuffer(), volume, 0, 1);
            break;
        }
        case nvinfer1::DataType::kINT32:
        {
            fillBuffer<int32_t>(buffer.getHostBuffer(), volume, -128, 127);
            break;
        }
        case nvinfer1::DataType::kINT8:
        {
            fillBuffer<int8_t>(buffer.getHostBuffer(), volume, -128, 127);
            break;
        }
        case nvinfer1::DataType::kFLOAT:
        {
            fillBuffer<float>(buffer.getHostBuffer(), volume, -1.0, 1.0);
            break;
        }
        case nvinfer1::DataType::kHALF:
        {
#if CUDA_VERSION < 10000
            fillBuffer<half_float::half>(buffer.getHostBuffer(), volume, static_cast<half_float::half>(-1.0),
                static_cast<half_float::half>(-1.0));
#else
            fillBuffer<__half>(buffer.getHostBuffer(), volume, -1.0, 1.0);
#endif
            break;
        }
        }
    }

    void dump(std::ostream& os, Dims dims, Dims strides, int32_t vectorDim, int32_t spv,
        const std::string separator = " ") const
    {
        switch (dataType)
        {
        case nvinfer1::DataType::kBOOL:
        {
            dumpBuffer<bool>(buffer.getHostBuffer(), separator, os, dims, strides, vectorDim, spv);
            break;
        }
        case nvinfer1::DataType::kINT32:
        {
            dumpBuffer<int32_t>(buffer.getHostBuffer(), separator, os, dims, strides, vectorDim, spv);
            break;
        }
        case nvinfer1::DataType::kINT8:
        {
            dumpBuffer<int8_t>(buffer.getHostBuffer(), separator, os, dims, strides, vectorDim, spv);
            break;
        }
        case nvinfer1::DataType::kFLOAT:
        {
            dumpBuffer<float>(buffer.getHostBuffer(), separator, os, dims, strides, vectorDim, spv);
            break;
        }
        case nvinfer1::DataType::kHALF:
        {
#if CUDA_VERSION < 10000
            dumpBuffer<half_float::half>(buffer.getHostBuffer(), separator, os, dims, strides, vectorDim, spv);
#else
            dumpBuffer<__half>(buffer.getHostBuffer(), separator, os, dims, strides, vectorDim, spv);
#endif
            break;
        }
        }
    }
};

class Bindings
{
public:
    void addBinding(int b, const std::string& name, bool isInput, int64_t volume, nvinfer1::DataType dataType,
        const std::string& fileName = "")
    {
        while (mBindings.size() <= static_cast<size_t>(b))
        {
            mBindings.emplace_back();
            mDevicePointers.emplace_back();
        }
        mNames[name] = b;
        mBindings[b].isInput = isInput;
        // Some memory allocators return nullptr when allocating zero bytes, but TensorRT requires a non-null ptr
        // even for empty tensors, so allocate a dummy byte.
        if (volume == 0)
        {
            mBindings[b].buffer.allocate(1);
        }
        else
        {
            mBindings[b].buffer.allocate(static_cast<size_t>(volume) * static_cast<size_t>(dataTypeSize(dataType)));
        }
        mBindings[b].volume = volume;
        mBindings[b].dataType = dataType;
        mDevicePointers[b] = mBindings[b].buffer.getDeviceBuffer();
        if (isInput)
        {
            if (fileName.empty())
            {
                fill(b);
            }
            else
            {
                fill(b, fileName);
            }
        }
    }

    void** getDeviceBuffers()
    {
        return mDevicePointers.data();
    }

    void transferInputToDevice(TrtCudaStream& stream)
    {
        for (auto& b : mNames)
        {
            if (mBindings[b.second].isInput)
            {
                mBindings[b.second].buffer.hostToDevice(stream);
            }
        }
    }

    void transferOutputToHost(TrtCudaStream& stream)
    {
        for (auto& b : mNames)
        {
            if (!mBindings[b.second].isInput)
            {
                mBindings[b.second].buffer.deviceToHost(stream);
            }
        }
    }

    void fill(int binding, const std::string& fileName)
    {
        mBindings[binding].fill(fileName);
    }

    void fill(int binding)
    {
        mBindings[binding].fill();
    }

    void dumpBindingDimensions(int binding, const nvinfer1::IExecutionContext& context, std::ostream& os) const
    {
        const auto dims = context.getBindingDimensions(binding);
        // Do not add a newline terminator, because the caller may be outputting a JSON string.
        os << dims;
    }

    void dumpBindingValues(const nvinfer1::IExecutionContext& context, int binding, std::ostream& os,
        const std::string& separator = " ", int32_t batch = 1) const
    {
        Dims dims = context.getBindingDimensions(binding);
        Dims strides = context.getStrides(binding);
        int32_t vectorDim = context.getEngine().getBindingVectorizedDim(binding);
        const int32_t spv = context.getEngine().getBindingComponentsPerElement(binding);

        if (context.getEngine().hasImplicitBatchDimension())
        {
            auto insertN = [](Dims& d, int32_t bs) {
                const int32_t nbDims = d.nbDims;
                ASSERT(nbDims < Dims::MAX_DIMS);
                std::copy_backward(&d.d[0], &d.d[nbDims], &d.d[nbDims + 1]);
                d.d[0] = bs;
                d.nbDims = nbDims + 1;
            };
            int32_t batchStride = 0;
            for (int32_t i = 0; i < strides.nbDims; ++i)
            {
                if (strides.d[i] * dims.d[i] > batchStride)
                {
                    batchStride = strides.d[i] * dims.d[i];
                }
            }
            insertN(dims, batch);
            insertN(strides, batchStride);
            vectorDim = (vectorDim == -1) ? -1 : vectorDim + 1;
        }

        mBindings[binding].dump(os, dims, strides, vectorDim, spv, separator);
    }

    void dumpInputs(const nvinfer1::IExecutionContext& context, std::ostream& os) const
    {
        auto isInput = [](const Binding& b) { return b.isInput; };
        dumpBindings(context, isInput, os);
    }

    void dumpOutputs(const nvinfer1::IExecutionContext& context, std::ostream& os) const
    {
        auto isOutput = [](const Binding& b) { return !b.isInput; };
        dumpBindings(context, isOutput, os);
    }

    void dumpBindings(const nvinfer1::IExecutionContext& context, std::ostream& os) const
    {
        auto all = [](const Binding& b) { return true; };
        dumpBindings(context, all, os);
    }

    void dumpBindings(
        const nvinfer1::IExecutionContext& context, bool (*predicate)(const Binding& b), std::ostream& os) const
    {
        for (const auto& n : mNames)
        {
            const auto binding = n.second;
            if (predicate(mBindings[binding]))
            {
                os << n.first << ": (";
                dumpBindingDimensions(binding, context, os);
                os << ")" << std::endl;

                dumpBindingValues(context, binding, os);
                os << std::endl;
            }
        }
    }

    std::unordered_map<std::string, int> getInputBindings() const
    {
        auto isInput = [](const Binding& b) { return b.isInput; };
        return getBindings(isInput);
    }

    std::unordered_map<std::string, int> getOutputBindings() const
    {
        auto isOutput = [](const Binding& b) { return !b.isInput; };
        return getBindings(isOutput);
    }

    std::unordered_map<std::string, int> getBindings() const
    {
        auto all = [](const Binding& b) { return true; };
        return getBindings(all);
    }

    std::unordered_map<std::string, int> getBindings(bool (*predicate)(const Binding& b)) const
    {
        std::unordered_map<std::string, int> bindings;
        for (const auto& n : mNames)
        {
            const auto binding = n.second;
            if (predicate(mBindings[binding]))
            {
                bindings.insert(n);
            }
        }
        return bindings;
    }

private:
    std::unordered_map<std::string, int> mNames;
    std::vector<Binding> mBindings;
    std::vector<void*> mDevicePointers;
};

template <typename T>
struct TrtDestroyer
{
    void operator()(T* t)
    {
        t->destroy();
    }
};

template <typename T>
using TrtUniquePtr = std::unique_ptr<T, TrtDestroyer<T>>;

inline bool broadcastIOFormats(const std::vector<IOFormat>& formats, size_t nbBindings, bool isInput = true)
{
    bool broadcast = formats.size() == 1;
    bool validFormatsCount = broadcast || (formats.size() == nbBindings);
    if (!formats.empty() && !validFormatsCount)
    {
        if (isInput)
        {
            throw std::invalid_argument(
                "The number of inputIOFormats must match network's inputs or be one for broadcasting.");
        }
        else
        {
            throw std::invalid_argument(
                "The number of outputIOFormats must match network's outputs or be one for broadcasting.");
        }
    }
    return broadcast;
}

inline std::vector<char> loadTimingCacheFile(const std::string inFileName)
{
    std::ifstream iFile(inFileName, std::ios::in | std::ios::binary);
    if (!iFile)
    {
        sample::gLogWarning << "Could not read timing cache from: " << inFileName
                            << ". A new timing cache will be generated and written." << std::endl;
        return std::vector<char>();
    }
    iFile.seekg(0, std::ifstream::end);
    size_t fsize = iFile.tellg();
    iFile.seekg(0, std::ifstream::beg);
    std::vector<char> content(fsize);
    iFile.read(content.data(), fsize);
    iFile.close();
    sample::gLogInfo << "Loaded " << fsize << " bytes of timing cache from " << inFileName << std::endl;
    return content;
}

inline void saveTimingCacheFile(const std::string outFileName, const IHostMemory* blob)
{
    std::ofstream oFile(outFileName, std::ios::out | std::ios::binary);
    if (!oFile)
    {
        sample::gLogWarning << "Could not write timing cache to: " << outFileName << std::endl;
        return;
    }
    oFile.write((char*) blob->data(), blob->size());
    oFile.close();
    sample::gLogInfo << "Saved " << blob->size() << " bytes of timing cache to " << outFileName << std::endl;
}

} // namespace sample

#endif // TRT_SAMPLE_UTILS_H