sampleOptions.cpp 66 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549
  1. /*
  2. * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <algorithm>
  17. #include <cctype>
  18. #include <cstring>
  19. #include <functional>
  20. #include <iostream>
  21. #include <stdexcept>
  22. #include <string>
  23. #include <vector>
  24. #include "NvInfer.h"
  25. #include "sampleOptions.h"
  26. #include "sampleUtils.h"
  27. namespace sample
  28. {
  29. namespace
  30. {
  31. std::vector<std::string> splitToStringVec(const std::string& option, char separator)
  32. {
  33. std::vector<std::string> options;
  34. for (size_t start = 0; start < option.length();)
  35. {
  36. size_t separatorIndex = option.find(separator, start);
  37. if (separatorIndex == std::string::npos)
  38. {
  39. separatorIndex = option.length();
  40. }
  41. options.emplace_back(option.substr(start, separatorIndex - start));
  42. start = separatorIndex + 1;
  43. }
  44. return options;
  45. }
  46. template <typename T>
  47. T stringToValue(const std::string& option)
  48. {
  49. return T{option};
  50. }
  51. template <>
  52. int stringToValue<int>(const std::string& option)
  53. {
  54. return std::stoi(option);
  55. }
  56. template <>
  57. float stringToValue<float>(const std::string& option)
  58. {
  59. return std::stof(option);
  60. }
  61. template <>
  62. bool stringToValue<bool>(const std::string& option)
  63. {
  64. return true;
  65. }
  66. template <>
  67. std::vector<int> stringToValue<std::vector<int>>(const std::string& option)
  68. {
  69. std::vector<int> shape;
  70. std::vector<std::string> dimsStrings = splitToStringVec(option, 'x');
  71. for (const auto& d : dimsStrings)
  72. {
  73. shape.push_back(stringToValue<int>(d));
  74. }
  75. return shape;
  76. }
  77. template <>
  78. nvinfer1::DataType stringToValue<nvinfer1::DataType>(const std::string& option)
  79. {
  80. const std::unordered_map<std::string, nvinfer1::DataType> strToDT{{"fp32", nvinfer1::DataType::kFLOAT},
  81. {"fp16", nvinfer1::DataType::kHALF}, {"int8", nvinfer1::DataType::kINT8},
  82. {"int32", nvinfer1::DataType::kINT32}};
  83. const auto& dt = strToDT.find(option);
  84. if (dt == strToDT.end())
  85. {
  86. throw std::invalid_argument("Invalid DataType " + option);
  87. }
  88. return dt->second;
  89. }
  90. template <>
  91. nvinfer1::TensorFormats stringToValue<nvinfer1::TensorFormats>(const std::string& option)
  92. {
  93. std::vector<std::string> optionStrings = splitToStringVec(option, '+');
  94. const std::unordered_map<std::string, nvinfer1::TensorFormat> strToFmt{{"chw", nvinfer1::TensorFormat::kLINEAR},
  95. {"chw2", nvinfer1::TensorFormat::kCHW2}, {"chw4", nvinfer1::TensorFormat::kCHW4},
  96. {"hwc8", nvinfer1::TensorFormat::kHWC8}, {"chw16", nvinfer1::TensorFormat::kCHW16},
  97. {"chw32", nvinfer1::TensorFormat::kCHW32}, {"dhwc8", nvinfer1::TensorFormat::kDHWC8},
  98. {"hwc", nvinfer1::TensorFormat::kHWC}, {"dla_linear", nvinfer1::TensorFormat::kDLA_LINEAR},
  99. {"dla_hwc4", nvinfer1::TensorFormat::kDLA_HWC4}};
  100. nvinfer1::TensorFormats formats{};
  101. for (auto f : optionStrings)
  102. {
  103. const auto& tf = strToFmt.find(f);
  104. if (tf == strToFmt.end())
  105. {
  106. throw std::invalid_argument(std::string("Invalid TensorFormat ") + f);
  107. }
  108. formats |= 1U << int(tf->second);
  109. }
  110. return formats;
  111. }
  112. template <>
  113. IOFormat stringToValue<IOFormat>(const std::string& option)
  114. {
  115. IOFormat ioFormat{};
  116. const size_t colon = option.find(':');
  117. if (colon == std::string::npos)
  118. {
  119. throw std::invalid_argument(std::string("Invalid IOFormat ") + option);
  120. }
  121. ioFormat.first = stringToValue<nvinfer1::DataType>(option.substr(0, colon));
  122. ioFormat.second = stringToValue<nvinfer1::TensorFormats>(option.substr(colon + 1));
  123. return ioFormat;
  124. }
  125. template <typename T>
  126. std::pair<std::string, T> splitNameAndValue(const std::string& s)
  127. {
  128. std::string tensorName;
  129. std::string valueString;
  130. // Split on the last :
  131. std::vector<std::string> nameRange{splitToStringVec(s, ':')};
  132. // Everything before the last : is the name
  133. tensorName = nameRange[0];
  134. for (size_t i = 1; i < nameRange.size() - 1; i++)
  135. {
  136. tensorName += ":" + nameRange[i];
  137. }
  138. // Value is the string element after the last :
  139. valueString = nameRange[nameRange.size() - 1];
  140. return std::pair<std::string, T>(tensorName, stringToValue<T>(valueString));
  141. }
  142. template <typename T>
  143. void splitInsertKeyValue(const std::vector<std::string>& kvList, T& map)
  144. {
  145. for (const auto& kv : kvList)
  146. {
  147. map.insert(splitNameAndValue<typename T::mapped_type>(kv));
  148. }
  149. }
  150. const char* boolToEnabled(bool enable)
  151. {
  152. return enable ? "Enabled" : "Disabled";
  153. }
  154. //! Check if input option exists in input arguments.
  155. //! If it does: return its value, erase the argument and return true.
  156. //! If it does not: return false.
  157. template <typename T>
  158. bool getAndDelOption(Arguments& arguments, const std::string& option, T& value)
  159. {
  160. const auto match = arguments.find(option);
  161. if (match != arguments.end())
  162. {
  163. value = stringToValue<T>(match->second);
  164. arguments.erase(match);
  165. return true;
  166. }
  167. return false;
  168. }
  169. //! Check if input option exists in input arguments.
  170. //! If it does: return false in value, erase the argument and return true.
  171. //! If it does not: return false.
  172. bool getAndDelNegOption(Arguments& arguments, const std::string& option, bool& value)
  173. {
  174. bool dummy;
  175. if (getAndDelOption(arguments, option, dummy))
  176. {
  177. value = false;
  178. return true;
  179. }
  180. return false;
  181. }
  182. //! Check if input option exists in input arguments.
  183. //! If it does: add all the matched arg values to values vector, erase the argument and return true.
  184. //! If it does not: return false.
  185. template <typename T>
  186. bool getAndDelRepeatedOption(Arguments& arguments, const std::string& option, std::vector<T>& values)
  187. {
  188. const auto match = arguments.equal_range(option);
  189. if (match.first == match.second)
  190. {
  191. return false;
  192. }
  193. auto addToValues = [&values](Arguments::value_type& argValue) {values.emplace_back(stringToValue<T>(argValue.second));};
  194. std::for_each(match.first, match.second, addToValues);
  195. arguments.erase(match.first, match.second);
  196. return true;
  197. }
  198. void insertShapesBuild(std::unordered_map<std::string, ShapeRange>& shapes, nvinfer1::OptProfileSelector selector, const std::string& name, const std::vector<int>& dims)
  199. {
  200. shapes[name][static_cast<size_t>(selector)] = dims;
  201. }
  202. void insertShapesInference(std::unordered_map<std::string, std::vector<int>>& shapes, const std::string& name, const std::vector<int>& dims)
  203. {
  204. shapes[name] = dims;
  205. }
  206. std::string removeSingleQuotationMarks(std::string& str)
  207. {
  208. std::vector<std::string> strList{splitToStringVec(str, '\'')};
  209. // Remove all the escaped single quotation marks
  210. std::string retVal = "";
  211. // Do not really care about unterminated sequences
  212. for (size_t i = 0; i < strList.size(); i++)
  213. {
  214. retVal += strList[i];
  215. }
  216. return retVal;
  217. }
  218. bool getShapesBuild(Arguments& arguments, std::unordered_map<std::string, ShapeRange>& shapes, const char* argument, nvinfer1::OptProfileSelector selector)
  219. {
  220. std::string list;
  221. bool retVal = getAndDelOption(arguments, argument, list);
  222. std::vector<std::string> shapeList{splitToStringVec(list, ',')};
  223. for (const auto& s : shapeList)
  224. {
  225. auto nameDimsPair = splitNameAndValue<std::vector<int>>(s);
  226. auto tensorName = removeSingleQuotationMarks(nameDimsPair.first);
  227. auto dims = nameDimsPair.second;
  228. insertShapesBuild(shapes, selector, tensorName, dims);
  229. }
  230. return retVal;
  231. }
  232. bool getShapesInference(Arguments& arguments, std::unordered_map<std::string, std::vector<int>>& shapes, const char* argument)
  233. {
  234. std::string list;
  235. bool retVal = getAndDelOption(arguments, argument, list);
  236. std::vector<std::string> shapeList{splitToStringVec(list, ',')};
  237. for (const auto& s : shapeList)
  238. {
  239. auto nameDimsPair = splitNameAndValue<std::vector<int>>(s);
  240. auto tensorName = removeSingleQuotationMarks(nameDimsPair.first);
  241. auto dims = nameDimsPair.second;
  242. insertShapesInference(shapes, tensorName, dims);
  243. }
  244. return retVal;
  245. }
  246. void processShapes(std::unordered_map<std::string, ShapeRange>& shapes, bool minShapes, bool optShapes, bool maxShapes, bool calib)
  247. {
  248. // Only accept optShapes only or all three of minShapes, optShapes, maxShapes
  249. if ( ((minShapes || maxShapes) && !optShapes) // minShapes only, maxShapes only, both minShapes and maxShapes
  250. || (minShapes && !maxShapes && optShapes) // both minShapes and optShapes
  251. || (!minShapes && maxShapes && optShapes)) // both maxShapes and optShapes
  252. {
  253. if (calib)
  254. {
  255. throw std::invalid_argument("Must specify only --optShapesCalib or all of --minShapesCalib, --optShapesCalib, --maxShapesCalib");
  256. }
  257. else
  258. {
  259. throw std::invalid_argument("Must specify only --optShapes or all of --minShapes, --optShapes, --maxShapes");
  260. }
  261. }
  262. // If optShapes only, expand optShapes to minShapes and maxShapes
  263. if (optShapes && !minShapes && !maxShapes)
  264. {
  265. std::unordered_map<std::string, ShapeRange> newShapes;
  266. for (auto& s : shapes)
  267. {
  268. insertShapesBuild(newShapes, nvinfer1::OptProfileSelector::kMIN, s.first, s.second[static_cast<size_t>(nvinfer1::OptProfileSelector::kOPT)]);
  269. insertShapesBuild(newShapes, nvinfer1::OptProfileSelector::kOPT, s.first, s.second[static_cast<size_t>(nvinfer1::OptProfileSelector::kOPT)]);
  270. insertShapesBuild(newShapes, nvinfer1::OptProfileSelector::kMAX, s.first, s.second[static_cast<size_t>(nvinfer1::OptProfileSelector::kOPT)]);
  271. }
  272. shapes = newShapes;
  273. }
  274. }
  275. template <typename T>
  276. void printShapes(std::ostream& os, const char* phase, const T& shapes)
  277. {
  278. if (shapes.empty())
  279. {
  280. os << "Input " << phase << " shapes: model" << std::endl;
  281. }
  282. else
  283. {
  284. for (const auto& s : shapes)
  285. {
  286. os << "Input " << phase << " shape: " << s.first << "=" << s.second << std::endl;
  287. }
  288. }
  289. }
  290. std::ostream& printBatch(std::ostream& os, int maxBatch)
  291. {
  292. if (maxBatch)
  293. {
  294. os << maxBatch;
  295. }
  296. else
  297. {
  298. os << "explicit";
  299. }
  300. return os;
  301. }
  302. std::ostream& printTacticSources(std::ostream& os, nvinfer1::TacticSources enabledSources, nvinfer1::TacticSources disabledSources)
  303. {
  304. if (!enabledSources && !disabledSources)
  305. {
  306. os << "Using default tactic sources";
  307. }
  308. else
  309. {
  310. const auto addSource = [&](uint32_t source, const std::string& name) {
  311. if (enabledSources & source)
  312. {
  313. os << name << " [ON], ";
  314. }
  315. else if (disabledSources & source)
  316. {
  317. os << name << " [OFF], ";
  318. }
  319. };
  320. addSource(1U << static_cast<uint32_t>(nvinfer1::TacticSource::kCUBLAS), "cublas");
  321. addSource(1U << static_cast<uint32_t>(nvinfer1::TacticSource::kCUBLAS_LT), "cublasLt");
  322. addSource(1U << static_cast<uint32_t>(nvinfer1::TacticSource::kCUDNN), "cudnn");
  323. }
  324. return os;
  325. }
  326. std::ostream& printPrecision(std::ostream& os, const BuildOptions& options)
  327. {
  328. os << "FP32";
  329. if (options.fp16)
  330. {
  331. os << "+FP16";
  332. }
  333. if (options.int8)
  334. {
  335. os << "+INT8";
  336. }
  337. return os;
  338. }
  339. std::ostream& printTimingCache(std::ostream& os, const BuildOptions& options)
  340. {
  341. switch (options.timingCacheMode)
  342. {
  343. case TimingCacheMode::kGLOBAL: os << "global"; break;
  344. case TimingCacheMode::kLOCAL: os << "local"; break;
  345. case TimingCacheMode::kDISABLE: os << "disable"; break;
  346. }
  347. return os;
  348. }
  349. std::ostream& printSparsity(std::ostream& os, const BuildOptions& options)
  350. {
  351. switch (options.sparsity)
  352. {
  353. case SparsityFlag::kDISABLE: os << "Disabled"; break;
  354. case SparsityFlag::kENABLE: os << "Enabled"; break;
  355. case SparsityFlag::kFORCE: os << "Forced"; break;
  356. }
  357. return os;
  358. }
  359. } // namespace
  360. Arguments argsToArgumentsMap(int argc, char* argv[])
  361. {
  362. Arguments arguments;
  363. for (int i = 1; i < argc; ++i)
  364. {
  365. auto valuePtr = strchr(argv[i], '=');
  366. if (valuePtr)
  367. {
  368. std::string value{valuePtr + 1};
  369. arguments.emplace(std::string(argv[i], valuePtr - argv[i]), value);
  370. }
  371. else
  372. {
  373. arguments.emplace(argv[i], "");
  374. }
  375. }
  376. return arguments;
  377. }
  378. void BaseModelOptions::parse(Arguments& arguments)
  379. {
  380. if (getAndDelOption(arguments, "--onnx", model))
  381. {
  382. format = ModelFormat::kONNX;
  383. }
  384. else if (getAndDelOption(arguments, "--uff", model))
  385. {
  386. format = ModelFormat::kUFF;
  387. }
  388. else if (getAndDelOption(arguments, "--model", model))
  389. {
  390. format = ModelFormat::kCAFFE;
  391. }
  392. }
  393. void UffInput::parse(Arguments& arguments)
  394. {
  395. getAndDelOption(arguments, "--uffNHWC", NHWC);
  396. std::vector<std::string> args;
  397. if (getAndDelRepeatedOption(arguments, "--uffInput", args))
  398. {
  399. for (const auto& i : args)
  400. {
  401. std::vector<std::string> values{splitToStringVec(i, ',')};
  402. if (values.size() == 4)
  403. {
  404. nvinfer1::Dims3 dims{std::stoi(values[1]), std::stoi(values[2]), std::stoi(values[3])};
  405. inputs.emplace_back(values[0], dims);
  406. }
  407. else
  408. {
  409. throw std::invalid_argument(std::string("Invalid uffInput ") + i);
  410. }
  411. }
  412. }
  413. }
  414. void ModelOptions::parse(Arguments& arguments)
  415. {
  416. baseModel.parse(arguments);
  417. switch (baseModel.format)
  418. {
  419. case ModelFormat::kCAFFE:
  420. {
  421. getAndDelOption(arguments, "--deploy", prototxt);
  422. break;
  423. }
  424. case ModelFormat::kUFF:
  425. {
  426. uffInputs.parse(arguments);
  427. if (uffInputs.inputs.empty())
  428. {
  429. throw std::invalid_argument("Uff models require at least one input");
  430. }
  431. break;
  432. }
  433. case ModelFormat::kONNX:
  434. break;
  435. case ModelFormat::kANY:
  436. {
  437. if (getAndDelOption(arguments, "--deploy", prototxt))
  438. {
  439. baseModel.format = ModelFormat::kCAFFE;
  440. }
  441. break;
  442. }
  443. }
  444. if (baseModel.format == ModelFormat::kCAFFE || baseModel.format == ModelFormat::kUFF)
  445. {
  446. std::vector<std::string> outArgs;
  447. if (getAndDelRepeatedOption(arguments, "--output", outArgs))
  448. {
  449. for (const auto& o : outArgs)
  450. {
  451. for (auto& v : splitToStringVec(o, ','))
  452. {
  453. outputs.emplace_back(std::move(v));
  454. }
  455. }
  456. }
  457. if (outputs.empty())
  458. {
  459. throw std::invalid_argument("Caffe and Uff models require at least one output");
  460. }
  461. }
  462. }
  463. void BuildOptions::parse(Arguments& arguments)
  464. {
  465. auto getFormats = [&arguments](std::vector<IOFormat>& formatsVector, const char* argument) {
  466. std::string list;
  467. getAndDelOption(arguments, argument, list);
  468. std::vector<std::string> formats{splitToStringVec(list, ',')};
  469. for (const auto& f : formats)
  470. {
  471. formatsVector.push_back(stringToValue<IOFormat>(f));
  472. }
  473. };
  474. getFormats(inputFormats, "--inputIOFormats");
  475. getFormats(outputFormats, "--outputIOFormats");
  476. bool explicitBatch{false};
  477. getAndDelOption(arguments, "--explicitBatch", explicitBatch);
  478. bool minShapes = getShapesBuild(arguments, shapes, "--minShapes", nvinfer1::OptProfileSelector::kMIN);
  479. bool optShapes = getShapesBuild(arguments, shapes, "--optShapes", nvinfer1::OptProfileSelector::kOPT);
  480. bool maxShapes = getShapesBuild(arguments, shapes, "--maxShapes", nvinfer1::OptProfileSelector::kMAX);
  481. processShapes(shapes, minShapes, optShapes, maxShapes, false);
  482. bool minShapesCalib
  483. = getShapesBuild(arguments, shapesCalib, "--minShapesCalib", nvinfer1::OptProfileSelector::kMIN);
  484. bool optShapesCalib
  485. = getShapesBuild(arguments, shapesCalib, "--optShapesCalib", nvinfer1::OptProfileSelector::kOPT);
  486. bool maxShapesCalib
  487. = getShapesBuild(arguments, shapesCalib, "--maxShapesCalib", nvinfer1::OptProfileSelector::kMAX);
  488. processShapes(shapesCalib, minShapesCalib, optShapesCalib, maxShapesCalib, true);
  489. explicitBatch = explicitBatch || !shapes.empty();
  490. getAndDelOption(arguments, "--explicitPrecision", explicitPrecision);
  491. int batch{0};
  492. getAndDelOption(arguments, "--maxBatch", batch);
  493. if (explicitBatch && batch)
  494. {
  495. throw std::invalid_argument(
  496. "Explicit batch or dynamic shapes enabled with implicit maxBatch " + std::to_string(batch));
  497. }
  498. if (explicitBatch)
  499. {
  500. maxBatch = 0;
  501. }
  502. else
  503. {
  504. if (batch)
  505. {
  506. maxBatch = batch;
  507. }
  508. }
  509. getAndDelOption(arguments, "--workspace", workspace);
  510. getAndDelOption(arguments, "--minTiming", minTiming);
  511. getAndDelOption(arguments, "--avgTiming", avgTiming);
  512. bool best{false};
  513. getAndDelOption(arguments, "--best", best);
  514. if (best)
  515. {
  516. int8 = true;
  517. fp16 = true;
  518. }
  519. getAndDelOption(arguments, "--refit", refittable);
  520. getAndDelNegOption(arguments, "--noTF32", tf32);
  521. getAndDelOption(arguments, "--fp16", fp16);
  522. getAndDelOption(arguments, "--int8", int8);
  523. getAndDelOption(arguments, "--safe", safe);
  524. std::string sparsityString;
  525. getAndDelOption(arguments, "--sparsity", sparsityString);
  526. if (sparsityString == "disable")
  527. {
  528. sparsity = SparsityFlag::kDISABLE;
  529. }
  530. else if (sparsityString == "enable")
  531. {
  532. sparsity = SparsityFlag::kENABLE;
  533. }
  534. else if (sparsityString == "force")
  535. {
  536. sparsity = SparsityFlag::kFORCE;
  537. }
  538. else if (!sparsityString.empty())
  539. {
  540. throw std::invalid_argument(std::string("Unknown sparsity mode: ") + sparsityString);
  541. }
  542. bool calibCheck = getAndDelOption(arguments, "--calib", calibration);
  543. if (int8 && calibCheck && !shapes.empty() && shapesCalib.empty())
  544. {
  545. shapesCalib = shapes;
  546. }
  547. std::string nvtxModeString;
  548. getAndDelOption(arguments, "--nvtxMode", nvtxModeString);
  549. if (nvtxModeString == "default")
  550. {
  551. nvtxMode = nvinfer1::ProfilingVerbosity::kDEFAULT;
  552. }
  553. else if (nvtxModeString == "none")
  554. {
  555. nvtxMode = nvinfer1::ProfilingVerbosity::kNONE;
  556. }
  557. else if (nvtxModeString == "verbose")
  558. {
  559. nvtxMode = nvinfer1::ProfilingVerbosity::kVERBOSE;
  560. }
  561. else if (!nvtxModeString.empty())
  562. {
  563. throw std::invalid_argument(std::string("Unknown nvtxMode: ") + nvtxModeString);
  564. }
  565. if (getAndDelOption(arguments, "--loadEngine", engine))
  566. {
  567. load = true;
  568. }
  569. if (getAndDelOption(arguments, "--saveEngine", engine))
  570. {
  571. save = true;
  572. }
  573. if (load && save)
  574. {
  575. throw std::invalid_argument("Incompatible load and save engine options selected");
  576. }
  577. std::string tacticSourceArgs;
  578. if (getAndDelOption(arguments, "--tacticSources", tacticSourceArgs))
  579. {
  580. std::vector<std::string> tacticList = splitToStringVec(tacticSourceArgs, ',');
  581. for (auto& t : tacticList)
  582. {
  583. bool enable{false};
  584. if (t.front() == '+')
  585. {
  586. enable = true;
  587. }
  588. else if (t.front() != '-')
  589. {
  590. throw std::invalid_argument(
  591. "Tactic source must be prefixed with + or -, indicating whether it should be enabled or disabled "
  592. "respectively.");
  593. }
  594. t.erase(0, 1);
  595. const auto toUpper = [](std::string& sourceName) {
  596. std::transform(
  597. sourceName.begin(), sourceName.end(), sourceName.begin(), [](char c) { return std::toupper(c); });
  598. return sourceName;
  599. };
  600. nvinfer1::TacticSource source{};
  601. t = toUpper(t);
  602. if (t == "CUBLAS")
  603. {
  604. source = nvinfer1::TacticSource::kCUBLAS;
  605. }
  606. else if (t == "CUBLASLT" || t == "CUBLAS_LT")
  607. {
  608. source = nvinfer1::TacticSource::kCUBLAS_LT;
  609. }
  610. else if (t == "CUDNN")
  611. {
  612. source = nvinfer1::TacticSource::kCUDNN;
  613. }
  614. else
  615. {
  616. throw std::invalid_argument(std::string("Unknown tactic source: ") + t);
  617. }
  618. uint32_t sourceBit = 1U << static_cast<uint32_t>(source);
  619. if (enable)
  620. {
  621. enabledTactics |= sourceBit;
  622. }
  623. else
  624. {
  625. disabledTactics |= sourceBit;
  626. }
  627. if (enabledTactics & disabledTactics)
  628. {
  629. throw std::invalid_argument(std::string("Cannot enable and disable ") + t);
  630. }
  631. }
  632. }
  633. bool noBuilderCache{false};
  634. getAndDelOption(arguments, "--noBuilderCache", noBuilderCache);
  635. getAndDelOption(arguments, "--timingCacheFile", timingCacheFile);
  636. if (noBuilderCache)
  637. {
  638. timingCacheMode = TimingCacheMode::kDISABLE;
  639. }
  640. else if (!timingCacheFile.empty())
  641. {
  642. timingCacheMode = TimingCacheMode::kGLOBAL;
  643. }
  644. else
  645. {
  646. timingCacheMode = TimingCacheMode::kLOCAL;
  647. }
  648. }
  649. void SystemOptions::parse(Arguments& arguments)
  650. {
  651. getAndDelOption(arguments, "--device", device);
  652. getAndDelOption(arguments, "--useDLACore", DLACore);
  653. getAndDelOption(arguments, "--allowGPUFallback", fallback);
  654. std::string pluginName;
  655. while (getAndDelOption(arguments, "--plugins", pluginName))
  656. {
  657. plugins.emplace_back(pluginName);
  658. }
  659. }
  660. void InferenceOptions::parse(Arguments& arguments)
  661. {
  662. getAndDelOption(arguments, "--streams", streams);
  663. getAndDelOption(arguments, "--iterations", iterations);
  664. getAndDelOption(arguments, "--duration", duration);
  665. getAndDelOption(arguments, "--warmUp", warmup);
  666. getAndDelOption(arguments, "--sleepTime", sleep);
  667. bool exposeDMA{false};
  668. if (getAndDelOption(arguments, "--exposeDMA", exposeDMA))
  669. {
  670. overlap = !exposeDMA;
  671. }
  672. getAndDelOption(arguments, "--noDataTransfers", skipTransfers);
  673. getAndDelOption(arguments, "--useSpinWait", spin);
  674. getAndDelOption(arguments, "--threads", threads);
  675. getAndDelOption(arguments, "--useCudaGraph", graph);
  676. getAndDelOption(arguments, "--separateProfileRun", rerun);
  677. getAndDelOption(arguments, "--buildOnly", skip);
  678. getAndDelOption(arguments, "--timeDeserialize", timeDeserialize);
  679. getAndDelOption(arguments, "--timeRefit", timeRefit);
  680. std::string list;
  681. getAndDelOption(arguments, "--loadInputs", list);
  682. std::vector<std::string> inputsList{splitToStringVec(list, ',')};
  683. splitInsertKeyValue(inputsList, inputs);
  684. getShapesInference(arguments, shapes, "--shapes");
  685. int batchOpt{0};
  686. getAndDelOption(arguments, "--batch", batchOpt);
  687. if (!shapes.empty() && batchOpt)
  688. {
  689. throw std::invalid_argument(
  690. "Explicit batch or dynamic shapes enabled with implicit batch " + std::to_string(batchOpt));
  691. }
  692. if (batchOpt)
  693. {
  694. batch = batchOpt;
  695. }
  696. else
  697. {
  698. if (!shapes.empty())
  699. {
  700. batch = 0;
  701. }
  702. }
  703. }
  704. void ReportingOptions::parse(Arguments& arguments)
  705. {
  706. getAndDelOption(arguments, "--percentile", percentile);
  707. getAndDelOption(arguments, "--avgRuns", avgs);
  708. getAndDelOption(arguments, "--verbose", verbose);
  709. getAndDelOption(arguments, "--dumpRefit", refit);
  710. getAndDelOption(arguments, "--dumpOutput", output);
  711. getAndDelOption(arguments, "--dumpProfile", profile);
  712. getAndDelOption(arguments, "--exportTimes", exportTimes);
  713. getAndDelOption(arguments, "--exportOutput", exportOutput);
  714. getAndDelOption(arguments, "--exportProfile", exportProfile);
  715. if (percentile < 0 || percentile > 100)
  716. {
  717. throw std::invalid_argument(std::string("Percentile ") + std::to_string(percentile) + "is not in [0,100]");
  718. }
  719. }
  720. bool parseHelp(Arguments& arguments)
  721. {
  722. bool helpLong{false};
  723. bool helpShort{false};
  724. getAndDelOption(arguments, "--help", helpLong);
  725. getAndDelOption(arguments, "-h", helpShort);
  726. return helpLong || helpShort;
  727. }
  728. void AllOptions::parse(Arguments& arguments)
  729. {
  730. model.parse(arguments);
  731. build.parse(arguments);
  732. system.parse(arguments);
  733. inference.parse(arguments);
  734. if (model.baseModel.format == ModelFormat::kONNX)
  735. {
  736. build.maxBatch = 0; // ONNX only supports explicit batch mode.
  737. }
  738. auto batchWasSet = [](int batch, int defaultValue) { return batch && batch != defaultValue; };
  739. if (!build.maxBatch && batchWasSet(inference.batch, defaultBatch) && !build.shapes.empty())
  740. {
  741. throw std::invalid_argument(
  742. "Explicit batch + dynamic shapes setting used at build time but inference uses --batch to set batch. "
  743. "Conflicting build and inference batch settings.");
  744. }
  745. if (batchWasSet(build.maxBatch, defaultMaxBatch) && !inference.batch)
  746. {
  747. throw std::invalid_argument(
  748. "Implicit batch option used at build time but inference input shapes specified. Conflicting build and "
  749. "inference batch settings.");
  750. }
  751. if (build.shapes.empty() && !inference.shapes.empty())
  752. {
  753. for (auto& s : inference.shapes)
  754. {
  755. insertShapesBuild(build.shapes, nvinfer1::OptProfileSelector::kMIN, s.first, s.second);
  756. insertShapesBuild(build.shapes, nvinfer1::OptProfileSelector::kOPT, s.first, s.second);
  757. insertShapesBuild(build.shapes, nvinfer1::OptProfileSelector::kMAX, s.first, s.second);
  758. }
  759. build.maxBatch = 0;
  760. }
  761. else
  762. {
  763. if (!build.shapes.empty() && inference.shapes.empty())
  764. {
  765. for (auto& s : build.shapes)
  766. {
  767. insertShapesInference(
  768. inference.shapes, s.first, s.second[static_cast<size_t>(nvinfer1::OptProfileSelector::kOPT)]);
  769. }
  770. }
  771. if (!build.maxBatch)
  772. {
  773. inference.batch = 0;
  774. }
  775. }
  776. if (build.maxBatch && inference.batch)
  777. {
  778. // For implicit batch, check for compatibility and if --maxBatch is not given and inference batch is greater
  779. // than maxBatch, use inference batch also for maxBatch
  780. if (build.maxBatch != defaultMaxBatch && build.maxBatch < inference.batch)
  781. {
  782. throw std::invalid_argument("Build max batch " + std::to_string(build.maxBatch)
  783. + " is less than inference batch " + std::to_string(inference.batch));
  784. }
  785. else
  786. {
  787. if (build.maxBatch < inference.batch)
  788. {
  789. build.maxBatch = inference.batch;
  790. }
  791. }
  792. }
  793. reporting.parse(arguments);
  794. helps = parseHelp(arguments);
  795. if (!helps)
  796. {
  797. if (!build.load && model.baseModel.format == ModelFormat::kANY)
  798. {
  799. throw std::invalid_argument("Model missing or format not recognized");
  800. }
  801. if (!build.load && !build.maxBatch && model.baseModel.format != ModelFormat::kONNX)
  802. {
  803. throw std::invalid_argument("Explicit batch size not supported for Caffe and Uff models");
  804. }
  805. if (build.safe && system.DLACore >= 0)
  806. {
  807. auto checkSafeDLAFormats = [](const std::vector<IOFormat>& fmt) {
  808. return fmt.empty() ? false : std::all_of(fmt.begin(), fmt.end(), [](const IOFormat& pair) {
  809. bool supported{false};
  810. const bool isCHW4{pair.second == 1U << static_cast<int>(nvinfer1::TensorFormat::kCHW4)};
  811. const bool isCHW32{pair.second == 1U << static_cast<int>(nvinfer1::TensorFormat::kCHW32)};
  812. const bool isCHW16{pair.second == 1U << static_cast<int>(nvinfer1::TensorFormat::kCHW16)};
  813. supported |= pair.first == nvinfer1::DataType::kINT8 && (isCHW4 || isCHW32);
  814. supported |= pair.first == nvinfer1::DataType::kHALF && (isCHW4 || isCHW16);
  815. return supported;
  816. });
  817. };
  818. if (!checkSafeDLAFormats(build.inputFormats) || !checkSafeDLAFormats(build.inputFormats))
  819. {
  820. throw std::invalid_argument(
  821. "I/O formats for safe DLA capability are restricted to fp16:chw16 or int8:chw32");
  822. }
  823. if (system.fallback)
  824. {
  825. throw std::invalid_argument("GPU fallback (--allowGPUFallback) not allowed for safe DLA capability");
  826. }
  827. }
  828. }
  829. }
  830. void SafeBuilderOptions::parse(Arguments& arguments)
  831. {
  832. auto getFormats = [&arguments](std::vector<IOFormat>& formatsVector, const char* argument) {
  833. std::string list;
  834. getAndDelOption(arguments, argument, list);
  835. std::vector<std::string> formats{splitToStringVec(list, ',')};
  836. for (const auto& f : formats)
  837. {
  838. formatsVector.push_back(stringToValue<IOFormat>(f));
  839. }
  840. };
  841. getAndDelOption(arguments, "--serialized", serialized);
  842. getAndDelOption(arguments, "--onnx", onnxModelFile);
  843. getAndDelOption(arguments, "--help", help);
  844. getAndDelOption(arguments, "--verbose", verbose);
  845. getFormats(inputFormats, "--inputIOFormats");
  846. getFormats(outputFormats, "--outputIOFormats");
  847. getAndDelOption(arguments, "--int8", int8);
  848. getAndDelOption(arguments, "--calib", calibFile);
  849. std::string pluginName;
  850. while (getAndDelOption(arguments, "--plugins", pluginName))
  851. {
  852. plugins.emplace_back(pluginName);
  853. }
  854. }
  855. std::ostream& operator<<(std::ostream& os, const BaseModelOptions& options)
  856. {
  857. os << "=== Model Options ===" << std::endl;
  858. os << "Format: ";
  859. switch (options.format)
  860. {
  861. case ModelFormat::kCAFFE:
  862. {
  863. os << "Caffe";
  864. break;
  865. }
  866. case ModelFormat::kONNX:
  867. {
  868. os << "ONNX";
  869. break;
  870. }
  871. case ModelFormat::kUFF:
  872. {
  873. os << "UFF";
  874. break;
  875. }
  876. case ModelFormat::kANY:
  877. os << "*";
  878. break;
  879. }
  880. os << std::endl << "Model: " << options.model << std::endl;
  881. return os;
  882. }
  883. std::ostream& operator<<(std::ostream& os, const UffInput& input)
  884. {
  885. os << "Uff Inputs Layout: " << (input.NHWC ? "NHWC" : "NCHW") << std::endl;
  886. for (const auto& i : input.inputs)
  887. {
  888. os << "Input: " << i.first << "," << i.second.d[0] << "," << i.second.d[1] << "," << i.second.d[2] << std::endl;
  889. }
  890. return os;
  891. }
  892. std::ostream& operator<<(std::ostream& os, const ModelOptions& options)
  893. {
  894. os << options.baseModel;
  895. switch (options.baseModel.format)
  896. {
  897. case ModelFormat::kCAFFE:
  898. {
  899. os << "Prototxt: " << options.prototxt << std::endl;
  900. break;
  901. }
  902. case ModelFormat::kUFF:
  903. {
  904. os << options.uffInputs;
  905. break;
  906. }
  907. case ModelFormat::kONNX: // Fallthrough: No options to report for ONNX or the generic case
  908. case ModelFormat::kANY:
  909. break;
  910. }
  911. os << "Output:";
  912. for (const auto& o : options.outputs)
  913. {
  914. os << " " << o;
  915. }
  916. os << std::endl;
  917. return os;
  918. }
  919. std::ostream& operator<<(std::ostream& os, const IOFormat& format)
  920. {
  921. switch (format.first)
  922. {
  923. case nvinfer1::DataType::kFLOAT:
  924. {
  925. os << "fp32:";
  926. break;
  927. }
  928. case nvinfer1::DataType::kHALF:
  929. {
  930. os << "fp16:";
  931. break;
  932. }
  933. case nvinfer1::DataType::kINT8:
  934. {
  935. os << "int8:";
  936. break;
  937. }
  938. case nvinfer1::DataType::kINT32:
  939. {
  940. os << "int32:";
  941. break;
  942. }
  943. case nvinfer1::DataType::kBOOL:
  944. {
  945. os << "Bool:";
  946. break;
  947. }
  948. }
  949. for (int f = 0; f < nvinfer1::EnumMax<nvinfer1::TensorFormat>(); ++f)
  950. {
  951. if ((1U << f) & format.second)
  952. {
  953. if (f)
  954. {
  955. os << "+";
  956. }
  957. switch (nvinfer1::TensorFormat(f))
  958. {
  959. case nvinfer1::TensorFormat::kLINEAR:
  960. {
  961. os << "chw";
  962. break;
  963. }
  964. case nvinfer1::TensorFormat::kCHW2:
  965. {
  966. os << "chw2";
  967. break;
  968. }
  969. case nvinfer1::TensorFormat::kHWC8:
  970. {
  971. os << "hwc8";
  972. break;
  973. }
  974. case nvinfer1::TensorFormat::kHWC16:
  975. {
  976. os << "hwc16";
  977. break;
  978. }
  979. case nvinfer1::TensorFormat::kCHW4:
  980. {
  981. os << "chw4";
  982. break;
  983. }
  984. case nvinfer1::TensorFormat::kCHW16:
  985. {
  986. os << "chw16";
  987. break;
  988. }
  989. case nvinfer1::TensorFormat::kCHW32:
  990. {
  991. os << "chw32";
  992. break;
  993. }
  994. case nvinfer1::TensorFormat::kDHWC8:
  995. {
  996. os << "dhwc8";
  997. break;
  998. }
  999. case nvinfer1::TensorFormat::kCDHW32:
  1000. {
  1001. os << "cdhw32";
  1002. break;
  1003. }
  1004. case nvinfer1::TensorFormat::kHWC:
  1005. {
  1006. os << "hwc";
  1007. break;
  1008. }
  1009. case nvinfer1::TensorFormat::kDLA_LINEAR:
  1010. {
  1011. os << "dla_linear";
  1012. break;
  1013. }
  1014. case nvinfer1::TensorFormat::kDLA_HWC4:
  1015. {
  1016. os << "dla_hwc4";
  1017. break;
  1018. }
  1019. }
  1020. }
  1021. }
  1022. return os;
  1023. }
  1024. std::ostream& operator<<(std::ostream& os, const ShapeRange& dims)
  1025. {
  1026. int i = 0;
  1027. for (const auto& d : dims)
  1028. {
  1029. if (!d.size())
  1030. {
  1031. break;
  1032. }
  1033. os << (i ? "+" : "") << d;
  1034. ++i;
  1035. }
  1036. return os;
  1037. }
  1038. std::ostream& operator<<(std::ostream& os, const BuildOptions& options)
  1039. {
  1040. // clang-format off
  1041. os << "=== Build Options ===" << std::endl <<
  1042. "Max batch: "; printBatch(os, options.maxBatch) << std::endl <<
  1043. "Workspace: " << options.workspace << " MiB" << std::endl <<
  1044. "minTiming: " << options.minTiming << std::endl <<
  1045. "avgTiming: " << options.avgTiming << std::endl <<
  1046. "Precision: "; printPrecision(os, options) << std::endl <<
  1047. "Calibration: " << (options.int8 && options.calibration.empty() ? "Dynamic" : options.calibration.c_str()) << std::endl <<
  1048. "Refit: " << boolToEnabled(options.refittable) << std::endl <<
  1049. "Sparsity: "; printSparsity(os, options) << std::endl <<
  1050. "Safe mode: " << boolToEnabled(options.safe) << std::endl <<
  1051. "Save engine: " << (options.save ? options.engine : "") << std::endl <<
  1052. "Load engine: " << (options.load ? options.engine : "") << std::endl <<
  1053. "NVTX verbosity: " << static_cast<int>(options.nvtxMode) << std::endl <<
  1054. "Tactic sources: "; printTacticSources(os, options.enabledTactics, options.disabledTactics) << std::endl <<
  1055. "timingCacheMode: "; printTimingCache(os, options) << std::endl <<
  1056. "timingCacheFile: "<< options.timingCacheFile << std::endl;
  1057. // clang-format on
  1058. auto printIOFormats = [](std::ostream& os, const char* direction, const std::vector<IOFormat> formats) {
  1059. if (formats.empty())
  1060. {
  1061. os << direction << "s format: fp32:CHW" << std::endl;
  1062. }
  1063. else
  1064. {
  1065. for(const auto& f : formats)
  1066. {
  1067. os << direction << ": " << f << std::endl;
  1068. }
  1069. }
  1070. };
  1071. printIOFormats(os, "Input(s)", options.inputFormats);
  1072. printIOFormats(os, "Output(s)", options.outputFormats);
  1073. printShapes(os, "build", options.shapes);
  1074. printShapes(os, "calibration", options.shapesCalib);
  1075. return os;
  1076. }
  1077. std::ostream& operator<<(std::ostream& os, const SystemOptions& options)
  1078. {
  1079. // clang-format off
  1080. os << "=== System Options ===" << std::endl <<
  1081. "Device: " << options.device << std::endl <<
  1082. "DLACore: " << (options.DLACore != -1 ? std::to_string(options.DLACore) : "") <<
  1083. (options.DLACore != -1 && options.fallback ? "(With GPU fallback)" : "") << std::endl;
  1084. os << "Plugins:";
  1085. for (const auto& p : options.plugins)
  1086. {
  1087. os << " " << p;
  1088. }
  1089. os << std::endl;
  1090. return os;
  1091. // clang-format on
  1092. }
  1093. std::ostream& operator<<(std::ostream& os, const InferenceOptions& options)
  1094. {
  1095. // clang-format off
  1096. os << "=== Inference Options ===" << std::endl <<
  1097. "Batch: ";
  1098. if (options.batch && options.shapes.empty())
  1099. {
  1100. os << options.batch << std::endl;
  1101. }
  1102. else
  1103. {
  1104. os << "Explicit" << std::endl;
  1105. }
  1106. printShapes(os, "inference", options.shapes);
  1107. os << "Iterations: " << options.iterations << std::endl <<
  1108. "Duration: " << options.duration << "s (+ "
  1109. << options.warmup << "ms warm up)" << std::endl <<
  1110. "Sleep time: " << options.sleep << "ms" << std::endl <<
  1111. "Streams: " << options.streams << std::endl <<
  1112. "ExposeDMA: " << boolToEnabled(!options.overlap) << std::endl <<
  1113. "Data transfers: " << boolToEnabled(!options.skipTransfers) << std::endl <<
  1114. "Spin-wait: " << boolToEnabled(options.spin) << std::endl <<
  1115. "Multithreading: " << boolToEnabled(options.threads) << std::endl <<
  1116. "CUDA Graph: " << boolToEnabled(options.graph) << std::endl <<
  1117. "Separate profiling: " << boolToEnabled(options.rerun) << std::endl <<
  1118. "Time Deserialize: " << boolToEnabled(options.timeDeserialize) << std::endl <<
  1119. "Time Refit: " << boolToEnabled(options.timeRefit) << std::endl <<
  1120. "Skip inference: " << boolToEnabled(options.skip) << std::endl;
  1121. // clang-format on
  1122. os << "Inputs:" << std::endl;
  1123. for (const auto& input : options.inputs)
  1124. {
  1125. os << input.first << "<-" << input.second << std::endl;
  1126. }
  1127. return os;
  1128. }
  1129. std::ostream& operator<<(std::ostream& os, const ReportingOptions& options)
  1130. {
  1131. // clang-format off
  1132. os << "=== Reporting Options ===" << std::endl <<
  1133. "Verbose: " << boolToEnabled(options.verbose) << std::endl <<
  1134. "Averages: " << options.avgs << " inferences" << std::endl <<
  1135. "Percentile: " << options.percentile << std::endl <<
  1136. "Dump refittable layers:" << boolToEnabled(options.refit) << std::endl <<
  1137. "Dump output: " << boolToEnabled(options.output) << std::endl <<
  1138. "Profile: " << boolToEnabled(options.profile) << std::endl <<
  1139. "Export timing to JSON file: " << options.exportTimes << std::endl <<
  1140. "Export output to JSON file: " << options.exportOutput << std::endl <<
  1141. "Export profile to JSON file: " << options.exportProfile << std::endl;
  1142. // clang-format on
  1143. return os;
  1144. }
  1145. std::ostream& operator<<(std::ostream& os, const AllOptions& options)
  1146. {
  1147. os << options.model << options.build << options.system << options.inference << options.reporting << std::endl;
  1148. return os;
  1149. }
  1150. std::ostream& operator<<(std::ostream& os, const SafeBuilderOptions& options)
  1151. {
  1152. auto printIOFormats = [](std::ostream& os, const char* direction, const std::vector<IOFormat> formats) {
  1153. if (formats.empty())
  1154. {
  1155. os << direction << "s format: fp32:CHW" << std::endl;
  1156. }
  1157. else
  1158. {
  1159. for(const auto& f : formats)
  1160. {
  1161. os << direction << ": " << f << std::endl;
  1162. }
  1163. }
  1164. };
  1165. os << "=== Build Options ===" << std::endl;
  1166. os << "Model ONNX: " << options.onnxModelFile << std::endl;
  1167. os << "Precision: FP16";
  1168. if (options.int8)
  1169. {
  1170. os << " + INT8";
  1171. }
  1172. os << std::endl;
  1173. os << "Calibration file: " << options.calibFile << std::endl;
  1174. os << "Serialized Network: " << options.serialized << std::endl;
  1175. printIOFormats(os, "Input(s)", options.inputFormats);
  1176. printIOFormats(os, "Output(s)", options.outputFormats);
  1177. os << "Plugins:";
  1178. for (const auto& p : options.plugins)
  1179. {
  1180. os << " " << p;
  1181. }
  1182. os << std::endl;
  1183. return os;
  1184. }
  1185. void BaseModelOptions::help(std::ostream& os)
  1186. {
  1187. // clang-format off
  1188. os << " --uff=<file> UFF model" << std::endl <<
  1189. " --onnx=<file> ONNX model" << std::endl <<
  1190. " --model=<file> Caffe model (default = no model, random weights used)" << std::endl;
  1191. // clang-format on
  1192. }
  1193. void UffInput::help(std::ostream& os)
  1194. {
  1195. // clang-format off
  1196. os << " --uffInput=<name>,X,Y,Z Input blob name and its dimensions (X,Y,Z=C,H,W), it can be specified "
  1197. "multiple times; at least one is required for UFF models" << std::endl <<
  1198. " --uffNHWC Set if inputs are in the NHWC layout instead of NCHW (use " <<
  1199. "X,Y,Z=H,W,C order in --uffInput)" << std::endl;
  1200. // clang-format on
  1201. }
  1202. void ModelOptions::help(std::ostream& os)
  1203. {
  1204. // clang-format off
  1205. os << "=== Model Options ===" << std::endl;
  1206. BaseModelOptions::help(os);
  1207. os << " --deploy=<file> Caffe prototxt file" << std::endl <<
  1208. " --output=<name>[,<name>]* Output names (it can be specified multiple times); at least one output "
  1209. "is required for UFF and Caffe" << std::endl;
  1210. UffInput::help(os);
  1211. // clang-format on
  1212. }
  1213. void BuildOptions::help(std::ostream& os)
  1214. {
  1215. // clang-format off
  1216. os << "=== Build Options ===" << std::endl <<
  1217. " --maxBatch Set max batch size and build an implicit batch engine (default = " << defaultMaxBatch << ")" << std::endl <<
  1218. " --explicitBatch Use explicit batch sizes when building the engine (default = implicit)" << std::endl <<
  1219. " --minShapes=spec Build with dynamic shapes using a profile with the min shapes provided" << std::endl <<
  1220. " --optShapes=spec Build with dynamic shapes using a profile with the opt shapes provided" << std::endl <<
  1221. " --maxShapes=spec Build with dynamic shapes using a profile with the max shapes provided" << std::endl <<
  1222. " --minShapesCalib=spec Calibrate with dynamic shapes using a profile with the min shapes provided" << std::endl <<
  1223. " --optShapesCalib=spec Calibrate with dynamic shapes using a profile with the opt shapes provided" << std::endl <<
  1224. " --maxShapesCalib=spec Calibrate with dynamic shapes using a profile with the max shapes provided" << std::endl <<
  1225. " Note: All three of min, opt and max shapes must be supplied." << std::endl <<
  1226. " However, if only opt shapes is supplied then it will be expanded so" << std::endl <<
  1227. " that min shapes and max shapes are set to the same values as opt shapes." << std::endl <<
  1228. " In addition, use of dynamic shapes implies explicit batch." << std::endl <<
  1229. " Input names can be wrapped with escaped single quotes (ex: \\\'Input:0\\\')." << std::endl <<
  1230. " Example input shapes spec: input0:1x3x256x256,input1:1x3x128x128" << std::endl <<
  1231. " Each input shape is supplied as a key-value pair where key is the input name and" << std::endl <<
  1232. " value is the dimensions (including the batch dimension) to be used for that input." << std::endl <<
  1233. " Each key-value pair has the key and value separated using a colon (:)." << std::endl <<
  1234. " Multiple input shapes can be provided via comma-separated key-value pairs." << std::endl <<
  1235. " --inputIOFormats=spec Type and format of each of the input tensors (default = all inputs in fp32:chw)" << std::endl <<
  1236. " See --outputIOFormats help for the grammar of type and format list." << std::endl <<
  1237. " Note: If this option is specified, please set comma-separated types and formats for all" << std::endl <<
  1238. " inputs following the same order as network inputs ID (even if only one input" << std::endl <<
  1239. " needs specifying IO format) or set the type and format once for broadcasting." << std::endl <<
  1240. " --outputIOFormats=spec Type and format of each of the output tensors (default = all outputs in fp32:chw)" << std::endl <<
  1241. " Note: If this option is specified, please set comma-separated types and formats for all" << std::endl <<
  1242. " outputs following the same order as network outputs ID (even if only one output" << std::endl <<
  1243. " needs specifying IO format) or set the type and format once for broadcasting." << std::endl <<
  1244. " IO Formats: spec ::= IOfmt[\",\"spec]" << std::endl <<
  1245. " IOfmt ::= type:fmt" << std::endl <<
  1246. " type ::= \"fp32\"|\"fp16\"|\"int32\"|\"int8\"" << std::endl <<
  1247. " fmt ::= (\"chw\"|\"chw2\"|\"chw4\"|\"hwc8\"|\"chw16\"|\"chw32\"|\"dhwc8\")[\"+\"fmt]" << std::endl <<
  1248. " --workspace=N Set workspace size in megabytes (default = " << defaultWorkspace << ")" << std::endl <<
  1249. " --nvtxMode=mode Specify NVTX annotation verbosity. mode ::= default|verbose|none" << std::endl <<
  1250. " --minTiming=M Set the minimum number of iterations used in kernel selection (default = "
  1251. << defaultMinTiming << ")" << std::endl <<
  1252. " --avgTiming=M Set the number of times averaged in each iteration for kernel selection (default = "
  1253. << defaultAvgTiming << ")" << std::endl <<
  1254. " --refit Mark the engine as refittable. This will allow the inspection of refittable layers " << std::endl <<
  1255. " and weights within the engine." << std::endl <<
  1256. " --sparsity=spec Control sparsity (default = disabled). " << std::endl <<
  1257. " Sparsity: spec ::= \"disable\", \"enable\", \"force\"" << std::endl <<
  1258. " Note: Description about each of these options is as below" << std::endl <<
  1259. " disable = do not enable sparse tactics in the builder (this is the default)" << std::endl <<
  1260. " enable = enable sparse tactics in the builder (but these tactics will only be" << std::endl <<
  1261. " considered if the weights have the right sparsity pattern)" << std::endl <<
  1262. " force = enable sparse tactics in the builder and force-overwrite the weights to have" << std::endl <<
  1263. " a sparsity pattern (even if you loaded a model yourself)" << std::endl <<
  1264. " --noTF32 Disable tf32 precision (default is to enable tf32, in addition to fp32)" << std::endl <<
  1265. " --fp16 Enable fp16 precision, in addition to fp32 (default = disabled)" << std::endl <<
  1266. " --int8 Enable int8 precision, in addition to fp32 (default = disabled)" << std::endl <<
  1267. " --best Enable all precisions to achieve the best performance (default = disabled)" << std::endl <<
  1268. " --calib=<file> Read INT8 calibration cache file" << std::endl <<
  1269. " --safe Only test the functionality available in safety restricted flows" << std::endl <<
  1270. " --saveEngine=<file> Save the serialized engine" << std::endl <<
  1271. " --loadEngine=<file> Load a serialized engine" << std::endl <<
  1272. " --tacticSources=tactics Specify the tactics to be used by adding (+) or removing (-) tactics from the default " << std::endl <<
  1273. " tactic sources (default = all available tactics)." << std::endl <<
  1274. " Note: Currently only cuDNN, cuBLAS and cuBLAS-LT are listed as optional tactics." << std::endl <<
  1275. " Tactic Sources: tactics ::= [\",\"tactic]" << std::endl <<
  1276. " tactic ::= (+|-)lib" << std::endl <<
  1277. " lib ::= \"CUBLAS\"|\"CUBLAS_LT\"|\"CUDNN\"" << std::endl <<
  1278. " For example, to disable cudnn and enable cublas: --tacticSources=-CUDNN,+CUBLAS" << std::endl <<
  1279. " --noBuilderCache Disable timing cache in builder (default is to enable timing cache)" << std::endl <<
  1280. " --timingCacheFile=<file> Save/load the serialized global timing cache" << std::endl
  1281. ;
  1282. // clang-format on
  1283. }
  1284. void SystemOptions::help(std::ostream& os)
  1285. {
  1286. // clang-format off
  1287. os << "=== System Options ===" << std::endl <<
  1288. " --device=N Select cuda device N (default = " << defaultDevice << ")" << std::endl <<
  1289. " --useDLACore=N Select DLA core N for layers that support DLA (default = none)" << std::endl <<
  1290. " --allowGPUFallback When DLA is enabled, allow GPU fallback for unsupported layers "
  1291. "(default = disabled)" << std::endl;
  1292. os << " --plugins Plugin library (.so) to load (can be specified multiple times)" << std::endl;
  1293. // clang-format on
  1294. }
  1295. void InferenceOptions::help(std::ostream& os)
  1296. {
  1297. // clang-format off
  1298. os << "=== Inference Options ===" << std::endl <<
  1299. " --batch=N Set batch size for implicit batch engines (default = " << defaultBatch << ")" << std::endl <<
  1300. " --shapes=spec Set input shapes for dynamic shapes inference inputs." << std::endl <<
  1301. " Note: Use of dynamic shapes implies explicit batch." << std::endl <<
  1302. " Input names can be wrapped with escaped single quotes (ex: \\\'Input:0\\\')." << std::endl <<
  1303. " Example input shapes spec: input0:1x3x256x256, input1:1x3x128x128" << std::endl <<
  1304. " Each input shape is supplied as a key-value pair where key is the input name and" << std::endl <<
  1305. " value is the dimensions (including the batch dimension) to be used for that input." << std::endl <<
  1306. " Each key-value pair has the key and value separated using a colon (:)." << std::endl <<
  1307. " Multiple input shapes can be provided via comma-separated key-value pairs." << std::endl <<
  1308. " --loadInputs=spec Load input values from files (default = generate random inputs). Input names can be "
  1309. "wrapped with single quotes (ex: 'Input:0')" << std::endl <<
  1310. " Input values spec ::= Ival[\",\"spec]" << std::endl <<
  1311. " Ival ::= name\":\"file" << std::endl <<
  1312. " --iterations=N Run at least N inference iterations (default = " << defaultIterations << ")" << std::endl <<
  1313. " --warmUp=N Run for N milliseconds to warmup before measuring performance (default = "
  1314. << defaultWarmUp << ")" << std::endl <<
  1315. " --duration=N Run performance measurements for at least N seconds wallclock time (default = "
  1316. << defaultDuration << ")" << std::endl <<
  1317. " --sleepTime=N Delay inference start with a gap of N milliseconds between launch and compute "
  1318. "(default = " << defaultSleep << ")" << std::endl <<
  1319. " --streams=N Instantiate N engines to use concurrently (default = " << defaultStreams << ")" << std::endl <<
  1320. " --exposeDMA Serialize DMA transfers to and from device (default = disabled)." << std::endl <<
  1321. " --noDataTransfers Disable DMA transfers to and from device (default = enabled)." << std::endl <<
  1322. " --useSpinWait Actively synchronize on GPU events. This option may decrease synchronization time but "
  1323. "increase CPU usage and power (default = disabled)" << std::endl <<
  1324. " --threads Enable multithreading to drive engines with independent threads (default = disabled)" << std::endl <<
  1325. " --useCudaGraph Use CUDA graph to capture engine execution and then launch inference (default = disabled)." << std::endl <<
  1326. " This flag may be ignored if the graph capture fails." << std::endl <<
  1327. " --timeDeserialize Time the amount of time it takes to deserialize the network and exit." << std::endl <<
  1328. " --timeRefit Time the amount of time it takes to refit the engine before inference." << std::endl <<
  1329. " --separateProfileRun Do not attach the profiler in the benchmark run; if profiling is enabled, a second "
  1330. "profile run will be executed (default = disabled)" << std::endl <<
  1331. " --buildOnly Skip inference perf measurement (default = disabled)" << std::endl;
  1332. // clang-format on
  1333. }
  1334. void ReportingOptions::help(std::ostream& os)
  1335. {
  1336. // clang-format off
  1337. os << "=== Reporting Options ===" << std::endl <<
  1338. " --verbose Use verbose logging (default = false)" << std::endl <<
  1339. " --avgRuns=N Report performance measurements averaged over N consecutive "
  1340. "iterations (default = " << defaultAvgRuns << ")" << std::endl <<
  1341. " --percentile=P Report performance for the P percentage (0<=P<=100, 0 "
  1342. "representing max perf, and 100 representing min perf; (default"
  1343. " = " << defaultPercentile << "%)" << std::endl <<
  1344. " --dumpRefit Print the refittable layers and weights from a refittable "
  1345. "engine" << std::endl <<
  1346. " --dumpOutput Print the output tensor(s) of the last inference iteration "
  1347. "(default = disabled)" << std::endl <<
  1348. " --dumpProfile Print profile information per layer (default = disabled)" << std::endl <<
  1349. " --exportTimes=<file> Write the timing results in a json file (default = disabled)" << std::endl <<
  1350. " --exportOutput=<file> Write the output tensors to a json file (default = disabled)" << std::endl <<
  1351. " --exportProfile=<file> Write the profile information per layer in a json file "
  1352. "(default = disabled)" << std::endl;
  1353. // clang-format on
  1354. }
  1355. void helpHelp(std::ostream& os)
  1356. {
  1357. // clang-format off
  1358. os << "=== Help ===" << std::endl <<
  1359. " --help, -h Print this message" << std::endl;
  1360. // clang-format on
  1361. }
  1362. void AllOptions::help(std::ostream& os)
  1363. {
  1364. ModelOptions::help(os);
  1365. os << std::endl;
  1366. BuildOptions::help(os);
  1367. os << std::endl;
  1368. InferenceOptions::help(os);
  1369. os << std::endl;
  1370. // clang-format off
  1371. os << "=== Build and Inference Batch Options ===" << std::endl <<
  1372. " When using implicit batch, the max batch size of the engine, if not given, " << std::endl <<
  1373. " is set to the inference batch size;" << std::endl <<
  1374. " when using explicit batch, if shapes are specified only for inference, they " << std::endl <<
  1375. " will be used also as min/opt/max in the build profile; if shapes are " << std::endl <<
  1376. " specified only for the build, the opt shapes will be used also for inference;" << std::endl <<
  1377. " if both are specified, they must be compatible; and if explicit batch is " << std::endl <<
  1378. " enabled but neither is specified, the model must provide complete static" << std::endl <<
  1379. " dimensions, including batch size, for all inputs" << std::endl <<
  1380. " Using ONNX models automatically forces explicit batch." << std::endl <<
  1381. std::endl;
  1382. // clang-format on
  1383. ReportingOptions::help(os);
  1384. os << std::endl;
  1385. SystemOptions::help(os);
  1386. os << std::endl;
  1387. helpHelp(os);
  1388. }
  1389. void SafeBuilderOptions::printHelp(std::ostream& os)
  1390. {
  1391. // clang-format off
  1392. os << "=== Mandatory ===" << std::endl <<
  1393. " --onnx=<file> ONNX model" << std::endl <<
  1394. " " << std::endl <<
  1395. "=== Optional ===" << std::endl <<
  1396. " --inputIOFormats=spec Type and format of each of the input tensors (default = all inputs in fp32:chw)" << std::endl <<
  1397. " See --outputIOFormats help for the grammar of type and format list." << std::endl <<
  1398. " Note: If this option is specified, please set comma-separated types and formats for all" << std::endl <<
  1399. " inputs following the same order as network inputs ID (even if only one input" << std::endl <<
  1400. " needs specifying IO format) or set the type and format once for broadcasting." << std::endl <<
  1401. " --outputIOFormats=spec Type and format of each of the output tensors (default = all outputs in fp32:chw)" << std::endl <<
  1402. " Note: If this option is specified, please set comma-separated types and formats for all" << std::endl <<
  1403. " outputs following the same order as network outputs ID (even if only one output" << std::endl <<
  1404. " needs specifying IO format) or set the type and format once for broadcasting." << std::endl <<
  1405. " IO Formats: spec ::= IOfmt[\",\"spec]" << std::endl <<
  1406. " IOfmt ::= type:fmt" << std::endl <<
  1407. " type ::= \"fp32\"|\"fp16\"|\"int32\"|\"int8\"" << std::endl <<
  1408. " fmt ::= (\"chw\"|\"chw2\"|\"chw4\"|\"hwc8\"|\"chw16\"|\"chw32\"|\"dhwc8\")[\"+\"fmt]" << std::endl <<
  1409. " --int8 Enable int8 precision, in addition to fp16 (default = disabled)" << std::endl <<
  1410. " --calib=<file> Read INT8 calibration cache file" << std::endl <<
  1411. " --serialized=<file> Save the serialized network" << std::endl <<
  1412. " --plugins Plugin library (.so) to load (can be specified multiple times)" << std::endl <<
  1413. " --verbose Use verbose logging (default = false)" << std::endl <<
  1414. " --help Print this message" << std::endl <<
  1415. " " << std::endl;
  1416. // clang-format on
  1417. }
  1418. } // namespace sample