diff --git a/examples/onnx_ptq/README.md b/examples/onnx_ptq/README.md index 6673b4038..ef5b60512 100644 --- a/examples/onnx_ptq/README.md +++ b/examples/onnx_ptq/README.md @@ -26,6 +26,8 @@ Please use the TensorRT docker image (e.g., `nvcr.io/nvidia/tensorrt:26.02-py3`) > **Note:** If you are using `onnxruntime-gpu`, we recommend using `nvcr.io/nvidia/tensorrt:25.06-py3` as it is built with CUDA 12, which is required by the stable `onnxruntime-gpu` package. +> **Note:** If you are using `onnxruntime-gpu`, we recommend using `nvcr.io/nvidia/tensorrt:25.06-py3` as it is built with CUDA 12, which is required by the stable `onnxruntime-gpu` package. + Set the following environment variables inside the TensorRT docker. ```bash @@ -172,53 +174,39 @@ python -m modelopt.onnx.quantization \ This feature requires `TensorRT 10+` and `ORT>=1.20`. For proper usage, please make sure that the paths to `libcudnn*.so` and TensorRT `lib/` are in the `LD_LIBRARY_PATH` env variable and that the `tensorrt` python package is installed. -Please see the sample example below. - -**Step 1**: Obtain the sample ONNX model and TensorRT plugin from [TensorRT-Custom-Plugin-Example](https://github.com/leimao/TensorRT-Custom-Plugin-Example). - -  **1.1.** Change directory to `TensorRT-Custom-Plugin-Example`: - -```bash -cd /path/to/TensorRT-Custom-Plugin-Example -``` +A self-contained example is provided in the [`custom_op_plugin/`](./custom_op_plugin/) subfolder. Please see the steps below. -  **1.2.** Compile the TensorRT plugin: +**Step 1**: Build the TensorRT plugin and create the sample ONNX model. -```bash -cmake -B build \ - -DNVINFER_LIB=$TRT_LIBPATH/libnvinfer.so.10 \ - -DNVINFER_PLUGIN_LIB=$TRT_LIBPATH/libnvinfer_plugin.so.10 \ - -DNVONNXPARSER_LIB=$TRT_LIBPATH/libnvonnxparser.so.10 \ - -DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=/usr/include/x86_64-linux-gnu -``` +  **1.1.** Compile the TensorRT plugin: ```bash -cmake --build build --config Release --parallel +cmake -S custom_op_plugin/plugin -B /tmp/plugin_build +cmake --build /tmp/plugin_build --config Release --parallel ``` -This generates a plugin in `TensorRT-Custom-Plugin-Example/build/src/plugins/IdentityConvIPluginV2IOExt/libidentity_conv_iplugin_v2_io_ext.so` +This generates `/tmp/plugin_build/libidentity_conv_plugin.so`. -  **1.3.** Create the ONNX file. +  **1.2.** Create the ONNX model with a custom `IdentityConv` operator: ```bash -python scripts/create_identity_neural_network.py +python custom_op_plugin/create_identity_neural_network.py \ + --output_path=/tmp/identity_neural_network.onnx ``` -This generates the identity_neural_network.onnx model in `TensorRT-Custom-Plugin-Example/data/identity_neural_network.onnx` - -**Step 2**: Quantize the ONNX model. We will be using the `libidentity_conv_iplugin_v2_io_ext.so` plugin for this example. +**Step 2**: Quantize the ONNX model using the compiled plugin. ```bash python -m modelopt.onnx.quantization \ - --onnx_path=/path/to/identity_neural_network.onnx \ - --trt_plugins=/path/to/libidentity_conv_iplugin_v2_io_ext.so + --onnx_path=/tmp/identity_neural_network.onnx \ + --trt_plugins=/tmp/plugin_build/libidentity_conv_plugin.so ``` **Step 3**: Deploy the quantized model with TensorRT. ```bash -trtexec --onnx=/path/to/identity_neural_network.quant.onnx \ - --staticPlugins=/path/to/libidentity_conv_iplugin_v2_io_ext.so +trtexec --onnx=/tmp/identity_neural_network.quant.onnx \ + --staticPlugins=/tmp/plugin_build/libidentity_conv_plugin.so ``` ### Optimize Q/DQ node placement with Autotune diff --git a/examples/onnx_ptq/custom_op_plugin/create_identity_neural_network.py b/examples/onnx_ptq/custom_op_plugin/create_identity_neural_network.py new file mode 100644 index 000000000..22c05c194 --- /dev/null +++ b/examples/onnx_ptq/custom_op_plugin/create_identity_neural_network.py @@ -0,0 +1,105 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Create a simple identity neural network with a custom IdentityConv operator. + +This script generates an ONNX model consisting of three convolutional layers where the +second Conv node is replaced with a custom ``IdentityConv`` operator. The custom operator +is not defined in the standard ONNX operator set and requires a TensorRT plugin to parse. + +Based on https://github.com/leimao/TensorRT-Custom-Plugin-Example. +""" + +import argparse +import os + +import numpy as np +import onnx +import onnx_graphsurgeon as gs + + +def create_identity_neural_network(output_path: str) -> None: + """Create and save an ONNX model with a custom IdentityConv operator.""" + opset_version = 15 + + input_shape = (1, 3, 480, 960) + input_channels = input_shape[1] + + # Configure identity convolution weights (depthwise, 1x1 kernel with all ones). + weights_shape = (input_channels, 1, 1, 1) + num_groups = input_channels + weights_data = np.ones(weights_shape, dtype=np.float32) + + # Build the ONNX graph using onnx-graphsurgeon. + x0 = gs.Variable(name="X0", dtype=np.float32, shape=input_shape) + w0 = gs.Constant(name="W0", values=weights_data) + x1 = gs.Variable(name="X1", dtype=np.float32, shape=input_shape) + w1 = gs.Constant(name="W1", values=weights_data) + x2 = gs.Variable(name="X2", dtype=np.float32, shape=input_shape) + w2 = gs.Constant(name="W2", values=weights_data) + x3 = gs.Variable(name="X3", dtype=np.float32, shape=input_shape) + + conv_attrs = { + "kernel_shape": [1, 1], + "strides": [1, 1], + "pads": [0, 0, 0, 0], + "group": num_groups, + } + + node_1 = gs.Node(name="Conv-1", op="Conv", inputs=[x0, w0], outputs=[x1], attrs=conv_attrs) + + # The second node uses the custom IdentityConv operator instead of standard Conv. + # This operator requires a TensorRT plugin to be loaded at runtime. + node_2 = gs.Node( + name="Conv-2", + op="IdentityConv", + inputs=[x1, w1], + outputs=[x2], + attrs={ + **conv_attrs, + "plugin_version": "1", + "plugin_namespace": "", + }, + ) + + node_3 = gs.Node(name="Conv-3", op="Conv", inputs=[x2, w2], outputs=[x3], attrs=conv_attrs) + + graph = gs.Graph( + nodes=[node_1, node_2, node_3], + inputs=[x0], + outputs=[x3], + opset=opset_version, + ) + model = gs.export_onnx(graph) + # Shape inference does not work with the custom operator. + dirname = os.path.dirname(output_path) + if dirname: + os.makedirs(dirname, exist_ok=True) + onnx.save(model, output_path) + print(f"Saved ONNX model to {output_path}") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Create an ONNX model with a custom IdentityConv operator." + ) + parser.add_argument( + "--output_path", + type=str, + default="identity_neural_network.onnx", + help="Path to save the generated ONNX model.", + ) + args = parser.parse_args() + create_identity_neural_network(args.output_path) diff --git a/examples/onnx_ptq/custom_op_plugin/plugin/CMakeLists.txt b/examples/onnx_ptq/custom_op_plugin/plugin/CMakeLists.txt new file mode 100644 index 000000000..8343630be --- /dev/null +++ b/examples/onnx_ptq/custom_op_plugin/plugin/CMakeLists.txt @@ -0,0 +1,39 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cmake_minimum_required(VERSION 3.18) + +project(IDENTITY-CONV-PLUGIN VERSION 0.0.1 LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +find_package(CUDAToolkit REQUIRED) + +# TensorRT libraries +find_library(NVINFER_LIB nvinfer HINTS /usr/lib/x86_64-linux-gnu/ PATH_SUFFIXES lib lib64 REQUIRED) +find_library(NVINFER_PLUGIN_LIB nvinfer_plugin HINTS /usr/lib/x86_64-linux-gnu/ PATH_SUFFIXES lib lib64 REQUIRED) + +add_library( + identity_conv_plugin + SHARED + PluginUtils.cpp + IdentityConvPlugin.cpp + IdentityConvPluginCreator.cpp + PluginRegistration.cpp +) + +target_include_directories(identity_conv_plugin PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(identity_conv_plugin PRIVATE ${NVINFER_LIB} ${NVINFER_PLUGIN_LIB} CUDA::cudart) diff --git a/examples/onnx_ptq/custom_op_plugin/plugin/IdentityConvPlugin.cpp b/examples/onnx_ptq/custom_op_plugin/plugin/IdentityConvPlugin.cpp new file mode 100644 index 000000000..7b8a41d7b --- /dev/null +++ b/examples/onnx_ptq/custom_op_plugin/plugin/IdentityConvPlugin.cpp @@ -0,0 +1,200 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// TensorRT IdentityConv custom plugin implementation. +// The enqueue method performs a simple identity (passthrough) operation using cudaMemcpyAsync. +// Based on https://github.com/leimao/TensorRT-Custom-Plugin-Example. + +#include +#include +#include + +#include +#include + +#include "IdentityConvPlugin.h" +#include "PluginUtils.h" + +namespace nvinfer1 { +namespace plugin { + +template void write(BufferType *&buffer, Type const &val) { + static_assert(sizeof(BufferType) == 1, "BufferType must be a 1 byte type."); + std::memcpy(buffer, &val, sizeof(Type)); + buffer += sizeof(Type); +} + +template OutType read(BufferType const *&buffer) { + static_assert(sizeof(BufferType) == 1, "BufferType must be a 1 byte type."); + OutType val{}; + std::memcpy(&val, static_cast(buffer), sizeof(OutType)); + buffer += sizeof(OutType); + return val; +} + +IdentityConv::IdentityConv(IdentityConvParameters params) : mParams{params} {} + +IdentityConv::IdentityConv(void const *data, size_t length) { + deserialize(static_cast(data), length); +} + +void IdentityConv::deserialize(uint8_t const *data, size_t length) { + uint8_t const *d{data}; + mParams.group = read(d); + mParams.dtype = read(d); + mParams.channelSize = read(d); + mParams.height = read(d); + mParams.width = read(d); + mParams.dtypeBytes = read(d); + PLUGIN_ASSERT(d == data + length); +} + +int32_t IdentityConv::getNbOutputs() const noexcept { return 1; } + +void IdentityConv::configurePlugin(nvinfer1::PluginTensorDesc const *in, int32_t nbInput, + nvinfer1::PluginTensorDesc const *out, + int32_t nbOutput) noexcept { + PLUGIN_ASSERT(nbInput == 2); + PLUGIN_ASSERT(nbOutput == 1); + PLUGIN_ASSERT(in[0].dims.nbDims == 3); + PLUGIN_ASSERT(out[0].dims.nbDims == 3); + PLUGIN_ASSERT(in[0].dims.d[0] == out[0].dims.d[0]); + PLUGIN_ASSERT(in[0].dims.d[1] == out[0].dims.d[1]); + PLUGIN_ASSERT(in[0].dims.d[2] == out[0].dims.d[2]); + PLUGIN_ASSERT(in[0].type == out[0].type); + + mParams.dtype = in[0].type; + mParams.channelSize = in[0].dims.d[0]; + mParams.height = in[0].dims.d[1]; + mParams.width = in[0].dims.d[2]; + + if (mParams.dtype == nvinfer1::DataType::kINT8) { + mParams.dtypeBytes = 1; + } else if (mParams.dtype == nvinfer1::DataType::kHALF) { + mParams.dtypeBytes = 2; + } else if (mParams.dtype == nvinfer1::DataType::kFLOAT) { + mParams.dtypeBytes = 4; + } else { + PLUGIN_ASSERT(false); + } +} + +int32_t IdentityConv::initialize() noexcept { return 0; } + +void IdentityConv::terminate() noexcept {} + +nvinfer1::Dims IdentityConv::getOutputDimensions(int32_t index, nvinfer1::Dims const *inputs, + int32_t nbInputDims) noexcept { + PLUGIN_ASSERT(index == 0); + PLUGIN_ASSERT(nbInputDims == 2); + PLUGIN_ASSERT(inputs != nullptr); + PLUGIN_ASSERT(inputs[0].nbDims == 3); + + nvinfer1::Dims dimsOutput; + dimsOutput.nbDims = inputs[0].nbDims; + dimsOutput.d[0] = inputs[0].d[0]; + dimsOutput.d[1] = inputs[0].d[1]; + dimsOutput.d[2] = inputs[0].d[2]; + + return dimsOutput; +} + +size_t IdentityConv::getWorkspaceSize(int32_t maxBatchSize) const noexcept { return 0; } + +size_t IdentityConv::getSerializationSize() const noexcept { + return sizeof(int32_t) * 4 + sizeof(nvinfer1::DataType) + sizeof(size_t); +} + +void IdentityConv::serialize(void *buffer) const noexcept { + char *d{reinterpret_cast(buffer)}; + char *const a{d}; + write(d, mParams.group); + write(d, mParams.dtype); + write(d, mParams.channelSize); + write(d, mParams.height); + write(d, mParams.width); + write(d, mParams.dtypeBytes); + PLUGIN_ASSERT(d == a + getSerializationSize()); +} + +bool IdentityConv::supportsFormatCombination(int32_t pos, nvinfer1::PluginTensorDesc const *inOut, + int32_t nbInputs, int32_t nbOutputs) const noexcept { + PLUGIN_ASSERT(nbInputs == 2 && nbOutputs == 1 && pos < nbInputs + nbOutputs); + bool isValidCombination = false; + + isValidCombination |= (inOut[pos].format == nvinfer1::TensorFormat::kLINEAR && + inOut[pos].type == nvinfer1::DataType::kFLOAT); + isValidCombination |= (inOut[pos].format == nvinfer1::TensorFormat::kLINEAR && + inOut[pos].type == nvinfer1::DataType::kHALF); + isValidCombination &= (pos < nbInputs || (inOut[pos].format == inOut[0].format && + inOut[pos].type == inOut[0].type)); + + return isValidCombination; +} + +char const *IdentityConv::getPluginType() const noexcept { return kIDENTITY_CONV_PLUGIN_NAME; } + +char const *IdentityConv::getPluginVersion() const noexcept { + return kIDENTITY_CONV_PLUGIN_VERSION; +} + +void IdentityConv::destroy() noexcept { delete this; } + +nvinfer1::IPluginV2IOExt *IdentityConv::clone() const noexcept { + try { + IPluginV2IOExt *const plugin{new IdentityConv{mParams}}; + plugin->setPluginNamespace(mPluginNamespace); + return plugin; + } catch (std::exception const &e) { + caughtError(e); + } + return nullptr; +} + +void IdentityConv::setPluginNamespace(char const *pluginNamespace) noexcept { + mPluginNamespace = pluginNamespace; +} + +char const *IdentityConv::getPluginNamespace() const noexcept { return mPluginNamespace; } + +nvinfer1::DataType IdentityConv::getOutputDataType(int32_t index, + nvinfer1::DataType const *inputTypes, + int32_t nbInputs) const noexcept { + PLUGIN_ASSERT(index == 0); + PLUGIN_ASSERT(nbInputs == 2); + return inputTypes[0]; +} + +bool IdentityConv::isOutputBroadcastAcrossBatch(int32_t outputIndex, bool const *inputIsBroadcasted, + int32_t nbInputs) const noexcept { + return false; +} + +bool IdentityConv::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept { return false; } + +int32_t IdentityConv::enqueue(int32_t batchSize, void const *const *inputs, void *const *outputs, + void *workspace, cudaStream_t stream) noexcept { + size_t const inputSize{ + static_cast(batchSize * mParams.channelSize * mParams.height * mParams.width)}; + size_t const inputSizeBytes{inputSize * mParams.dtypeBytes}; + cudaError_t const status{ + cudaMemcpyAsync(outputs[0], inputs[0], inputSizeBytes, cudaMemcpyDeviceToDevice, stream)}; + return status; +} + +} // namespace plugin +} // namespace nvinfer1 diff --git a/examples/onnx_ptq/custom_op_plugin/plugin/IdentityConvPlugin.h b/examples/onnx_ptq/custom_op_plugin/plugin/IdentityConvPlugin.h new file mode 100644 index 000000000..da6128424 --- /dev/null +++ b/examples/onnx_ptq/custom_op_plugin/plugin/IdentityConvPlugin.h @@ -0,0 +1,107 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// TensorRT IdentityConv custom plugin header. +// This plugin performs a simple identity (passthrough) operation using CUDA memcpy. +// Based on https://github.com/leimao/TensorRT-Custom-Plugin-Example. + +#ifndef TENSORRT_IDENTITY_CONV_PLUGIN_H +#define TENSORRT_IDENTITY_CONV_PLUGIN_H + +#include + +#include + +constexpr char const *const kIDENTITY_CONV_PLUGIN_NAME{"IdentityConv"}; +constexpr char const *const kIDENTITY_CONV_PLUGIN_VERSION{"1"}; + +namespace nvinfer1 { +namespace plugin { + +struct IdentityConvParameters { + int32_t group; + nvinfer1::DataType dtype; + int32_t channelSize; + int32_t height; + int32_t width; + size_t dtypeBytes; +}; + +class IdentityConv : public nvinfer1::IPluginV2IOExt { +public: + IdentityConv(IdentityConvParameters params); + + IdentityConv(void const *data, size_t length); + + ~IdentityConv() override = default; + + int32_t getNbOutputs() const noexcept override; + + nvinfer1::Dims getOutputDimensions(int32_t index, nvinfer1::Dims const *inputs, + int32_t nbInputDims) noexcept override; + + int32_t initialize() noexcept override; + + void terminate() noexcept override; + + size_t getWorkspaceSize(int32_t maxBatchSize) const noexcept override; + + int32_t enqueue(int32_t batchSize, void const *const *inputs, void *const *outputs, + void *workspace, cudaStream_t stream) noexcept override; + + size_t getSerializationSize() const noexcept override; + + void serialize(void *buffer) const noexcept override; + + void configurePlugin(nvinfer1::PluginTensorDesc const *in, int32_t nbInput, + nvinfer1::PluginTensorDesc const *out, int32_t nbOutput) noexcept override; + + bool supportsFormatCombination(int32_t pos, nvinfer1::PluginTensorDesc const *inOut, + int32_t nbInputs, int32_t nbOutputs) const noexcept override; + + char const *getPluginType() const noexcept override; + + char const *getPluginVersion() const noexcept override; + + void destroy() noexcept override; + + IPluginV2IOExt *clone() const noexcept override; + + nvinfer1::DataType getOutputDataType(int32_t index, nvinfer1::DataType const *inputType, + int32_t nbInputs) const noexcept override; + + void setPluginNamespace(char const *pluginNamespace) noexcept override; + + char const *getPluginNamespace() const noexcept override; + + bool isOutputBroadcastAcrossBatch(int32_t outputIndex, bool const *inputIsBroadcasted, + int32_t nbInputs) const noexcept override; + + bool canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept override; + +private: + void deserialize(uint8_t const *data, size_t length); + + IdentityConvParameters mParams; + + char const *mPluginNamespace; +}; + +} // namespace plugin +} // namespace nvinfer1 + +#endif // TENSORRT_IDENTITY_CONV_PLUGIN_H diff --git a/examples/onnx_ptq/custom_op_plugin/plugin/IdentityConvPluginCreator.cpp b/examples/onnx_ptq/custom_op_plugin/plugin/IdentityConvPluginCreator.cpp new file mode 100644 index 000000000..3c787ad13 --- /dev/null +++ b/examples/onnx_ptq/custom_op_plugin/plugin/IdentityConvPluginCreator.cpp @@ -0,0 +1,107 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// TensorRT IdentityConv plugin creator (factory) implementation. +// Based on https://github.com/leimao/TensorRT-Custom-Plugin-Example. + +#include +#include + +#include + +#include "IdentityConvPlugin.h" +#include "IdentityConvPluginCreator.h" +#include "PluginUtils.h" + +namespace nvinfer1 { +namespace plugin { + +REGISTER_TENSORRT_PLUGIN(IdentityConvCreator); + +IdentityConvCreator::IdentityConvCreator() { + mPluginAttributes.clear(); + mPluginAttributes.emplace_back( + nvinfer1::PluginField("kernel_shape", nullptr, PluginFieldType::kINT32, 2)); + mPluginAttributes.emplace_back( + nvinfer1::PluginField("strides", nullptr, PluginFieldType::kINT32, 2)); + mPluginAttributes.emplace_back( + nvinfer1::PluginField("pads", nullptr, PluginFieldType::kINT32, 4)); + mPluginAttributes.emplace_back( + nvinfer1::PluginField("group", nullptr, PluginFieldType::kINT32, 1)); + + mFC.nbFields = mPluginAttributes.size(); + mFC.fields = mPluginAttributes.data(); +} + +char const *IdentityConvCreator::getPluginName() const noexcept { + return kIDENTITY_CONV_PLUGIN_NAME; +} + +char const *IdentityConvCreator::getPluginVersion() const noexcept { + return kIDENTITY_CONV_PLUGIN_VERSION; +} + +nvinfer1::PluginFieldCollection const *IdentityConvCreator::getFieldNames() noexcept { + return &mFC; +} + +nvinfer1::IPluginV2IOExt * +IdentityConvCreator::createPlugin(char const *name, + nvinfer1::PluginFieldCollection const *fc) noexcept { + try { + nvinfer1::PluginField const *fields{fc->fields}; + int32_t nbFields{fc->nbFields}; + + PLUGIN_VALIDATE(nbFields == 4); + + int32_t group{}; + + for (int32_t i{0}; i < nbFields; ++i) { + char const *attrName = fields[i].name; + if (!strcmp(attrName, "group")) { + PLUGIN_VALIDATE(fields[i].type == nvinfer1::PluginFieldType::kINT32); + PLUGIN_VALIDATE(fields[i].length == 1); + group = *(static_cast(fields[i].data)); + } + } + + IdentityConvParameters const params{.group = group}; + + IdentityConv *const plugin{new IdentityConv{params}}; + plugin->setPluginNamespace(mNamespace.c_str()); + return plugin; + } catch (std::exception const &e) { + caughtError(e); + } + return nullptr; +} + +nvinfer1::IPluginV2IOExt *IdentityConvCreator::deserializePlugin(char const *name, + void const *serialData, + size_t serialLength) noexcept { + try { + IdentityConv *plugin = new IdentityConv{serialData, serialLength}; + plugin->setPluginNamespace(mNamespace.c_str()); + return plugin; + } catch (std::exception const &e) { + caughtError(e); + } + return nullptr; +} + +} // namespace plugin +} // namespace nvinfer1 diff --git a/examples/onnx_ptq/custom_op_plugin/plugin/IdentityConvPluginCreator.h b/examples/onnx_ptq/custom_op_plugin/plugin/IdentityConvPluginCreator.h new file mode 100644 index 000000000..9b7cef8b5 --- /dev/null +++ b/examples/onnx_ptq/custom_op_plugin/plugin/IdentityConvPluginCreator.h @@ -0,0 +1,63 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// TensorRT IdentityConv plugin creator (factory) header. +// Based on https://github.com/leimao/TensorRT-Custom-Plugin-Example. + +#ifndef TENSORRT_IDENTITY_CONV_PLUGIN_CREATOR_H +#define TENSORRT_IDENTITY_CONV_PLUGIN_CREATOR_H + +#include +#include + +#include + +namespace nvinfer1 { +namespace plugin { + +class IdentityConvCreator : public nvinfer1::IPluginCreator { +public: + IdentityConvCreator(); + + ~IdentityConvCreator() override = default; + + char const *getPluginName() const noexcept override; + + char const *getPluginVersion() const noexcept override; + + nvinfer1::PluginFieldCollection const *getFieldNames() noexcept override; + + nvinfer1::IPluginV2IOExt * + createPlugin(char const *name, nvinfer1::PluginFieldCollection const *fc) noexcept override; + + nvinfer1::IPluginV2IOExt *deserializePlugin(char const *name, void const *serialData, + size_t serialLength) noexcept override; + + void setPluginNamespace(char const *libNamespace) noexcept override { mNamespace = libNamespace; } + + char const *getPluginNamespace() const noexcept override { return mNamespace.c_str(); } + +private: + nvinfer1::PluginFieldCollection mFC; + std::vector mPluginAttributes; + std::string mNamespace; +}; + +} // namespace plugin +} // namespace nvinfer1 + +#endif // TENSORRT_IDENTITY_CONV_PLUGIN_CREATOR_H diff --git a/examples/onnx_ptq/custom_op_plugin/plugin/PluginRegistration.cpp b/examples/onnx_ptq/custom_op_plugin/plugin/PluginRegistration.cpp new file mode 100644 index 000000000..e1ed3c4a2 --- /dev/null +++ b/examples/onnx_ptq/custom_op_plugin/plugin/PluginRegistration.cpp @@ -0,0 +1,63 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Plugin registration: provides the external C API that TensorRT calls at runtime +// to discover and load plugins from this shared library. +// Based on https://github.com/leimao/TensorRT-Custom-Plugin-Example. + +#include + +#include + +#include "IdentityConvPluginCreator.h" + +class ThreadSafeLoggerFinder { +public: + ThreadSafeLoggerFinder() = default; + + void setLoggerFinder(nvinfer1::ILoggerFinder *finder) { + std::lock_guard lk(mMutex); + if (mLoggerFinder == nullptr && finder != nullptr) { + mLoggerFinder = finder; + } + } + + nvinfer1::ILogger *getLogger() noexcept { + std::lock_guard lk(mMutex); + if (mLoggerFinder != nullptr) { + return mLoggerFinder->findLogger(); + } + return nullptr; + } + +private: + nvinfer1::ILoggerFinder *mLoggerFinder{nullptr}; + std::mutex mMutex; +}; + +ThreadSafeLoggerFinder gLoggerFinder; + +extern "C" void setLoggerFinder(nvinfer1::ILoggerFinder *finder) { + gLoggerFinder.setLoggerFinder(finder); +} + +extern "C" nvinfer1::IPluginCreator *const *getPluginCreators(int32_t &nbCreators) { + nbCreators = 1; + static nvinfer1::plugin::IdentityConvCreator identityConvCreator{}; + static nvinfer1::IPluginCreator *const pluginCreatorList[] = {&identityConvCreator}; + return pluginCreatorList; +} diff --git a/examples/onnx_ptq/custom_op_plugin/plugin/PluginUtils.cpp b/examples/onnx_ptq/custom_op_plugin/plugin/PluginUtils.cpp new file mode 100644 index 000000000..1cc90ee11 --- /dev/null +++ b/examples/onnx_ptq/custom_op_plugin/plugin/PluginUtils.cpp @@ -0,0 +1,47 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Utility functions for TensorRT plugin error handling and logging. +// Based on https://github.com/leimao/TensorRT-Custom-Plugin-Example. + +#include +#include + +#include + +void caughtError(std::exception const &e) { + getLogger()->log(nvinfer1::ILogger::Severity::kINTERNAL_ERROR, e.what()); +} + +void reportAssertion(bool success, char const *msg, char const *file, int32_t line) { + if (!success) { + std::ostringstream stream; + stream << "Assertion failed: " << msg << std::endl + << file << ':' << line << std::endl + << "Aborting..." << std::endl; + getLogger()->log(nvinfer1::ILogger::Severity::kINTERNAL_ERROR, stream.str().c_str()); + std::abort(); + } +} + +void reportValidation(bool success, char const *msg, char const *file, int32_t line) { + if (!success) { + std::ostringstream stream; + stream << "Validation failed: " << msg << std::endl << file << ':' << line << std::endl; + getLogger()->log(nvinfer1::ILogger::Severity::kINTERNAL_ERROR, stream.str().c_str()); + } +} diff --git a/examples/onnx_ptq/custom_op_plugin/plugin/PluginUtils.h b/examples/onnx_ptq/custom_op_plugin/plugin/PluginUtils.h new file mode 100644 index 000000000..aaeb114ae --- /dev/null +++ b/examples/onnx_ptq/custom_op_plugin/plugin/PluginUtils.h @@ -0,0 +1,32 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Utility functions for TensorRT plugin error handling and logging. +// Based on https://github.com/leimao/TensorRT-Custom-Plugin-Example. + +#ifndef TENSORRT_PLUGIN_UTILS_H +#define TENSORRT_PLUGIN_UTILS_H + +void caughtError(std::exception const &e); + +#define PLUGIN_ASSERT(val) reportAssertion((val), #val, __FILE__, __LINE__) +void reportAssertion(bool success, char const *msg, char const *file, int32_t line); + +#define PLUGIN_VALIDATE(val) reportValidation((val), #val, __FILE__, __LINE__) +void reportValidation(bool success, char const *msg, char const *file, int32_t line); + +#endif // TENSORRT_PLUGIN_UTILS_H