onnx: Add Verisilicon provider

Add the option to use the VSI provider for the Verisilicon NPUs.

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/9408>
This commit is contained in:
Olivier Crête 2025-03-05 11:02:55 -05:00 committed by GStreamer Marge Bot
parent 78f0f61523
commit 40828d890b
4 changed files with 152 additions and 64 deletions

View File

@ -22,7 +22,17 @@
#include "gstonnxclient.h"
#include <onnxruntime_cxx_api.h>
#ifdef HAVE_VSI_NPU
#include <core/providers/vsinpu/vsinpu_provider_factory.h>
#endif
#ifdef CPUPROVIDER_IN_SUBDIR
#include <core/providers/cpu/cpu_provider_factory.h>
#else
#include <cpu_provider_factory.h>
#endif
#include <sstream>
#define GST_CAT_DEFAULT onnx_inference_debug
@ -153,7 +163,7 @@ GstOnnxClient::GstOnnxClient (GstElement *debug_parent):debug_parent(debug_paren
break;
default:
g_error ("Data type %d not handled", inputDatatype);
break;
break;
};
}
@ -225,11 +235,18 @@ GstOnnxClient::GstOnnxClient (GstElement *debug_parent):debug_parent(debug_paren
GstOnnxOptimizationLevel optim, GstOnnxExecutionProvider provider,
GstStructure * tensors)
{
OrtStatus *status;
if (session)
return true;
GraphOptimizationLevel onnx_optim;
switch (optim) {
try {
Ort::SessionOptions sessionOptions;
const auto & api = Ort::GetApi ();
// for debugging
//sessionOptions.SetIntraOpNumThreads (1);
GraphOptimizationLevel onnx_optim;
switch (optim) {
case GST_ONNX_OPTIMIZATION_LEVEL_DISABLE_ALL:
onnx_optim = GraphOptimizationLevel::ORT_DISABLE_ALL;
break;
@ -245,47 +262,66 @@ GstOnnxClient::GstOnnxClient (GstElement *debug_parent):debug_parent(debug_paren
default:
onnx_optim = GraphOptimizationLevel::ORT_ENABLE_EXTENDED;
break;
};
};
try {
Ort::SessionOptions sessionOptions;
const auto & api = Ort::GetApi ();
// for debugging
//sessionOptions.SetIntraOpNumThreads (1);
sessionOptions.SetGraphOptimizationLevel (onnx_optim);
m_provider = provider;
switch (m_provider) {
case GST_ONNX_EXECUTION_PROVIDER_CUDA:
case GST_ONNX_EXECUTION_PROVIDER_CUDA:
try {
OrtCUDAProviderOptionsV2 *cuda_options = nullptr;
Ort::ThrowOnError (api.CreateCUDAProviderOptions (&cuda_options));
std::unique_ptr < OrtCUDAProviderOptionsV2,
decltype (api.ReleaseCUDAProviderOptions) >
rel_cuda_options (cuda_options, api.ReleaseCUDAProviderOptions);
rel_cuda_options (cuda_options, api.ReleaseCUDAProviderOptions);
Ort::ThrowOnError (api.SessionOptionsAppendExecutionProvider_CUDA_V2
(static_cast < OrtSessionOptions * >(sessionOptions),
rel_cuda_options.get ()));
}
catch (Ort::Exception & ortex) {
GST_WARNING
("Failed to create CUDA provider - dropping back to CPU");
Ort::ThrowOnError (OrtSessionOptionsAppendExecutionProvider_CPU
(sessionOptions, 1));
}
break;
default:
} catch (Ort::Exception & ortex) {
GST_WARNING
("Failed to create CUDA provider - dropping back to CPU");
Ort::ThrowOnError (OrtSessionOptionsAppendExecutionProvider_CPU
(sessionOptions, 1));
break;
};
env =
Ort::Env (OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING,
}
break;
#ifdef HAVE_VSI_NPU
case GST_ONNX_EXECUTION_PROVIDER_VSI:
try {
status = OrtSessionOptionsAppendExecutionProvider_VSINPU(sessionOptions);
if (status != nullptr) {
GST_ERROR_OBJECT (debug_parent,
"Failed to set VSINPU AI execution provider: %s",
Ort::GetApi().GetErrorMessage(status));
return false;
}
}
catch (Ort::Exception & ortex) {
GST_ERROR_OBJECT (debug_parent,
"Failed to set VSINPU AI execution provider: %s", ortex.what ());
return false;
}
sessionOptions.DisableCpuMemArena();
break;
#endif
default:
Ort::ThrowOnError (OrtSessionOptionsAppendExecutionProvider_CPU
(sessionOptions, 1));
break;
}
env = Ort::Env (OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING,
"GstOnnxNamespace");
env.DisableTelemetryEvents();
session = new Ort::Session (env, modelFile.c_str (), sessionOptions);
auto inputTypeInfo = session->GetInputTypeInfo (0);
std::vector < int64_t > inputDims =
inputTypeInfo.GetTensorTypeAndShapeInfo ().GetShape ();
inputTypeInfo.GetTensorTypeAndShapeInfo ().GetShape ();
if (inputImageFormat == GST_ML_INPUT_IMAGE_FORMAT_HWC) {
height = inputDims[1];
width = inputDims[2];
@ -298,23 +334,23 @@ GstOnnxClient::GstOnnxClient (GstElement *debug_parent):debug_parent(debug_paren
fixedInputImageSize = width > 0 && height > 0;
GST_DEBUG_OBJECT (debug_parent, "Number of Output Nodes: %d",
(gint) session->GetOutputCount ());
(gint) session->GetOutputCount ());
ONNXTensorElementDataType elementType =
inputTypeInfo.GetTensorTypeAndShapeInfo ().GetElementType ();
inputTypeInfo.GetTensorTypeAndShapeInfo ().GetElementType ();
switch (elementType) {
case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8:
setInputImageDatatype(GST_TENSOR_DATA_TYPE_UINT8);
break;
case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT:
setInputImageDatatype(GST_TENSOR_DATA_TYPE_FLOAT32);
break;
default:
GST_ERROR_OBJECT (debug_parent,
case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8:
setInputImageDatatype(GST_TENSOR_DATA_TYPE_UINT8);
break;
case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT:
setInputImageDatatype(GST_TENSOR_DATA_TYPE_FLOAT32);
break;
default:
GST_ERROR_OBJECT (debug_parent,
"Only input tensors of type int8 and floatare supported");
return false;
}
return false;
}
Ort::AllocatorWithDefaultOptions allocator;
auto input_name = session->GetInputNameAllocated (0, allocator);
@ -322,7 +358,8 @@ GstOnnxClient::GstOnnxClient (GstElement *debug_parent):debug_parent(debug_paren
for (size_t i = 0; i < session->GetOutputCount (); ++i) {
auto output_name = session->GetOutputNameAllocated (i, allocator);
GST_DEBUG_OBJECT (debug_parent, "Output name %lu:%s", i, output_name.get ());
GST_DEBUG_OBJECT (debug_parent, "Output name %lu:%s", i,
output_name.get ());
outputNames.push_back (std::move (output_name));
}
genOutputNamesRaw ();
@ -330,8 +367,8 @@ GstOnnxClient::GstOnnxClient (GstElement *debug_parent):debug_parent(debug_paren
// look up tensor ids
auto metaData = session->GetModelMetadata ();
OrtAllocator *ortAllocator;
auto status =
Ort::GetApi ().GetAllocatorWithDefaultOptions (&ortAllocator);
status =
Ort::GetApi ().GetAllocatorWithDefaultOptions (&ortAllocator);
if (status) {
// Handle the error case
const char *errorString = Ort::GetApi ().GetErrorMessage (status);
@ -345,24 +382,23 @@ GstOnnxClient::GstOnnxClient (GstElement *debug_parent):debug_parent(debug_paren
size_t i = 0;
for (auto & name:outputNamesRaw) {
Ort::TypeInfo ti = session->GetOutputTypeInfo(i++);
auto type_shape = ti.GetTensorTypeAndShapeInfo();
auto card = type_shape.GetDimensionsCount();
auto type = type_shape.GetElementType();
Ort::AllocatedStringPtr res =
metaData.LookupCustomMetadataMapAllocated (name, ortAllocator);
if (res)
{
if (res) {
GQuark quark = g_quark_from_string (res.get ());
outputIds.push_back (quark);
} else if (g_str_has_prefix (name, "detection_scores")) {
} else if (g_str_has_prefix (name, "scores")) {
GQuark quark = g_quark_from_static_string (GST_MODEL_OBJECT_DETECTOR_SCORES);
GST_INFO_OBJECT(debug_parent,
"No custom metadata for key '%s', assuming %s",
name, GST_MODEL_OBJECT_DETECTOR_SCORES);
outputIds.push_back (quark);
} else if (g_str_has_prefix(name, "detection_boxes")) {
} else if (g_str_has_prefix(name, "boxes")) {
GQuark quark = g_quark_from_static_string (GST_MODEL_OBJECT_DETECTOR_BOXES);
GST_INFO_OBJECT(debug_parent,
"No custom metadata for key '%s', assuming %s",
@ -381,8 +417,7 @@ GstOnnxClient::GstOnnxClient (GstElement *debug_parent):debug_parent(debug_paren
name, GST_MODEL_OBJECT_DETECTOR_NUM_DETECTIONS);
outputIds.push_back (quark);
} else {
GST_ERROR_OBJECT (debug_parent, "Failed to look up id for key %s",
name);
GST_ERROR_OBJECT (debug_parent, "Failed to look up id for key %s", name);
return false;
}
@ -413,12 +448,13 @@ GstOnnxClient::GstOnnxClient (GstElement *debug_parent):debug_parent(debug_paren
gst_structure_set(tensors, res.get(), GST_TYPE_CAPS,
gst_caps_new_full(tensor_desc, NULL), NULL);
}
}
catch (Ort::Exception & ortex) {
} catch (Ort::Exception & ortex) {
GST_ERROR_OBJECT (debug_parent, "%s", ortex.what ());
return false;
}
return true;
}

View File

@ -54,6 +54,7 @@ typedef enum
{
GST_ONNX_EXECUTION_PROVIDER_CPU,
GST_ONNX_EXECUTION_PROVIDER_CUDA,
GST_ONNX_EXECUTION_PROVIDER_VSI,
} GstOnnxExecutionProvider;

View File

@ -187,9 +187,24 @@ gst_onnx_execution_provider_get_type (void)
static GEnumValue execution_provider_types[] = {
{GST_ONNX_EXECUTION_PROVIDER_CPU, "CPU execution provider",
"cpu"},
#if HAVE_CUDA
{GST_ONNX_EXECUTION_PROVIDER_CUDA,
"CUDA execution provider",
"cuda"},
#else
{GST_ONNX_EXECUTION_PROVIDER_CUDA,
"CUDA execution provider (compiled out, will use CPU)",
"cuda"},
#endif
#ifdef HAVE_VSI_NPU
{GST_ONNX_EXECUTION_PROVIDER_VSI,
"VeriSilicon NPU execution provider",
"vsi"},
#else
{GST_ONNX_EXECUTION_PROVIDER_VSI,
"VeriSilicon NPU execution provider (compiled out, will use CPU)",
"vsi"},
#endif
{0, NULL, NULL},
};

View File

@ -23,26 +23,62 @@ if get_option('onnx').disabled()
subdir_done()
endif
onnxrt_dep = dependency('libonnxruntime', version : '>= 1.16.1', required : get_option('onnx'))
extra_args = []
extra_deps = []
extra_incs = []
onnxrt_dep = dependency('libonnxruntime', version : '>= 1.16.1',
required: false)
if not onnxrt_dep.found()
fsmod = import('fs')
sysroot = meson.get_external_property('sys_root', '/')
onnx_inc = join_paths(sysroot, 'usr/include/onnxruntime')
incs = []
if fsmod.is_dir(onnx_inc)
incs = [include_directories(onnx_inc)]
endif
onnxrt_dep = cxx.find_library('onnxruntime',
has_headers: 'onnxruntime_cxx_api.h',
header_include_directories: incs,
required: get_option('onnx'))
extra_incs += incs
endif
if not onnxrt_dep.found()
subdir_done()
endif
if gstcuda_dep.found()
extra_args += ['-DHAVE_CUDA']
extra_deps += [gstcuda_dep]
endif
if onnxrt_dep.found()
gstonnx = library('gstonnx',
onnx_sources,
c_args : gst_plugins_bad_args + extra_args,
cpp_args : gst_plugins_bad_args + extra_args,
link_args : noseh_link_args,
include_directories : [configinc, libsinc, cuda_stubinc],
dependencies : [gstbase_dep, gstvideo_dep, gstanalytics_dep, onnxrt_dep,
libm] + extra_deps,
install : true,
install_dir : plugins_install_dir,
)
plugins += [gstonnx]
if cxx.has_header('core/providers/cpu/cpu_provider_factory.h',
dependencies: onnxrt_dep,
include_directories: extra_incs)
extra_args += ['-DCPUPROVIDER_IN_SUBDIR']
endif
if cxx.has_function('OrtSessionOptionsAppendExecutionProvider_VSINPU',
dependencies: onnxrt_dep) and \
cxx.has_header('core/providers/vsinpu/vsinpu_provider_factory.h',
dependencies: onnxrt_dep,
include_directories: extra_incs)
message('Enabled VSI Onnx VSI NPU provider')
extra_args += ['-DHAVE_VSI_NPU']
endif
gstonnx = library('gstonnx',
onnx_sources,
c_args : gst_plugins_bad_args + extra_args,
cpp_args : gst_plugins_bad_args + extra_args,
link_args : noseh_link_args,
include_directories : [configinc, libsinc, cuda_stubinc] + extra_incs,
dependencies : [gstbase_dep, gstvideo_dep, gstanalytics_dep, onnxrt_dep,
libm] + extra_deps,
install : true,
install_dir : plugins_install_dir,
)
plugins += [gstonnx]