Add Execution Provider OpenVINO

gstanalytics : Add tensor decoder element for yolo detection models
Existing tensor decoder has been bifurcated into two seperate gst element as: `yoloodv5tensordecoder`: decodes tensors output(masks) from detection-only models e.g yolov8s.onnx `yolsegv8tensordecoder`: decoder tensors output(masks and logits) from segementation models e.g FastSAM or yolov8s-seg
2025-06-09 12:49:22 +02:00 · 2025-04-05 13:09:00 +02:00 · 2025-04-05 13:09:00 +02:00 · 2025-04-05 13:09:00 +02:00 · 2025-04-05 13:09:00 +02:00 · 2025-04-05 13:09:00 +02:00
9 changed files with 1665 additions and 5 deletions
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@ -0,0 +1,4 @@
+{
+  "name": "FT-Driverless Dev",
+  "image": "git.fasttube.de/ft-driverless/ft_as:gstreamer-plugin-bad"
+}
--- a/subprojects/gst-plugins-bad/docs/plugins/gst_plugins_cache.json
+++ b/subprojects/gst-plugins-bad/docs/plugins/gst_plugins_cache.json
@ -248041,6 +248041,114 @@
    "tensordecoders": {
        "description": "Tensor decoders elements",
        "elements": {
+            "fastsamtensordecoder": {
+                "author": "Daniel Morin <daniel.morin@collabora.com>",
+                "description": "Decode tensors output from the inference of FastSAM model (segmentation) on video frames. The original repository of the FastSAM is located at https://github.com/CASIA-IVA-Lab/FastSAM. For easy experimentation a strawberry segmentation model based on FastSAM architecture in Onnx  format can be found at https://col.la/gstonnxmodelseg . This model already has tensors name embedded matching default values of tensors-masks-name and tensors-logits-name properties. It's also possible to embed tensor-ids into any model based on FastSAM architecture to allow this tensor-decoder to decode tensors. This process is described in the Readme of this repository: https://col.la/gstonnxmodels",
+                "hierarchy": [
+                    "GstFastSAMTensorDecoder",
+                    "GstBaseTransform",
+                    "GstElement",
+                    "GstObject",
+                    "GInitiallyUnowned",
+                    "GObject"
+                ],
+                "klass": "TensorDecoder/Video",
+                "pad-templates": {
+                    "sink": {
+                        "caps": "video/x-raw:\n",
+                        "direction": "sink",
+                        "presence": "always"
+                    },
+                    "src": {
+                        "caps": "video/x-raw:\n",
+                        "direction": "src",
+                        "presence": "always"
+                    }
+                },
+                "properties": {
+                    "box-confidence-threshold": {
+                        "blurb": "Boxes with a location confidence level inferior to this threshold will be excluded",
+                        "conditionally-available": false,
+                        "construct": false,
+                        "construct-only": false,
+                        "controllable": false,
+                        "default": "0.4",
+                        "max": "1",
+                        "min": "0",
+                        "mutable": "null",
+                        "readable": true,
+                        "type": "gfloat",
+                        "writable": true
+                    },
+                    "class-confidence-threshold": {
+                        "blurb": "Classes with a confidence level inferior to this threshold will be excluded",
+                        "conditionally-available": false,
+                        "construct": false,
+                        "construct-only": false,
+                        "controllable": false,
+                        "default": "0.4",
+                        "max": "1",
+                        "min": "0",
+                        "mutable": "null",
+                        "readable": true,
+                        "type": "gfloat",
+                        "writable": true
+                    },
+                    "iou-threshold": {
+                        "blurb": "Maximum intersection-over-union between bounding boxes to consider them distinct.",
+                        "conditionally-available": false,
+                        "construct": false,
+                        "construct-only": false,
+                        "controllable": false,
+                        "default": "0.7",
+                        "max": "1",
+                        "min": "0",
+                        "mutable": "null",
+                        "readable": true,
+                        "type": "gfloat",
+                        "writable": true
+                    },
+                    "max-detections": {
+                        "blurb": "Maximum object/masks detections.",
+                        "conditionally-available": false,
+                        "construct": false,
+                        "construct-only": false,
+                        "controllable": false,
+                        "default": "100",
+                        "max": "-1",
+                        "min": "0",
+                        "mutable": "null",
+                        "readable": true,
+                        "type": "guint",
+                        "writable": true
+                    },
+                    "tensors-name-logits": {
+                        "blurb": "Name that identify FastSAM logits tensors.",
+                        "conditionally-available": false,
+                        "construct": true,
+                        "construct-only": false,
+                        "controllable": false,
+                        "default": "Gst.Model.FastSAM.Segmentation.Logits",
+                        "mutable": "null",
+                        "readable": true,
+                        "type": "gchararray",
+                        "writable": true
+                    },
+                    "tensors-name-masks": {
+                        "blurb": "Name that identify FastSAM mask tensors.",
+                        "conditionally-available": false,
+                        "construct": true,
+                        "construct-only": false,
+                        "controllable": false,
+                        "default": "Gst.Model.FastSAM.Segmentation.Masks",
+                        "mutable": "null",
+                        "readable": true,
+                        "type": "gchararray",
+                        "writable": true
+                    }
+                },
+                "rank": "primary"
+            },
            "ssdobjectdetector": {
                "author": "Aaron Boxer <aaron.boxer@collabora.com>, Marcus Edel <marcus.edel@collabora.com>",
                "description": "Apply tensor output from inference to detect objects in video frames",
--- a/subprojects/gst-plugins-bad/ext/onnx/gstonnxclient.cpp
+++ b/subprojects/gst-plugins-bad/ext/onnx/gstonnxclient.cpp
@ -21,7 +21,9 @@
 */

 #include "gstonnxclient.h"
-#include <cpu_provider_factory.h>
+#include <onnxruntime_cxx_api.h>
+#include <onnxruntime/core/providers/cpu/cpu_provider_factory.h>
+#include <onnxruntime/core/providers/openvino/openvino_provider_factory.h>
 #include <sstream>

 #define GST_CAT_DEFAULT onnx_inference_debug
@ -63,8 +65,9 @@ GstOnnxClient::GstOnnxClient (GstElement *debug_parent):debug_parent(debug_paren
      inputDatatypeSize (sizeof (uint8_t)),
      fixedInputImageSize (false),
      inputTensorOffset (0.0),
-      inputTensorScale (1.0)
-       {
+      inputTensorScale (1.0),
+      provider_config(nullptr)
+      {
  }

  GstOnnxClient::~GstOnnxClient () {
@ -72,6 +75,10 @@ GstOnnxClient::GstOnnxClient (GstElement *debug_parent):debug_parent(debug_paren
    delete[]dest;
  }

+  void GstOnnxClient::setProviderConfig (const char *config)
+  {
+    provider_config = config;
+  }
  int32_t GstOnnxClient::getWidth (void)
  {
    return width;
@ -222,6 +229,20 @@ GstOnnxClient::GstOnnxClient (GstElement *debug_parent):debug_parent(debug_paren
                (sessionOptions, 1));
          }
          break;
+          case GST_ONNX_EXECUTION_PROVIDER_OPENVINO: {
+          std::unordered_map<std::string, std::string> ovOptions;
+          if (this->provider_config) {
+            std::istringstream ss(this->provider_config);
+            std::string kv;
+            while (std::getline(ss, kv, ',')) {
+              auto pos = kv.find('=');
+            if (pos == std::string::npos) continue;
+              ovOptions[kv.substr(0, pos)] = kv.substr(pos + 1);
+            }
+          }
+          sessionOptions.AppendExecutionProvider("OpenVINO", ovOptions);
+        }
+        break;
        default:
          Ort::ThrowOnError (OrtSessionOptionsAppendExecutionProvider_CPU
              (sessionOptions, 1));
--- a/subprojects/gst-plugins-bad/ext/onnx/gstonnxclient.h
+++ b/subprojects/gst-plugins-bad/ext/onnx/gstonnxclient.h
@ -54,6 +54,7 @@ typedef enum
 {
  GST_ONNX_EXECUTION_PROVIDER_CPU,
  GST_ONNX_EXECUTION_PROVIDER_CUDA,
+  GST_ONNX_EXECUTION_PROVIDER_OPENVINO,
 } GstOnnxExecutionProvider;


@ -82,6 +83,7 @@ namespace GstOnnxNamespace {
    GstTensorMeta *copy_tensors_to_meta (std::vector<Ort::Value> &outputs,
                                         GstBuffer *buffer);
    void parseDimensions(GstVideoInfo vinfo);
+    void setProviderConfig(const char *config);
  private:

    GstElement *debug_parent;
@ -108,6 +110,7 @@ namespace GstOnnxNamespace {
    bool fixedInputImageSize;
    float inputTensorOffset;
    float inputTensorScale;
+    const char *provider_config;
  };
 }

--- a/subprojects/gst-plugins-bad/ext/onnx/gstonnxinference.cpp
+++ b/subprojects/gst-plugins-bad/ext/onnx/gstonnxinference.cpp
@ -84,6 +84,7 @@ struct _GstOnnxInference
  gpointer onnx_client;
  gboolean onnx_disabled;
  GstVideoInfo video_info;
+  gchar *provider_config;
 };

 GST_DEBUG_CATEGORY (onnx_inference_debug);
@ -101,6 +102,7 @@ enum
  PROP_INPUT_IMAGE_FORMAT,
  PROP_OPTIMIZATION_LEVEL,
  PROP_EXECUTION_PROVIDER,
+  PROP_PROVIDER_CONFIG,
  PROP_INPUT_OFFSET,
  PROP_INPUT_SCALE
 };
@ -190,6 +192,9 @@ gst_onnx_execution_provider_get_type (void)
      {GST_ONNX_EXECUTION_PROVIDER_CUDA,
            "CUDA execution provider",
          "cuda"},
+      {GST_ONNX_EXECUTION_PROVIDER_OPENVINO,
+         "OPENVINO execution provider",
+          "openvino"},
      {0, NULL, NULL},
    };

@ -316,6 +321,14 @@ gst_onnx_inference_class_init (GstOnnxInferenceClass * klass)
          G_MINFLOAT, G_MAXFLOAT, 1.0,
          (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));

+  g_object_class_install_property (G_OBJECT_CLASS (klass),
+        PROP_PROVIDER_CONFIG,
+        g_param_spec_string ("provider-config",
+          "Provider config",
+            "Comma-separierte Key=Value-Optionen",
+            nullptr,
+           (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+

  gst_element_class_set_static_metadata (element_class, "onnxinference",
      "Filter/Effect/Video",
@ -351,7 +364,8 @@ static void
 gst_onnx_inference_finalize (GObject * object)
 {
  GstOnnxInference *self = GST_ONNX_INFERENCE (object);
-
+  g_free (self->provider_config);
+  self->provider_config = NULL;
  g_free (self->model_file);
  delete GST_ONNX_CLIENT_MEMBER (self);
  G_OBJECT_CLASS (gst_onnx_inference_parent_class)->finalize (object);
@ -397,6 +411,11 @@ gst_onnx_inference_set_property (GObject * object, guint prop_id,
    case PROP_INPUT_SCALE:
      onnxClient->setInputImageScale (g_value_get_float (value));
      break;
+    case PROP_PROVIDER_CONFIG:
+      g_free (self->provider_config);
+      self->provider_config = g_value_dup_string (value);
+      onnxClient->setProviderConfig(self->provider_config);
+      break;
    default:
      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
      break;
--- a/subprojects/gst-plugins-bad/gst/tensordecoders/gsttensordecoders.c
+++ b/subprojects/gst-plugins-bad/gst/tensordecoders/gsttensordecoders.c
@ -25,6 +25,7 @@
 #endif

 #include "gstssdobjectdetector.h"
+#include "gstyolotensordecoder.h"

 /**
 * SECTION:plugin-tensordecoders
@ -38,6 +39,8 @@ plugin_init (GstPlugin * plugin)
 {
  gboolean ret = FALSE;
  ret |= GST_ELEMENT_REGISTER (ssd_object_detector, plugin);
+  ret |= GST_ELEMENT_REGISTER (yolo_seg_tensor_decoder, plugin);
+  ret |= GST_ELEMENT_REGISTER (yolo_od_tensor_decoder, plugin);

  return ret;
 }
--- a/subprojects/gst-plugins-bad/gst/tensordecoders/gstyolotensordecoder.c
+++ b/subprojects/gst-plugins-bad/gst/tensordecoders/gstyolotensordecoder.c
--- a/subprojects/gst-plugins-bad/gst/tensordecoders/gstyolotensordecoder.h
+++ b/subprojects/gst-plugins-bad/gst/tensordecoders/gstyolotensordecoder.h
@ -0,0 +1,127 @@
+/*
+ * GStreamer gstreamer-yolotensordecoder
+ * Copyright (C) 2024 Collabora Ltd
+ *  Authors: Daniel Morin <daniel.morin@collabora.com>
+ *           Vineet Suryan <vineet.suryan@collabora.com>
+ *           Santosh Mahto <santosh.mahto@collabora.com>
+ *
+ * gstyolotensordecoder.h
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+
+#ifndef __GST_YOLO_TENSOR_DECODER_H__
+#define __GST_YOLO_TENSOR_DECODER_H__
+
+#include <gst/gst.h>
+#include <gst/video/video.h>
+#include <gst/base/base.h>
+
+G_BEGIN_DECLS
+
+#define GST_TYPE_YOLO_OD_TENSOR_DECODER (gst_yolo_od_tensor_decoder_get_type ())
+#define GST_YOLO_OD_TENSOR_DECODER(obj) \
+  (G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_YOLO_OD_TENSOR_DECODER, GstYoloOdTensorDecoder))
+#define GST_YOLO_OD_TENSOR_DECODER_CLASS(klass) \
+  (G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_YOLO_OD_TENSOR_DECODER, GstYoloOdTensorDecoderClass))
+#define GST_IS_YOLO_OD_TENSOR_DECODER(obj) \
+    (G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_YOLO_OD_TENSOR_DECODER))
+#define GST_IS_YOLO_OD_TENSOR_DECODER_CLASS(klass) \
+  (G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_YOLO_OD_TENSOR_DECODER))
+#define GST_YOLO_OD_TENSOR_DECODER_GET_CLASS(obj) \
+  (G_TYPE_INSTANCE_GET_CLASS ((obj), GST_TYPE_YOLO_OD_TENSOR_DECODER, GstYoloOdTensorDecoderClass))
+
+typedef struct _GstYoloOdTensorDecoder GstYoloOdTensorDecoder;
+typedef struct _GstYoloOdTensorDecoderClass GstYoloOdTensorDecoderClass;
+
+typedef struct _BBox
+{
+  gint x;
+  gint y;
+  guint w;
+  guint h;
+} BBox;
+
+struct _GstYoloOdTensorDecoder
+{
+  GstBaseTransform basetransform;
+  /* Box confidence threshold */
+  gfloat box_confi_thresh;
+  /* Class confidence threshold */
+  gfloat cls_confi_thresh;
+  /* Intersection-of-Union threshold */
+  gfloat iou_thresh;
+  /* Maximum detection/mask */
+  gsize max_detection;
+  /* Video Info */
+  /* Candidates with a class confidence level above threshold. */
+  GPtrArray *sel_candidates;
+  /* Final candidates selected that respect class confidence level,
+   * NMS and maximum detection. */
+  GPtrArray *selected;
+  /* Tensor-id identifying mask tensors out of yolo inference process. */
+  GQuark mask_tensor_id;
+
+  GstVideoInfo video_info;
+  /* Labels file */
+  gchar *label_file;
+  /* Labels */
+  GArray *labels;
+  /* GstAnalyticsODMtd data */
+  GArray *od_mtds;
+  /* Hash table to store the offset in the mask tensor buffer where
+   * OdMtd data are stored. key is OdMtd.id */
+  GHashTable *candidate_offsets;
+};
+
+struct _GstYoloOdTensorDecoderClass
+{
+  GstBaseTransformClass parent_class;
+};
+
+GType gst_yolo_od_tensor_decoder_get_type (void);
+G_DEFINE_AUTOPTR_CLEANUP_FUNC (GstYoloOdTensorDecoder, g_object_unref)
+
+GST_ELEMENT_REGISTER_DECLARE (yolo_od_tensor_decoder)
+
+/* Yolo segmentation tensor decoder */
+
+#define GST_TYPE_YOLO_SEG_TENSOR_DECODER (gst_yolo_seg_tensor_decoder_get_type ())
+G_DECLARE_FINAL_TYPE (GstYoloSegTensorDecoder, gst_yolo_seg_tensor_decoder,
+GST, YOLO_SEG_TENSOR_DECODER, GstYoloOdTensorDecoder)
+
+struct _GstYoloSegTensorDecoder
+{
+  GstYoloOdTensorDecoder parent;
+  /* Mask width */
+  guint mask_w;
+  /* Mask height */
+  guint mask_h;
+  /* Mask length */
+  gsize mask_length;
+  GQuark logits_tensor_id;
+  gfloat bb2mask_gain;
+  /* BufferPool for mask */
+  BBox mask_roi;
+  /* BufferPool for mask */
+  GstBufferPool *mask_pool;
+};
+
+GST_ELEMENT_REGISTER_DECLARE (yolo_seg_tensor_decoder)
+
+G_END_DECLS
+#endif /* __GST_YOLO_TENSOR_DECODER_H__ */
--- a/subprojects/gst-plugins-bad/gst/tensordecoders/meson.build
+++ b/subprojects/gst-plugins-bad/gst/tensordecoders/meson.build
@ -1,6 +1,7 @@
 tensordecoders_sources = [
  'gsttensordecoders.c',
-  'gstssdobjectdetector.c'
+  'gstssdobjectdetector.c',
+  'gstyolotensordecoder.c'
 ]

 tensordecoders_headers = [
Author	SHA1	Message	Date
Elias Rosendahl	6377ebf568	Add Execution Provider OpenVINO	2025-06-09 12:49:22 +02:00
Santosh Mahto	62731c958c	gstanalytics : Add tensor decoder element for yolo detection models Existing tensor decoder has been bifurcated into two seperate gst element as: `yoloodv5tensordecoder`: decodes tensors output(masks) from detection-only models e.g yolov8s.onnx `yolsegv8tensordecoder`: decoder tensors output(masks and logits) from segementation models e.g FastSAM or yolov8s-seg	2025-04-05 13:09:00 +02:00
Daniel Morin	87b56fbf86	tensordecoders: rename element - yolotensordecoder replaced with yolov8segtensordecoder	2025-04-05 13:09:00 +02:00
Daniel Morin	83d685054b	tensordecoders: address MR comments	2025-04-05 13:09:00 +02:00
Daniel Morin	e616f64152	tensordecoders: Remove object locking in properties accessors	2025-04-05 13:09:00 +02:00
Daniel Morin	df08833e27	tensordecoders: add property label-file to exemple	2025-04-05 13:09:00 +02:00
Daniel Morin	01a504c310	tensordecoder: Improve class retrieval - Optimize the way to retrieve class with maximum confidence. Avoir multiple pass. - Add support for label files	2025-04-05 13:09:00 +02:00
Santosh Mahto	62eeb7e008	gst-analytics : Adapt and Rename fastsamtensordecoder to yolo based. YOLOv8 model have same tensor output format as FastSAM, so for better generalization rename fastsamtensordecoder to yolotensordecoder. This also requires code adaptation to support Yolo based model.	2025-04-05 13:09:00 +02:00
Olivier Crête	244dd01b22	fastsamtensordecoder: Set mask resolution based on model output	2025-04-05 13:09:00 +02:00
Daniel Morin	b7f964929c	fastsamtensordecoder: Add FastSAM tensor decoder Co-authored-by: Vineet Suryan <vineet.suryan@collabora.com>	2025-04-05 13:08:59 +02:00