Compare commits

...

10 Commits

Author SHA1 Message Date
Elias Rosendahl
6377ebf568 Add Execution Provider OpenVINO 2025-06-09 12:49:22 +02:00
Santosh Mahto
62731c958c gstanalytics : Add tensor decoder element for yolo detection models
Existing tensor decoder has been bifurcated into two seperate gst
element as:

`yoloodv5tensordecoder`: decodes tensors output(masks) from detection-only
models e.g yolov8s.onnx

`yolsegv8tensordecoder`: decoder tensors output(masks and logits) from
segementation models e.g FastSAM or yolov8s-seg
2025-04-05 13:09:00 +02:00
Daniel Morin
87b56fbf86 tensordecoders: rename element
- yolotensordecoder replaced with yolov8segtensordecoder
2025-04-05 13:09:00 +02:00
Daniel Morin
83d685054b tensordecoders: address MR comments 2025-04-05 13:09:00 +02:00
Daniel Morin
e616f64152 tensordecoders: Remove object locking in properties accessors 2025-04-05 13:09:00 +02:00
Daniel Morin
df08833e27 tensordecoders: add property label-file to exemple 2025-04-05 13:09:00 +02:00
Daniel Morin
01a504c310 tensordecoder: Improve class retrieval
- Optimize the way to retrieve class with maximum confidence. Avoir multiple
  pass.
- Add support for label files
2025-04-05 13:09:00 +02:00
Santosh Mahto
62eeb7e008 gst-analytics : Adapt and Rename fastsamtensordecoder to yolo based.
YOLOv8 model have same tensor output format as FastSAM, so for better
generalization rename fastsamtensordecoder to yolotensordecoder. This
also requires code adaptation to support Yolo based model.
2025-04-05 13:09:00 +02:00
Olivier Crête
244dd01b22 fastsamtensordecoder: Set mask resolution based on model output 2025-04-05 13:09:00 +02:00
Daniel Morin
b7f964929c fastsamtensordecoder: Add FastSAM tensor decoder
Co-authored-by: Vineet Suryan <vineet.suryan@collabora.com>
2025-04-05 13:08:59 +02:00
9 changed files with 1665 additions and 5 deletions

View File

@ -0,0 +1,4 @@
{
"name": "FT-Driverless Dev",
"image": "git.fasttube.de/ft-driverless/ft_as:gstreamer-plugin-bad"
}

View File

@ -248041,6 +248041,114 @@
"tensordecoders": {
"description": "Tensor decoders elements",
"elements": {
"fastsamtensordecoder": {
"author": "Daniel Morin <daniel.morin@collabora.com>",
"description": "Decode tensors output from the inference of FastSAM model (segmentation) on video frames. The original repository of the FastSAM is located at https://github.com/CASIA-IVA-Lab/FastSAM. For easy experimentation a strawberry segmentation model based on FastSAM architecture in Onnx format can be found at https://col.la/gstonnxmodelseg . This model already has tensors name embedded matching default values of tensors-masks-name and tensors-logits-name properties. It's also possible to embed tensor-ids into any model based on FastSAM architecture to allow this tensor-decoder to decode tensors. This process is described in the Readme of this repository: https://col.la/gstonnxmodels",
"hierarchy": [
"GstFastSAMTensorDecoder",
"GstBaseTransform",
"GstElement",
"GstObject",
"GInitiallyUnowned",
"GObject"
],
"klass": "TensorDecoder/Video",
"pad-templates": {
"sink": {
"caps": "video/x-raw:\n",
"direction": "sink",
"presence": "always"
},
"src": {
"caps": "video/x-raw:\n",
"direction": "src",
"presence": "always"
}
},
"properties": {
"box-confidence-threshold": {
"blurb": "Boxes with a location confidence level inferior to this threshold will be excluded",
"conditionally-available": false,
"construct": false,
"construct-only": false,
"controllable": false,
"default": "0.4",
"max": "1",
"min": "0",
"mutable": "null",
"readable": true,
"type": "gfloat",
"writable": true
},
"class-confidence-threshold": {
"blurb": "Classes with a confidence level inferior to this threshold will be excluded",
"conditionally-available": false,
"construct": false,
"construct-only": false,
"controllable": false,
"default": "0.4",
"max": "1",
"min": "0",
"mutable": "null",
"readable": true,
"type": "gfloat",
"writable": true
},
"iou-threshold": {
"blurb": "Maximum intersection-over-union between bounding boxes to consider them distinct.",
"conditionally-available": false,
"construct": false,
"construct-only": false,
"controllable": false,
"default": "0.7",
"max": "1",
"min": "0",
"mutable": "null",
"readable": true,
"type": "gfloat",
"writable": true
},
"max-detections": {
"blurb": "Maximum object/masks detections.",
"conditionally-available": false,
"construct": false,
"construct-only": false,
"controllable": false,
"default": "100",
"max": "-1",
"min": "0",
"mutable": "null",
"readable": true,
"type": "guint",
"writable": true
},
"tensors-name-logits": {
"blurb": "Name that identify FastSAM logits tensors.",
"conditionally-available": false,
"construct": true,
"construct-only": false,
"controllable": false,
"default": "Gst.Model.FastSAM.Segmentation.Logits",
"mutable": "null",
"readable": true,
"type": "gchararray",
"writable": true
},
"tensors-name-masks": {
"blurb": "Name that identify FastSAM mask tensors.",
"conditionally-available": false,
"construct": true,
"construct-only": false,
"controllable": false,
"default": "Gst.Model.FastSAM.Segmentation.Masks",
"mutable": "null",
"readable": true,
"type": "gchararray",
"writable": true
}
},
"rank": "primary"
},
"ssdobjectdetector": {
"author": "Aaron Boxer <aaron.boxer@collabora.com>, Marcus Edel <marcus.edel@collabora.com>",
"description": "Apply tensor output from inference to detect objects in video frames",

View File

@ -21,7 +21,9 @@
*/
#include "gstonnxclient.h"
#include <cpu_provider_factory.h>
#include <onnxruntime_cxx_api.h>
#include <onnxruntime/core/providers/cpu/cpu_provider_factory.h>
#include <onnxruntime/core/providers/openvino/openvino_provider_factory.h>
#include <sstream>
#define GST_CAT_DEFAULT onnx_inference_debug
@ -63,8 +65,9 @@ GstOnnxClient::GstOnnxClient (GstElement *debug_parent):debug_parent(debug_paren
inputDatatypeSize (sizeof (uint8_t)),
fixedInputImageSize (false),
inputTensorOffset (0.0),
inputTensorScale (1.0)
{
inputTensorScale (1.0),
provider_config(nullptr)
{
}
GstOnnxClient::~GstOnnxClient () {
@ -72,6 +75,10 @@ GstOnnxClient::GstOnnxClient (GstElement *debug_parent):debug_parent(debug_paren
delete[]dest;
}
void GstOnnxClient::setProviderConfig (const char *config)
{
provider_config = config;
}
int32_t GstOnnxClient::getWidth (void)
{
return width;
@ -222,6 +229,20 @@ GstOnnxClient::GstOnnxClient (GstElement *debug_parent):debug_parent(debug_paren
(sessionOptions, 1));
}
break;
case GST_ONNX_EXECUTION_PROVIDER_OPENVINO: {
std::unordered_map<std::string, std::string> ovOptions;
if (this->provider_config) {
std::istringstream ss(this->provider_config);
std::string kv;
while (std::getline(ss, kv, ',')) {
auto pos = kv.find('=');
if (pos == std::string::npos) continue;
ovOptions[kv.substr(0, pos)] = kv.substr(pos + 1);
}
}
sessionOptions.AppendExecutionProvider("OpenVINO", ovOptions);
}
break;
default:
Ort::ThrowOnError (OrtSessionOptionsAppendExecutionProvider_CPU
(sessionOptions, 1));

View File

@ -54,6 +54,7 @@ typedef enum
{
GST_ONNX_EXECUTION_PROVIDER_CPU,
GST_ONNX_EXECUTION_PROVIDER_CUDA,
GST_ONNX_EXECUTION_PROVIDER_OPENVINO,
} GstOnnxExecutionProvider;
@ -82,6 +83,7 @@ namespace GstOnnxNamespace {
GstTensorMeta *copy_tensors_to_meta (std::vector<Ort::Value> &outputs,
GstBuffer *buffer);
void parseDimensions(GstVideoInfo vinfo);
void setProviderConfig(const char *config);
private:
GstElement *debug_parent;
@ -108,6 +110,7 @@ namespace GstOnnxNamespace {
bool fixedInputImageSize;
float inputTensorOffset;
float inputTensorScale;
const char *provider_config;
};
}

View File

@ -84,6 +84,7 @@ struct _GstOnnxInference
gpointer onnx_client;
gboolean onnx_disabled;
GstVideoInfo video_info;
gchar *provider_config;
};
GST_DEBUG_CATEGORY (onnx_inference_debug);
@ -101,6 +102,7 @@ enum
PROP_INPUT_IMAGE_FORMAT,
PROP_OPTIMIZATION_LEVEL,
PROP_EXECUTION_PROVIDER,
PROP_PROVIDER_CONFIG,
PROP_INPUT_OFFSET,
PROP_INPUT_SCALE
};
@ -190,6 +192,9 @@ gst_onnx_execution_provider_get_type (void)
{GST_ONNX_EXECUTION_PROVIDER_CUDA,
"CUDA execution provider",
"cuda"},
{GST_ONNX_EXECUTION_PROVIDER_OPENVINO,
"OPENVINO execution provider",
"openvino"},
{0, NULL, NULL},
};
@ -316,6 +321,14 @@ gst_onnx_inference_class_init (GstOnnxInferenceClass * klass)
G_MINFLOAT, G_MAXFLOAT, 1.0,
(GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
g_object_class_install_property (G_OBJECT_CLASS (klass),
PROP_PROVIDER_CONFIG,
g_param_spec_string ("provider-config",
"Provider config",
"Comma-separierte Key=Value-Optionen",
nullptr,
(GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
gst_element_class_set_static_metadata (element_class, "onnxinference",
"Filter/Effect/Video",
@ -351,7 +364,8 @@ static void
gst_onnx_inference_finalize (GObject * object)
{
GstOnnxInference *self = GST_ONNX_INFERENCE (object);
g_free (self->provider_config);
self->provider_config = NULL;
g_free (self->model_file);
delete GST_ONNX_CLIENT_MEMBER (self);
G_OBJECT_CLASS (gst_onnx_inference_parent_class)->finalize (object);
@ -397,6 +411,11 @@ gst_onnx_inference_set_property (GObject * object, guint prop_id,
case PROP_INPUT_SCALE:
onnxClient->setInputImageScale (g_value_get_float (value));
break;
case PROP_PROVIDER_CONFIG:
g_free (self->provider_config);
self->provider_config = g_value_dup_string (value);
onnxClient->setProviderConfig(self->provider_config);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;

View File

@ -25,6 +25,7 @@
#endif
#include "gstssdobjectdetector.h"
#include "gstyolotensordecoder.h"
/**
* SECTION:plugin-tensordecoders
@ -38,6 +39,8 @@ plugin_init (GstPlugin * plugin)
{
gboolean ret = FALSE;
ret |= GST_ELEMENT_REGISTER (ssd_object_detector, plugin);
ret |= GST_ELEMENT_REGISTER (yolo_seg_tensor_decoder, plugin);
ret |= GST_ELEMENT_REGISTER (yolo_od_tensor_decoder, plugin);
return ret;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,127 @@
/*
* GStreamer gstreamer-yolotensordecoder
* Copyright (C) 2024 Collabora Ltd
* Authors: Daniel Morin <daniel.morin@collabora.com>
* Vineet Suryan <vineet.suryan@collabora.com>
* Santosh Mahto <santosh.mahto@collabora.com>
*
* gstyolotensordecoder.h
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifndef __GST_YOLO_TENSOR_DECODER_H__
#define __GST_YOLO_TENSOR_DECODER_H__
#include <gst/gst.h>
#include <gst/video/video.h>
#include <gst/base/base.h>
G_BEGIN_DECLS
#define GST_TYPE_YOLO_OD_TENSOR_DECODER (gst_yolo_od_tensor_decoder_get_type ())
#define GST_YOLO_OD_TENSOR_DECODER(obj) \
(G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_YOLO_OD_TENSOR_DECODER, GstYoloOdTensorDecoder))
#define GST_YOLO_OD_TENSOR_DECODER_CLASS(klass) \
(G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_YOLO_OD_TENSOR_DECODER, GstYoloOdTensorDecoderClass))
#define GST_IS_YOLO_OD_TENSOR_DECODER(obj) \
(G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_YOLO_OD_TENSOR_DECODER))
#define GST_IS_YOLO_OD_TENSOR_DECODER_CLASS(klass) \
(G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_YOLO_OD_TENSOR_DECODER))
#define GST_YOLO_OD_TENSOR_DECODER_GET_CLASS(obj) \
(G_TYPE_INSTANCE_GET_CLASS ((obj), GST_TYPE_YOLO_OD_TENSOR_DECODER, GstYoloOdTensorDecoderClass))
typedef struct _GstYoloOdTensorDecoder GstYoloOdTensorDecoder;
typedef struct _GstYoloOdTensorDecoderClass GstYoloOdTensorDecoderClass;
typedef struct _BBox
{
gint x;
gint y;
guint w;
guint h;
} BBox;
struct _GstYoloOdTensorDecoder
{
GstBaseTransform basetransform;
/* Box confidence threshold */
gfloat box_confi_thresh;
/* Class confidence threshold */
gfloat cls_confi_thresh;
/* Intersection-of-Union threshold */
gfloat iou_thresh;
/* Maximum detection/mask */
gsize max_detection;
/* Video Info */
/* Candidates with a class confidence level above threshold. */
GPtrArray *sel_candidates;
/* Final candidates selected that respect class confidence level,
* NMS and maximum detection. */
GPtrArray *selected;
/* Tensor-id identifying mask tensors out of yolo inference process. */
GQuark mask_tensor_id;
GstVideoInfo video_info;
/* Labels file */
gchar *label_file;
/* Labels */
GArray *labels;
/* GstAnalyticsODMtd data */
GArray *od_mtds;
/* Hash table to store the offset in the mask tensor buffer where
* OdMtd data are stored. key is OdMtd.id */
GHashTable *candidate_offsets;
};
struct _GstYoloOdTensorDecoderClass
{
GstBaseTransformClass parent_class;
};
GType gst_yolo_od_tensor_decoder_get_type (void);
G_DEFINE_AUTOPTR_CLEANUP_FUNC (GstYoloOdTensorDecoder, g_object_unref)
GST_ELEMENT_REGISTER_DECLARE (yolo_od_tensor_decoder)
/* Yolo segmentation tensor decoder */
#define GST_TYPE_YOLO_SEG_TENSOR_DECODER (gst_yolo_seg_tensor_decoder_get_type ())
G_DECLARE_FINAL_TYPE (GstYoloSegTensorDecoder, gst_yolo_seg_tensor_decoder,
GST, YOLO_SEG_TENSOR_DECODER, GstYoloOdTensorDecoder)
struct _GstYoloSegTensorDecoder
{
GstYoloOdTensorDecoder parent;
/* Mask width */
guint mask_w;
/* Mask height */
guint mask_h;
/* Mask length */
gsize mask_length;
GQuark logits_tensor_id;
gfloat bb2mask_gain;
/* BufferPool for mask */
BBox mask_roi;
/* BufferPool for mask */
GstBufferPool *mask_pool;
};
GST_ELEMENT_REGISTER_DECLARE (yolo_seg_tensor_decoder)
G_END_DECLS
#endif /* __GST_YOLO_TENSOR_DECODER_H__ */

View File

@ -1,6 +1,7 @@
tensordecoders_sources = [
'gsttensordecoders.c',
'gstssdobjectdetector.c'
'gstssdobjectdetector.c',
'gstyolotensordecoder.c'
]
tensordecoders_headers = [