Compare commits

..

No commits in common. "6377ebf568749de0bd07bafdd578cb85895a1ade" and "6db576f0332985b4261e02a038e7b0a6167c7fb9" have entirely different histories.

9 changed files with 5 additions and 1665 deletions

View File

@ -1,4 +0,0 @@
{
"name": "FT-Driverless Dev",
"image": "git.fasttube.de/ft-driverless/ft_as:gstreamer-plugin-bad"
}

View File

@ -248041,114 +248041,6 @@
"tensordecoders": {
"description": "Tensor decoders elements",
"elements": {
"fastsamtensordecoder": {
"author": "Daniel Morin <daniel.morin@collabora.com>",
"description": "Decode tensors output from the inference of FastSAM model (segmentation) on video frames. The original repository of the FastSAM is located at https://github.com/CASIA-IVA-Lab/FastSAM. For easy experimentation a strawberry segmentation model based on FastSAM architecture in Onnx format can be found at https://col.la/gstonnxmodelseg . This model already has tensors name embedded matching default values of tensors-masks-name and tensors-logits-name properties. It's also possible to embed tensor-ids into any model based on FastSAM architecture to allow this tensor-decoder to decode tensors. This process is described in the Readme of this repository: https://col.la/gstonnxmodels",
"hierarchy": [
"GstFastSAMTensorDecoder",
"GstBaseTransform",
"GstElement",
"GstObject",
"GInitiallyUnowned",
"GObject"
],
"klass": "TensorDecoder/Video",
"pad-templates": {
"sink": {
"caps": "video/x-raw:\n",
"direction": "sink",
"presence": "always"
},
"src": {
"caps": "video/x-raw:\n",
"direction": "src",
"presence": "always"
}
},
"properties": {
"box-confidence-threshold": {
"blurb": "Boxes with a location confidence level inferior to this threshold will be excluded",
"conditionally-available": false,
"construct": false,
"construct-only": false,
"controllable": false,
"default": "0.4",
"max": "1",
"min": "0",
"mutable": "null",
"readable": true,
"type": "gfloat",
"writable": true
},
"class-confidence-threshold": {
"blurb": "Classes with a confidence level inferior to this threshold will be excluded",
"conditionally-available": false,
"construct": false,
"construct-only": false,
"controllable": false,
"default": "0.4",
"max": "1",
"min": "0",
"mutable": "null",
"readable": true,
"type": "gfloat",
"writable": true
},
"iou-threshold": {
"blurb": "Maximum intersection-over-union between bounding boxes to consider them distinct.",
"conditionally-available": false,
"construct": false,
"construct-only": false,
"controllable": false,
"default": "0.7",
"max": "1",
"min": "0",
"mutable": "null",
"readable": true,
"type": "gfloat",
"writable": true
},
"max-detections": {
"blurb": "Maximum object/masks detections.",
"conditionally-available": false,
"construct": false,
"construct-only": false,
"controllable": false,
"default": "100",
"max": "-1",
"min": "0",
"mutable": "null",
"readable": true,
"type": "guint",
"writable": true
},
"tensors-name-logits": {
"blurb": "Name that identify FastSAM logits tensors.",
"conditionally-available": false,
"construct": true,
"construct-only": false,
"controllable": false,
"default": "Gst.Model.FastSAM.Segmentation.Logits",
"mutable": "null",
"readable": true,
"type": "gchararray",
"writable": true
},
"tensors-name-masks": {
"blurb": "Name that identify FastSAM mask tensors.",
"conditionally-available": false,
"construct": true,
"construct-only": false,
"controllable": false,
"default": "Gst.Model.FastSAM.Segmentation.Masks",
"mutable": "null",
"readable": true,
"type": "gchararray",
"writable": true
}
},
"rank": "primary"
},
"ssdobjectdetector": {
"author": "Aaron Boxer <aaron.boxer@collabora.com>, Marcus Edel <marcus.edel@collabora.com>",
"description": "Apply tensor output from inference to detect objects in video frames",

View File

@ -21,9 +21,7 @@
*/
#include "gstonnxclient.h"
#include <onnxruntime_cxx_api.h>
#include <onnxruntime/core/providers/cpu/cpu_provider_factory.h>
#include <onnxruntime/core/providers/openvino/openvino_provider_factory.h>
#include <cpu_provider_factory.h>
#include <sstream>
#define GST_CAT_DEFAULT onnx_inference_debug
@ -65,9 +63,8 @@ GstOnnxClient::GstOnnxClient (GstElement *debug_parent):debug_parent(debug_paren
inputDatatypeSize (sizeof (uint8_t)),
fixedInputImageSize (false),
inputTensorOffset (0.0),
inputTensorScale (1.0),
provider_config(nullptr)
{
inputTensorScale (1.0)
{
}
GstOnnxClient::~GstOnnxClient () {
@ -75,10 +72,6 @@ GstOnnxClient::GstOnnxClient (GstElement *debug_parent):debug_parent(debug_paren
delete[]dest;
}
void GstOnnxClient::setProviderConfig (const char *config)
{
provider_config = config;
}
int32_t GstOnnxClient::getWidth (void)
{
return width;
@ -229,20 +222,6 @@ GstOnnxClient::GstOnnxClient (GstElement *debug_parent):debug_parent(debug_paren
(sessionOptions, 1));
}
break;
case GST_ONNX_EXECUTION_PROVIDER_OPENVINO: {
std::unordered_map<std::string, std::string> ovOptions;
if (this->provider_config) {
std::istringstream ss(this->provider_config);
std::string kv;
while (std::getline(ss, kv, ',')) {
auto pos = kv.find('=');
if (pos == std::string::npos) continue;
ovOptions[kv.substr(0, pos)] = kv.substr(pos + 1);
}
}
sessionOptions.AppendExecutionProvider("OpenVINO", ovOptions);
}
break;
default:
Ort::ThrowOnError (OrtSessionOptionsAppendExecutionProvider_CPU
(sessionOptions, 1));

View File

@ -54,7 +54,6 @@ typedef enum
{
GST_ONNX_EXECUTION_PROVIDER_CPU,
GST_ONNX_EXECUTION_PROVIDER_CUDA,
GST_ONNX_EXECUTION_PROVIDER_OPENVINO,
} GstOnnxExecutionProvider;
@ -83,7 +82,6 @@ namespace GstOnnxNamespace {
GstTensorMeta *copy_tensors_to_meta (std::vector<Ort::Value> &outputs,
GstBuffer *buffer);
void parseDimensions(GstVideoInfo vinfo);
void setProviderConfig(const char *config);
private:
GstElement *debug_parent;
@ -110,7 +108,6 @@ namespace GstOnnxNamespace {
bool fixedInputImageSize;
float inputTensorOffset;
float inputTensorScale;
const char *provider_config;
};
}

View File

@ -84,7 +84,6 @@ struct _GstOnnxInference
gpointer onnx_client;
gboolean onnx_disabled;
GstVideoInfo video_info;
gchar *provider_config;
};
GST_DEBUG_CATEGORY (onnx_inference_debug);
@ -102,7 +101,6 @@ enum
PROP_INPUT_IMAGE_FORMAT,
PROP_OPTIMIZATION_LEVEL,
PROP_EXECUTION_PROVIDER,
PROP_PROVIDER_CONFIG,
PROP_INPUT_OFFSET,
PROP_INPUT_SCALE
};
@ -192,9 +190,6 @@ gst_onnx_execution_provider_get_type (void)
{GST_ONNX_EXECUTION_PROVIDER_CUDA,
"CUDA execution provider",
"cuda"},
{GST_ONNX_EXECUTION_PROVIDER_OPENVINO,
"OPENVINO execution provider",
"openvino"},
{0, NULL, NULL},
};
@ -321,14 +316,6 @@ gst_onnx_inference_class_init (GstOnnxInferenceClass * klass)
G_MINFLOAT, G_MAXFLOAT, 1.0,
(GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
g_object_class_install_property (G_OBJECT_CLASS (klass),
PROP_PROVIDER_CONFIG,
g_param_spec_string ("provider-config",
"Provider config",
"Comma-separierte Key=Value-Optionen",
nullptr,
(GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
gst_element_class_set_static_metadata (element_class, "onnxinference",
"Filter/Effect/Video",
@ -364,8 +351,7 @@ static void
gst_onnx_inference_finalize (GObject * object)
{
GstOnnxInference *self = GST_ONNX_INFERENCE (object);
g_free (self->provider_config);
self->provider_config = NULL;
g_free (self->model_file);
delete GST_ONNX_CLIENT_MEMBER (self);
G_OBJECT_CLASS (gst_onnx_inference_parent_class)->finalize (object);
@ -411,11 +397,6 @@ gst_onnx_inference_set_property (GObject * object, guint prop_id,
case PROP_INPUT_SCALE:
onnxClient->setInputImageScale (g_value_get_float (value));
break;
case PROP_PROVIDER_CONFIG:
g_free (self->provider_config);
self->provider_config = g_value_dup_string (value);
onnxClient->setProviderConfig(self->provider_config);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;

View File

@ -25,7 +25,6 @@
#endif
#include "gstssdobjectdetector.h"
#include "gstyolotensordecoder.h"
/**
* SECTION:plugin-tensordecoders
@ -39,8 +38,6 @@ plugin_init (GstPlugin * plugin)
{
gboolean ret = FALSE;
ret |= GST_ELEMENT_REGISTER (ssd_object_detector, plugin);
ret |= GST_ELEMENT_REGISTER (yolo_seg_tensor_decoder, plugin);
ret |= GST_ELEMENT_REGISTER (yolo_od_tensor_decoder, plugin);
return ret;
}

View File

@ -1,127 +0,0 @@
/*
* GStreamer gstreamer-yolotensordecoder
* Copyright (C) 2024 Collabora Ltd
* Authors: Daniel Morin <daniel.morin@collabora.com>
* Vineet Suryan <vineet.suryan@collabora.com>
* Santosh Mahto <santosh.mahto@collabora.com>
*
* gstyolotensordecoder.h
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifndef __GST_YOLO_TENSOR_DECODER_H__
#define __GST_YOLO_TENSOR_DECODER_H__
#include <gst/gst.h>
#include <gst/video/video.h>
#include <gst/base/base.h>
G_BEGIN_DECLS
#define GST_TYPE_YOLO_OD_TENSOR_DECODER (gst_yolo_od_tensor_decoder_get_type ())
#define GST_YOLO_OD_TENSOR_DECODER(obj) \
(G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_YOLO_OD_TENSOR_DECODER, GstYoloOdTensorDecoder))
#define GST_YOLO_OD_TENSOR_DECODER_CLASS(klass) \
(G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_YOLO_OD_TENSOR_DECODER, GstYoloOdTensorDecoderClass))
#define GST_IS_YOLO_OD_TENSOR_DECODER(obj) \
(G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_YOLO_OD_TENSOR_DECODER))
#define GST_IS_YOLO_OD_TENSOR_DECODER_CLASS(klass) \
(G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_YOLO_OD_TENSOR_DECODER))
#define GST_YOLO_OD_TENSOR_DECODER_GET_CLASS(obj) \
(G_TYPE_INSTANCE_GET_CLASS ((obj), GST_TYPE_YOLO_OD_TENSOR_DECODER, GstYoloOdTensorDecoderClass))
typedef struct _GstYoloOdTensorDecoder GstYoloOdTensorDecoder;
typedef struct _GstYoloOdTensorDecoderClass GstYoloOdTensorDecoderClass;
typedef struct _BBox
{
gint x;
gint y;
guint w;
guint h;
} BBox;
struct _GstYoloOdTensorDecoder
{
GstBaseTransform basetransform;
/* Box confidence threshold */
gfloat box_confi_thresh;
/* Class confidence threshold */
gfloat cls_confi_thresh;
/* Intersection-of-Union threshold */
gfloat iou_thresh;
/* Maximum detection/mask */
gsize max_detection;
/* Video Info */
/* Candidates with a class confidence level above threshold. */
GPtrArray *sel_candidates;
/* Final candidates selected that respect class confidence level,
* NMS and maximum detection. */
GPtrArray *selected;
/* Tensor-id identifying mask tensors out of yolo inference process. */
GQuark mask_tensor_id;
GstVideoInfo video_info;
/* Labels file */
gchar *label_file;
/* Labels */
GArray *labels;
/* GstAnalyticsODMtd data */
GArray *od_mtds;
/* Hash table to store the offset in the mask tensor buffer where
* OdMtd data are stored. key is OdMtd.id */
GHashTable *candidate_offsets;
};
struct _GstYoloOdTensorDecoderClass
{
GstBaseTransformClass parent_class;
};
GType gst_yolo_od_tensor_decoder_get_type (void);
G_DEFINE_AUTOPTR_CLEANUP_FUNC (GstYoloOdTensorDecoder, g_object_unref)
GST_ELEMENT_REGISTER_DECLARE (yolo_od_tensor_decoder)
/* Yolo segmentation tensor decoder */
#define GST_TYPE_YOLO_SEG_TENSOR_DECODER (gst_yolo_seg_tensor_decoder_get_type ())
G_DECLARE_FINAL_TYPE (GstYoloSegTensorDecoder, gst_yolo_seg_tensor_decoder,
GST, YOLO_SEG_TENSOR_DECODER, GstYoloOdTensorDecoder)
struct _GstYoloSegTensorDecoder
{
GstYoloOdTensorDecoder parent;
/* Mask width */
guint mask_w;
/* Mask height */
guint mask_h;
/* Mask length */
gsize mask_length;
GQuark logits_tensor_id;
gfloat bb2mask_gain;
/* BufferPool for mask */
BBox mask_roi;
/* BufferPool for mask */
GstBufferPool *mask_pool;
};
GST_ELEMENT_REGISTER_DECLARE (yolo_seg_tensor_decoder)
G_END_DECLS
#endif /* __GST_YOLO_TENSOR_DECODER_H__ */

View File

@ -1,7 +1,6 @@
tensordecoders_sources = [
'gsttensordecoders.c',
'gstssdobjectdetector.c',
'gstyolotensordecoder.c'
'gstssdobjectdetector.c'
]
tensordecoders_headers = [