Compare commits
10 Commits
main
...
gstreamer-
Author | SHA1 | Date | |
---|---|---|---|
|
6377ebf568 | ||
|
62731c958c | ||
|
87b56fbf86 | ||
|
83d685054b | ||
|
e616f64152 | ||
|
df08833e27 | ||
|
01a504c310 | ||
|
62eeb7e008 | ||
|
244dd01b22 | ||
|
b7f964929c |
4
.devcontainer/devcontainer.json
Normal file
4
.devcontainer/devcontainer.json
Normal file
@ -0,0 +1,4 @@
|
||||
{
|
||||
"name": "FT-Driverless Dev",
|
||||
"image": "git.fasttube.de/ft-driverless/ft_as:gstreamer-plugin-bad"
|
||||
}
|
@ -248041,6 +248041,114 @@
|
||||
"tensordecoders": {
|
||||
"description": "Tensor decoders elements",
|
||||
"elements": {
|
||||
"fastsamtensordecoder": {
|
||||
"author": "Daniel Morin <daniel.morin@collabora.com>",
|
||||
"description": "Decode tensors output from the inference of FastSAM model (segmentation) on video frames. The original repository of the FastSAM is located at https://github.com/CASIA-IVA-Lab/FastSAM. For easy experimentation a strawberry segmentation model based on FastSAM architecture in Onnx format can be found at https://col.la/gstonnxmodelseg . This model already has tensors name embedded matching default values of tensors-masks-name and tensors-logits-name properties. It's also possible to embed tensor-ids into any model based on FastSAM architecture to allow this tensor-decoder to decode tensors. This process is described in the Readme of this repository: https://col.la/gstonnxmodels",
|
||||
"hierarchy": [
|
||||
"GstFastSAMTensorDecoder",
|
||||
"GstBaseTransform",
|
||||
"GstElement",
|
||||
"GstObject",
|
||||
"GInitiallyUnowned",
|
||||
"GObject"
|
||||
],
|
||||
"klass": "TensorDecoder/Video",
|
||||
"pad-templates": {
|
||||
"sink": {
|
||||
"caps": "video/x-raw:\n",
|
||||
"direction": "sink",
|
||||
"presence": "always"
|
||||
},
|
||||
"src": {
|
||||
"caps": "video/x-raw:\n",
|
||||
"direction": "src",
|
||||
"presence": "always"
|
||||
}
|
||||
},
|
||||
"properties": {
|
||||
"box-confidence-threshold": {
|
||||
"blurb": "Boxes with a location confidence level inferior to this threshold will be excluded",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "0.4",
|
||||
"max": "1",
|
||||
"min": "0",
|
||||
"mutable": "null",
|
||||
"readable": true,
|
||||
"type": "gfloat",
|
||||
"writable": true
|
||||
},
|
||||
"class-confidence-threshold": {
|
||||
"blurb": "Classes with a confidence level inferior to this threshold will be excluded",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "0.4",
|
||||
"max": "1",
|
||||
"min": "0",
|
||||
"mutable": "null",
|
||||
"readable": true,
|
||||
"type": "gfloat",
|
||||
"writable": true
|
||||
},
|
||||
"iou-threshold": {
|
||||
"blurb": "Maximum intersection-over-union between bounding boxes to consider them distinct.",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "0.7",
|
||||
"max": "1",
|
||||
"min": "0",
|
||||
"mutable": "null",
|
||||
"readable": true,
|
||||
"type": "gfloat",
|
||||
"writable": true
|
||||
},
|
||||
"max-detections": {
|
||||
"blurb": "Maximum object/masks detections.",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "100",
|
||||
"max": "-1",
|
||||
"min": "0",
|
||||
"mutable": "null",
|
||||
"readable": true,
|
||||
"type": "guint",
|
||||
"writable": true
|
||||
},
|
||||
"tensors-name-logits": {
|
||||
"blurb": "Name that identify FastSAM logits tensors.",
|
||||
"conditionally-available": false,
|
||||
"construct": true,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "Gst.Model.FastSAM.Segmentation.Logits",
|
||||
"mutable": "null",
|
||||
"readable": true,
|
||||
"type": "gchararray",
|
||||
"writable": true
|
||||
},
|
||||
"tensors-name-masks": {
|
||||
"blurb": "Name that identify FastSAM mask tensors.",
|
||||
"conditionally-available": false,
|
||||
"construct": true,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "Gst.Model.FastSAM.Segmentation.Masks",
|
||||
"mutable": "null",
|
||||
"readable": true,
|
||||
"type": "gchararray",
|
||||
"writable": true
|
||||
}
|
||||
},
|
||||
"rank": "primary"
|
||||
},
|
||||
"ssdobjectdetector": {
|
||||
"author": "Aaron Boxer <aaron.boxer@collabora.com>, Marcus Edel <marcus.edel@collabora.com>",
|
||||
"description": "Apply tensor output from inference to detect objects in video frames",
|
||||
|
@ -21,7 +21,9 @@
|
||||
*/
|
||||
|
||||
#include "gstonnxclient.h"
|
||||
#include <cpu_provider_factory.h>
|
||||
#include <onnxruntime_cxx_api.h>
|
||||
#include <onnxruntime/core/providers/cpu/cpu_provider_factory.h>
|
||||
#include <onnxruntime/core/providers/openvino/openvino_provider_factory.h>
|
||||
#include <sstream>
|
||||
|
||||
#define GST_CAT_DEFAULT onnx_inference_debug
|
||||
@ -63,8 +65,9 @@ GstOnnxClient::GstOnnxClient (GstElement *debug_parent):debug_parent(debug_paren
|
||||
inputDatatypeSize (sizeof (uint8_t)),
|
||||
fixedInputImageSize (false),
|
||||
inputTensorOffset (0.0),
|
||||
inputTensorScale (1.0)
|
||||
{
|
||||
inputTensorScale (1.0),
|
||||
provider_config(nullptr)
|
||||
{
|
||||
}
|
||||
|
||||
GstOnnxClient::~GstOnnxClient () {
|
||||
@ -72,6 +75,10 @@ GstOnnxClient::GstOnnxClient (GstElement *debug_parent):debug_parent(debug_paren
|
||||
delete[]dest;
|
||||
}
|
||||
|
||||
void GstOnnxClient::setProviderConfig (const char *config)
|
||||
{
|
||||
provider_config = config;
|
||||
}
|
||||
int32_t GstOnnxClient::getWidth (void)
|
||||
{
|
||||
return width;
|
||||
@ -222,6 +229,20 @@ GstOnnxClient::GstOnnxClient (GstElement *debug_parent):debug_parent(debug_paren
|
||||
(sessionOptions, 1));
|
||||
}
|
||||
break;
|
||||
case GST_ONNX_EXECUTION_PROVIDER_OPENVINO: {
|
||||
std::unordered_map<std::string, std::string> ovOptions;
|
||||
if (this->provider_config) {
|
||||
std::istringstream ss(this->provider_config);
|
||||
std::string kv;
|
||||
while (std::getline(ss, kv, ',')) {
|
||||
auto pos = kv.find('=');
|
||||
if (pos == std::string::npos) continue;
|
||||
ovOptions[kv.substr(0, pos)] = kv.substr(pos + 1);
|
||||
}
|
||||
}
|
||||
sessionOptions.AppendExecutionProvider("OpenVINO", ovOptions);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
Ort::ThrowOnError (OrtSessionOptionsAppendExecutionProvider_CPU
|
||||
(sessionOptions, 1));
|
||||
|
@ -54,6 +54,7 @@ typedef enum
|
||||
{
|
||||
GST_ONNX_EXECUTION_PROVIDER_CPU,
|
||||
GST_ONNX_EXECUTION_PROVIDER_CUDA,
|
||||
GST_ONNX_EXECUTION_PROVIDER_OPENVINO,
|
||||
} GstOnnxExecutionProvider;
|
||||
|
||||
|
||||
@ -82,6 +83,7 @@ namespace GstOnnxNamespace {
|
||||
GstTensorMeta *copy_tensors_to_meta (std::vector<Ort::Value> &outputs,
|
||||
GstBuffer *buffer);
|
||||
void parseDimensions(GstVideoInfo vinfo);
|
||||
void setProviderConfig(const char *config);
|
||||
private:
|
||||
|
||||
GstElement *debug_parent;
|
||||
@ -108,6 +110,7 @@ namespace GstOnnxNamespace {
|
||||
bool fixedInputImageSize;
|
||||
float inputTensorOffset;
|
||||
float inputTensorScale;
|
||||
const char *provider_config;
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -84,6 +84,7 @@ struct _GstOnnxInference
|
||||
gpointer onnx_client;
|
||||
gboolean onnx_disabled;
|
||||
GstVideoInfo video_info;
|
||||
gchar *provider_config;
|
||||
};
|
||||
|
||||
GST_DEBUG_CATEGORY (onnx_inference_debug);
|
||||
@ -101,6 +102,7 @@ enum
|
||||
PROP_INPUT_IMAGE_FORMAT,
|
||||
PROP_OPTIMIZATION_LEVEL,
|
||||
PROP_EXECUTION_PROVIDER,
|
||||
PROP_PROVIDER_CONFIG,
|
||||
PROP_INPUT_OFFSET,
|
||||
PROP_INPUT_SCALE
|
||||
};
|
||||
@ -190,6 +192,9 @@ gst_onnx_execution_provider_get_type (void)
|
||||
{GST_ONNX_EXECUTION_PROVIDER_CUDA,
|
||||
"CUDA execution provider",
|
||||
"cuda"},
|
||||
{GST_ONNX_EXECUTION_PROVIDER_OPENVINO,
|
||||
"OPENVINO execution provider",
|
||||
"openvino"},
|
||||
{0, NULL, NULL},
|
||||
};
|
||||
|
||||
@ -316,6 +321,14 @@ gst_onnx_inference_class_init (GstOnnxInferenceClass * klass)
|
||||
G_MINFLOAT, G_MAXFLOAT, 1.0,
|
||||
(GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
|
||||
|
||||
g_object_class_install_property (G_OBJECT_CLASS (klass),
|
||||
PROP_PROVIDER_CONFIG,
|
||||
g_param_spec_string ("provider-config",
|
||||
"Provider config",
|
||||
"Comma-separierte Key=Value-Optionen",
|
||||
nullptr,
|
||||
(GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
|
||||
|
||||
|
||||
gst_element_class_set_static_metadata (element_class, "onnxinference",
|
||||
"Filter/Effect/Video",
|
||||
@ -351,7 +364,8 @@ static void
|
||||
gst_onnx_inference_finalize (GObject * object)
|
||||
{
|
||||
GstOnnxInference *self = GST_ONNX_INFERENCE (object);
|
||||
|
||||
g_free (self->provider_config);
|
||||
self->provider_config = NULL;
|
||||
g_free (self->model_file);
|
||||
delete GST_ONNX_CLIENT_MEMBER (self);
|
||||
G_OBJECT_CLASS (gst_onnx_inference_parent_class)->finalize (object);
|
||||
@ -397,6 +411,11 @@ gst_onnx_inference_set_property (GObject * object, guint prop_id,
|
||||
case PROP_INPUT_SCALE:
|
||||
onnxClient->setInputImageScale (g_value_get_float (value));
|
||||
break;
|
||||
case PROP_PROVIDER_CONFIG:
|
||||
g_free (self->provider_config);
|
||||
self->provider_config = g_value_dup_string (value);
|
||||
onnxClient->setProviderConfig(self->provider_config);
|
||||
break;
|
||||
default:
|
||||
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
|
||||
break;
|
||||
|
@ -25,6 +25,7 @@
|
||||
#endif
|
||||
|
||||
#include "gstssdobjectdetector.h"
|
||||
#include "gstyolotensordecoder.h"
|
||||
|
||||
/**
|
||||
* SECTION:plugin-tensordecoders
|
||||
@ -38,6 +39,8 @@ plugin_init (GstPlugin * plugin)
|
||||
{
|
||||
gboolean ret = FALSE;
|
||||
ret |= GST_ELEMENT_REGISTER (ssd_object_detector, plugin);
|
||||
ret |= GST_ELEMENT_REGISTER (yolo_seg_tensor_decoder, plugin);
|
||||
ret |= GST_ELEMENT_REGISTER (yolo_od_tensor_decoder, plugin);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,127 @@
|
||||
/*
|
||||
* GStreamer gstreamer-yolotensordecoder
|
||||
* Copyright (C) 2024 Collabora Ltd
|
||||
* Authors: Daniel Morin <daniel.morin@collabora.com>
|
||||
* Vineet Suryan <vineet.suryan@collabora.com>
|
||||
* Santosh Mahto <santosh.mahto@collabora.com>
|
||||
*
|
||||
* gstyolotensordecoder.h
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Library General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Library General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Library General Public
|
||||
* License along with this library; if not, write to the
|
||||
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef __GST_YOLO_TENSOR_DECODER_H__
|
||||
#define __GST_YOLO_TENSOR_DECODER_H__
|
||||
|
||||
#include <gst/gst.h>
|
||||
#include <gst/video/video.h>
|
||||
#include <gst/base/base.h>
|
||||
|
||||
G_BEGIN_DECLS
|
||||
|
||||
#define GST_TYPE_YOLO_OD_TENSOR_DECODER (gst_yolo_od_tensor_decoder_get_type ())
|
||||
#define GST_YOLO_OD_TENSOR_DECODER(obj) \
|
||||
(G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_YOLO_OD_TENSOR_DECODER, GstYoloOdTensorDecoder))
|
||||
#define GST_YOLO_OD_TENSOR_DECODER_CLASS(klass) \
|
||||
(G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_YOLO_OD_TENSOR_DECODER, GstYoloOdTensorDecoderClass))
|
||||
#define GST_IS_YOLO_OD_TENSOR_DECODER(obj) \
|
||||
(G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_YOLO_OD_TENSOR_DECODER))
|
||||
#define GST_IS_YOLO_OD_TENSOR_DECODER_CLASS(klass) \
|
||||
(G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_YOLO_OD_TENSOR_DECODER))
|
||||
#define GST_YOLO_OD_TENSOR_DECODER_GET_CLASS(obj) \
|
||||
(G_TYPE_INSTANCE_GET_CLASS ((obj), GST_TYPE_YOLO_OD_TENSOR_DECODER, GstYoloOdTensorDecoderClass))
|
||||
|
||||
typedef struct _GstYoloOdTensorDecoder GstYoloOdTensorDecoder;
|
||||
typedef struct _GstYoloOdTensorDecoderClass GstYoloOdTensorDecoderClass;
|
||||
|
||||
typedef struct _BBox
|
||||
{
|
||||
gint x;
|
||||
gint y;
|
||||
guint w;
|
||||
guint h;
|
||||
} BBox;
|
||||
|
||||
struct _GstYoloOdTensorDecoder
|
||||
{
|
||||
GstBaseTransform basetransform;
|
||||
/* Box confidence threshold */
|
||||
gfloat box_confi_thresh;
|
||||
/* Class confidence threshold */
|
||||
gfloat cls_confi_thresh;
|
||||
/* Intersection-of-Union threshold */
|
||||
gfloat iou_thresh;
|
||||
/* Maximum detection/mask */
|
||||
gsize max_detection;
|
||||
/* Video Info */
|
||||
/* Candidates with a class confidence level above threshold. */
|
||||
GPtrArray *sel_candidates;
|
||||
/* Final candidates selected that respect class confidence level,
|
||||
* NMS and maximum detection. */
|
||||
GPtrArray *selected;
|
||||
/* Tensor-id identifying mask tensors out of yolo inference process. */
|
||||
GQuark mask_tensor_id;
|
||||
|
||||
GstVideoInfo video_info;
|
||||
/* Labels file */
|
||||
gchar *label_file;
|
||||
/* Labels */
|
||||
GArray *labels;
|
||||
/* GstAnalyticsODMtd data */
|
||||
GArray *od_mtds;
|
||||
/* Hash table to store the offset in the mask tensor buffer where
|
||||
* OdMtd data are stored. key is OdMtd.id */
|
||||
GHashTable *candidate_offsets;
|
||||
};
|
||||
|
||||
struct _GstYoloOdTensorDecoderClass
|
||||
{
|
||||
GstBaseTransformClass parent_class;
|
||||
};
|
||||
|
||||
GType gst_yolo_od_tensor_decoder_get_type (void);
|
||||
G_DEFINE_AUTOPTR_CLEANUP_FUNC (GstYoloOdTensorDecoder, g_object_unref)
|
||||
|
||||
GST_ELEMENT_REGISTER_DECLARE (yolo_od_tensor_decoder)
|
||||
|
||||
/* Yolo segmentation tensor decoder */
|
||||
|
||||
#define GST_TYPE_YOLO_SEG_TENSOR_DECODER (gst_yolo_seg_tensor_decoder_get_type ())
|
||||
G_DECLARE_FINAL_TYPE (GstYoloSegTensorDecoder, gst_yolo_seg_tensor_decoder,
|
||||
GST, YOLO_SEG_TENSOR_DECODER, GstYoloOdTensorDecoder)
|
||||
|
||||
struct _GstYoloSegTensorDecoder
|
||||
{
|
||||
GstYoloOdTensorDecoder parent;
|
||||
/* Mask width */
|
||||
guint mask_w;
|
||||
/* Mask height */
|
||||
guint mask_h;
|
||||
/* Mask length */
|
||||
gsize mask_length;
|
||||
GQuark logits_tensor_id;
|
||||
gfloat bb2mask_gain;
|
||||
/* BufferPool for mask */
|
||||
BBox mask_roi;
|
||||
/* BufferPool for mask */
|
||||
GstBufferPool *mask_pool;
|
||||
};
|
||||
|
||||
GST_ELEMENT_REGISTER_DECLARE (yolo_seg_tensor_decoder)
|
||||
|
||||
G_END_DECLS
|
||||
#endif /* __GST_YOLO_TENSOR_DECODER_H__ */
|
@ -1,6 +1,7 @@
|
||||
tensordecoders_sources = [
|
||||
'gsttensordecoders.c',
|
||||
'gstssdobjectdetector.c'
|
||||
'gstssdobjectdetector.c',
|
||||
'gstyolotensordecoder.c'
|
||||
]
|
||||
|
||||
tensordecoders_headers = [
|
||||
|
Loading…
x
Reference in New Issue
Block a user