From b7f964929c8ebc918459ddefd582542147c1e081 Mon Sep 17 00:00:00 2001 From: Daniel Morin Date: Wed, 10 Jan 2024 09:30:51 -0500 Subject: [PATCH] fastsamtensordecoder: Add FastSAM tensor decoder Co-authored-by: Vineet Suryan --- .../docs/plugins/gst_plugins_cache.json | 108 ++ .../tensordecoders/gstfastsamtensordecoder.c | 990 ++++++++++++++++++ .../tensordecoders/gstfastsamtensordecoder.h | 101 ++ .../gst/tensordecoders/gsttensordecoders.c | 2 + .../gst/tensordecoders/meson.build | 3 +- 5 files changed, 1203 insertions(+), 1 deletion(-) create mode 100644 subprojects/gst-plugins-bad/gst/tensordecoders/gstfastsamtensordecoder.c create mode 100644 subprojects/gst-plugins-bad/gst/tensordecoders/gstfastsamtensordecoder.h diff --git a/subprojects/gst-plugins-bad/docs/plugins/gst_plugins_cache.json b/subprojects/gst-plugins-bad/docs/plugins/gst_plugins_cache.json index ba43021555..eb820220c8 100644 --- a/subprojects/gst-plugins-bad/docs/plugins/gst_plugins_cache.json +++ b/subprojects/gst-plugins-bad/docs/plugins/gst_plugins_cache.json @@ -248041,6 +248041,114 @@ "tensordecoders": { "description": "Tensor decoders elements", "elements": { + "fastsamtensordecoder": { + "author": "Daniel Morin ", + "description": "Decode tensors output from the inference of FastSAM model (segmentation) on video frames. The original repository of the FastSAM is located at https://github.com/CASIA-IVA-Lab/FastSAM. For easy experimentation a strawberry segmentation model based on FastSAM architecture in Onnx format can be found at https://col.la/gstonnxmodelseg . This model already has tensors name embedded matching default values of tensors-masks-name and tensors-logits-name properties. It's also possible to embed tensor-ids into any model based on FastSAM architecture to allow this tensor-decoder to decode tensors. This process is described in the Readme of this repository: https://col.la/gstonnxmodels", + "hierarchy": [ + "GstFastSAMTensorDecoder", + "GstBaseTransform", + "GstElement", + "GstObject", + "GInitiallyUnowned", + "GObject" + ], + "klass": "TensorDecoder/Video", + "pad-templates": { + "sink": { + "caps": "video/x-raw:\n", + "direction": "sink", + "presence": "always" + }, + "src": { + "caps": "video/x-raw:\n", + "direction": "src", + "presence": "always" + } + }, + "properties": { + "box-confidence-threshold": { + "blurb": "Boxes with a location confidence level inferior to this threshold will be excluded", + "conditionally-available": false, + "construct": false, + "construct-only": false, + "controllable": false, + "default": "0.4", + "max": "1", + "min": "0", + "mutable": "null", + "readable": true, + "type": "gfloat", + "writable": true + }, + "class-confidence-threshold": { + "blurb": "Classes with a confidence level inferior to this threshold will be excluded", + "conditionally-available": false, + "construct": false, + "construct-only": false, + "controllable": false, + "default": "0.4", + "max": "1", + "min": "0", + "mutable": "null", + "readable": true, + "type": "gfloat", + "writable": true + }, + "iou-threshold": { + "blurb": "Maximum intersection-over-union between bounding boxes to consider them distinct.", + "conditionally-available": false, + "construct": false, + "construct-only": false, + "controllable": false, + "default": "0.7", + "max": "1", + "min": "0", + "mutable": "null", + "readable": true, + "type": "gfloat", + "writable": true + }, + "max-detections": { + "blurb": "Maximum object/masks detections.", + "conditionally-available": false, + "construct": false, + "construct-only": false, + "controllable": false, + "default": "100", + "max": "-1", + "min": "0", + "mutable": "null", + "readable": true, + "type": "guint", + "writable": true + }, + "tensors-name-logits": { + "blurb": "Name that identify FastSAM logits tensors.", + "conditionally-available": false, + "construct": true, + "construct-only": false, + "controllable": false, + "default": "Gst.Model.FastSAM.Segmentation.Logits", + "mutable": "null", + "readable": true, + "type": "gchararray", + "writable": true + }, + "tensors-name-masks": { + "blurb": "Name that identify FastSAM mask tensors.", + "conditionally-available": false, + "construct": true, + "construct-only": false, + "controllable": false, + "default": "Gst.Model.FastSAM.Segmentation.Masks", + "mutable": "null", + "readable": true, + "type": "gchararray", + "writable": true + } + }, + "rank": "primary" + }, "ssdobjectdetector": { "author": "Aaron Boxer , Marcus Edel ", "description": "Apply tensor output from inference to detect objects in video frames", diff --git a/subprojects/gst-plugins-bad/gst/tensordecoders/gstfastsamtensordecoder.c b/subprojects/gst-plugins-bad/gst/tensordecoders/gstfastsamtensordecoder.c new file mode 100644 index 0000000000..7b029eb2d1 --- /dev/null +++ b/subprojects/gst-plugins-bad/gst/tensordecoders/gstfastsamtensordecoder.c @@ -0,0 +1,990 @@ +/* + * GStreamer gstreamer-fastsamtensordecoder + * Copyright (C) 2024 Collabora Ltd. + * Authors: Daniel Morin + * Vineet Suryan + * + * gstfastsamtensordecoder.c + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +/** + * SECTION:element-fastsamtensordecoder.c + * @short_description: Decode tensors from a FastSAM detection and segmentation + * neural network. + * + * + * This element can parse per-buffer inference tensors meta data generated by an upstream + * inference element + * + * + * ## Example launch command: + * + * Test image file, model file and labels file can be found here : + * https://gitlab.collabora.com/gstreamer/onnx-models + * + * GST_DEBUG=fastsamtensordecoder \ + * gst-launch-1.0 multifilesrc location=strawberry_crops.jpg ! decodebin \ + * ! videoconvertscale add-borders=1 ! onnxinference execution-provider=cpu + * model-file=segmentation.onnx input-image-format=chw input-tensor-offset=0 \ + * input-tensor-scale=255.0 ! fastsamtensordecoder \ + * class-confidence-threshold=0.8 iou-threshold=0.7 max-detections=100 + * ! objectdetectionoverlay object-detection-outline-color=0xFF0000FF + * draw-labels=true ! segmentationoverlay hint-maximum-segment-type=50 \ + * ! videoconvert ! ximagesink + * + */ + +#ifdef HAVE_CONFI_H +#include "config.h" +#endif + +#include "gstfastsamtensordecoder.h" + +#include + +#include + +#define GST_MODEL_FASTSAM_SEGMENTATION_MASK \ + "Gst.Model.FastSAM.Segmentation.Masks" +#define GST_MODEL_FASTSAM_SEGMENTATION_LOGITS \ + "Gst.Model.FastSAM.Segmentation.Logits" + +GST_DEBUG_CATEGORY_STATIC (fastsam_tensor_decoder_debug); +#define GST_CAT_DEFAULT fastsam_tensor_decoder_debug + +GST_ELEMENT_REGISTER_DEFINE (fastsam_tensor_decoder, "fastsamtensordecoder", + GST_RANK_PRIMARY, GST_TYPE_FASTSAM_TENSOR_DECODER); + +/* GstFastSAMTensorDecoder properties, see properties description in + * gst_fastsam_tensor_decoder_class_init for more details. */ +enum +{ + PROP_0, + PROP_BOX_CONFI_THRESH, + PROP_CLS_CONFI_THRESH, + PROP_IOU_THRESH, + PROP_MAX_DETECTION, + PROP_MASK_TENSOR_NAME, + PROP_LOGITS_TENSOR_NAME +}; + +/* For debug purpose */ +typedef struct _DebugCandidates +{ + GstFastSAMTensorDecoder *self; + gsize fields; /* Fields count do debug */ + gsize offset; /* Fields offset */ + gsize start; /* First field index to debug */ +} DebugCandidates; + +/* Default properties value */ +static const gfloat DEFAULT_BOX_CONFI_THRESH = 0.4f; +static const gfloat DEFAULT_CLS_CONFI_THRESH = 0.4f; +static const gfloat DEFAULT_IOU_THRESH = 0.7f; +static const gsize DEFAULT_MAX_DETECTION = 100; + +/* Global variable storing class for OD. Generally OD has class + * and we need to provide one but this class is just a placeholder.*/ +GQuark OOI_CLASS_ID; + +/* To tensor-id are defined by a string that is converted to quark + * which is just an integer value using a hash function. For efficiency + * we compare on the quark (hash value). Since tensor-id never change we + * just calculate the hash once during initialization and store the value in + * these variables. */ +GQuark GST_MODEL_FASTSAM_SEGMENTATION_MASKS_ID; +GQuark GST_MODEL_FASTSAM_SEGMENTATION_LOGITS_ID; + +/* GStreamer element srcpad template. Template of a srcpad that can receive + * any raw video. */ +static GstStaticPadTemplate gst_fastsam_tensor_decoder_src_template = +GST_STATIC_PAD_TEMPLATE ("src", + GST_PAD_SRC, + GST_PAD_ALWAYS, + GST_STATIC_CAPS ("video/x-raw")); + +/* GStreamer element sinkpad template. Template of a sinkpad that can receive + * any raw video. */ +static GstStaticPadTemplate gst_fastsam_tensor_decoder_sink_template = +GST_STATIC_PAD_TEMPLATE ("sink", + GST_PAD_SINK, + GST_PAD_ALWAYS, + GST_STATIC_CAPS ("video/x-raw")); + +/* Prototypes */ +static void gst_fastsam_tensor_decoder_set_property (GObject * object, + guint prop_id, const GValue * value, GParamSpec * pspec); +static void gst_fastsam_tensor_decoder_get_property (GObject * object, + guint prop_id, GValue * value, GParamSpec * pspec); + +static void gst_fastsam_tensor_decoder_finalize (GObject * object); + +static GstFlowReturn gst_fastsam_tensor_decoder_transform_ip (GstBaseTransform * + trans, GstBuffer * buf); +static gboolean gst_fastsam_tensor_decoder_set_caps (GstBaseTransform * trans, + GstCaps * incaps, GstCaps * outcaps); +static void gst_fastsam_tensor_decoder_decode_masks_f32 (GstFastSAMTensorDecoder + * self, GstTensor * masks_tensor, GstTensor * logits_tensor, + GstAnalyticsRelationMeta * rmeta); + +G_DEFINE_TYPE (GstFastSAMTensorDecoder, gst_fastsam_tensor_decoder, + GST_TYPE_BASE_TRANSFORM); + +static void +gst_fastsam_tensor_decoder_class_init (GstFastSAMTensorDecoderClass * klass) +{ + GObjectClass *gobject_class = (GObjectClass *) klass; + GstElementClass *element_class = (GstElementClass *) klass; + GstBaseTransformClass *basetransform_class = (GstBaseTransformClass *) klass; + + /* Define GstFastSAMTensorDecoder debug category. */ + GST_DEBUG_CATEGORY_INIT (fastsam_tensor_decoder_debug, "fastsamtensordecoder", + 0, "Tensor decoder for FastSAM segmentation N.N."); + + /* Set GObject vmethod to get and set property */ + gobject_class->set_property = gst_fastsam_tensor_decoder_set_property; + gobject_class->get_property = gst_fastsam_tensor_decoder_get_property; + + /* Set GObject vmethod finalize */ + gobject_class->finalize = gst_fastsam_tensor_decoder_finalize; + + /* Define GstFastSAMTensorDecoder properties using GObject properties + * interface.*/ + g_object_class_install_property (G_OBJECT_CLASS (klass), + PROP_BOX_CONFI_THRESH, + g_param_spec_float ("box-confidence-threshold", + "Box location confidence threshold", + "Boxes with a location confidence level inferior to this threshold " + "will be excluded", + 0.0, 1.0, DEFAULT_BOX_CONFI_THRESH, + (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + + g_object_class_install_property (G_OBJECT_CLASS (klass), + PROP_CLS_CONFI_THRESH, + g_param_spec_float ("class-confidence-threshold", + "Class confidence threshold", + "Classes with a confidence level inferior to this threshold " + "will be excluded", + 0.0, 1.0, DEFAULT_CLS_CONFI_THRESH, + (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + + g_object_class_install_property (G_OBJECT_CLASS (klass), + PROP_IOU_THRESH, + g_param_spec_float ("iou-threshold", + "Maximum IOU threshold", + "Maximum intersection-over-union between bounding boxes to " + "consider them distinct.", + 0.0, 1.0, DEFAULT_IOU_THRESH, + (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + + g_object_class_install_property (G_OBJECT_CLASS (klass), + PROP_MAX_DETECTION, + g_param_spec_uint ("max-detections", + "Maximum object/masks detections.", + "Maximum object/masks detections.", + 0, G_MAXUINT, DEFAULT_MAX_DETECTION, + (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + + g_object_class_install_property (G_OBJECT_CLASS (klass), + PROP_MASK_TENSOR_NAME, + g_param_spec_string ("tensors-name-masks", + "Mask tensors name", + "Name that identify FastSAM mask tensors.", + GST_MODEL_FASTSAM_SEGMENTATION_MASK, + (GParamFlags) (G_PARAM_READWRITE | G_PARAM_CONSTRUCT | + G_PARAM_STATIC_STRINGS))); + + g_object_class_install_property (G_OBJECT_CLASS (klass), + PROP_LOGITS_TENSOR_NAME, + g_param_spec_string ("tensors-name-logits", + "Logits tensors name", + "Name that identify FastSAM logits tensors.", + GST_MODEL_FASTSAM_SEGMENTATION_LOGITS, + (GParamFlags) (G_PARAM_READWRITE | G_PARAM_CONSTRUCT | + G_PARAM_STATIC_STRINGS))); + + /* Element description. */ + gst_element_class_set_static_metadata (element_class, "fastsamtensordecoder", + "TensorDecoder/Video", + "Decode tensors output from the inference of FastSAM model (segmentation)" + " on video frames. The original repository of the FastSAM is located at" + " https://github.com/CASIA-IVA-Lab/FastSAM. For easy experimentation a" + " strawberry segmentation model based on FastSAM architecture in Onnx " + " format can be found at https://col.la/gstonnxmodelseg . This model " + "already has tensors name embedded matching default " + "values of tensors-masks-name and tensors-logits-name properties. It's " + "also possible to embed tensor-ids into any model based on FastSAM " + "architecture to allow this tensor-decoder to decode tensors. This " + "process is described in the Readme of this repository: " + "https://col.la/gstonnxmodels", + "Daniel Morin "); + + /* Add pads to element base on pad template defined earlier */ + gst_element_class_add_pad_template (element_class, + gst_static_pad_template_get (&gst_fastsam_tensor_decoder_src_template)); + gst_element_class_add_pad_template (element_class, + gst_static_pad_template_get (&gst_fastsam_tensor_decoder_sink_template)); + + /* Set GstBaseTransform vmethod transform_ip. This methode is called + * by the srcpad when it receive buffer. ip stand for in-place meaning the + * buffer remain unchanged by the element. Tensor-decoder only monitor + * buffer it receive for a meta attach to the buffer that is a GstTensorMeta + * and has a tensor-id can be handled by GstFastSAMTensorDecoder. */ + basetransform_class->transform_ip = + GST_DEBUG_FUNCPTR (gst_fastsam_tensor_decoder_transform_ip); + + /* Set GstBaseTransform set_caps vmethod. This will be called once the + * capability negotiation has been completed. We will be able to extract + * resolution from this callback. */ + basetransform_class->set_caps = + GST_DEBUG_FUNCPTR (gst_fastsam_tensor_decoder_set_caps); + + /* Calculate the class id placeholder (also a quark) that will be set on all + * OD analytics-meta. */ + OOI_CLASS_ID = g_quark_from_static_string ("FastSAM-None"); + + /* Calculate the FastSAM Mask tensor-id */ + GST_MODEL_FASTSAM_SEGMENTATION_MASKS_ID = + g_quark_from_static_string (GST_MODEL_FASTSAM_SEGMENTATION_MASK); + + /* Calculate the FastSAM Logits tensor-id */ + GST_MODEL_FASTSAM_SEGMENTATION_LOGITS_ID = + g_quark_from_static_string (GST_MODEL_FASTSAM_SEGMENTATION_LOGITS); +} + +static void +gst_fastsam_tensor_decoder_init (GstFastSAMTensorDecoder * self) +{ + /* GstFastSAMTensorDecoder instance initialization */ + self->box_confi_thresh = DEFAULT_BOX_CONFI_THRESH; + self->cls_confi_thresh = DEFAULT_CLS_CONFI_THRESH; + self->iou_thresh = DEFAULT_IOU_THRESH; + self->max_detection = DEFAULT_MAX_DETECTION; + self->sel_candidates = NULL; + self->selected = NULL; + self->mask_w = 256; + self->mask_h = 256; + self->mask_length = self->mask_w * self->mask_h; + memset (&self->mask_roi, 0, sizeof (BBox)); + self->mask_pool = NULL; + gst_base_transform_set_passthrough (GST_BASE_TRANSFORM (self), FALSE); +} + +static void +gst_fastsam_tensor_decoder_finalize (GObject * object) +{ + GstFastSAMTensorDecoder *self = GST_FASTSAM_TENSOR_DECODER (object); + + if (self->sel_candidates) { + g_ptr_array_unref (g_steal_pointer (&self->sel_candidates)); + } + + if (self->selected) { + g_ptr_array_unref (g_steal_pointer (&self->selected)); + } + + if (self->mask_pool) { + gst_object_unref (self->mask_pool); + } + + G_OBJECT_CLASS (gst_fastsam_tensor_decoder_parent_class)->finalize (object); +} + +static void +gst_fastsam_tensor_decoder_set_property (GObject * object, guint prop_id, + const GValue * value, GParamSpec * pspec) +{ + GstFastSAMTensorDecoder *self = GST_FASTSAM_TENSOR_DECODER (object); + + switch (prop_id) { + case PROP_BOX_CONFI_THRESH: + GST_OBJECT_LOCK (self); + self->box_confi_thresh = g_value_get_float (value); + GST_OBJECT_UNLOCK (self); + break; + case PROP_CLS_CONFI_THRESH: + GST_OBJECT_LOCK (self); + self->cls_confi_thresh = g_value_get_float (value); + GST_OBJECT_UNLOCK (self); + break; + case PROP_IOU_THRESH: + GST_OBJECT_LOCK (self); + self->iou_thresh = g_value_get_float (value); + GST_OBJECT_UNLOCK (self); + break; + case PROP_MAX_DETECTION: + GST_OBJECT_LOCK (self); + self->max_detection = g_value_get_uint (value); + GST_OBJECT_UNLOCK (self); + break; + case PROP_MASK_TENSOR_NAME: + GST_OBJECT_LOCK (self); + self->mask_tensor_id = g_quark_from_string (g_value_get_string (value)); + GST_OBJECT_UNLOCK (self); + break; + case PROP_LOGITS_TENSOR_NAME: + GST_OBJECT_LOCK (self); + self->logits_tensor_id = g_quark_from_string (g_value_get_string (value)); + GST_OBJECT_UNLOCK (self); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static void +gst_fastsam_tensor_decoder_get_property (GObject * object, guint prop_id, + GValue * value, GParamSpec * pspec) +{ + GstFastSAMTensorDecoder *self = GST_FASTSAM_TENSOR_DECODER (object); + + switch (prop_id) { + case PROP_BOX_CONFI_THRESH: + g_value_set_float (value, self->box_confi_thresh); + break; + case PROP_CLS_CONFI_THRESH: + g_value_set_float (value, self->cls_confi_thresh); + break; + case PROP_IOU_THRESH: + g_value_set_float (value, self->iou_thresh); + break; + case PROP_MAX_DETECTION: + g_value_set_uint (value, self->max_detection); + break; + case PROP_MASK_TENSOR_NAME: + GST_OBJECT_LOCK (self); + g_value_set_string (value, g_quark_to_string (self->mask_tensor_id)); + GST_OBJECT_UNLOCK (self); + break; + case PROP_LOGITS_TENSOR_NAME: + GST_OBJECT_LOCK (self); + g_value_set_string (value, g_quark_to_string (self->logits_tensor_id)); + GST_OBJECT_UNLOCK (self); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +/* gst_fastsam_tensor_decoder_get_tensor_meta + * @buf:in: buffer + * @mask_tensor:out: Mask tensor + * @logits_tensor:out: Logits tensor + * @return: TRUE if buf has mask and logits tensor attach to it. + * Retrieve FastSAM masks and logits tensors from buffer. + */ +static gboolean +gst_fastsam_tensor_decoder_get_tensor_meta (GstFastSAMTensorDecoder * self, + GstBuffer * buf, GstTensor ** mask_tensor, GstTensor ** logits_tensor) +{ + GstTensorMeta *tensor_meta; + gint mask_tensor_idx, logits_tensor_idx; + + g_return_val_if_fail (mask_tensor != NULL, FALSE); + g_return_val_if_fail (logits_tensor != NULL, FALSE); + + *mask_tensor = NULL; + *logits_tensor = NULL; + + /* Retrieve all TensorMeta attach the buffer */ + tensor_meta = gst_buffer_get_tensor_meta (buf); + if (!tensor_meta) { + GST_LOG_OBJECT (self, "No tensor meta"); + return FALSE; + } + + GST_LOG_OBJECT (self, "Num tensors %zu", tensor_meta->num_tensors); + + /* Retrieve the index of the tensor that has a tensor-id matching + * GST_MODEL_FASTSAM_SEGMENTATION_MASKS_ID in the GstTensorMeta. */ + mask_tensor_idx = gst_tensor_meta_get_index_from_id (tensor_meta, + GST_MODEL_FASTSAM_SEGMENTATION_MASKS_ID); + + /* Retrieve the index of the tensor that has a tensor-id matching + * GST_MODEL_FASTSAM_SEGMENTATION_LOGITS_ID in the GstTensorMeta. */ + logits_tensor_idx = gst_tensor_meta_get_index_from_id (tensor_meta, + GST_MODEL_FASTSAM_SEGMENTATION_LOGITS_ID); + + if (mask_tensor_idx >= 0 && logits_tensor_idx >= 0) { + GST_LOG_OBJECT (self, "Masks tensor id: %d", mask_tensor_idx); + GST_LOG_OBJECT (self, "Masks tensor id: %d", logits_tensor_idx); + + *mask_tensor = tensor_meta->tensors[mask_tensor_idx]; + *logits_tensor = tensor_meta->tensors[logits_tensor_idx]; + + return TRUE; + } else { + GST_INFO_OBJECT (self, "Couldn't find mask or logits tensor, skipping"); + } + + return FALSE; +} + +/* gst_fastsam_tensor_decoder_set_caps: + * + * Callback on caps negociation completed. We use it here to retrieve + * video resolution. See GstBaseTransform for more details. + */ +static gboolean +gst_fastsam_tensor_decoder_set_caps (GstBaseTransform * trans, GstCaps * incaps, + GstCaps * outcaps) +{ + GstFastSAMTensorDecoder *self = GST_FASTSAM_TENSOR_DECODER (trans); + + if (!gst_video_info_from_caps (&self->video_info, incaps)) { + GST_ERROR_OBJECT (self, "Failed to parse caps"); + return FALSE; + } + + if (gst_base_transform_is_passthrough (trans)) { + GST_ERROR_OBJECT (self, "Failed. Can't handle passthrough"); + return FALSE; + } + + /* The masks need to be cropped to fit the SAR of the image. */ + /* TODO: We're reconstructing the transformation that was done on the + * original image based on the assumption that the complete image without + * deformation would be analyzed. This assumption is not alway true and + * we should try to find a way to convey this transformation information + * and retrieve from here to know the transformation that need to be done + * on the mask.*/ + + if (self->video_info.width > self->video_info.height) { + self->bb2mask_gain = ((gfloat) self->mask_w) / self->video_info.width; + self->mask_roi.x = 0; + self->mask_roi.w = self->mask_w; + self->mask_roi.h = ((gfloat) self->bb2mask_gain) * self->video_info.height; + self->mask_roi.y = (self->mask_h - self->mask_roi.h) / 2; + } else { + self->bb2mask_gain = ((gfloat) self->mask_h) / self->video_info.height; + self->mask_roi.y = 0; + self->mask_roi.h = self->mask_h; + self->mask_roi.w = self->bb2mask_gain * self->video_info.width; + self->mask_roi.x = (self->mask_w - self->mask_roi.w) / 2; + } + + if (self->mask_pool == NULL) { + GstVideoInfo minfo; + GstCaps *caps; + gst_video_info_init (&minfo); + gst_video_info_set_format (&minfo, GST_VIDEO_FORMAT_GRAY8, 256, 256); + caps = gst_video_info_to_caps (&minfo); + self->mask_pool = gst_video_buffer_pool_new (); + GstStructure *config = gst_buffer_pool_get_config (self->mask_pool); + gst_buffer_pool_config_set_params (config, caps, self->mask_length, 0, 0); + gst_buffer_pool_config_add_option (config, + GST_BUFFER_POOL_OPTION_VIDEO_META); + g_return_val_if_fail (gst_buffer_pool_set_config (self->mask_pool, config), + FALSE); + g_return_val_if_fail (gst_buffer_pool_set_active (self->mask_pool, TRUE), + FALSE); + gst_caps_unref (caps); + } + + return TRUE; +} + +/* gst_fastsam_tensor_decoder_transform_ip: + * @trans: Instance + * @buf:inout: Buffer containing media and where tensors can be attached + * @return: Flow errors + * Decode FastSAM tensors, post-process tensors and store decoded information + * into an analytics-meta that is attached to the buffer before been pushed + * downstream. + */ +static GstFlowReturn +gst_fastsam_tensor_decoder_transform_ip (GstBaseTransform * trans, + GstBuffer * buf) +{ + GstFastSAMTensorDecoder *self = GST_FASTSAM_TENSOR_DECODER (trans); + GstTensor *masks_tensor, *logits_tensor; + GstAnalyticsRelationMeta *rmeta; + gsize mask_w, mask_h; + + if (!gst_fastsam_tensor_decoder_get_tensor_meta (self, buf, &masks_tensor, + &logits_tensor)) + return GST_FLOW_OK; + + if (masks_tensor->num_dims < 3) { + GST_ELEMENT_ERROR (self, STREAM, DECODE, (NULL), + ("Masks tensor must have at least 3 dimensions," + "but only has %zu", masks_tensor->num_dims)); + return GST_FLOW_ERROR; + } + + if (logits_tensor->num_dims != 4) { + GST_ELEMENT_ERROR (self, STREAM, DECODE, (NULL), + ("Logits tensor must have 4 dimensions but has %zu", + masks_tensor->num_dims)); + return GST_FLOW_ERROR; + } + + mask_w = logits_tensor->dims[2]; + mask_h = logits_tensor->dims[3]; + + /* The masks need to be cropped to fit the SAR of the image. */ + /* TODO: We're reconstructing the transformation that was done on the + * original image based on the assumption that the complete image without + * deformation would be analyzed. This assumption is not alway true and + * we should try to find a way to convey this transformation information + * and retrieve from here to know the transformation that need to be done + * on the mask.*/ + + if (self->mask_w != mask_w || self->mask_h != mask_h) { + self->mask_w = mask_w; + self->mask_h = mask_h; + self->mask_length = mask_w * mask_h; + + if (self->video_info.width > self->video_info.height) { + self->bb2mask_gain = ((gfloat) self->mask_w) / self->video_info.width; + self->mask_roi.x = 0; + self->mask_roi.w = self->mask_w; + self->mask_roi.h = + ((gfloat) self->bb2mask_gain) * self->video_info.height; + self->mask_roi.y = (self->mask_h - self->mask_roi.h) / 2; + } else { + self->bb2mask_gain = ((gfloat) self->mask_h) / self->video_info.height; + self->mask_roi.y = 0; + self->mask_roi.h = self->mask_h; + self->mask_roi.w = self->bb2mask_gain * self->video_info.width; + self->mask_roi.x = (self->mask_w - self->mask_roi.w) / 2; + } + + if (self->mask_pool) { + gst_buffer_pool_set_active (self->mask_pool, FALSE); + g_clear_object (&self->mask_pool); + } + } + + if (self->mask_pool == NULL) { + GstVideoInfo minfo; + GstCaps *caps; + gst_video_info_init (&minfo); + gst_video_info_set_format (&minfo, GST_VIDEO_FORMAT_GRAY8, self->mask_w, + self->mask_h); + caps = gst_video_info_to_caps (&minfo);; + self->mask_pool = gst_video_buffer_pool_new (); + + GstStructure *config = gst_buffer_pool_get_config (self->mask_pool); + gst_buffer_pool_config_set_params (config, caps, self->mask_length, 0, 0); + gst_buffer_pool_config_add_option (config, + GST_BUFFER_POOL_OPTION_VIDEO_META); + gst_buffer_pool_set_config (self->mask_pool, config); + gst_buffer_pool_set_active (self->mask_pool, TRUE); + gst_caps_unref (caps); + } + + + static GstAnalyticsRelationMetaInitParams rmeta_init_params = { + .initial_buf_size = 1024, + .initial_relation_order = 10 + }; + + /* Retrieve or attach an analytics-relation-meta to the buffer. + * Analytics-relation-meta are container that can reveive multiple + * analytics-meta, like OD and Segmentation. The following call will only + * retrieve an analytics-relation-meta if it exist or create one if it + * does not exist. */ + rmeta = gst_buffer_add_analytics_relation_meta_full (buf, &rmeta_init_params); + g_return_val_if_fail (rmeta != NULL, GST_FLOW_ERROR); + + /* Decode masks_tensor and attach the information in a structured way + * to rmeta. + * TODO: I think we need to send both tensors masks and logits + * to gst_fastsam_tensor_decoder_decode_masks_f32 since both are + * required simultanously to extract the segmentation. If this is the case + * we probably should rename gst_fastsam_tensor_decoder_decode_masks_f32 to + * gst_fastsam_tensor_decoder_decode_f32. */ + gst_fastsam_tensor_decoder_decode_masks_f32 (self, masks_tensor, + logits_tensor, rmeta); + + return GST_FLOW_OK; +} + +/* Evaluate if there's an intersection between segement s1 and s2 */ +static guint +linear_intersection (guint s1_min, guint s1_max, guint s2_min, guint s2_max) +{ + guint tmp; + if (s1_max > s2_min && s2_max > s1_min) { + if (s1_min > s2_min) { + tmp = (s2_max > s1_max) ? s1_max : s2_max; + return tmp - s1_min; + } else { + tmp = (s1_max > s2_max) ? s2_max : s1_max; + return tmp - s2_min; + } + } + return 0.0f; +} + +static gfloat +iou (guint bb1_x, guint bb1_y, guint bb1_w, guint bb1_h, + guint bb2_x, guint bb2_y, guint bb2_w, guint bb2_h) +{ + /* Rational: linear intersection is much faster to calculate then + * 2d intersection. We project the two bounding boxes considered for + * intersection on one axis and verify if the segments the create intersect. + * If they don't, the bounding boxes can't intersect in 2d and we don't + * need to verify if they intersect on the other dimension. If they + * intersect on the first dimension we verify if they intersec on the other + * dimension. Again if the don't intersect the bounding boxes can't intersect + * on in a 2D space. If they intersected on both axis we calculate the IoU.*/ + const guint x_intersection = + linear_intersection (bb1_x, bb1_x + bb1_w, bb2_x, bb2_x + bb2_w); + if (x_intersection > 0) { + const guint y_intersection = linear_intersection (bb1_y, bb1_y + bb1_h, + bb2_y, bb2_y + bb2_h); + if (y_intersection > 0) { + const guint bb1_area = bb1_w * bb1_h; + const guint bb2_area = bb2_w * bb2_h; + const guint intersect_area = x_intersection * y_intersection; + const guint union_area = bb1_area + bb2_area - intersect_area; + return union_area == 0 ? 0.0f : ((gfloat) intersect_area) / union_area; + } + } + + return 0.0f; +} + +/* Extract bounding box from tensor data */ +static void +gst_fastsam_tensor_decoder_convert_bbox (gfloat * candidate, gsize * offset, + BBox * bbox) +{ + gfloat w = *(candidate + offset[2]); + gfloat h = *(candidate + offset[3]); + bbox->x = *(candidate + offset[0]) - (w / 2); + bbox->y = *(candidate + offset[1]) - (h / 2); + bbox->w = w + 0.5; + bbox->h = h + 0.5; +} + +/* Calculate iou between boundingbox of candidate c1 and c2 + */ +static gfloat +gst_fastsam_tensor_decoder_iou (gfloat * c1, gfloat * c2, gsize * offset, + BBox * bb1, BBox * bb2) +{ + gst_fastsam_tensor_decoder_convert_bbox (c1, offset, bb1); + gst_fastsam_tensor_decoder_convert_bbox (c2, offset, bb2); + return iou (bb1->x, bb1->y, bb1->w, bb1->h, bb2->x, bb2->y, bb2->w, bb2->h); +} + +/* Compare c1 and c2 + * Utility function for sorting candiates based on the a field identified + * by offset. + */ +static gint +gst_fastsam_tensor_decoder_sort_candidates (gconstpointer c1, gconstpointer c2, + gpointer offset) +{ + const gfloat *c1_confi = + (*((const gfloat **) c1) + GPOINTER_TO_SIZE (offset)); + const gfloat *c2_confi = + (*((const gfloat **) c2) + GPOINTER_TO_SIZE (offset)); + return *c1_confi < *c2_confi ? 1 : *c1_confi > *c2_confi ? -1 : 0; +} + +static void +gst_fastsam_tensor_decoder_debug_print_candidate (gpointer candidate_, + gpointer data) +{ + DebugCandidates *ctx = data; + const gfloat *candidate = candidate_; + + for (gsize i = ctx->start; i < ctx->fields + ctx->start; i++) { + GST_TRACE_OBJECT (ctx->self, "Field %lu: %f", i, + *(candidate + (i * ctx->offset))); + } +} + +static float +sigmoid (float x) +{ + /* Check for positive overflow */ + if (x > 0) { + double exp_neg_x = exp (-x); + return 1.0 / (1.0 + exp_neg_x); + } + /* Check for negative overflow and improve stability for negative x */ + else { + double exp_x = exp (x); + return exp_x / (1.0 + exp_x); + } +} + +static gboolean +gst_fastsam_tensor_decoder_decode_valid_bb (GstFastSAMTensorDecoder * self, + gfloat x, gfloat y, gfloat w, gfloat h) +{ + if (x > (GST_VIDEO_INFO_WIDTH (&self->video_info))) + return FALSE; + if (y > (GST_VIDEO_INFO_HEIGHT (&self->video_info))) + return FALSE; + if (x < -(gfloat) (GST_VIDEO_INFO_WIDTH (&self->video_info) / 2.0)) + return FALSE; + if (y < -(gfloat) (GST_VIDEO_INFO_HEIGHT (&self->video_info) / 2.0)) + return FALSE; + if (w <= 0) + return FALSE; + if (h <= 0) + return FALSE; + if (w > (GST_VIDEO_INFO_WIDTH (&self->video_info))) + return FALSE; + if (h > (GST_VIDEO_INFO_HEIGHT (&self->video_info))) + return FALSE; + + return TRUE; +} + +static void +gst_fastsam_tensor_decoder_decode_masks_f32 (GstFastSAMTensorDecoder * self, + GstTensor * masks_tensor, GstTensor * logits_tensor, + GstAnalyticsRelationMeta * rmeta) +{ + /*guint batch_size = masks_tensor->dims[0]; */ + /*guint num_masks = masks_tensor->dims[1]; */ + GstMapInfo map_info_masks, map_info_logits, out_mask_info; + gfloat *candidate, **candidates, iou, *data_logits; + gboolean rv, keep; + gsize offset, x_offset, y_offset, w_offset, h_offset, c_offset, offsets[4]; + gsize m0_offset; + GPtrArray *sel_candidates = self->sel_candidates, *selected = self->selected; + BBox bb1, bb2, bb_mask; + GstAnalyticsODMtd od_mtd; + GstAnalyticsSegmentationMtd seg_mtd; + guint8 *mask_data; + + /* Retrieve memory at index 0 and map it in READ mode */ + rv = gst_buffer_map (masks_tensor->data, &map_info_masks, GST_MAP_READ); + g_assert (rv); + + /* Retrieve memory at index 0 from logits_tensor in READ mode */ + rv = gst_buffer_map (logits_tensor->data, &map_info_logits, GST_MAP_READ); + g_assert (rv); + data_logits = (gfloat *) map_info_logits.data; + + GST_LOG_OBJECT (self, "Mask Tensor shape dims %zu", masks_tensor->num_dims); + + /* Trace masks tensor dimensions */ + if (gst_debug_category_get_threshold (GST_CAT_DEFAULT) >= GST_LEVEL_TRACE) { + for (gsize i = 0; i < masks_tensor->num_dims; i++) { + GST_TRACE_OBJECT (self, "Masks Tensor dim %zu: %zu", i, + masks_tensor->dims[i]); + } + } + + /* Allocated array to store selected candidates */ + if (sel_candidates == NULL) { + /* Number of candidates can be large, keep the array to avoid frequent + * allocation */ + sel_candidates = g_ptr_array_new_full (masks_tensor->dims[2], NULL); + self->sel_candidates = sel_candidates; + selected = g_ptr_array_new_full (masks_tensor->dims[2], NULL); + self->selected = selected; + } else { + /* Reset lengths when we re-use arrays */ + sel_candidates->len = 0; + selected->len = 0; + } + + /* masks_tensor->dims[2] contain the number of candidates. Let's call the + * number of candidates C. We store this value in offset as we use it + * calculate the offset of candidate fields. The variable #data_masks above point + * at the masks tensor data, but candidates data is organize like a plane. + * Candidates bbox X coord fields from 0 to C start at the begining of the + * tensor data and are continguous in memory, followed by all candidates + * field Y, followed by field W, ... followed by field class confidence level, + * ..., followed by all candidates mask0, ..., followed by all candidates + * mask31. Bellow we pre-calculate each field offset relative to the + * candidate pointer (pointer to field X), which will allow us to easily + * access each candiates field. + * */ + offset = masks_tensor->dims[2]; + x_offset = 0; + y_offset = offset; + w_offset = 2 * offset; + h_offset = 3 * offset; + c_offset = 4 * offset; + m0_offset = 5 * offset; + offsets[0] = x_offset; + offsets[1] = y_offset; + offsets[2] = w_offset; + offsets[3] = h_offset; + +#define MASK_X(candidate, index) candidate[m0_offset + (index * offset)] +#define BB_X(candidate) candidate[x_offset] +#define BB_Y(candidate) candidate[y_offset] +#define BB_W(candidate) candidate[w_offset] +#define BB_H(candidate) candidate[h_offset] + + candidate = (gfloat *) map_info_masks.data;; + for (gsize c_idx = 0; c_idx < masks_tensor->dims[2]; c_idx++) { + /* FastSAM only has one class, but this confidence level is still used + * to evaluate the relevance of the candidate. Here we filter candidates + * based on their class confidence level.*/ + if (candidate[c_offset] > self->cls_confi_thresh && + gst_fastsam_tensor_decoder_decode_valid_bb (self, + BB_X (candidate), BB_Y (candidate), BB_W (candidate), + BB_H (candidate))) { + g_ptr_array_add (sel_candidates, candidate); + GST_TRACE_OBJECT (self, + "%lu: x,y=(%f;%f) w,h=(%f;%f), s=%f c=%f", + c_idx, + candidate[x_offset], + candidate[y_offset], + candidate[w_offset], + candidate[h_offset], + candidate[w_offset] * candidate[h_offset], candidate[c_offset]); + } + + /* Pointer arithmetic, going to the next candidate. This is the candidate + * pointer that is now incremented to the next candidate which is also + * the field X of the next candidate.*/ + candidate += 1; + } + + GST_LOG_OBJECT (self, "Selected candidates count: %u", sel_candidates->len); + + /* We sort the remaining candidates because, in the next selection phase we + * have a maximum and we want to make sure that considered only the candidates + * with the highest class confidence level before potentially reaching the + * maximum.*/ + g_ptr_array_sort_with_data (sel_candidates, + gst_fastsam_tensor_decoder_sort_candidates, GSIZE_TO_POINTER (c_offset)); + + if (gst_debug_category_get_threshold (GST_CAT_DEFAULT) >= GST_LEVEL_TRACE) { + /* For debug purpose only. Prints candidates before NMS */ + DebugCandidates ctx; + ctx.start = 0; + ctx.fields = 5; + ctx.offset = offset; + ctx.self = self; + g_ptr_array_foreach (sel_candidates, + gst_fastsam_tensor_decoder_debug_print_candidate, &ctx); + } + + GstBuffer *mask_buf; + guint region_ids[2] = { 0, 0 }; + + /* Algorithm in part inspired by OpenCV NMSBoxes */ + candidates = (gfloat **) sel_candidates->pdata; + for (gsize c = 0; c < sel_candidates->len; c++) { + keep = TRUE; + + /* We only want to a NMS using IoU between candidates we've decided to + * keep and the new one we considering to keep. selected array contain + * the candidates we decided to keep and candidates[c] is the candidate + * we're considering to keep or reject */ + for (gsize s = 0; s < selected->len && keep; s++) { + iou = gst_fastsam_tensor_decoder_iou (candidates[c], selected->pdata[s], + offsets, &bb1, &bb2); + keep = iou <= self->iou_thresh; + } + + if (keep) { + candidate = sel_candidates->pdata[c]; + if (selected->len == 0) { + /* The first bounding-box always get in as there's no others bbox + * to filter on based on IoU */ + gst_fastsam_tensor_decoder_convert_bbox (candidate, offsets, &bb1); + } + + g_ptr_array_add (selected, candidate); + region_ids[1] = selected->len; + + /* We add the analytics-objectdetection-meta to the buffer. Since + * there's only one class the class confidence level is set to -1.0 + * as it's deemed not important. */ + gst_analytics_relation_meta_add_od_mtd (rmeta, OOI_CLASS_ID, + bb1.x, bb1.y, bb1.w, bb1.h, -1.0, &od_mtd); + + bb_mask.x = self->bb2mask_gain * bb1.x + self->mask_roi.x; + bb_mask.y = self->bb2mask_gain * bb1.y + self->mask_roi.y; + bb_mask.w = self->bb2mask_gain * bb1.w; + bb_mask.h = self->bb2mask_gain * bb1.h; + + mask_buf = NULL; + g_assert (gst_buffer_pool_acquire_buffer (self->mask_pool, + &mask_buf, NULL) == GST_FLOW_OK); + g_assert (GST_IS_BUFFER (mask_buf)); + GstVideoMeta *vmeta = gst_buffer_get_video_meta (mask_buf); + g_assert (vmeta != NULL); + vmeta->width = bb_mask.w; + vmeta->height = bb_mask.h; + + gst_buffer_map (mask_buf, &out_mask_info, GST_MAP_READWRITE); + mask_data = (guint8 *) out_mask_info.data; + +#define MX_MAX (bb_mask.x + bb_mask.w) +#define MY_MAX (bb_mask.y + bb_mask.h) + + for (gint my = bb_mask.y, i = 0, j; my < MY_MAX; my++) { + for (gint mx = bb_mask.x; mx < MX_MAX; mx++, i++) { + float sum = 0.0f; + j = my * self->mask_w + mx; + for (gint k = 0; k < 32; ++k) { + GST_TRACE_OBJECT (self, "protos data at (%d, %d) is %f", j, k, + data_logits[k * self->mask_length + j]); + sum += + MASK_X (candidate, k) * data_logits[k * self->mask_length + j]; + } + mask_data[i] = sigmoid (sum) > 0.5 ? selected->len : 0; + } + } + + gst_analytics_relation_meta_add_segmentation_mtd (rmeta, mask_buf, + GST_SEGMENTATION_TYPE_INSTANCE, 1, region_ids, bb1.x, bb1.y, bb1.w, + bb1.h, &seg_mtd); + + gst_analytics_relation_meta_set_relation (rmeta, + GST_ANALYTICS_REL_TYPE_RELATE_TO, seg_mtd.id, od_mtd.id); + + gst_analytics_relation_meta_set_relation (rmeta, + GST_ANALYTICS_REL_TYPE_RELATE_TO, od_mtd.id, seg_mtd.id); + + gst_buffer_unmap (mask_buf, &out_mask_info); + + /* If the maximum number of candidate selected is reached exit the + * selection process. */ + if (selected->len >= self->max_detection) { + break; + } + } + } + + GST_LOG_OBJECT (self, "Selected count: %u", selected->len); + + if (gst_debug_category_get_threshold (GST_CAT_DEFAULT) >= GST_LEVEL_TRACE) { + DebugCandidates ctx; + /* For debug purpose only. Prints candidates after NMS */ + ctx.start = 0; + ctx.fields = 5; + ctx.offset = offset; + ctx.self = self; + g_ptr_array_foreach (selected, + gst_fastsam_tensor_decoder_debug_print_candidate, &ctx); + } + + /* We unmap the memory */ + gst_buffer_unmap (masks_tensor->data, &map_info_masks); + gst_buffer_unmap (logits_tensor->data, &map_info_logits); +} diff --git a/subprojects/gst-plugins-bad/gst/tensordecoders/gstfastsamtensordecoder.h b/subprojects/gst-plugins-bad/gst/tensordecoders/gstfastsamtensordecoder.h new file mode 100644 index 0000000000..429e46231a --- /dev/null +++ b/subprojects/gst-plugins-bad/gst/tensordecoders/gstfastsamtensordecoder.h @@ -0,0 +1,101 @@ +/* + * GStreamer gstreamer-fastsamtensordecoder + * Copyright (C) 2024 Collabora Ltd + * Authors: Daniel Morin + * Vineet Suryan + * + * gstfastsamtensordecoder.h + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + + +#ifndef __GST_FASTSAM_TENSOR_DECODER_H__ +#define __GST_FASTSAM_TENSOR_DECODER_H__ + +#include +#include +#include + +G_BEGIN_DECLS + +#define GST_TYPE_FASTSAM_TENSOR_DECODER (gst_fastsam_tensor_decoder_get_type ()) +G_DECLARE_FINAL_TYPE (GstFastSAMTensorDecoder, gst_fastsam_tensor_decoder, + GST, FASTSAM_TENSOR_DECODER, GstBaseTransform) + +typedef struct _BBox +{ + gint x; + gint y; + guint w; + guint h; +} BBox; + +struct _GstFastSAMTensorDecoder +{ + GstBaseTransform basetransform; + /* Box confidence threshold */ + gfloat box_confi_thresh; + /* Class confidence threshold */ + gfloat cls_confi_thresh; + /* Intersection-of-Union threshold */ + gfloat iou_thresh; + /* Maximum detection/mask */ + gsize max_detection; + /* Video Info */ + GstVideoInfo video_info; + + /* Candidates with a class confidence level above threshold. */ + GPtrArray *sel_candidates; + + /* Final candidates selected that respect class confidence level, + * NMS and maximum detection. */ + GPtrArray *selected; + + /* Tensor-id identifying mask tensors out of FastSAM inference process. */ + GQuark mask_tensor_id; + + /* Tensor-id identifying logits tensors out of FastSAM inference process. */ + GQuark logits_tensor_id; + + /* Region of the mask that contain valid segmentation information */ + BBox mask_roi; + + /* Scaling factor to convert bounding-box coordinates to mask coordinates */ + gfloat bb2mask_gain; + + /* Mask width */ + guint mask_w; + + /* Mask height */ + guint mask_h; + + /* Mask length */ + gsize mask_length; + + /* BufferPool for mask */ + GstBufferPool *mask_pool; +}; + +struct _GstFastSAMTensorDecoderClass +{ + GstBaseTransformClass parent_class; +}; + +GST_ELEMENT_REGISTER_DECLARE (fastsam_tensor_decoder) + +G_END_DECLS +#endif /* __GST_FASTSAM_TENSOR_DECODER_H__ */ diff --git a/subprojects/gst-plugins-bad/gst/tensordecoders/gsttensordecoders.c b/subprojects/gst-plugins-bad/gst/tensordecoders/gsttensordecoders.c index 258afe84cb..ac0cb59fe9 100644 --- a/subprojects/gst-plugins-bad/gst/tensordecoders/gsttensordecoders.c +++ b/subprojects/gst-plugins-bad/gst/tensordecoders/gsttensordecoders.c @@ -25,6 +25,7 @@ #endif #include "gstssdobjectdetector.h" +#include "gstfastsamtensordecoder.h" /** * SECTION:plugin-tensordecoders @@ -38,6 +39,7 @@ plugin_init (GstPlugin * plugin) { gboolean ret = FALSE; ret |= GST_ELEMENT_REGISTER (ssd_object_detector, plugin); + ret |= GST_ELEMENT_REGISTER (fastsam_tensor_decoder, plugin); return ret; } diff --git a/subprojects/gst-plugins-bad/gst/tensordecoders/meson.build b/subprojects/gst-plugins-bad/gst/tensordecoders/meson.build index 1c1940a442..9370ce6180 100644 --- a/subprojects/gst-plugins-bad/gst/tensordecoders/meson.build +++ b/subprojects/gst-plugins-bad/gst/tensordecoders/meson.build @@ -1,6 +1,7 @@ tensordecoders_sources = [ 'gsttensordecoders.c', - 'gstssdobjectdetector.c' + 'gstssdobjectdetector.c', + 'gstfastsamtensordecoder.c' ] tensordecoders_headers = [