gst-analytics : Adapt and Rename fastsamtensordecoder to yolo based.

YOLOv8 model have same tensor output format as FastSAM, so for better
generalization rename fastsamtensordecoder to yolotensordecoder. This
also requires code adaptation to support Yolo based model.
This commit is contained in:
Santosh Mahto 2025-01-28 20:21:24 +05:30 committed by Elias Rosendahl
parent 244dd01b22
commit 62eeb7e008
4 changed files with 166 additions and 125 deletions

View File

@ -25,7 +25,7 @@
#endif
#include "gstssdobjectdetector.h"
#include "gstfastsamtensordecoder.h"
#include "gstyolotensordecoder.h"
/**
* SECTION:plugin-tensordecoders
@ -39,7 +39,7 @@ plugin_init (GstPlugin * plugin)
{
gboolean ret = FALSE;
ret |= GST_ELEMENT_REGISTER (ssd_object_detector, plugin);
ret |= GST_ELEMENT_REGISTER (fastsam_tensor_decoder, plugin);
ret |= GST_ELEMENT_REGISTER (yolo_tensor_decoder, plugin);
return ret;
}

View File

@ -1,10 +1,11 @@
/*
* GStreamer gstreamer-fastsamtensordecoder
* GStreamer gstreamer-yolotensordecoder
* Copyright (C) 2024 Collabora Ltd.
* Authors: Daniel Morin <daniel.morin@collabora.com>
* Vineet Suryan <vineet.suryan@collabora.com>
* Santosh Mahto <santosh.mahto@collabora.com>
*
* gstfastsamtensordecoder.c
* gstyolotensordecoder.c
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
@ -23,8 +24,8 @@
*/
/**
* SECTION:element-fastsamtensordecoder.c
* @short_description: Decode tensors from a FastSAM detection and segmentation
* SECTION:element-yolotensordecoder.c
* @short_description: Decode tensors from a FastSAM or YOLOv8 detection and segmentation
* neural network.
*
*
@ -37,11 +38,11 @@
* Test image file, model file and labels file can be found here :
* https://gitlab.collabora.com/gstreamer/onnx-models
*
* GST_DEBUG=fastsamtensordecoder \
* GST_DEBUG=yolotensordecoder \
* gst-launch-1.0 multifilesrc location=strawberry_crops.jpg ! decodebin \
* ! videoconvertscale add-borders=1 ! onnxinference execution-provider=cpu
* model-file=segmentation.onnx input-image-format=chw input-tensor-offset=0 \
* input-tensor-scale=255.0 ! fastsamtensordecoder \
* input-tensor-scale=255.0 ! yolotensordecoder \
* class-confidence-threshold=0.8 iou-threshold=0.7 max-detections=100
* ! objectdetectionoverlay object-detection-outline-color=0xFF0000FF
* draw-labels=true ! segmentationoverlay hint-maximum-segment-type=50 \
@ -53,25 +54,25 @@
#include "config.h"
#endif
#include "gstfastsamtensordecoder.h"
#include "gstyolotensordecoder.h"
#include <gst/analytics/analytics.h>
#include <math.h>
#define GST_MODEL_FASTSAM_SEGMENTATION_MASK \
"Gst.Model.FastSAM.Segmentation.Masks"
#define GST_MODEL_FASTSAM_SEGMENTATION_LOGITS \
"Gst.Model.FastSAM.Segmentation.Logits"
#define GST_MODEL_YOLO_SEGMENTATION_MASK \
"Gst.Model.Yolo.Segmentation.Masks"
#define GST_MODEL_YOLO_SEGMENTATION_LOGITS \
"Gst.Model.Yolo.Segmentation.Logits"
GST_DEBUG_CATEGORY_STATIC (fastsam_tensor_decoder_debug);
#define GST_CAT_DEFAULT fastsam_tensor_decoder_debug
GST_DEBUG_CATEGORY_STATIC (yolo_tensor_decoder_debug);
#define GST_CAT_DEFAULT yolo_tensor_decoder_debug
GST_ELEMENT_REGISTER_DEFINE (fastsam_tensor_decoder, "fastsamtensordecoder",
GST_RANK_PRIMARY, GST_TYPE_FASTSAM_TENSOR_DECODER);
GST_ELEMENT_REGISTER_DEFINE (yolo_tensor_decoder, "yolotensordecoder",
GST_RANK_PRIMARY, GST_TYPE_YOLO_TENSOR_DECODER);
/* GstFastSAMTensorDecoder properties, see properties description in
* gst_fastsam_tensor_decoder_class_init for more details. */
/* GstYoloTensorDecoder properties, see properties description in
* gst_yolo_tensor_decoder_class_init for more details. */
enum
{
PROP_0,
@ -86,12 +87,20 @@ enum
/* For debug purpose */
typedef struct _DebugCandidates
{
GstFastSAMTensorDecoder *self;
GstYoloTensorDecoder *self;
gsize fields; /* Fields count do debug */
gsize offset; /* Fields offset */
gsize start; /* First field index to debug */
} DebugCandidates;
/* Specify the range of confidence level in tensor output*/
typedef struct _ConfidenceRange
{
gsize start; /* Start index of confidence level */
gsize end; /* End index of confidence level */
gsize step; /* Step size of next confidence level index */
} ConfidenceRange;
/* Default properties value */
static const gfloat DEFAULT_BOX_CONFI_THRESH = 0.4f;
static const gfloat DEFAULT_CLS_CONFI_THRESH = 0.4f;
@ -107,12 +116,12 @@ GQuark OOI_CLASS_ID;
* we compare on the quark (hash value). Since tensor-id never change we
* just calculate the hash once during initialization and store the value in
* these variables. */
GQuark GST_MODEL_FASTSAM_SEGMENTATION_MASKS_ID;
GQuark GST_MODEL_FASTSAM_SEGMENTATION_LOGITS_ID;
GQuark GST_MODEL_YOLO_SEGMENTATION_MASKS_ID;
GQuark GST_MODEL_YOLO_SEGMENTATION_LOGITS_ID;
/* GStreamer element srcpad template. Template of a srcpad that can receive
* any raw video. */
static GstStaticPadTemplate gst_fastsam_tensor_decoder_src_template =
static GstStaticPadTemplate gst_yolo_tensor_decoder_src_template =
GST_STATIC_PAD_TEMPLATE ("src",
GST_PAD_SRC,
GST_PAD_ALWAYS,
@ -120,47 +129,47 @@ GST_STATIC_PAD_TEMPLATE ("src",
/* GStreamer element sinkpad template. Template of a sinkpad that can receive
* any raw video. */
static GstStaticPadTemplate gst_fastsam_tensor_decoder_sink_template =
static GstStaticPadTemplate gst_yolo_tensor_decoder_sink_template =
GST_STATIC_PAD_TEMPLATE ("sink",
GST_PAD_SINK,
GST_PAD_ALWAYS,
GST_STATIC_CAPS ("video/x-raw"));
/* Prototypes */
static void gst_fastsam_tensor_decoder_set_property (GObject * object,
static void gst_yolo_tensor_decoder_set_property (GObject * object,
guint prop_id, const GValue * value, GParamSpec * pspec);
static void gst_fastsam_tensor_decoder_get_property (GObject * object,
static void gst_yolo_tensor_decoder_get_property (GObject * object,
guint prop_id, GValue * value, GParamSpec * pspec);
static gboolean gst_fastsam_tensor_decoder_stop (GstBaseTransform * trans);
static gboolean gst_yolo_tensor_decoder_stop (GstBaseTransform * trans);
static GstFlowReturn gst_fastsam_tensor_decoder_transform_ip (GstBaseTransform *
static GstFlowReturn gst_yolo_tensor_decoder_transform_ip (GstBaseTransform *
trans, GstBuffer * buf);
static gboolean gst_fastsam_tensor_decoder_set_caps (GstBaseTransform * trans,
static gboolean gst_yolo_tensor_decoder_set_caps (GstBaseTransform * trans,
GstCaps * incaps, GstCaps * outcaps);
static void gst_fastsam_tensor_decoder_decode_masks_f32 (GstFastSAMTensorDecoder
static void gst_yolo_tensor_decoder_decode_masks_f32 (GstYoloTensorDecoder
* self, GstTensor * masks_tensor, GstTensor * logits_tensor,
GstAnalyticsRelationMeta * rmeta);
G_DEFINE_TYPE (GstFastSAMTensorDecoder, gst_fastsam_tensor_decoder,
G_DEFINE_TYPE (GstYoloTensorDecoder, gst_yolo_tensor_decoder,
GST_TYPE_BASE_TRANSFORM);
static void
gst_fastsam_tensor_decoder_class_init (GstFastSAMTensorDecoderClass * klass)
gst_yolo_tensor_decoder_class_init (GstYoloTensorDecoderClass * klass)
{
GObjectClass *gobject_class = (GObjectClass *) klass;
GstElementClass *element_class = (GstElementClass *) klass;
GstBaseTransformClass *basetransform_class = (GstBaseTransformClass *) klass;
/* Define GstFastSAMTensorDecoder debug category. */
GST_DEBUG_CATEGORY_INIT (fastsam_tensor_decoder_debug, "fastsamtensordecoder",
0, "Tensor decoder for FastSAM segmentation N.N.");
/* Define GstYoloTensorDecoder debug category. */
GST_DEBUG_CATEGORY_INIT (yolo_tensor_decoder_debug, "yolotensordecoder",
0, "Tensor decoder for Yolo segmentation N.N.");
/* Set GObject vmethod to get and set property */
gobject_class->set_property = gst_fastsam_tensor_decoder_set_property;
gobject_class->get_property = gst_fastsam_tensor_decoder_get_property;
gobject_class->set_property = gst_yolo_tensor_decoder_set_property;
gobject_class->get_property = gst_yolo_tensor_decoder_get_property;
/* Define GstFastSAMTensorDecoder properties using GObject properties
/* Define GstYoloTensorDecoder properties using GObject properties
* interface.*/
g_object_class_install_property (G_OBJECT_CLASS (klass),
PROP_BOX_CONFI_THRESH,
@ -201,8 +210,8 @@ gst_fastsam_tensor_decoder_class_init (GstFastSAMTensorDecoderClass * klass)
PROP_MASK_TENSOR_NAME,
g_param_spec_string ("tensors-name-masks",
"Mask tensors name",
"Name that identify FastSAM mask tensors.",
GST_MODEL_FASTSAM_SEGMENTATION_MASK,
"Name that identify Yolo mask tensors.",
GST_MODEL_YOLO_SEGMENTATION_MASK,
(GParamFlags) (G_PARAM_READWRITE | G_PARAM_CONSTRUCT |
G_PARAM_STATIC_STRINGS)));
@ -210,22 +219,22 @@ gst_fastsam_tensor_decoder_class_init (GstFastSAMTensorDecoderClass * klass)
PROP_LOGITS_TENSOR_NAME,
g_param_spec_string ("tensors-name-logits",
"Logits tensors name",
"Name that identify FastSAM logits tensors.",
GST_MODEL_FASTSAM_SEGMENTATION_LOGITS,
"Name that identify Yolo logits tensors.",
GST_MODEL_YOLO_SEGMENTATION_LOGITS,
(GParamFlags) (G_PARAM_READWRITE | G_PARAM_CONSTRUCT |
G_PARAM_STATIC_STRINGS)));
/* Element description. */
gst_element_class_set_static_metadata (element_class, "fastsamtensordecoder",
gst_element_class_set_static_metadata (element_class, "yolotensordecoder",
"TensorDecoder/Video",
"Decode tensors output from the inference of FastSAM model (segmentation)"
" on video frames. The original repository of the FastSAM is located at"
" https://github.com/CASIA-IVA-Lab/FastSAM. For easy experimentation a"
" strawberry segmentation model based on FastSAM architecture in Onnx "
"Decode tensors output from the inference of Yolo or FastSAM model (segmentation)"
" on video frames. The original repository of the Yolo is located at"
" https://github.com/ultralytics/ultralytics. For easy experimentation a"
" object segmentation model based on Yolo architecture in Onnx "
" format can be found at https://col.la/gstonnxmodelseg . This model "
"already has tensors name embedded matching default "
"values of tensors-masks-name and tensors-logits-name properties. It's "
"also possible to embed tensor-ids into any model based on FastSAM "
"also possible to embed tensor-ids into any model based on Yolo "
"architecture to allow this tensor-decoder to decode tensors. This "
"process is described in the Readme of this repository: "
"https://col.la/gstonnxmodels",
@ -233,45 +242,45 @@ gst_fastsam_tensor_decoder_class_init (GstFastSAMTensorDecoderClass * klass)
/* Add pads to element base on pad template defined earlier */
gst_element_class_add_pad_template (element_class,
gst_static_pad_template_get (&gst_fastsam_tensor_decoder_src_template));
gst_static_pad_template_get (&gst_yolo_tensor_decoder_src_template));
gst_element_class_add_pad_template (element_class,
gst_static_pad_template_get (&gst_fastsam_tensor_decoder_sink_template));
gst_static_pad_template_get (&gst_yolo_tensor_decoder_sink_template));
/* Set GstBaseTransform vmethod transform_ip. This methode is called
* by the srcpad when it receive buffer. ip stand for in-place meaning the
* buffer remain unchanged by the element. Tensor-decoder only monitor
* buffer it receive for a meta attach to the buffer that is a GstTensorMeta
* and has a tensor-id can be handled by GstFastSAMTensorDecoder. */
* and has a tensor-id can be handled by GstYoloTensorDecoder. */
basetransform_class->transform_ip =
GST_DEBUG_FUNCPTR (gst_fastsam_tensor_decoder_transform_ip);
GST_DEBUG_FUNCPTR (gst_yolo_tensor_decoder_transform_ip);
/* Set GstBaseTransform set_caps vmethod. This will be called once the
* capability negotiation has been completed. We will be able to extract
* resolution from this callback. */
basetransform_class->set_caps =
GST_DEBUG_FUNCPTR (gst_fastsam_tensor_decoder_set_caps);
GST_DEBUG_FUNCPTR (gst_yolo_tensor_decoder_set_caps);
/* Set GObject vmethod finalize */
basetransform_class->stop = gst_fastsam_tensor_decoder_stop;
basetransform_class->stop = gst_yolo_tensor_decoder_stop;
/* Calculate the class id placeholder (also a quark) that will be set on all
* OD analytics-meta. */
OOI_CLASS_ID = g_quark_from_static_string ("FastSAM-None");
OOI_CLASS_ID = g_quark_from_static_string ("Yolo-None");
/* Calculate the FastSAM Mask tensor-id */
GST_MODEL_FASTSAM_SEGMENTATION_MASKS_ID =
g_quark_from_static_string (GST_MODEL_FASTSAM_SEGMENTATION_MASK);
/* Calculate the Yolo Mask tensor-id */
GST_MODEL_YOLO_SEGMENTATION_MASKS_ID =
g_quark_from_static_string (GST_MODEL_YOLO_SEGMENTATION_MASK);
/* Calculate the FastSAM Logits tensor-id */
GST_MODEL_FASTSAM_SEGMENTATION_LOGITS_ID =
g_quark_from_static_string (GST_MODEL_FASTSAM_SEGMENTATION_LOGITS);
/* Calculate the Yolo Logits tensor-id */
GST_MODEL_YOLO_SEGMENTATION_LOGITS_ID =
g_quark_from_static_string (GST_MODEL_YOLO_SEGMENTATION_LOGITS);
}
static void
gst_fastsam_tensor_decoder_init (GstFastSAMTensorDecoder * self)
gst_yolo_tensor_decoder_init (GstYoloTensorDecoder * self)
{
/* GstFastSAMTensorDecoder instance initialization */
/* GstYoloTensorDecoder instance initialization */
self->box_confi_thresh = DEFAULT_BOX_CONFI_THRESH;
self->cls_confi_thresh = DEFAULT_CLS_CONFI_THRESH;
self->iou_thresh = DEFAULT_IOU_THRESH;
@ -287,9 +296,9 @@ gst_fastsam_tensor_decoder_init (GstFastSAMTensorDecoder * self)
}
static gboolean
gst_fastsam_tensor_decoder_stop (GstBaseTransform * trans)
gst_yolo_tensor_decoder_stop (GstBaseTransform * trans)
{
GstFastSAMTensorDecoder *self = GST_FASTSAM_TENSOR_DECODER (trans);
GstYoloTensorDecoder *self = GST_YOLO_TENSOR_DECODER (trans);
self->mask_w = 0;
self->mask_h = 0;
@ -306,10 +315,10 @@ gst_fastsam_tensor_decoder_stop (GstBaseTransform * trans)
}
static void
gst_fastsam_tensor_decoder_set_property (GObject * object, guint prop_id,
gst_yolo_tensor_decoder_set_property (GObject * object, guint prop_id,
const GValue * value, GParamSpec * pspec)
{
GstFastSAMTensorDecoder *self = GST_FASTSAM_TENSOR_DECODER (object);
GstYoloTensorDecoder *self = GST_YOLO_TENSOR_DECODER (object);
switch (prop_id) {
case PROP_BOX_CONFI_THRESH:
@ -349,10 +358,10 @@ gst_fastsam_tensor_decoder_set_property (GObject * object, guint prop_id,
}
static void
gst_fastsam_tensor_decoder_get_property (GObject * object, guint prop_id,
gst_yolo_tensor_decoder_get_property (GObject * object, guint prop_id,
GValue * value, GParamSpec * pspec)
{
GstFastSAMTensorDecoder *self = GST_FASTSAM_TENSOR_DECODER (object);
GstYoloTensorDecoder *self = GST_YOLO_TENSOR_DECODER (object);
switch (prop_id) {
case PROP_BOX_CONFI_THRESH:
@ -383,15 +392,15 @@ gst_fastsam_tensor_decoder_get_property (GObject * object, guint prop_id,
}
}
/* gst_fastsam_tensor_decoder_get_tensor_meta
/* gst_yolo_tensor_decoder_get_tensor_meta
* @buf:in: buffer
* @mask_tensor:out: Mask tensor
* @logits_tensor:out: Logits tensor
* @return: TRUE if buf has mask and logits tensor attach to it.
* Retrieve FastSAM masks and logits tensors from buffer.
* Retrieve Yolo masks and logits tensors from buffer.
*/
static gboolean
gst_fastsam_tensor_decoder_get_tensor_meta (GstFastSAMTensorDecoder * self,
gst_yolo_tensor_decoder_get_tensor_meta (GstYoloTensorDecoder * self,
GstBuffer * buf, GstTensor ** mask_tensor, GstTensor ** logits_tensor)
{
GstTensorMeta *tensor_meta;
@ -413,14 +422,14 @@ gst_fastsam_tensor_decoder_get_tensor_meta (GstFastSAMTensorDecoder * self,
GST_LOG_OBJECT (self, "Num tensors %zu", tensor_meta->num_tensors);
/* Retrieve the index of the tensor that has a tensor-id matching
* GST_MODEL_FASTSAM_SEGMENTATION_MASKS_ID in the GstTensorMeta. */
* GST_MODEL_YOLO_SEGMENTATION_MASKS_ID in the GstTensorMeta. */
mask_tensor_idx = gst_tensor_meta_get_index_from_id (tensor_meta,
GST_MODEL_FASTSAM_SEGMENTATION_MASKS_ID);
GST_MODEL_YOLO_SEGMENTATION_MASKS_ID);
/* Retrieve the index of the tensor that has a tensor-id matching
* GST_MODEL_FASTSAM_SEGMENTATION_LOGITS_ID in the GstTensorMeta. */
* GST_MODEL_YOLO_SEGMENTATION_LOGITS_ID in the GstTensorMeta. */
logits_tensor_idx = gst_tensor_meta_get_index_from_id (tensor_meta,
GST_MODEL_FASTSAM_SEGMENTATION_LOGITS_ID);
GST_MODEL_YOLO_SEGMENTATION_LOGITS_ID);
if (mask_tensor_idx >= 0 && logits_tensor_idx >= 0) {
GST_LOG_OBJECT (self, "Masks tensor id: %d", mask_tensor_idx);
@ -437,16 +446,16 @@ gst_fastsam_tensor_decoder_get_tensor_meta (GstFastSAMTensorDecoder * self,
return FALSE;
}
/* gst_fastsam_tensor_decoder_set_caps:
/* gst_yolo_tensor_decoder_set_caps:
*
* Callback on caps negociation completed. We use it here to retrieve
* video resolution. See GstBaseTransform for more details.
*/
static gboolean
gst_fastsam_tensor_decoder_set_caps (GstBaseTransform * trans, GstCaps * incaps,
gst_yolo_tensor_decoder_set_caps (GstBaseTransform * trans, GstCaps * incaps,
GstCaps * outcaps)
{
GstFastSAMTensorDecoder *self = GST_FASTSAM_TENSOR_DECODER (trans);
GstYoloTensorDecoder *self = GST_YOLO_TENSOR_DECODER (trans);
if (!gst_video_info_from_caps (&self->video_info, incaps)) {
GST_ERROR_OBJECT (self, "Failed to parse caps");
@ -461,24 +470,24 @@ gst_fastsam_tensor_decoder_set_caps (GstBaseTransform * trans, GstCaps * incaps,
return TRUE;
}
/* gst_fastsam_tensor_decoder_transform_ip:
/* gst_yolo_tensor_decoder_transform_ip:
* @trans: Instance
* @buf:inout: Buffer containing media and where tensors can be attached
* @return: Flow errors
* Decode FastSAM tensors, post-process tensors and store decoded information
* Decode Yolo tensors, post-process tensors and store decoded information
* into an analytics-meta that is attached to the buffer before been pushed
* downstream.
*/
static GstFlowReturn
gst_fastsam_tensor_decoder_transform_ip (GstBaseTransform * trans,
gst_yolo_tensor_decoder_transform_ip (GstBaseTransform * trans,
GstBuffer * buf)
{
GstFastSAMTensorDecoder *self = GST_FASTSAM_TENSOR_DECODER (trans);
GstYoloTensorDecoder *self = GST_YOLO_TENSOR_DECODER (trans);
GstTensor *masks_tensor, *logits_tensor;
GstAnalyticsRelationMeta *rmeta;
gsize mask_w, mask_h;
if (!gst_fastsam_tensor_decoder_get_tensor_meta (self, buf, &masks_tensor,
if (!gst_yolo_tensor_decoder_get_tensor_meta (self, buf, &masks_tensor,
&logits_tensor))
return GST_FLOW_OK;
@ -567,7 +576,7 @@ gst_fastsam_tensor_decoder_transform_ip (GstBaseTransform * trans,
/* Decode masks_tensor and attach the information in a structured way
* to rmeta. */
gst_fastsam_tensor_decoder_decode_masks_f32 (self, masks_tensor,
gst_yolo_tensor_decoder_decode_masks_f32 (self, masks_tensor,
logits_tensor, rmeta);
return GST_FLOW_OK;
@ -621,7 +630,7 @@ iou (guint bb1_x, guint bb1_y, guint bb1_w, guint bb1_h,
/* Extract bounding box from tensor data */
static void
gst_fastsam_tensor_decoder_convert_bbox (gfloat * candidate, gsize * offset,
gst_yolo_tensor_decoder_convert_bbox (gfloat * candidate, gsize * offset,
BBox * bbox)
{
gfloat w = *(candidate + offset[2]);
@ -635,31 +644,48 @@ gst_fastsam_tensor_decoder_convert_bbox (gfloat * candidate, gsize * offset,
/* Calculate iou between boundingbox of candidate c1 and c2
*/
static gfloat
gst_fastsam_tensor_decoder_iou (gfloat * c1, gfloat * c2, gsize * offset,
gst_yolo_tensor_decoder_iou (gfloat * c1, gfloat * c2, gsize * offset,
BBox * bb1, BBox * bb2)
{
gst_fastsam_tensor_decoder_convert_bbox (c1, offset, bb1);
gst_fastsam_tensor_decoder_convert_bbox (c2, offset, bb2);
gst_yolo_tensor_decoder_convert_bbox (c1, offset, bb1);
gst_yolo_tensor_decoder_convert_bbox (c2, offset, bb2);
return iou (bb1->x, bb1->y, bb1->w, bb1->h, bb2->x, bb2->y, bb2->w, bb2->h);
}
/* Utility function to find maxmum confidence value across classes
* specified by range.
*/
static gfloat
gst_yolo_tensor_decoder_find_max_class_confidence (const gfloat *c, gpointer range)
{
ConfidenceRange c_range = *((ConfidenceRange *) range);
gfloat max_val = 0.0;
for (gsize i = c_range.start; i <= c_range.end; i += c_range.step) {
if (*(c + i) > max_val)
max_val = *(c + i);
}
return max_val;
}
/* Compare c1 and c2
* Utility function for sorting candiates based on the a field identified
* by offset.
*/
static gint
gst_fastsam_tensor_decoder_sort_candidates (gconstpointer c1, gconstpointer c2,
gpointer offset)
gst_yolo_tensor_decoder_sort_candidates (gconstpointer c1, gconstpointer c2, gpointer range)
{
const gfloat *c1_confi =
(*((const gfloat **) c1) + GPOINTER_TO_SIZE (offset));
const gfloat *c2_confi =
(*((const gfloat **) c2) + GPOINTER_TO_SIZE (offset));
return *c1_confi < *c2_confi ? 1 : *c1_confi > *c2_confi ? -1 : 0;
const gfloat *candidate1 = *((gfloat **) c1);
const gfloat *candidate2 = *((gfloat **) c2);
const gfloat max_c1_confi =
gst_yolo_tensor_decoder_find_max_class_confidence (candidate1, range);
const gfloat max_c2_confi =
gst_yolo_tensor_decoder_find_max_class_confidence (candidate2, range);
return max_c1_confi < max_c2_confi ? 1 : max_c1_confi > max_c2_confi ? -1 : 0;
}
static void
gst_fastsam_tensor_decoder_debug_print_candidate (gpointer candidate_,
gst_yolo_tensor_decoder_debug_print_candidate (gpointer candidate_,
gpointer data)
{
DebugCandidates *ctx = data;
@ -687,9 +713,10 @@ sigmoid (float x)
}
static gboolean
gst_fastsam_tensor_decoder_decode_valid_bb (GstFastSAMTensorDecoder * self,
gst_yolo_tensor_decoder_decode_valid_bb (GstYoloTensorDecoder * self,
gfloat x, gfloat y, gfloat w, gfloat h)
{
if (x > (GST_VIDEO_INFO_WIDTH (&self->video_info)))
return FALSE;
if (y > (GST_VIDEO_INFO_HEIGHT (&self->video_info)))
@ -711,7 +738,7 @@ gst_fastsam_tensor_decoder_decode_valid_bb (GstFastSAMTensorDecoder * self,
}
static void
gst_fastsam_tensor_decoder_decode_masks_f32 (GstFastSAMTensorDecoder * self,
gst_yolo_tensor_decoder_decode_masks_f32 (GstYoloTensorDecoder * self,
GstTensor * masks_tensor, GstTensor * logits_tensor,
GstAnalyticsRelationMeta * rmeta)
{
@ -720,13 +747,14 @@ gst_fastsam_tensor_decoder_decode_masks_f32 (GstFastSAMTensorDecoder * self,
GstMapInfo map_info_masks, map_info_logits, out_mask_info;
gfloat *candidate, **candidates, iou, *data_logits;
gboolean rv, keep;
gsize offset, x_offset, y_offset, w_offset, h_offset, c_offset, offsets[4];
gsize offset, x_offset, y_offset, w_offset, h_offset, offsets[4];
gsize m0_offset;
GPtrArray *sel_candidates = self->sel_candidates, *selected = self->selected;
BBox bb1, bb2, bb_mask;
GstAnalyticsODMtd od_mtd;
GstAnalyticsSegmentationMtd seg_mtd;
guint8 *mask_data;
ConfidenceRange c_range;
/* Retrieve memory at index 0 and map it in READ mode */
rv = gst_buffer_map (masks_tensor->data, &map_info_masks, GST_MAP_READ);
@ -747,6 +775,14 @@ gst_fastsam_tensor_decoder_decode_masks_f32 (GstFastSAMTensorDecoder * self,
}
}
/* Trace masks tensor dimensions */
if (gst_debug_category_get_threshold (GST_CAT_DEFAULT) >= GST_LEVEL_TRACE) {
for (gsize i = 0; i < logits_tensor->num_dims; i++) {
GST_TRACE_OBJECT (self, "Masks Tensor dim %zu: %zu", i,
logits_tensor->dims[i]);
}
}
/* Allocated array to store selected candidates */
if (sel_candidates == NULL) {
/* Number of candidates can be large, keep the array to avoid frequent
@ -778,8 +814,10 @@ gst_fastsam_tensor_decoder_decode_masks_f32 (GstFastSAMTensorDecoder * self,
y_offset = offset;
w_offset = 2 * offset;
h_offset = 3 * offset;
c_offset = 4 * offset;
m0_offset = 5 * offset;
c_range.start = 4 * offset;
c_range.end = (masks_tensor->dims[1] - 32 - 1) * offset;
c_range.step = masks_tensor->dims[2];
m0_offset = c_range.end + offset;
offsets[0] = x_offset;
offsets[1] = y_offset;
offsets[2] = w_offset;
@ -793,11 +831,13 @@ gst_fastsam_tensor_decoder_decode_masks_f32 (GstFastSAMTensorDecoder * self,
candidate = (gfloat *) map_info_masks.data;;
for (gsize c_idx = 0; c_idx < masks_tensor->dims[2]; c_idx++) {
/* FastSAM only has one class, but this confidence level is still used
/* Yolo have multiple class, so maximum confidence level across all class is used
* to evaluate the relevance of the candidate. Here we filter candidates
* based on their class confidence level.*/
if (candidate[c_offset] > self->cls_confi_thresh &&
gst_fastsam_tensor_decoder_decode_valid_bb (self,
gfloat max_confidence =
gst_yolo_tensor_decoder_find_max_class_confidence (candidate, &c_range);
if (max_confidence > self->cls_confi_thresh
&& gst_yolo_tensor_decoder_decode_valid_bb (self,
BB_X (candidate), BB_Y (candidate), BB_W (candidate),
BB_H (candidate))) {
g_ptr_array_add (sel_candidates, candidate);
@ -808,7 +848,7 @@ gst_fastsam_tensor_decoder_decode_masks_f32 (GstFastSAMTensorDecoder * self,
candidate[y_offset],
candidate[w_offset],
candidate[h_offset],
candidate[w_offset] * candidate[h_offset], candidate[c_offset]);
candidate[w_offset] * candidate[h_offset], max_confidence);
}
/* Pointer arithmetic, going to the next candidate. This is the candidate
@ -824,7 +864,7 @@ gst_fastsam_tensor_decoder_decode_masks_f32 (GstFastSAMTensorDecoder * self,
* with the highest class confidence level before potentially reaching the
* maximum.*/
g_ptr_array_sort_with_data (sel_candidates,
gst_fastsam_tensor_decoder_sort_candidates, GSIZE_TO_POINTER (c_offset));
gst_yolo_tensor_decoder_sort_candidates, &c_range);
if (gst_debug_category_get_threshold (GST_CAT_DEFAULT) >= GST_LEVEL_TRACE) {
/* For debug purpose only. Prints candidates before NMS */
@ -834,7 +874,7 @@ gst_fastsam_tensor_decoder_decode_masks_f32 (GstFastSAMTensorDecoder * self,
ctx.offset = offset;
ctx.self = self;
g_ptr_array_foreach (sel_candidates,
gst_fastsam_tensor_decoder_debug_print_candidate, &ctx);
gst_yolo_tensor_decoder_debug_print_candidate, &ctx);
}
GstBuffer *mask_buf;
@ -850,7 +890,7 @@ gst_fastsam_tensor_decoder_decode_masks_f32 (GstFastSAMTensorDecoder * self,
* the candidates we decided to keep and candidates[c] is the candidate
* we're considering to keep or reject */
for (gsize s = 0; s < selected->len && keep; s++) {
iou = gst_fastsam_tensor_decoder_iou (candidates[c], selected->pdata[s],
iou = gst_yolo_tensor_decoder_iou (candidates[c], selected->pdata[s],
offsets, &bb1, &bb2);
keep = iou <= self->iou_thresh;
}
@ -860,7 +900,7 @@ gst_fastsam_tensor_decoder_decode_masks_f32 (GstFastSAMTensorDecoder * self,
if (selected->len == 0) {
/* The first bounding-box always get in as there's no others bbox
* to filter on based on IoU */
gst_fastsam_tensor_decoder_convert_bbox (candidate, offsets, &bb1);
gst_yolo_tensor_decoder_convert_bbox (candidate, offsets, &bb1);
}
g_ptr_array_add (selected, candidate);
@ -936,7 +976,7 @@ gst_fastsam_tensor_decoder_decode_masks_f32 (GstFastSAMTensorDecoder * self,
ctx.offset = offset;
ctx.self = self;
g_ptr_array_foreach (selected,
gst_fastsam_tensor_decoder_debug_print_candidate, &ctx);
gst_yolo_tensor_decoder_debug_print_candidate, &ctx);
}
/* We unmap the memory */

View File

@ -1,10 +1,11 @@
/*
* GStreamer gstreamer-fastsamtensordecoder
* GStreamer gstreamer-yolotensordecoder
* Copyright (C) 2024 Collabora Ltd
* Authors: Daniel Morin <daniel.morin@collabora.com>
* Vineet Suryan <vineet.suryan@collabora.com>
* Santosh Mahto <santosh.mahto@collabora.com>
*
* gstfastsamtensordecoder.h
* gstyolotensordecoder.h
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
@ -23,8 +24,8 @@
*/
#ifndef __GST_FASTSAM_TENSOR_DECODER_H__
#define __GST_FASTSAM_TENSOR_DECODER_H__
#ifndef __GST_YOLO_TENSOR_DECODER_H__
#define __GST_YOLO_TENSOR_DECODER_H__
#include <gst/gst.h>
#include <gst/video/video.h>
@ -32,9 +33,9 @@
G_BEGIN_DECLS
#define GST_TYPE_FASTSAM_TENSOR_DECODER (gst_fastsam_tensor_decoder_get_type ())
G_DECLARE_FINAL_TYPE (GstFastSAMTensorDecoder, gst_fastsam_tensor_decoder,
GST, FASTSAM_TENSOR_DECODER, GstBaseTransform)
#define GST_TYPE_YOLO_TENSOR_DECODER (gst_yolo_tensor_decoder_get_type ())
G_DECLARE_FINAL_TYPE (GstYoloTensorDecoder, gst_yolo_tensor_decoder,
GST, YOLO_TENSOR_DECODER, GstBaseTransform)
typedef struct _BBox
{
@ -44,7 +45,7 @@ typedef struct _BBox
guint h;
} BBox;
struct _GstFastSAMTensorDecoder
struct _GstYoloTensorDecoder
{
GstBaseTransform basetransform;
/* Box confidence threshold */
@ -65,10 +66,10 @@ struct _GstFastSAMTensorDecoder
* NMS and maximum detection. */
GPtrArray *selected;
/* Tensor-id identifying mask tensors out of FastSAM inference process. */
/* Tensor-id identifying mask tensors out of yolo inference process. */
GQuark mask_tensor_id;
/* Tensor-id identifying logits tensors out of FastSAM inference process. */
/* Tensor-id identifying logits tensors out of yolo inference process. */
GQuark logits_tensor_id;
/* Region of the mask that contain valid segmentation information */
@ -90,12 +91,12 @@ struct _GstFastSAMTensorDecoder
GstBufferPool *mask_pool;
};
struct _GstFastSAMTensorDecoderClass
struct _GstYoloTensorDecoderClass
{
GstBaseTransformClass parent_class;
};
GST_ELEMENT_REGISTER_DECLARE (fastsam_tensor_decoder)
GST_ELEMENT_REGISTER_DECLARE (yolo_tensor_decoder)
G_END_DECLS
#endif /* __GST_FASTSAM_TENSOR_DECODER_H__ */
#endif /* __GST_YOLO_TENSOR_DECODER_H__ */

View File

@ -1,7 +1,7 @@
tensordecoders_sources = [
'gsttensordecoders.c',
'gstssdobjectdetector.c',
'gstfastsamtensordecoder.c'
'gstyolotensordecoder.c'
]
tensordecoders_headers = [