fastsamtensordecoder: Add FastSAM tensor decoder
Co-authored-by: Vineet Suryan <vineet.suryan@collabora.com>
This commit is contained in:
parent
6db576f033
commit
b7f964929c
@ -248041,6 +248041,114 @@
|
||||
"tensordecoders": {
|
||||
"description": "Tensor decoders elements",
|
||||
"elements": {
|
||||
"fastsamtensordecoder": {
|
||||
"author": "Daniel Morin <daniel.morin@collabora.com>",
|
||||
"description": "Decode tensors output from the inference of FastSAM model (segmentation) on video frames. The original repository of the FastSAM is located at https://github.com/CASIA-IVA-Lab/FastSAM. For easy experimentation a strawberry segmentation model based on FastSAM architecture in Onnx format can be found at https://col.la/gstonnxmodelseg . This model already has tensors name embedded matching default values of tensors-masks-name and tensors-logits-name properties. It's also possible to embed tensor-ids into any model based on FastSAM architecture to allow this tensor-decoder to decode tensors. This process is described in the Readme of this repository: https://col.la/gstonnxmodels",
|
||||
"hierarchy": [
|
||||
"GstFastSAMTensorDecoder",
|
||||
"GstBaseTransform",
|
||||
"GstElement",
|
||||
"GstObject",
|
||||
"GInitiallyUnowned",
|
||||
"GObject"
|
||||
],
|
||||
"klass": "TensorDecoder/Video",
|
||||
"pad-templates": {
|
||||
"sink": {
|
||||
"caps": "video/x-raw:\n",
|
||||
"direction": "sink",
|
||||
"presence": "always"
|
||||
},
|
||||
"src": {
|
||||
"caps": "video/x-raw:\n",
|
||||
"direction": "src",
|
||||
"presence": "always"
|
||||
}
|
||||
},
|
||||
"properties": {
|
||||
"box-confidence-threshold": {
|
||||
"blurb": "Boxes with a location confidence level inferior to this threshold will be excluded",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "0.4",
|
||||
"max": "1",
|
||||
"min": "0",
|
||||
"mutable": "null",
|
||||
"readable": true,
|
||||
"type": "gfloat",
|
||||
"writable": true
|
||||
},
|
||||
"class-confidence-threshold": {
|
||||
"blurb": "Classes with a confidence level inferior to this threshold will be excluded",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "0.4",
|
||||
"max": "1",
|
||||
"min": "0",
|
||||
"mutable": "null",
|
||||
"readable": true,
|
||||
"type": "gfloat",
|
||||
"writable": true
|
||||
},
|
||||
"iou-threshold": {
|
||||
"blurb": "Maximum intersection-over-union between bounding boxes to consider them distinct.",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "0.7",
|
||||
"max": "1",
|
||||
"min": "0",
|
||||
"mutable": "null",
|
||||
"readable": true,
|
||||
"type": "gfloat",
|
||||
"writable": true
|
||||
},
|
||||
"max-detections": {
|
||||
"blurb": "Maximum object/masks detections.",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "100",
|
||||
"max": "-1",
|
||||
"min": "0",
|
||||
"mutable": "null",
|
||||
"readable": true,
|
||||
"type": "guint",
|
||||
"writable": true
|
||||
},
|
||||
"tensors-name-logits": {
|
||||
"blurb": "Name that identify FastSAM logits tensors.",
|
||||
"conditionally-available": false,
|
||||
"construct": true,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "Gst.Model.FastSAM.Segmentation.Logits",
|
||||
"mutable": "null",
|
||||
"readable": true,
|
||||
"type": "gchararray",
|
||||
"writable": true
|
||||
},
|
||||
"tensors-name-masks": {
|
||||
"blurb": "Name that identify FastSAM mask tensors.",
|
||||
"conditionally-available": false,
|
||||
"construct": true,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "Gst.Model.FastSAM.Segmentation.Masks",
|
||||
"mutable": "null",
|
||||
"readable": true,
|
||||
"type": "gchararray",
|
||||
"writable": true
|
||||
}
|
||||
},
|
||||
"rank": "primary"
|
||||
},
|
||||
"ssdobjectdetector": {
|
||||
"author": "Aaron Boxer <aaron.boxer@collabora.com>, Marcus Edel <marcus.edel@collabora.com>",
|
||||
"description": "Apply tensor output from inference to detect objects in video frames",
|
||||
|
@ -0,0 +1,990 @@
|
||||
/*
|
||||
* GStreamer gstreamer-fastsamtensordecoder
|
||||
* Copyright (C) 2024 Collabora Ltd.
|
||||
* Authors: Daniel Morin <daniel.morin@collabora.com>
|
||||
* Vineet Suryan <vineet.suryan@collabora.com>
|
||||
*
|
||||
* gstfastsamtensordecoder.c
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Library General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Library General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Library General Public
|
||||
* License along with this library; if not, write to the
|
||||
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
/**
|
||||
* SECTION:element-fastsamtensordecoder.c
|
||||
* @short_description: Decode tensors from a FastSAM detection and segmentation
|
||||
* neural network.
|
||||
*
|
||||
*
|
||||
* This element can parse per-buffer inference tensors meta data generated by an upstream
|
||||
* inference element
|
||||
*
|
||||
*
|
||||
* ## Example launch command:
|
||||
*
|
||||
* Test image file, model file and labels file can be found here :
|
||||
* https://gitlab.collabora.com/gstreamer/onnx-models
|
||||
*
|
||||
* GST_DEBUG=fastsamtensordecoder \
|
||||
* gst-launch-1.0 multifilesrc location=strawberry_crops.jpg ! decodebin \
|
||||
* ! videoconvertscale add-borders=1 ! onnxinference execution-provider=cpu
|
||||
* model-file=segmentation.onnx input-image-format=chw input-tensor-offset=0 \
|
||||
* input-tensor-scale=255.0 ! fastsamtensordecoder \
|
||||
* class-confidence-threshold=0.8 iou-threshold=0.7 max-detections=100
|
||||
* ! objectdetectionoverlay object-detection-outline-color=0xFF0000FF
|
||||
* draw-labels=true ! segmentationoverlay hint-maximum-segment-type=50 \
|
||||
* ! videoconvert ! ximagesink
|
||||
*
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFI_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "gstfastsamtensordecoder.h"
|
||||
|
||||
#include <gst/analytics/analytics.h>
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#define GST_MODEL_FASTSAM_SEGMENTATION_MASK \
|
||||
"Gst.Model.FastSAM.Segmentation.Masks"
|
||||
#define GST_MODEL_FASTSAM_SEGMENTATION_LOGITS \
|
||||
"Gst.Model.FastSAM.Segmentation.Logits"
|
||||
|
||||
GST_DEBUG_CATEGORY_STATIC (fastsam_tensor_decoder_debug);
|
||||
#define GST_CAT_DEFAULT fastsam_tensor_decoder_debug
|
||||
|
||||
GST_ELEMENT_REGISTER_DEFINE (fastsam_tensor_decoder, "fastsamtensordecoder",
|
||||
GST_RANK_PRIMARY, GST_TYPE_FASTSAM_TENSOR_DECODER);
|
||||
|
||||
/* GstFastSAMTensorDecoder properties, see properties description in
|
||||
* gst_fastsam_tensor_decoder_class_init for more details. */
|
||||
enum
|
||||
{
|
||||
PROP_0,
|
||||
PROP_BOX_CONFI_THRESH,
|
||||
PROP_CLS_CONFI_THRESH,
|
||||
PROP_IOU_THRESH,
|
||||
PROP_MAX_DETECTION,
|
||||
PROP_MASK_TENSOR_NAME,
|
||||
PROP_LOGITS_TENSOR_NAME
|
||||
};
|
||||
|
||||
/* For debug purpose */
|
||||
typedef struct _DebugCandidates
|
||||
{
|
||||
GstFastSAMTensorDecoder *self;
|
||||
gsize fields; /* Fields count do debug */
|
||||
gsize offset; /* Fields offset */
|
||||
gsize start; /* First field index to debug */
|
||||
} DebugCandidates;
|
||||
|
||||
/* Default properties value */
|
||||
static const gfloat DEFAULT_BOX_CONFI_THRESH = 0.4f;
|
||||
static const gfloat DEFAULT_CLS_CONFI_THRESH = 0.4f;
|
||||
static const gfloat DEFAULT_IOU_THRESH = 0.7f;
|
||||
static const gsize DEFAULT_MAX_DETECTION = 100;
|
||||
|
||||
/* Global variable storing class for OD. Generally OD has class
|
||||
* and we need to provide one but this class is just a placeholder.*/
|
||||
GQuark OOI_CLASS_ID;
|
||||
|
||||
/* To tensor-id are defined by a string that is converted to quark
|
||||
* which is just an integer value using a hash function. For efficiency
|
||||
* we compare on the quark (hash value). Since tensor-id never change we
|
||||
* just calculate the hash once during initialization and store the value in
|
||||
* these variables. */
|
||||
GQuark GST_MODEL_FASTSAM_SEGMENTATION_MASKS_ID;
|
||||
GQuark GST_MODEL_FASTSAM_SEGMENTATION_LOGITS_ID;
|
||||
|
||||
/* GStreamer element srcpad template. Template of a srcpad that can receive
|
||||
* any raw video. */
|
||||
static GstStaticPadTemplate gst_fastsam_tensor_decoder_src_template =
|
||||
GST_STATIC_PAD_TEMPLATE ("src",
|
||||
GST_PAD_SRC,
|
||||
GST_PAD_ALWAYS,
|
||||
GST_STATIC_CAPS ("video/x-raw"));
|
||||
|
||||
/* GStreamer element sinkpad template. Template of a sinkpad that can receive
|
||||
* any raw video. */
|
||||
static GstStaticPadTemplate gst_fastsam_tensor_decoder_sink_template =
|
||||
GST_STATIC_PAD_TEMPLATE ("sink",
|
||||
GST_PAD_SINK,
|
||||
GST_PAD_ALWAYS,
|
||||
GST_STATIC_CAPS ("video/x-raw"));
|
||||
|
||||
/* Prototypes */
|
||||
static void gst_fastsam_tensor_decoder_set_property (GObject * object,
|
||||
guint prop_id, const GValue * value, GParamSpec * pspec);
|
||||
static void gst_fastsam_tensor_decoder_get_property (GObject * object,
|
||||
guint prop_id, GValue * value, GParamSpec * pspec);
|
||||
|
||||
static void gst_fastsam_tensor_decoder_finalize (GObject * object);
|
||||
|
||||
static GstFlowReturn gst_fastsam_tensor_decoder_transform_ip (GstBaseTransform *
|
||||
trans, GstBuffer * buf);
|
||||
static gboolean gst_fastsam_tensor_decoder_set_caps (GstBaseTransform * trans,
|
||||
GstCaps * incaps, GstCaps * outcaps);
|
||||
static void gst_fastsam_tensor_decoder_decode_masks_f32 (GstFastSAMTensorDecoder
|
||||
* self, GstTensor * masks_tensor, GstTensor * logits_tensor,
|
||||
GstAnalyticsRelationMeta * rmeta);
|
||||
|
||||
G_DEFINE_TYPE (GstFastSAMTensorDecoder, gst_fastsam_tensor_decoder,
|
||||
GST_TYPE_BASE_TRANSFORM);
|
||||
|
||||
static void
|
||||
gst_fastsam_tensor_decoder_class_init (GstFastSAMTensorDecoderClass * klass)
|
||||
{
|
||||
GObjectClass *gobject_class = (GObjectClass *) klass;
|
||||
GstElementClass *element_class = (GstElementClass *) klass;
|
||||
GstBaseTransformClass *basetransform_class = (GstBaseTransformClass *) klass;
|
||||
|
||||
/* Define GstFastSAMTensorDecoder debug category. */
|
||||
GST_DEBUG_CATEGORY_INIT (fastsam_tensor_decoder_debug, "fastsamtensordecoder",
|
||||
0, "Tensor decoder for FastSAM segmentation N.N.");
|
||||
|
||||
/* Set GObject vmethod to get and set property */
|
||||
gobject_class->set_property = gst_fastsam_tensor_decoder_set_property;
|
||||
gobject_class->get_property = gst_fastsam_tensor_decoder_get_property;
|
||||
|
||||
/* Set GObject vmethod finalize */
|
||||
gobject_class->finalize = gst_fastsam_tensor_decoder_finalize;
|
||||
|
||||
/* Define GstFastSAMTensorDecoder properties using GObject properties
|
||||
* interface.*/
|
||||
g_object_class_install_property (G_OBJECT_CLASS (klass),
|
||||
PROP_BOX_CONFI_THRESH,
|
||||
g_param_spec_float ("box-confidence-threshold",
|
||||
"Box location confidence threshold",
|
||||
"Boxes with a location confidence level inferior to this threshold "
|
||||
"will be excluded",
|
||||
0.0, 1.0, DEFAULT_BOX_CONFI_THRESH,
|
||||
(GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
|
||||
|
||||
g_object_class_install_property (G_OBJECT_CLASS (klass),
|
||||
PROP_CLS_CONFI_THRESH,
|
||||
g_param_spec_float ("class-confidence-threshold",
|
||||
"Class confidence threshold",
|
||||
"Classes with a confidence level inferior to this threshold "
|
||||
"will be excluded",
|
||||
0.0, 1.0, DEFAULT_CLS_CONFI_THRESH,
|
||||
(GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
|
||||
|
||||
g_object_class_install_property (G_OBJECT_CLASS (klass),
|
||||
PROP_IOU_THRESH,
|
||||
g_param_spec_float ("iou-threshold",
|
||||
"Maximum IOU threshold",
|
||||
"Maximum intersection-over-union between bounding boxes to "
|
||||
"consider them distinct.",
|
||||
0.0, 1.0, DEFAULT_IOU_THRESH,
|
||||
(GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
|
||||
|
||||
g_object_class_install_property (G_OBJECT_CLASS (klass),
|
||||
PROP_MAX_DETECTION,
|
||||
g_param_spec_uint ("max-detections",
|
||||
"Maximum object/masks detections.",
|
||||
"Maximum object/masks detections.",
|
||||
0, G_MAXUINT, DEFAULT_MAX_DETECTION,
|
||||
(GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
|
||||
|
||||
g_object_class_install_property (G_OBJECT_CLASS (klass),
|
||||
PROP_MASK_TENSOR_NAME,
|
||||
g_param_spec_string ("tensors-name-masks",
|
||||
"Mask tensors name",
|
||||
"Name that identify FastSAM mask tensors.",
|
||||
GST_MODEL_FASTSAM_SEGMENTATION_MASK,
|
||||
(GParamFlags) (G_PARAM_READWRITE | G_PARAM_CONSTRUCT |
|
||||
G_PARAM_STATIC_STRINGS)));
|
||||
|
||||
g_object_class_install_property (G_OBJECT_CLASS (klass),
|
||||
PROP_LOGITS_TENSOR_NAME,
|
||||
g_param_spec_string ("tensors-name-logits",
|
||||
"Logits tensors name",
|
||||
"Name that identify FastSAM logits tensors.",
|
||||
GST_MODEL_FASTSAM_SEGMENTATION_LOGITS,
|
||||
(GParamFlags) (G_PARAM_READWRITE | G_PARAM_CONSTRUCT |
|
||||
G_PARAM_STATIC_STRINGS)));
|
||||
|
||||
/* Element description. */
|
||||
gst_element_class_set_static_metadata (element_class, "fastsamtensordecoder",
|
||||
"TensorDecoder/Video",
|
||||
"Decode tensors output from the inference of FastSAM model (segmentation)"
|
||||
" on video frames. The original repository of the FastSAM is located at"
|
||||
" https://github.com/CASIA-IVA-Lab/FastSAM. For easy experimentation a"
|
||||
" strawberry segmentation model based on FastSAM architecture in Onnx "
|
||||
" format can be found at https://col.la/gstonnxmodelseg . This model "
|
||||
"already has tensors name embedded matching default "
|
||||
"values of tensors-masks-name and tensors-logits-name properties. It's "
|
||||
"also possible to embed tensor-ids into any model based on FastSAM "
|
||||
"architecture to allow this tensor-decoder to decode tensors. This "
|
||||
"process is described in the Readme of this repository: "
|
||||
"https://col.la/gstonnxmodels",
|
||||
"Daniel Morin <daniel.morin@collabora.com>");
|
||||
|
||||
/* Add pads to element base on pad template defined earlier */
|
||||
gst_element_class_add_pad_template (element_class,
|
||||
gst_static_pad_template_get (&gst_fastsam_tensor_decoder_src_template));
|
||||
gst_element_class_add_pad_template (element_class,
|
||||
gst_static_pad_template_get (&gst_fastsam_tensor_decoder_sink_template));
|
||||
|
||||
/* Set GstBaseTransform vmethod transform_ip. This methode is called
|
||||
* by the srcpad when it receive buffer. ip stand for in-place meaning the
|
||||
* buffer remain unchanged by the element. Tensor-decoder only monitor
|
||||
* buffer it receive for a meta attach to the buffer that is a GstTensorMeta
|
||||
* and has a tensor-id can be handled by GstFastSAMTensorDecoder. */
|
||||
basetransform_class->transform_ip =
|
||||
GST_DEBUG_FUNCPTR (gst_fastsam_tensor_decoder_transform_ip);
|
||||
|
||||
/* Set GstBaseTransform set_caps vmethod. This will be called once the
|
||||
* capability negotiation has been completed. We will be able to extract
|
||||
* resolution from this callback. */
|
||||
basetransform_class->set_caps =
|
||||
GST_DEBUG_FUNCPTR (gst_fastsam_tensor_decoder_set_caps);
|
||||
|
||||
/* Calculate the class id placeholder (also a quark) that will be set on all
|
||||
* OD analytics-meta. */
|
||||
OOI_CLASS_ID = g_quark_from_static_string ("FastSAM-None");
|
||||
|
||||
/* Calculate the FastSAM Mask tensor-id */
|
||||
GST_MODEL_FASTSAM_SEGMENTATION_MASKS_ID =
|
||||
g_quark_from_static_string (GST_MODEL_FASTSAM_SEGMENTATION_MASK);
|
||||
|
||||
/* Calculate the FastSAM Logits tensor-id */
|
||||
GST_MODEL_FASTSAM_SEGMENTATION_LOGITS_ID =
|
||||
g_quark_from_static_string (GST_MODEL_FASTSAM_SEGMENTATION_LOGITS);
|
||||
}
|
||||
|
||||
static void
|
||||
gst_fastsam_tensor_decoder_init (GstFastSAMTensorDecoder * self)
|
||||
{
|
||||
/* GstFastSAMTensorDecoder instance initialization */
|
||||
self->box_confi_thresh = DEFAULT_BOX_CONFI_THRESH;
|
||||
self->cls_confi_thresh = DEFAULT_CLS_CONFI_THRESH;
|
||||
self->iou_thresh = DEFAULT_IOU_THRESH;
|
||||
self->max_detection = DEFAULT_MAX_DETECTION;
|
||||
self->sel_candidates = NULL;
|
||||
self->selected = NULL;
|
||||
self->mask_w = 256;
|
||||
self->mask_h = 256;
|
||||
self->mask_length = self->mask_w * self->mask_h;
|
||||
memset (&self->mask_roi, 0, sizeof (BBox));
|
||||
self->mask_pool = NULL;
|
||||
gst_base_transform_set_passthrough (GST_BASE_TRANSFORM (self), FALSE);
|
||||
}
|
||||
|
||||
static void
|
||||
gst_fastsam_tensor_decoder_finalize (GObject * object)
|
||||
{
|
||||
GstFastSAMTensorDecoder *self = GST_FASTSAM_TENSOR_DECODER (object);
|
||||
|
||||
if (self->sel_candidates) {
|
||||
g_ptr_array_unref (g_steal_pointer (&self->sel_candidates));
|
||||
}
|
||||
|
||||
if (self->selected) {
|
||||
g_ptr_array_unref (g_steal_pointer (&self->selected));
|
||||
}
|
||||
|
||||
if (self->mask_pool) {
|
||||
gst_object_unref (self->mask_pool);
|
||||
}
|
||||
|
||||
G_OBJECT_CLASS (gst_fastsam_tensor_decoder_parent_class)->finalize (object);
|
||||
}
|
||||
|
||||
static void
|
||||
gst_fastsam_tensor_decoder_set_property (GObject * object, guint prop_id,
|
||||
const GValue * value, GParamSpec * pspec)
|
||||
{
|
||||
GstFastSAMTensorDecoder *self = GST_FASTSAM_TENSOR_DECODER (object);
|
||||
|
||||
switch (prop_id) {
|
||||
case PROP_BOX_CONFI_THRESH:
|
||||
GST_OBJECT_LOCK (self);
|
||||
self->box_confi_thresh = g_value_get_float (value);
|
||||
GST_OBJECT_UNLOCK (self);
|
||||
break;
|
||||
case PROP_CLS_CONFI_THRESH:
|
||||
GST_OBJECT_LOCK (self);
|
||||
self->cls_confi_thresh = g_value_get_float (value);
|
||||
GST_OBJECT_UNLOCK (self);
|
||||
break;
|
||||
case PROP_IOU_THRESH:
|
||||
GST_OBJECT_LOCK (self);
|
||||
self->iou_thresh = g_value_get_float (value);
|
||||
GST_OBJECT_UNLOCK (self);
|
||||
break;
|
||||
case PROP_MAX_DETECTION:
|
||||
GST_OBJECT_LOCK (self);
|
||||
self->max_detection = g_value_get_uint (value);
|
||||
GST_OBJECT_UNLOCK (self);
|
||||
break;
|
||||
case PROP_MASK_TENSOR_NAME:
|
||||
GST_OBJECT_LOCK (self);
|
||||
self->mask_tensor_id = g_quark_from_string (g_value_get_string (value));
|
||||
GST_OBJECT_UNLOCK (self);
|
||||
break;
|
||||
case PROP_LOGITS_TENSOR_NAME:
|
||||
GST_OBJECT_LOCK (self);
|
||||
self->logits_tensor_id = g_quark_from_string (g_value_get_string (value));
|
||||
GST_OBJECT_UNLOCK (self);
|
||||
break;
|
||||
default:
|
||||
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
gst_fastsam_tensor_decoder_get_property (GObject * object, guint prop_id,
|
||||
GValue * value, GParamSpec * pspec)
|
||||
{
|
||||
GstFastSAMTensorDecoder *self = GST_FASTSAM_TENSOR_DECODER (object);
|
||||
|
||||
switch (prop_id) {
|
||||
case PROP_BOX_CONFI_THRESH:
|
||||
g_value_set_float (value, self->box_confi_thresh);
|
||||
break;
|
||||
case PROP_CLS_CONFI_THRESH:
|
||||
g_value_set_float (value, self->cls_confi_thresh);
|
||||
break;
|
||||
case PROP_IOU_THRESH:
|
||||
g_value_set_float (value, self->iou_thresh);
|
||||
break;
|
||||
case PROP_MAX_DETECTION:
|
||||
g_value_set_uint (value, self->max_detection);
|
||||
break;
|
||||
case PROP_MASK_TENSOR_NAME:
|
||||
GST_OBJECT_LOCK (self);
|
||||
g_value_set_string (value, g_quark_to_string (self->mask_tensor_id));
|
||||
GST_OBJECT_UNLOCK (self);
|
||||
break;
|
||||
case PROP_LOGITS_TENSOR_NAME:
|
||||
GST_OBJECT_LOCK (self);
|
||||
g_value_set_string (value, g_quark_to_string (self->logits_tensor_id));
|
||||
GST_OBJECT_UNLOCK (self);
|
||||
break;
|
||||
default:
|
||||
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* gst_fastsam_tensor_decoder_get_tensor_meta
|
||||
* @buf:in: buffer
|
||||
* @mask_tensor:out: Mask tensor
|
||||
* @logits_tensor:out: Logits tensor
|
||||
* @return: TRUE if buf has mask and logits tensor attach to it.
|
||||
* Retrieve FastSAM masks and logits tensors from buffer.
|
||||
*/
|
||||
static gboolean
|
||||
gst_fastsam_tensor_decoder_get_tensor_meta (GstFastSAMTensorDecoder * self,
|
||||
GstBuffer * buf, GstTensor ** mask_tensor, GstTensor ** logits_tensor)
|
||||
{
|
||||
GstTensorMeta *tensor_meta;
|
||||
gint mask_tensor_idx, logits_tensor_idx;
|
||||
|
||||
g_return_val_if_fail (mask_tensor != NULL, FALSE);
|
||||
g_return_val_if_fail (logits_tensor != NULL, FALSE);
|
||||
|
||||
*mask_tensor = NULL;
|
||||
*logits_tensor = NULL;
|
||||
|
||||
/* Retrieve all TensorMeta attach the buffer */
|
||||
tensor_meta = gst_buffer_get_tensor_meta (buf);
|
||||
if (!tensor_meta) {
|
||||
GST_LOG_OBJECT (self, "No tensor meta");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
GST_LOG_OBJECT (self, "Num tensors %zu", tensor_meta->num_tensors);
|
||||
|
||||
/* Retrieve the index of the tensor that has a tensor-id matching
|
||||
* GST_MODEL_FASTSAM_SEGMENTATION_MASKS_ID in the GstTensorMeta. */
|
||||
mask_tensor_idx = gst_tensor_meta_get_index_from_id (tensor_meta,
|
||||
GST_MODEL_FASTSAM_SEGMENTATION_MASKS_ID);
|
||||
|
||||
/* Retrieve the index of the tensor that has a tensor-id matching
|
||||
* GST_MODEL_FASTSAM_SEGMENTATION_LOGITS_ID in the GstTensorMeta. */
|
||||
logits_tensor_idx = gst_tensor_meta_get_index_from_id (tensor_meta,
|
||||
GST_MODEL_FASTSAM_SEGMENTATION_LOGITS_ID);
|
||||
|
||||
if (mask_tensor_idx >= 0 && logits_tensor_idx >= 0) {
|
||||
GST_LOG_OBJECT (self, "Masks tensor id: %d", mask_tensor_idx);
|
||||
GST_LOG_OBJECT (self, "Masks tensor id: %d", logits_tensor_idx);
|
||||
|
||||
*mask_tensor = tensor_meta->tensors[mask_tensor_idx];
|
||||
*logits_tensor = tensor_meta->tensors[logits_tensor_idx];
|
||||
|
||||
return TRUE;
|
||||
} else {
|
||||
GST_INFO_OBJECT (self, "Couldn't find mask or logits tensor, skipping");
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* gst_fastsam_tensor_decoder_set_caps:
|
||||
*
|
||||
* Callback on caps negociation completed. We use it here to retrieve
|
||||
* video resolution. See GstBaseTransform for more details.
|
||||
*/
|
||||
static gboolean
|
||||
gst_fastsam_tensor_decoder_set_caps (GstBaseTransform * trans, GstCaps * incaps,
|
||||
GstCaps * outcaps)
|
||||
{
|
||||
GstFastSAMTensorDecoder *self = GST_FASTSAM_TENSOR_DECODER (trans);
|
||||
|
||||
if (!gst_video_info_from_caps (&self->video_info, incaps)) {
|
||||
GST_ERROR_OBJECT (self, "Failed to parse caps");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (gst_base_transform_is_passthrough (trans)) {
|
||||
GST_ERROR_OBJECT (self, "Failed. Can't handle passthrough");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* The masks need to be cropped to fit the SAR of the image. */
|
||||
/* TODO: We're reconstructing the transformation that was done on the
|
||||
* original image based on the assumption that the complete image without
|
||||
* deformation would be analyzed. This assumption is not alway true and
|
||||
* we should try to find a way to convey this transformation information
|
||||
* and retrieve from here to know the transformation that need to be done
|
||||
* on the mask.*/
|
||||
|
||||
if (self->video_info.width > self->video_info.height) {
|
||||
self->bb2mask_gain = ((gfloat) self->mask_w) / self->video_info.width;
|
||||
self->mask_roi.x = 0;
|
||||
self->mask_roi.w = self->mask_w;
|
||||
self->mask_roi.h = ((gfloat) self->bb2mask_gain) * self->video_info.height;
|
||||
self->mask_roi.y = (self->mask_h - self->mask_roi.h) / 2;
|
||||
} else {
|
||||
self->bb2mask_gain = ((gfloat) self->mask_h) / self->video_info.height;
|
||||
self->mask_roi.y = 0;
|
||||
self->mask_roi.h = self->mask_h;
|
||||
self->mask_roi.w = self->bb2mask_gain * self->video_info.width;
|
||||
self->mask_roi.x = (self->mask_w - self->mask_roi.w) / 2;
|
||||
}
|
||||
|
||||
if (self->mask_pool == NULL) {
|
||||
GstVideoInfo minfo;
|
||||
GstCaps *caps;
|
||||
gst_video_info_init (&minfo);
|
||||
gst_video_info_set_format (&minfo, GST_VIDEO_FORMAT_GRAY8, 256, 256);
|
||||
caps = gst_video_info_to_caps (&minfo);
|
||||
self->mask_pool = gst_video_buffer_pool_new ();
|
||||
GstStructure *config = gst_buffer_pool_get_config (self->mask_pool);
|
||||
gst_buffer_pool_config_set_params (config, caps, self->mask_length, 0, 0);
|
||||
gst_buffer_pool_config_add_option (config,
|
||||
GST_BUFFER_POOL_OPTION_VIDEO_META);
|
||||
g_return_val_if_fail (gst_buffer_pool_set_config (self->mask_pool, config),
|
||||
FALSE);
|
||||
g_return_val_if_fail (gst_buffer_pool_set_active (self->mask_pool, TRUE),
|
||||
FALSE);
|
||||
gst_caps_unref (caps);
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* gst_fastsam_tensor_decoder_transform_ip:
|
||||
* @trans: Instance
|
||||
* @buf:inout: Buffer containing media and where tensors can be attached
|
||||
* @return: Flow errors
|
||||
* Decode FastSAM tensors, post-process tensors and store decoded information
|
||||
* into an analytics-meta that is attached to the buffer before been pushed
|
||||
* downstream.
|
||||
*/
|
||||
static GstFlowReturn
|
||||
gst_fastsam_tensor_decoder_transform_ip (GstBaseTransform * trans,
|
||||
GstBuffer * buf)
|
||||
{
|
||||
GstFastSAMTensorDecoder *self = GST_FASTSAM_TENSOR_DECODER (trans);
|
||||
GstTensor *masks_tensor, *logits_tensor;
|
||||
GstAnalyticsRelationMeta *rmeta;
|
||||
gsize mask_w, mask_h;
|
||||
|
||||
if (!gst_fastsam_tensor_decoder_get_tensor_meta (self, buf, &masks_tensor,
|
||||
&logits_tensor))
|
||||
return GST_FLOW_OK;
|
||||
|
||||
if (masks_tensor->num_dims < 3) {
|
||||
GST_ELEMENT_ERROR (self, STREAM, DECODE, (NULL),
|
||||
("Masks tensor must have at least 3 dimensions,"
|
||||
"but only has %zu", masks_tensor->num_dims));
|
||||
return GST_FLOW_ERROR;
|
||||
}
|
||||
|
||||
if (logits_tensor->num_dims != 4) {
|
||||
GST_ELEMENT_ERROR (self, STREAM, DECODE, (NULL),
|
||||
("Logits tensor must have 4 dimensions but has %zu",
|
||||
masks_tensor->num_dims));
|
||||
return GST_FLOW_ERROR;
|
||||
}
|
||||
|
||||
mask_w = logits_tensor->dims[2];
|
||||
mask_h = logits_tensor->dims[3];
|
||||
|
||||
/* The masks need to be cropped to fit the SAR of the image. */
|
||||
/* TODO: We're reconstructing the transformation that was done on the
|
||||
* original image based on the assumption that the complete image without
|
||||
* deformation would be analyzed. This assumption is not alway true and
|
||||
* we should try to find a way to convey this transformation information
|
||||
* and retrieve from here to know the transformation that need to be done
|
||||
* on the mask.*/
|
||||
|
||||
if (self->mask_w != mask_w || self->mask_h != mask_h) {
|
||||
self->mask_w = mask_w;
|
||||
self->mask_h = mask_h;
|
||||
self->mask_length = mask_w * mask_h;
|
||||
|
||||
if (self->video_info.width > self->video_info.height) {
|
||||
self->bb2mask_gain = ((gfloat) self->mask_w) / self->video_info.width;
|
||||
self->mask_roi.x = 0;
|
||||
self->mask_roi.w = self->mask_w;
|
||||
self->mask_roi.h =
|
||||
((gfloat) self->bb2mask_gain) * self->video_info.height;
|
||||
self->mask_roi.y = (self->mask_h - self->mask_roi.h) / 2;
|
||||
} else {
|
||||
self->bb2mask_gain = ((gfloat) self->mask_h) / self->video_info.height;
|
||||
self->mask_roi.y = 0;
|
||||
self->mask_roi.h = self->mask_h;
|
||||
self->mask_roi.w = self->bb2mask_gain * self->video_info.width;
|
||||
self->mask_roi.x = (self->mask_w - self->mask_roi.w) / 2;
|
||||
}
|
||||
|
||||
if (self->mask_pool) {
|
||||
gst_buffer_pool_set_active (self->mask_pool, FALSE);
|
||||
g_clear_object (&self->mask_pool);
|
||||
}
|
||||
}
|
||||
|
||||
if (self->mask_pool == NULL) {
|
||||
GstVideoInfo minfo;
|
||||
GstCaps *caps;
|
||||
gst_video_info_init (&minfo);
|
||||
gst_video_info_set_format (&minfo, GST_VIDEO_FORMAT_GRAY8, self->mask_w,
|
||||
self->mask_h);
|
||||
caps = gst_video_info_to_caps (&minfo);;
|
||||
self->mask_pool = gst_video_buffer_pool_new ();
|
||||
|
||||
GstStructure *config = gst_buffer_pool_get_config (self->mask_pool);
|
||||
gst_buffer_pool_config_set_params (config, caps, self->mask_length, 0, 0);
|
||||
gst_buffer_pool_config_add_option (config,
|
||||
GST_BUFFER_POOL_OPTION_VIDEO_META);
|
||||
gst_buffer_pool_set_config (self->mask_pool, config);
|
||||
gst_buffer_pool_set_active (self->mask_pool, TRUE);
|
||||
gst_caps_unref (caps);
|
||||
}
|
||||
|
||||
|
||||
static GstAnalyticsRelationMetaInitParams rmeta_init_params = {
|
||||
.initial_buf_size = 1024,
|
||||
.initial_relation_order = 10
|
||||
};
|
||||
|
||||
/* Retrieve or attach an analytics-relation-meta to the buffer.
|
||||
* Analytics-relation-meta are container that can reveive multiple
|
||||
* analytics-meta, like OD and Segmentation. The following call will only
|
||||
* retrieve an analytics-relation-meta if it exist or create one if it
|
||||
* does not exist. */
|
||||
rmeta = gst_buffer_add_analytics_relation_meta_full (buf, &rmeta_init_params);
|
||||
g_return_val_if_fail (rmeta != NULL, GST_FLOW_ERROR);
|
||||
|
||||
/* Decode masks_tensor and attach the information in a structured way
|
||||
* to rmeta.
|
||||
* TODO: I think we need to send both tensors masks and logits
|
||||
* to gst_fastsam_tensor_decoder_decode_masks_f32 since both are
|
||||
* required simultanously to extract the segmentation. If this is the case
|
||||
* we probably should rename gst_fastsam_tensor_decoder_decode_masks_f32 to
|
||||
* gst_fastsam_tensor_decoder_decode_f32. */
|
||||
gst_fastsam_tensor_decoder_decode_masks_f32 (self, masks_tensor,
|
||||
logits_tensor, rmeta);
|
||||
|
||||
return GST_FLOW_OK;
|
||||
}
|
||||
|
||||
/* Evaluate if there's an intersection between segement s1 and s2 */
|
||||
static guint
|
||||
linear_intersection (guint s1_min, guint s1_max, guint s2_min, guint s2_max)
|
||||
{
|
||||
guint tmp;
|
||||
if (s1_max > s2_min && s2_max > s1_min) {
|
||||
if (s1_min > s2_min) {
|
||||
tmp = (s2_max > s1_max) ? s1_max : s2_max;
|
||||
return tmp - s1_min;
|
||||
} else {
|
||||
tmp = (s1_max > s2_max) ? s2_max : s1_max;
|
||||
return tmp - s2_min;
|
||||
}
|
||||
}
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
static gfloat
|
||||
iou (guint bb1_x, guint bb1_y, guint bb1_w, guint bb1_h,
|
||||
guint bb2_x, guint bb2_y, guint bb2_w, guint bb2_h)
|
||||
{
|
||||
/* Rational: linear intersection is much faster to calculate then
|
||||
* 2d intersection. We project the two bounding boxes considered for
|
||||
* intersection on one axis and verify if the segments the create intersect.
|
||||
* If they don't, the bounding boxes can't intersect in 2d and we don't
|
||||
* need to verify if they intersect on the other dimension. If they
|
||||
* intersect on the first dimension we verify if they intersec on the other
|
||||
* dimension. Again if the don't intersect the bounding boxes can't intersect
|
||||
* on in a 2D space. If they intersected on both axis we calculate the IoU.*/
|
||||
const guint x_intersection =
|
||||
linear_intersection (bb1_x, bb1_x + bb1_w, bb2_x, bb2_x + bb2_w);
|
||||
if (x_intersection > 0) {
|
||||
const guint y_intersection = linear_intersection (bb1_y, bb1_y + bb1_h,
|
||||
bb2_y, bb2_y + bb2_h);
|
||||
if (y_intersection > 0) {
|
||||
const guint bb1_area = bb1_w * bb1_h;
|
||||
const guint bb2_area = bb2_w * bb2_h;
|
||||
const guint intersect_area = x_intersection * y_intersection;
|
||||
const guint union_area = bb1_area + bb2_area - intersect_area;
|
||||
return union_area == 0 ? 0.0f : ((gfloat) intersect_area) / union_area;
|
||||
}
|
||||
}
|
||||
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
/* Extract bounding box from tensor data */
|
||||
static void
|
||||
gst_fastsam_tensor_decoder_convert_bbox (gfloat * candidate, gsize * offset,
|
||||
BBox * bbox)
|
||||
{
|
||||
gfloat w = *(candidate + offset[2]);
|
||||
gfloat h = *(candidate + offset[3]);
|
||||
bbox->x = *(candidate + offset[0]) - (w / 2);
|
||||
bbox->y = *(candidate + offset[1]) - (h / 2);
|
||||
bbox->w = w + 0.5;
|
||||
bbox->h = h + 0.5;
|
||||
}
|
||||
|
||||
/* Calculate iou between boundingbox of candidate c1 and c2
|
||||
*/
|
||||
static gfloat
|
||||
gst_fastsam_tensor_decoder_iou (gfloat * c1, gfloat * c2, gsize * offset,
|
||||
BBox * bb1, BBox * bb2)
|
||||
{
|
||||
gst_fastsam_tensor_decoder_convert_bbox (c1, offset, bb1);
|
||||
gst_fastsam_tensor_decoder_convert_bbox (c2, offset, bb2);
|
||||
return iou (bb1->x, bb1->y, bb1->w, bb1->h, bb2->x, bb2->y, bb2->w, bb2->h);
|
||||
}
|
||||
|
||||
/* Compare c1 and c2
|
||||
* Utility function for sorting candiates based on the a field identified
|
||||
* by offset.
|
||||
*/
|
||||
static gint
|
||||
gst_fastsam_tensor_decoder_sort_candidates (gconstpointer c1, gconstpointer c2,
|
||||
gpointer offset)
|
||||
{
|
||||
const gfloat *c1_confi =
|
||||
(*((const gfloat **) c1) + GPOINTER_TO_SIZE (offset));
|
||||
const gfloat *c2_confi =
|
||||
(*((const gfloat **) c2) + GPOINTER_TO_SIZE (offset));
|
||||
return *c1_confi < *c2_confi ? 1 : *c1_confi > *c2_confi ? -1 : 0;
|
||||
}
|
||||
|
||||
static void
|
||||
gst_fastsam_tensor_decoder_debug_print_candidate (gpointer candidate_,
|
||||
gpointer data)
|
||||
{
|
||||
DebugCandidates *ctx = data;
|
||||
const gfloat *candidate = candidate_;
|
||||
|
||||
for (gsize i = ctx->start; i < ctx->fields + ctx->start; i++) {
|
||||
GST_TRACE_OBJECT (ctx->self, "Field %lu: %f", i,
|
||||
*(candidate + (i * ctx->offset)));
|
||||
}
|
||||
}
|
||||
|
||||
static float
|
||||
sigmoid (float x)
|
||||
{
|
||||
/* Check for positive overflow */
|
||||
if (x > 0) {
|
||||
double exp_neg_x = exp (-x);
|
||||
return 1.0 / (1.0 + exp_neg_x);
|
||||
}
|
||||
/* Check for negative overflow and improve stability for negative x */
|
||||
else {
|
||||
double exp_x = exp (x);
|
||||
return exp_x / (1.0 + exp_x);
|
||||
}
|
||||
}
|
||||
|
||||
static gboolean
|
||||
gst_fastsam_tensor_decoder_decode_valid_bb (GstFastSAMTensorDecoder * self,
|
||||
gfloat x, gfloat y, gfloat w, gfloat h)
|
||||
{
|
||||
if (x > (GST_VIDEO_INFO_WIDTH (&self->video_info)))
|
||||
return FALSE;
|
||||
if (y > (GST_VIDEO_INFO_HEIGHT (&self->video_info)))
|
||||
return FALSE;
|
||||
if (x < -(gfloat) (GST_VIDEO_INFO_WIDTH (&self->video_info) / 2.0))
|
||||
return FALSE;
|
||||
if (y < -(gfloat) (GST_VIDEO_INFO_HEIGHT (&self->video_info) / 2.0))
|
||||
return FALSE;
|
||||
if (w <= 0)
|
||||
return FALSE;
|
||||
if (h <= 0)
|
||||
return FALSE;
|
||||
if (w > (GST_VIDEO_INFO_WIDTH (&self->video_info)))
|
||||
return FALSE;
|
||||
if (h > (GST_VIDEO_INFO_HEIGHT (&self->video_info)))
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static void
|
||||
gst_fastsam_tensor_decoder_decode_masks_f32 (GstFastSAMTensorDecoder * self,
|
||||
GstTensor * masks_tensor, GstTensor * logits_tensor,
|
||||
GstAnalyticsRelationMeta * rmeta)
|
||||
{
|
||||
/*guint batch_size = masks_tensor->dims[0]; */
|
||||
/*guint num_masks = masks_tensor->dims[1]; */
|
||||
GstMapInfo map_info_masks, map_info_logits, out_mask_info;
|
||||
gfloat *candidate, **candidates, iou, *data_logits;
|
||||
gboolean rv, keep;
|
||||
gsize offset, x_offset, y_offset, w_offset, h_offset, c_offset, offsets[4];
|
||||
gsize m0_offset;
|
||||
GPtrArray *sel_candidates = self->sel_candidates, *selected = self->selected;
|
||||
BBox bb1, bb2, bb_mask;
|
||||
GstAnalyticsODMtd od_mtd;
|
||||
GstAnalyticsSegmentationMtd seg_mtd;
|
||||
guint8 *mask_data;
|
||||
|
||||
/* Retrieve memory at index 0 and map it in READ mode */
|
||||
rv = gst_buffer_map (masks_tensor->data, &map_info_masks, GST_MAP_READ);
|
||||
g_assert (rv);
|
||||
|
||||
/* Retrieve memory at index 0 from logits_tensor in READ mode */
|
||||
rv = gst_buffer_map (logits_tensor->data, &map_info_logits, GST_MAP_READ);
|
||||
g_assert (rv);
|
||||
data_logits = (gfloat *) map_info_logits.data;
|
||||
|
||||
GST_LOG_OBJECT (self, "Mask Tensor shape dims %zu", masks_tensor->num_dims);
|
||||
|
||||
/* Trace masks tensor dimensions */
|
||||
if (gst_debug_category_get_threshold (GST_CAT_DEFAULT) >= GST_LEVEL_TRACE) {
|
||||
for (gsize i = 0; i < masks_tensor->num_dims; i++) {
|
||||
GST_TRACE_OBJECT (self, "Masks Tensor dim %zu: %zu", i,
|
||||
masks_tensor->dims[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocated array to store selected candidates */
|
||||
if (sel_candidates == NULL) {
|
||||
/* Number of candidates can be large, keep the array to avoid frequent
|
||||
* allocation */
|
||||
sel_candidates = g_ptr_array_new_full (masks_tensor->dims[2], NULL);
|
||||
self->sel_candidates = sel_candidates;
|
||||
selected = g_ptr_array_new_full (masks_tensor->dims[2], NULL);
|
||||
self->selected = selected;
|
||||
} else {
|
||||
/* Reset lengths when we re-use arrays */
|
||||
sel_candidates->len = 0;
|
||||
selected->len = 0;
|
||||
}
|
||||
|
||||
/* masks_tensor->dims[2] contain the number of candidates. Let's call the
|
||||
* number of candidates C. We store this value in offset as we use it
|
||||
* calculate the offset of candidate fields. The variable #data_masks above point
|
||||
* at the masks tensor data, but candidates data is organize like a plane.
|
||||
* Candidates bbox X coord fields from 0 to C start at the begining of the
|
||||
* tensor data and are continguous in memory, followed by all candidates
|
||||
* field Y, followed by field W, ... followed by field class confidence level,
|
||||
* ..., followed by all candidates mask0, ..., followed by all candidates
|
||||
* mask31. Bellow we pre-calculate each field offset relative to the
|
||||
* candidate pointer (pointer to field X), which will allow us to easily
|
||||
* access each candiates field.
|
||||
* */
|
||||
offset = masks_tensor->dims[2];
|
||||
x_offset = 0;
|
||||
y_offset = offset;
|
||||
w_offset = 2 * offset;
|
||||
h_offset = 3 * offset;
|
||||
c_offset = 4 * offset;
|
||||
m0_offset = 5 * offset;
|
||||
offsets[0] = x_offset;
|
||||
offsets[1] = y_offset;
|
||||
offsets[2] = w_offset;
|
||||
offsets[3] = h_offset;
|
||||
|
||||
#define MASK_X(candidate, index) candidate[m0_offset + (index * offset)]
|
||||
#define BB_X(candidate) candidate[x_offset]
|
||||
#define BB_Y(candidate) candidate[y_offset]
|
||||
#define BB_W(candidate) candidate[w_offset]
|
||||
#define BB_H(candidate) candidate[h_offset]
|
||||
|
||||
candidate = (gfloat *) map_info_masks.data;;
|
||||
for (gsize c_idx = 0; c_idx < masks_tensor->dims[2]; c_idx++) {
|
||||
/* FastSAM only has one class, but this confidence level is still used
|
||||
* to evaluate the relevance of the candidate. Here we filter candidates
|
||||
* based on their class confidence level.*/
|
||||
if (candidate[c_offset] > self->cls_confi_thresh &&
|
||||
gst_fastsam_tensor_decoder_decode_valid_bb (self,
|
||||
BB_X (candidate), BB_Y (candidate), BB_W (candidate),
|
||||
BB_H (candidate))) {
|
||||
g_ptr_array_add (sel_candidates, candidate);
|
||||
GST_TRACE_OBJECT (self,
|
||||
"%lu: x,y=(%f;%f) w,h=(%f;%f), s=%f c=%f",
|
||||
c_idx,
|
||||
candidate[x_offset],
|
||||
candidate[y_offset],
|
||||
candidate[w_offset],
|
||||
candidate[h_offset],
|
||||
candidate[w_offset] * candidate[h_offset], candidate[c_offset]);
|
||||
}
|
||||
|
||||
/* Pointer arithmetic, going to the next candidate. This is the candidate
|
||||
* pointer that is now incremented to the next candidate which is also
|
||||
* the field X of the next candidate.*/
|
||||
candidate += 1;
|
||||
}
|
||||
|
||||
GST_LOG_OBJECT (self, "Selected candidates count: %u", sel_candidates->len);
|
||||
|
||||
/* We sort the remaining candidates because, in the next selection phase we
|
||||
* have a maximum and we want to make sure that considered only the candidates
|
||||
* with the highest class confidence level before potentially reaching the
|
||||
* maximum.*/
|
||||
g_ptr_array_sort_with_data (sel_candidates,
|
||||
gst_fastsam_tensor_decoder_sort_candidates, GSIZE_TO_POINTER (c_offset));
|
||||
|
||||
if (gst_debug_category_get_threshold (GST_CAT_DEFAULT) >= GST_LEVEL_TRACE) {
|
||||
/* For debug purpose only. Prints candidates before NMS */
|
||||
DebugCandidates ctx;
|
||||
ctx.start = 0;
|
||||
ctx.fields = 5;
|
||||
ctx.offset = offset;
|
||||
ctx.self = self;
|
||||
g_ptr_array_foreach (sel_candidates,
|
||||
gst_fastsam_tensor_decoder_debug_print_candidate, &ctx);
|
||||
}
|
||||
|
||||
GstBuffer *mask_buf;
|
||||
guint region_ids[2] = { 0, 0 };
|
||||
|
||||
/* Algorithm in part inspired by OpenCV NMSBoxes */
|
||||
candidates = (gfloat **) sel_candidates->pdata;
|
||||
for (gsize c = 0; c < sel_candidates->len; c++) {
|
||||
keep = TRUE;
|
||||
|
||||
/* We only want to a NMS using IoU between candidates we've decided to
|
||||
* keep and the new one we considering to keep. selected array contain
|
||||
* the candidates we decided to keep and candidates[c] is the candidate
|
||||
* we're considering to keep or reject */
|
||||
for (gsize s = 0; s < selected->len && keep; s++) {
|
||||
iou = gst_fastsam_tensor_decoder_iou (candidates[c], selected->pdata[s],
|
||||
offsets, &bb1, &bb2);
|
||||
keep = iou <= self->iou_thresh;
|
||||
}
|
||||
|
||||
if (keep) {
|
||||
candidate = sel_candidates->pdata[c];
|
||||
if (selected->len == 0) {
|
||||
/* The first bounding-box always get in as there's no others bbox
|
||||
* to filter on based on IoU */
|
||||
gst_fastsam_tensor_decoder_convert_bbox (candidate, offsets, &bb1);
|
||||
}
|
||||
|
||||
g_ptr_array_add (selected, candidate);
|
||||
region_ids[1] = selected->len;
|
||||
|
||||
/* We add the analytics-objectdetection-meta to the buffer. Since
|
||||
* there's only one class the class confidence level is set to -1.0
|
||||
* as it's deemed not important. */
|
||||
gst_analytics_relation_meta_add_od_mtd (rmeta, OOI_CLASS_ID,
|
||||
bb1.x, bb1.y, bb1.w, bb1.h, -1.0, &od_mtd);
|
||||
|
||||
bb_mask.x = self->bb2mask_gain * bb1.x + self->mask_roi.x;
|
||||
bb_mask.y = self->bb2mask_gain * bb1.y + self->mask_roi.y;
|
||||
bb_mask.w = self->bb2mask_gain * bb1.w;
|
||||
bb_mask.h = self->bb2mask_gain * bb1.h;
|
||||
|
||||
mask_buf = NULL;
|
||||
g_assert (gst_buffer_pool_acquire_buffer (self->mask_pool,
|
||||
&mask_buf, NULL) == GST_FLOW_OK);
|
||||
g_assert (GST_IS_BUFFER (mask_buf));
|
||||
GstVideoMeta *vmeta = gst_buffer_get_video_meta (mask_buf);
|
||||
g_assert (vmeta != NULL);
|
||||
vmeta->width = bb_mask.w;
|
||||
vmeta->height = bb_mask.h;
|
||||
|
||||
gst_buffer_map (mask_buf, &out_mask_info, GST_MAP_READWRITE);
|
||||
mask_data = (guint8 *) out_mask_info.data;
|
||||
|
||||
#define MX_MAX (bb_mask.x + bb_mask.w)
|
||||
#define MY_MAX (bb_mask.y + bb_mask.h)
|
||||
|
||||
for (gint my = bb_mask.y, i = 0, j; my < MY_MAX; my++) {
|
||||
for (gint mx = bb_mask.x; mx < MX_MAX; mx++, i++) {
|
||||
float sum = 0.0f;
|
||||
j = my * self->mask_w + mx;
|
||||
for (gint k = 0; k < 32; ++k) {
|
||||
GST_TRACE_OBJECT (self, "protos data at (%d, %d) is %f", j, k,
|
||||
data_logits[k * self->mask_length + j]);
|
||||
sum +=
|
||||
MASK_X (candidate, k) * data_logits[k * self->mask_length + j];
|
||||
}
|
||||
mask_data[i] = sigmoid (sum) > 0.5 ? selected->len : 0;
|
||||
}
|
||||
}
|
||||
|
||||
gst_analytics_relation_meta_add_segmentation_mtd (rmeta, mask_buf,
|
||||
GST_SEGMENTATION_TYPE_INSTANCE, 1, region_ids, bb1.x, bb1.y, bb1.w,
|
||||
bb1.h, &seg_mtd);
|
||||
|
||||
gst_analytics_relation_meta_set_relation (rmeta,
|
||||
GST_ANALYTICS_REL_TYPE_RELATE_TO, seg_mtd.id, od_mtd.id);
|
||||
|
||||
gst_analytics_relation_meta_set_relation (rmeta,
|
||||
GST_ANALYTICS_REL_TYPE_RELATE_TO, od_mtd.id, seg_mtd.id);
|
||||
|
||||
gst_buffer_unmap (mask_buf, &out_mask_info);
|
||||
|
||||
/* If the maximum number of candidate selected is reached exit the
|
||||
* selection process. */
|
||||
if (selected->len >= self->max_detection) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GST_LOG_OBJECT (self, "Selected count: %u", selected->len);
|
||||
|
||||
if (gst_debug_category_get_threshold (GST_CAT_DEFAULT) >= GST_LEVEL_TRACE) {
|
||||
DebugCandidates ctx;
|
||||
/* For debug purpose only. Prints candidates after NMS */
|
||||
ctx.start = 0;
|
||||
ctx.fields = 5;
|
||||
ctx.offset = offset;
|
||||
ctx.self = self;
|
||||
g_ptr_array_foreach (selected,
|
||||
gst_fastsam_tensor_decoder_debug_print_candidate, &ctx);
|
||||
}
|
||||
|
||||
/* We unmap the memory */
|
||||
gst_buffer_unmap (masks_tensor->data, &map_info_masks);
|
||||
gst_buffer_unmap (logits_tensor->data, &map_info_logits);
|
||||
}
|
@ -0,0 +1,101 @@
|
||||
/*
|
||||
* GStreamer gstreamer-fastsamtensordecoder
|
||||
* Copyright (C) 2024 Collabora Ltd
|
||||
* Authors: Daniel Morin <daniel.morin@collabora.com>
|
||||
* Vineet Suryan <vineet.suryan@collabora.com>
|
||||
*
|
||||
* gstfastsamtensordecoder.h
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Library General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Library General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Library General Public
|
||||
* License along with this library; if not, write to the
|
||||
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef __GST_FASTSAM_TENSOR_DECODER_H__
|
||||
#define __GST_FASTSAM_TENSOR_DECODER_H__
|
||||
|
||||
#include <gst/gst.h>
|
||||
#include <gst/video/video.h>
|
||||
#include <gst/base/base.h>
|
||||
|
||||
G_BEGIN_DECLS
|
||||
|
||||
#define GST_TYPE_FASTSAM_TENSOR_DECODER (gst_fastsam_tensor_decoder_get_type ())
|
||||
G_DECLARE_FINAL_TYPE (GstFastSAMTensorDecoder, gst_fastsam_tensor_decoder,
|
||||
GST, FASTSAM_TENSOR_DECODER, GstBaseTransform)
|
||||
|
||||
typedef struct _BBox
|
||||
{
|
||||
gint x;
|
||||
gint y;
|
||||
guint w;
|
||||
guint h;
|
||||
} BBox;
|
||||
|
||||
struct _GstFastSAMTensorDecoder
|
||||
{
|
||||
GstBaseTransform basetransform;
|
||||
/* Box confidence threshold */
|
||||
gfloat box_confi_thresh;
|
||||
/* Class confidence threshold */
|
||||
gfloat cls_confi_thresh;
|
||||
/* Intersection-of-Union threshold */
|
||||
gfloat iou_thresh;
|
||||
/* Maximum detection/mask */
|
||||
gsize max_detection;
|
||||
/* Video Info */
|
||||
GstVideoInfo video_info;
|
||||
|
||||
/* Candidates with a class confidence level above threshold. */
|
||||
GPtrArray *sel_candidates;
|
||||
|
||||
/* Final candidates selected that respect class confidence level,
|
||||
* NMS and maximum detection. */
|
||||
GPtrArray *selected;
|
||||
|
||||
/* Tensor-id identifying mask tensors out of FastSAM inference process. */
|
||||
GQuark mask_tensor_id;
|
||||
|
||||
/* Tensor-id identifying logits tensors out of FastSAM inference process. */
|
||||
GQuark logits_tensor_id;
|
||||
|
||||
/* Region of the mask that contain valid segmentation information */
|
||||
BBox mask_roi;
|
||||
|
||||
/* Scaling factor to convert bounding-box coordinates to mask coordinates */
|
||||
gfloat bb2mask_gain;
|
||||
|
||||
/* Mask width */
|
||||
guint mask_w;
|
||||
|
||||
/* Mask height */
|
||||
guint mask_h;
|
||||
|
||||
/* Mask length */
|
||||
gsize mask_length;
|
||||
|
||||
/* BufferPool for mask */
|
||||
GstBufferPool *mask_pool;
|
||||
};
|
||||
|
||||
struct _GstFastSAMTensorDecoderClass
|
||||
{
|
||||
GstBaseTransformClass parent_class;
|
||||
};
|
||||
|
||||
GST_ELEMENT_REGISTER_DECLARE (fastsam_tensor_decoder)
|
||||
|
||||
G_END_DECLS
|
||||
#endif /* __GST_FASTSAM_TENSOR_DECODER_H__ */
|
@ -25,6 +25,7 @@
|
||||
#endif
|
||||
|
||||
#include "gstssdobjectdetector.h"
|
||||
#include "gstfastsamtensordecoder.h"
|
||||
|
||||
/**
|
||||
* SECTION:plugin-tensordecoders
|
||||
@ -38,6 +39,7 @@ plugin_init (GstPlugin * plugin)
|
||||
{
|
||||
gboolean ret = FALSE;
|
||||
ret |= GST_ELEMENT_REGISTER (ssd_object_detector, plugin);
|
||||
ret |= GST_ELEMENT_REGISTER (fastsam_tensor_decoder, plugin);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
tensordecoders_sources = [
|
||||
'gsttensordecoders.c',
|
||||
'gstssdobjectdetector.c'
|
||||
'gstssdobjectdetector.c',
|
||||
'gstfastsamtensordecoder.c'
|
||||
]
|
||||
|
||||
tensordecoders_headers = [
|
||||
|
Loading…
x
Reference in New Issue
Block a user