/* * GStreamer gstreamer-yolotensordecoder * Copyright (C) 2024 Collabora Ltd. * Authors: Daniel Morin * Vineet Suryan * Santosh Mahto * * gstyolotensordecoder.c * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301, USA. */ /** * SECTION:element-yolotensordecoder.c * @short_description: Decode tensors from a FastSAM or YOLOv8 detection and segmentation * neural network. * * * This element can parse per-buffer inference tensors meta data generated by an upstream * inference element * * * ## Example launch command: * * Test image file, model file and labels file can be found here : * https://gitlab.collabora.com/gstreamer/onnx-models * * GST_DEBUG=yolotensordecoder \ * gst-launch-1.0 multifilesrc location=strawberry_crops.jpg ! decodebin \ * ! videoconvertscale add-borders=1 ! onnxinference execution-provider=cpu * model-file=segmentation.onnx input-image-format=chw input-tensor-offset=0 \ * input-tensor-scale=255.0 ! yolotensordecoder \ * class-confidence-threshold=0.8 iou-threshold=0.7 max-detections=100 \ * label-file=coco_labels.txt ! objectdetectionoverlay \ * object-detection-outline-color=0xFF0000FF draw-labels=true ! \ * segmentationoverlay hint-maximum-segment-type=50 ! videoconvert ! ximagesink * */ #ifdef HAVE_CONFI_H #include "config.h" #endif #include "gstyolotensordecoder.h" #include #include #include #define GST_MODEL_YOLO_DETECTION_MASK \ "Gst.Model.Yolo.Segmentation.Masks" #define GST_MODEL_YOLO_SEGMENTATION_LOGITS \ "Gst.Model.Yolo.Segmentation.Logits" #define YOLO_MASKS_WEIGHT_SIZE 32 GST_DEBUG_CATEGORY_STATIC (yolo_tensor_decoder_debug); #define GST_CAT_DEFAULT yolo_tensor_decoder_debug GST_ELEMENT_REGISTER_DEFINE (yolo_seg_tensor_decoder, "yolosegv8tensordecoder", GST_RANK_PRIMARY, GST_TYPE_YOLO_SEG_TENSOR_DECODER); GST_ELEMENT_REGISTER_DEFINE (yolo_od_tensor_decoder, "yoloodv5tensordecoder", GST_RANK_PRIMARY, GST_TYPE_YOLO_OD_TENSOR_DECODER); /* GstYoloTensorDecoder properties, see properties description in * gst_yolo_tensor_decoder_class_init for more details. */ enum { PROP_0, PROP_BOX_CONFI_THRESH, PROP_CLS_CONFI_THRESH, PROP_IOU_THRESH, PROP_MAX_DETECTION, PROP_MASK_TENSOR_NAME, PROP_LOGITS_TENSOR_NAME, PROP_LABEL_FILE }; /* For debug purpose */ typedef struct _DebugCandidates { gpointer self; gsize fields; /* Fields count do debug */ gsize offset; /* Fields offset */ gsize start; /* First field index to debug */ } DebugCandidates; /* Specify the range of confidence level in tensor output*/ typedef struct _ConfidenceRange { gsize start; /* Start index of confidence level */ gsize end; /* End index of confidence level */ gsize step; /* Step size of next confidence level index */ } ConfidenceRange; /* Default properties value */ static const gfloat DEFAULT_BOX_CONFI_THRESH = 0.4f; static const gfloat DEFAULT_CLS_CONFI_THRESH = 0.4f; static const gfloat DEFAULT_IOU_THRESH = 0.7f; static const gsize DEFAULT_MAX_DETECTION = 100; /* Global variable storing class for OD. Generally OD has class * and we need to provide one but this class is just a placeholder.*/ GQuark OOI_CLASS_ID; /* To tensor-id are defined by a string that is converted to quark * which is just an integer value using a hash function. For efficiency * we compare on the quark (hash value). Since tensor-id never change we * just calculate the hash once during initialization and store the value in * these variables. */ GQuark GST_MODEL_YOLO_DETECTION_MASKS_ID; GQuark GST_MODEL_YOLO_SEGMENTATION_LOGITS_ID; /* GStreamer element srcpad template. Template of a srcpad that can receive * any raw video. */ static GstStaticPadTemplate gst_yolo_tensor_decoder_src_template = GST_STATIC_PAD_TEMPLATE ("src", GST_PAD_SRC, GST_PAD_ALWAYS, GST_STATIC_CAPS ("video/x-raw")); /* GStreamer element sinkpad template. Template of a sinkpad that can receive * any raw video. */ static GstStaticPadTemplate gst_yolo_tensor_decoder_sink_template = GST_STATIC_PAD_TEMPLATE ("sink", GST_PAD_SINK, GST_PAD_ALWAYS, GST_STATIC_CAPS ("video/x-raw")); /*Common Prototypes */ static gboolean gst_yolo_tensor_decoder_set_caps (GstBaseTransform * trans, GstCaps * incaps, GstCaps * outcaps); static gboolean gst_yolo_tensor_decoder_get_tensor_meta (GstYoloOdTensorDecoder * self, GstBuffer * buf, GstTensor ** mask_tensor, GstTensor ** logits_tensor); /* GstYoloOdTensorDecoder Prototypes */ static void gst_yolo_od_tensor_decoder_set_property (GObject * object, guint prop_id, const GValue * value, GParamSpec * pspec); static void gst_yolo_od_tensor_decoder_get_property (GObject * object, guint prop_id, GValue * value, GParamSpec * pspec); static gboolean gst_yolo_od_tensor_decoder_stop (GstBaseTransform * trans); static GstFlowReturn gst_yolo_od_tensor_decoder_transform_ip (GstBaseTransform * trans, GstBuffer * buf); static void gst_yolo_od_tensor_decoder_finalize (GObject * object); static void gst_yolo_od_tensor_decoder_decode_masks_f32 (GstYoloOdTensorDecoder * self, GstTensor * masks_tensor, GstAnalyticsRelationMeta * rmeta); /* GstYoloSegTensorDecoder Prototypes */ static void gst_yolo_seg_tensor_decoder_set_property (GObject * object, guint prop_id, const GValue * value, GParamSpec * pspec); static void gst_yolo_seg_tensor_decoder_get_property (GObject * object, guint prop_id, GValue * value, GParamSpec * pspec); static gboolean gst_yolo_seg_tensor_decoder_stop (GstBaseTransform * trans); static GstFlowReturn gst_yolo_seg_tensor_decoder_transform_ip (GstBaseTransform * trans, GstBuffer * buf); static void gst_yolo_seg_tensor_decoder_finalize (GObject * object); static void gst_yolo_seg_tensor_decoder_decode_masks_logits_f32 (GstYoloSegTensorDecoder * self, GstTensor * masks_tensor, GstTensor * logits_tensor, GstAnalyticsRelationMeta * rmeta); G_DEFINE_TYPE (GstYoloOdTensorDecoder, gst_yolo_od_tensor_decoder, GST_TYPE_BASE_TRANSFORM); G_DEFINE_TYPE (GstYoloSegTensorDecoder, gst_yolo_seg_tensor_decoder, GST_TYPE_YOLO_OD_TENSOR_DECODER); static GArray * read_labels (const char *labels_file) { GArray *array; GFile *file = g_file_new_for_path (labels_file); GFileInputStream *file_stream; GDataInputStream *data_stream; GError *error = NULL; gchar *line; file_stream = g_file_read (file, NULL, &error); g_object_unref (file); if (!file_stream) { GST_WARNING ("Could not open file %s: %s\n", labels_file, error->message); g_clear_error (&error); return NULL; } data_stream = g_data_input_stream_new (G_INPUT_STREAM (file_stream)); g_object_unref (file_stream); array = g_array_new (FALSE, FALSE, sizeof (GQuark)); while ((line = g_data_input_stream_read_line (data_stream, NULL, NULL, &error))) { GQuark label = g_quark_from_string (line); g_array_append_val (array, label); g_free (line); } g_object_unref (data_stream); if (error) { GST_WARNING ("Could not open file %s: %s", labels_file, error->message); g_array_free (array, TRUE); g_clear_error (&error); return NULL; } if (array->len == 0) { g_array_free (array, TRUE); return NULL; } return array; } static gboolean gst_yolo_tensor_decoder_get_tensor_meta (GstYoloOdTensorDecoder * self, GstBuffer * buf, GstTensor ** mask_tensor, GstTensor ** logits_tensor) { GstTensorMeta *tensor_meta; gint mask_tensor_idx, logits_tensor_idx; g_return_val_if_fail (mask_tensor != NULL || logits_tensor != NULL, FALSE); /* Retrieve all TensorMeta attach the buffer */ tensor_meta = gst_buffer_get_tensor_meta (buf); if (!tensor_meta) { GST_LOG_OBJECT (self, "No tensor meta"); return FALSE; } GST_LOG_OBJECT (self, "Num tensors %zu", tensor_meta->num_tensors); if (mask_tensor) { *mask_tensor = NULL; /* Retrieve the index of the tensor that has a tensor-id matching * GST_MODEL_YOLO_SEGMENTATION_MASKS_ID in the GstTensorMeta. */ mask_tensor_idx = gst_tensor_meta_get_index_from_id (tensor_meta, GST_MODEL_YOLO_DETECTION_MASKS_ID); if (mask_tensor_idx >= 0) { GST_LOG_OBJECT (self, "Masks tensor id: %d", mask_tensor_idx); *mask_tensor = tensor_meta->tensors[mask_tensor_idx]; } if (!*mask_tensor) { GST_INFO_OBJECT (self, "Couldn't find mask or logits tensor, skipping"); return FALSE; } } if (logits_tensor) { *logits_tensor = NULL; /* Retrieve the index of the tensor that has a tensor-id matching * GST_MODEL_YOLO_SEGMENTATION_LOGITS_ID in the GstTensorMeta. */ logits_tensor_idx = gst_tensor_meta_get_index_from_id (tensor_meta, GST_MODEL_YOLO_SEGMENTATION_LOGITS_ID); if (logits_tensor_idx >= 0) { GST_LOG_OBJECT (self, "Masks tensor id: %d", logits_tensor_idx); *logits_tensor = tensor_meta->tensors[logits_tensor_idx]; } if (!*logits_tensor) { GST_INFO_OBJECT (self, "Couldn't find mask or logits tensor, skipping"); return FALSE; } } return TRUE; } static gboolean gst_yolo_tensor_decoder_set_caps (GstBaseTransform * trans, GstCaps * incaps, GstCaps * outcaps) { GstYoloOdTensorDecoder *self = GST_YOLO_OD_TENSOR_DECODER (trans); if (!gst_video_info_from_caps (&self->video_info, incaps)) { GST_ERROR_OBJECT (self, "Failed to parse caps"); return FALSE; } if (gst_base_transform_is_passthrough (trans)) { GST_ERROR_OBJECT (self, "Failed. Can't handle passthrough"); return FALSE; } return TRUE; } static void gst_yolo_od_tensor_decoder_set_property (GObject * object, guint prop_id, const GValue * value, GParamSpec * pspec) { GstYoloOdTensorDecoder *self = GST_YOLO_OD_TENSOR_DECODER (object); const gchar *filename; switch (prop_id) { case PROP_BOX_CONFI_THRESH: GST_OBJECT_LOCK (self); self->box_confi_thresh = g_value_get_float (value); GST_OBJECT_UNLOCK (self); break; case PROP_CLS_CONFI_THRESH: GST_OBJECT_LOCK (self); self->cls_confi_thresh = g_value_get_float (value); GST_OBJECT_UNLOCK (self); break; case PROP_IOU_THRESH: GST_OBJECT_LOCK (self); self->iou_thresh = g_value_get_float (value); GST_OBJECT_UNLOCK (self); break; case PROP_MAX_DETECTION: GST_OBJECT_LOCK (self); self->max_detection = g_value_get_uint (value); GST_OBJECT_UNLOCK (self); break; case PROP_MASK_TENSOR_NAME: GST_OBJECT_LOCK (self); self->mask_tensor_id = g_quark_from_string (g_value_get_string (value)); GST_OBJECT_UNLOCK (self); break; case PROP_LABEL_FILE: { GArray *labels; filename = g_value_get_string (value); labels = read_labels (filename); if (labels) { g_free (self->label_file); self->label_file = g_strdup (filename); g_clear_pointer (&self->labels, g_array_unref); self->labels = labels; } else { GST_WARNING_OBJECT (self, "Label file '%s' not found!", filename); } break; } default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); break; } } static void gst_yolo_od_tensor_decoder_get_property (GObject * object, guint prop_id, GValue * value, GParamSpec * pspec) { GstYoloOdTensorDecoder *self = GST_YOLO_OD_TENSOR_DECODER (object); switch (prop_id) { case PROP_BOX_CONFI_THRESH: g_value_set_float (value, self->box_confi_thresh); break; case PROP_CLS_CONFI_THRESH: g_value_set_float (value, self->cls_confi_thresh); break; case PROP_IOU_THRESH: g_value_set_float (value, self->iou_thresh); break; case PROP_MAX_DETECTION: g_value_set_uint (value, self->max_detection); break; case PROP_MASK_TENSOR_NAME: GST_OBJECT_LOCK (self); g_value_set_string (value, g_quark_to_string (self->mask_tensor_id)); GST_OBJECT_UNLOCK (self); break; case PROP_LABEL_FILE: g_value_set_string (value, self->label_file); break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); break; } } static gboolean gst_yolo_od_tensor_decoder_stop (GstBaseTransform * trans) { GstYoloOdTensorDecoder *self = GST_YOLO_OD_TENSOR_DECODER (trans); g_clear_pointer (&self->sel_candidates, g_ptr_array_unref); g_clear_pointer (&self->selected, g_ptr_array_unref); g_clear_pointer (&self->od_mtds, g_array_unref); if (self->candidate_offsets) g_hash_table_destroy (self->candidate_offsets); return TRUE; } static void gst_yolo_od_tensor_decoder_class_init (GstYoloOdTensorDecoderClass * klass) { GObjectClass *gobject_class = (GObjectClass *) klass; GstElementClass *element_class = (GstElementClass *) klass; GstBaseTransformClass *basetransform_class = (GstBaseTransformClass *) klass; /* Define GstYoloTensorDecoder debug category. */ GST_DEBUG_CATEGORY_INIT (yolo_tensor_decoder_debug, "yolotensordecoder", 0, "Tensor decoder for Yolo detection N.N."); /* Set GObject vmethod to get and set property */ gobject_class->set_property = gst_yolo_od_tensor_decoder_set_property; gobject_class->get_property = gst_yolo_od_tensor_decoder_get_property; /* Define GstYoloTensorDecoder properties using GObject properties * interface.*/ /** * GstYoloTensorDecoder:box-confidence-threshold * * Threshold on boxes location confidence level * * Since: 1.26 */ g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_BOX_CONFI_THRESH, g_param_spec_float ("box-confidence-threshold", "Box location confidence threshold", "Boxes with a location confidence level inferior to this threshold " "will be excluded", 0.0, 1.0, DEFAULT_BOX_CONFI_THRESH, (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); /** * GstYoloTensorDecoder:class-confidence-threshold * * Threshold on object class confidence level * * Since: 1.26 */ g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_CLS_CONFI_THRESH, g_param_spec_float ("class-confidence-threshold", "Class confidence threshold", "Classes with a confidence level inferior to this threshold " "will be excluded", 0.0, 1.0, DEFAULT_CLS_CONFI_THRESH, (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); /** * GstYoloTensorDecoder:class-confidence-threshold * * Threshold on maximum intersection-over-union between bounding boxes to * consider them distinct. * * Since: 1.26 */ g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_IOU_THRESH, g_param_spec_float ("iou-threshold", "Maximum IOU threshold", "Maximum intersection-over-union between bounding boxes to " "consider them distinct.", 0.0, 1.0, DEFAULT_IOU_THRESH, (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); /** * GstYoloTensorDecoder:max-detections * * Threshold on maximum object/masks detections * * Since: 1.26 */ g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_MAX_DETECTION, g_param_spec_uint ("max-detections", "Maximum object/masks detections.", "Maximum object/masks detections.", 0, G_MAXUINT, DEFAULT_MAX_DETECTION, (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); /** * GstYoloTensorDecoder:tensors-name-masks * * Overwrite mask tensors name * * Since: 1.26 */ g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_MASK_TENSOR_NAME, g_param_spec_string ("tensors-name-masks", "Mask tensors name", "Name that identify Yolo mask tensors.", GST_MODEL_YOLO_DETECTION_MASK, (GParamFlags) (G_PARAM_READWRITE | G_PARAM_CONSTRUCT | G_PARAM_STATIC_STRINGS))); /** * GstYoloTensorDecoder:label-file * * Label file * * Since: 1.26 */ g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_LABEL_FILE, g_param_spec_string ("label-file", "Label file", "Label file", NULL, (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); /* Element description. */ gst_element_class_set_static_metadata (element_class, "yoloodv5tensordecoder", "TensorDecoder/Video", "Decode tensors output from the inference of Yolo or FastSAM model (Detection)" " on video frames. The original repository of the Yolo is located at" " https://github.com/ultralytics/ultralytics. For easy experimentation a" " strawberry segmentation model based on Yolo architecture in Onnx " " format can be found at https://col.la/gstonnxmodelseg . This model " "already has tensors name embedded matching default " "values of tensors-masks-name and tensors-logits-name properties. It's " "also possible to embed tensor-ids into any model based on Yolo " "architecture to allow this tensor-decoder to decode tensors. This " "process is described in the Readme of this repository: " "https://col.la/gstonnxmodels", "Daniel Morin "); /* Add pads to element base on pad template defined earlier */ gst_element_class_add_pad_template (element_class, gst_static_pad_template_get (&gst_yolo_tensor_decoder_src_template)); gst_element_class_add_pad_template (element_class, gst_static_pad_template_get (&gst_yolo_tensor_decoder_sink_template)); /* Set GstBaseTransform vmethod transform_ip. This methode is called * by the srcpad when it receive buffer. ip stand for in-place meaning the * buffer remain unchanged by the element. Tensor-decoder only monitor * buffer it receive for a meta attach to the buffer that is a GstTensorMeta * and has a tensor-id can be handled by GstYoloTensorDecoder. */ basetransform_class->transform_ip = GST_DEBUG_FUNCPTR (gst_yolo_od_tensor_decoder_transform_ip); /* Set GstBaseTransform set_caps vmethod. This will be called once the * capability negotiation has been completed. We will be able to extract * resolution from this callback. */ basetransform_class->set_caps = GST_DEBUG_FUNCPTR (gst_yolo_tensor_decoder_set_caps); /* Set GObject vmethod finalize */ basetransform_class->stop = gst_yolo_od_tensor_decoder_stop; gobject_class->finalize = gst_yolo_od_tensor_decoder_finalize; /* Calculate the class id placeholder (also a quark) that will be set on all * OD analytics-meta. */ OOI_CLASS_ID = g_quark_from_static_string ("Yolo-None"); /* Calculate the Yolo Mask tensor-id */ GST_MODEL_YOLO_DETECTION_MASKS_ID = g_quark_from_static_string (GST_MODEL_YOLO_DETECTION_MASK); } static void gst_yolo_od_tensor_decoder_init (GstYoloOdTensorDecoder * self) { /* GstYoloTensorDecoder instance initialization */ self->box_confi_thresh = DEFAULT_BOX_CONFI_THRESH; self->cls_confi_thresh = DEFAULT_CLS_CONFI_THRESH; self->iou_thresh = DEFAULT_IOU_THRESH; self->max_detection = DEFAULT_MAX_DETECTION; self->sel_candidates = NULL; self->selected = NULL; self->od_mtds = NULL; self->candidate_offsets = NULL; gst_base_transform_set_passthrough (GST_BASE_TRANSFORM (self), FALSE); } static GstFlowReturn gst_yolo_od_tensor_decoder_transform_ip (GstBaseTransform * trans, GstBuffer * buf) { GstYoloOdTensorDecoder *self = GST_YOLO_OD_TENSOR_DECODER (trans); GstTensor *masks_tensor; GstAnalyticsRelationMeta *rmeta; if (!gst_yolo_tensor_decoder_get_tensor_meta (self, buf, &masks_tensor, NULL)) { return GST_FLOW_OK; } if (masks_tensor->num_dims != 3) { GST_ELEMENT_ERROR (self, STREAM, DECODE, (NULL), ("Masks tensor must have 3 dimensions but has %zu", masks_tensor->num_dims)); return GST_FLOW_ERROR; } static GstAnalyticsRelationMetaInitParams rmeta_init_params = { .initial_buf_size = 1024, .initial_relation_order = 10 }; /* Retrieve or attach an analytics-relation-meta to the buffer. * Analytics-relation-meta are container that can reveive multiple * analytics-meta, like OD and Segmentation. The following call will only * retrieve an analytics-relation-meta if it exist or create one if it * does not exist. */ rmeta = gst_buffer_add_analytics_relation_meta_full (buf, &rmeta_init_params); g_assert (rmeta != NULL); /* Decode masks_tensor and attach the information in a structured way * to rmeta. */ gst_yolo_od_tensor_decoder_decode_masks_f32 (self, masks_tensor, rmeta); return GST_FLOW_OK; } static void gst_yolo_od_tensor_decoder_finalize (GObject * object) { GstYoloOdTensorDecoder *self = GST_YOLO_OD_TENSOR_DECODER (object); g_free (self->label_file); g_clear_pointer (&self->labels, g_array_unref); G_OBJECT_CLASS (gst_yolo_od_tensor_decoder_parent_class)->finalize (object); } /* Evaluate if there's an intersection between segement s1 and s2 */ static guint linear_intersection (guint s1_min, guint s1_max, guint s2_min, guint s2_max) { guint tmp; if (s1_max > s2_min && s2_max > s1_min) { if (s1_min > s2_min) { tmp = (s2_max > s1_max) ? s1_max : s2_max; return tmp - s1_min; } else { tmp = (s1_max > s2_max) ? s2_max : s1_max; return tmp - s2_min; } } return 0.0f; } static gfloat iou (guint bb1_x, guint bb1_y, guint bb1_w, guint bb1_h, guint bb2_x, guint bb2_y, guint bb2_w, guint bb2_h) { /* Rational: linear intersection is much faster to calculate then * 2d intersection. We project the two bounding boxes considered for * intersection on one axis and verify if the segments the create intersect. * If they don't, the bounding boxes can't intersect in 2d and we don't * need to verify if they intersect on the other dimension. If they * intersect on the first dimension we verify if they intersec on the other * dimension. Again if the don't intersect the bounding boxes can't intersect * on in a 2D space. If they intersected on both axis we calculate the IoU.*/ const guint x_intersection = linear_intersection (bb1_x, bb1_x + bb1_w, bb2_x, bb2_x + bb2_w); if (x_intersection > 0) { const guint y_intersection = linear_intersection (bb1_y, bb1_y + bb1_h, bb2_y, bb2_y + bb2_h); if (y_intersection > 0) { const guint bb1_area = bb1_w * bb1_h; const guint bb2_area = bb2_w * bb2_h; const guint intersect_area = x_intersection * y_intersection; const guint union_area = bb1_area + bb2_area - intersect_area; return union_area == 0 ? 0.0f : ((gfloat) intersect_area) / union_area; } } return 0.0f; } /* Extract bounding box from tensor data */ static void gst_yolo_tensor_decoder_convert_bbox (gfloat * candidate, gsize * offset, BBox * bbox) { gfloat w = *(candidate + offset[2]); gfloat h = *(candidate + offset[3]); bbox->x = *(candidate + offset[0]) - (w / 2); bbox->y = *(candidate + offset[1]) - (h / 2); bbox->w = w + 0.5; bbox->h = h + 0.5; } /* Calculate iou between boundingbox of candidate c1 and c2 */ static gfloat gst_yolo_tensor_decoder_iou (gfloat * c1, gfloat * c2, gsize * offset, BBox * bb1, BBox * bb2) { gst_yolo_tensor_decoder_convert_bbox (c1, offset, bb1); gst_yolo_tensor_decoder_convert_bbox (c2, offset, bb2); return iou (bb1->x, bb1->y, bb1->w, bb1->h, bb2->x, bb2->y, bb2->w, bb2->h); } /* Utility function to find maxmum confidence value across classes * specified by range. */ static gfloat gst_yolo_tensor_decoder_find_max_class_confidence (const gfloat * c, const ConfidenceRange * c_range, gsize * max_class_ofs) { gfloat max_val = 0.0; for (gsize i = c_range->start; i <= c_range->end; i += c_range->step) { if (*(c + i) > max_val) { max_val = *(c + i); *max_class_ofs = i; } } return max_val; } /* Compare c1 and c2 * Utility function for sorting candiates based on the a field identified * by offset. */ static gint gst_yolo_tensor_decoder_sort_candidates (gconstpointer c1, gconstpointer c2, gpointer range) { ConfidenceRange *c_range = (ConfidenceRange *) range; const gfloat *candidate1 = *((gfloat **) c1); const gfloat *candidate2 = *((gfloat **) c2); gfloat max_c1_confi; gfloat max_c2_confi; gsize offset; if (candidate1[c_range->start] <= -1.0) { offset = (gsize) (-candidate1[c_range->start]); max_c1_confi = candidate1[offset]; } else { max_c1_confi = candidate1[c_range->start]; } if (candidate2[c_range->start] <= -1.0) { offset = (gsize) (-candidate2[c_range->start]); max_c2_confi = candidate2[offset]; } else { max_c2_confi = candidate2[c_range->start]; } return max_c1_confi < max_c2_confi ? 1 : max_c1_confi > max_c2_confi ? -1 : 0; } static void gst_yolo_tensor_decoder_debug_print_candidate (gpointer candidate_, gpointer data) { DebugCandidates *ctx = data; const gfloat *candidate = candidate_; for (gsize i = ctx->start; i < ctx->fields + ctx->start; i++) { GST_TRACE_OBJECT (ctx->self, "Field %lu: %f", i, *(candidate + (i * ctx->offset))); } } static float sigmoid (float x) { /* Check for positive overflow */ if (x > 0) { double exp_neg_x = exp (-x); return 1.0 / (1.0 + exp_neg_x); } /* Check for negative overflow and improve stability for negative x */ else { double exp_x = exp (x); return exp_x / (1.0 + exp_x); } } static gboolean gst_yolo_tensor_decoder_decode_valid_bb (GstYoloOdTensorDecoder * self, gfloat x, gfloat y, gfloat w, gfloat h) { GstYoloOdTensorDecoder *parent = GST_YOLO_OD_TENSOR_DECODER (self); if (x > (GST_VIDEO_INFO_WIDTH (&parent->video_info))) return FALSE; if (y > (GST_VIDEO_INFO_HEIGHT (&parent->video_info))) return FALSE; if (x < -(gfloat) (GST_VIDEO_INFO_WIDTH (&parent->video_info) / 2.0)) return FALSE; if (y < -(gfloat) (GST_VIDEO_INFO_HEIGHT (&parent->video_info) / 2.0)) return FALSE; if (w <= 0) return FALSE; if (h <= 0) return FALSE; if (w > (GST_VIDEO_INFO_WIDTH (&parent->video_info))) return FALSE; if (h > (GST_VIDEO_INFO_HEIGHT (&parent->video_info))) return FALSE; return TRUE; } static void gst_yolo_od_tensor_decoder_decode_masks_f32 (GstYoloOdTensorDecoder * self, GstTensor * masks_tensor, GstAnalyticsRelationMeta * rmeta) { GstMapInfo map_info_masks; gfloat *candidate, **candidates, iou, confid = -1.0; gboolean rv, keep; gsize offset, x_offset, y_offset, w_offset, h_offset, offsets[4]; GPtrArray *sel_candidates = self->sel_candidates, *selected = self->selected; BBox bb1, bb2; GstAnalyticsODMtd od_mtd; ConfidenceRange c_range; gsize max_class_offset = 0, class_index; GQuark class_quark = OOI_CLASS_ID; /* Retrieve memory at index 0 and map it in READWRITE mode */ masks_tensor->data = gst_buffer_make_writable (masks_tensor->data); rv = gst_buffer_map (masks_tensor->data, &map_info_masks, GST_MAP_READWRITE); g_assert (rv); GST_LOG_OBJECT (self, "Mask Tensor shape dims %zu", masks_tensor->num_dims); /* Trace masks tensor dimensions */ if (gst_debug_category_get_threshold (GST_CAT_DEFAULT) >= GST_LEVEL_TRACE) { for (gsize i = 0; i < masks_tensor->num_dims; i++) { GST_TRACE_OBJECT (self, "Masks Tensor dim %zu: %zu", i, masks_tensor->dims[i]); } } /* Allocated array to store selected candidates */ if (sel_candidates == NULL) { /* Number of candidates can be large, keep the array to avoid frequent * allocation */ sel_candidates = g_ptr_array_new_full (masks_tensor->dims[2], NULL); self->sel_candidates = sel_candidates; selected = g_ptr_array_new_full (masks_tensor->dims[2], NULL); self->selected = selected; self->od_mtds = g_array_new (FALSE, FALSE, sizeof (GstAnalyticsODMtd)); self->candidate_offsets = g_hash_table_new (g_direct_hash, g_direct_equal); } else { /* Reset lengths when we re-use arrays */ g_ptr_array_set_size (sel_candidates, 0); g_ptr_array_set_size (selected, 0); g_array_set_size (self->od_mtds, 0); g_hash_table_remove_all (self->candidate_offsets); } /* masks_tensor->dims[2] contain the number of candidates. Let's call the * number of candidates C. We store this value in offset as we use it * calculate the offset of candidate fields. The variable #data_masks above point * at the masks tensor data, but candidates data is organize like a plane. * Candidates bbox X coord fields from 0 to C start at the begining of the * tensor data and are continguous in memory, followed by all candidates * field Y, followed by field W, ... followed by field class confidence level, * ..., followed by all candidates mask0, ..., followed by all candidates * mask31. Bellow we pre-calculate each field offset relative to the * candidate pointer (pointer to field X), which will allow us to easily * access each candiates field. * */ offset = masks_tensor->dims[2]; x_offset = 0; y_offset = offset; w_offset = 2 * offset; h_offset = 3 * offset; /* Start index of label confidence level */ c_range.start = 4 * offset; /* Last index of label confidence level */ c_range.end = (masks_tensor->dims[1] - YOLO_MASKS_WEIGHT_SIZE - 1) * offset; /* Step between class confidence level */ c_range.step = offset; offsets[0] = x_offset; offsets[1] = y_offset; offsets[2] = w_offset; offsets[3] = h_offset; #define BB_X(candidate) candidate[x_offset] #define BB_Y(candidate) candidate[y_offset] #define BB_W(candidate) candidate[w_offset] #define BB_H(candidate) candidate[h_offset] candidate = (gfloat *) map_info_masks.data; for (gsize c_idx = 0; c_idx < masks_tensor->dims[2]; c_idx++) { /* Yolo have multiple class, so maximum confidence level across all class is used * to evaluate the relevance of the candidate. Here we filter candidates * based on their class confidence level.*/ gfloat max_confidence = gst_yolo_tensor_decoder_find_max_class_confidence (candidate, &c_range, &max_class_offset); if (max_confidence > self->cls_confi_thresh && gst_yolo_tensor_decoder_decode_valid_bb (self, BB_X (candidate), BB_Y (candidate), BB_W (candidate), BB_H (candidate))) { /* We need a way to keep track of the class with maximum confidence. At * this level we're operating on a large number of candidate. Candidates * will be sorted and filtered later one. Here we use an inplace method * to store the offset of the class with highest confidence level. If * the class with highest confidence level is the first one we keep it's * value as-is, otherwise we overwrite the first class confidence level * with the value of the -offset of the class with maximum confidence. */ if (max_class_offset != c_range.start) { candidate[c_range.start] = -(float) (max_class_offset); } g_ptr_array_add (sel_candidates, candidate); GST_TRACE_OBJECT (self, "%lu: x,y=(%f;%f) w,h=(%f;%f), s=%f c=%f", c_idx, candidate[x_offset], candidate[y_offset], candidate[w_offset], candidate[h_offset], candidate[w_offset] * candidate[h_offset], max_confidence); } /* Pointer arithmetic, going to the next candidate. This is the candidate * pointer that is now incremented to the next candidate which is also * the field X of the next candidate.*/ candidate += 1; } GST_LOG_OBJECT (self, "Selected candidates count: %u", sel_candidates->len); /* We sort the remaining candidates because, in the next selection phase we * have a maximum and we want to make sure that considered only the candidates * with the highest class confidence level before potentially reaching the * maximum.*/ g_ptr_array_sort_with_data (sel_candidates, gst_yolo_tensor_decoder_sort_candidates, &c_range); if (gst_debug_category_get_threshold (GST_CAT_DEFAULT) >= GST_LEVEL_TRACE) { /* For debug purpose only. Prints candidates before NMS */ DebugCandidates ctx; ctx.start = 0; ctx.fields = 5; ctx.offset = offset; ctx.self = self; g_ptr_array_foreach (sel_candidates, gst_yolo_tensor_decoder_debug_print_candidate, &ctx); } /* Algorithm in part inspired by OpenCV NMSBoxes */ candidates = (gfloat **) sel_candidates->pdata; for (gsize c = 0; c < sel_candidates->len; c++) { keep = TRUE; /* We only want to a NMS using IoU between candidates we've decided to * keep and the new one we considering to keep. selected array contain * the candidates we decided to keep and candidates[c] is the candidate * we're considering to keep or reject */ for (gsize s = 0; s < selected->len && keep; s++) { iou = gst_yolo_tensor_decoder_iou (candidates[c], selected->pdata[s], offsets, &bb1, &bb2); keep = iou <= self->iou_thresh; } if (keep) { candidate = sel_candidates->pdata[c]; if (selected->len == 0) { /* The first bounding-box always get in as there's no others bbox * to filter on based on IoU */ gst_yolo_tensor_decoder_convert_bbox (candidate, offsets, &bb1); } g_ptr_array_add (selected, candidate); if (self->labels) { if (candidate[c_range.start] <= -1.0) { /* Max class is not the first one and `candidate[c_range.start]` * contain -offset to the class with maximum confidence */ max_class_offset = (gsize) (-candidate[c_range.start]); confid = candidate[max_class_offset]; /* Set overwritten confidence to 0 to avoir incorrect interpreation */ candidate[c_range.start] = 0.0; class_index = (max_class_offset - c_range.start) / c_range.step; } else { confid = candidate[c_range.start]; class_index = 0; } if (class_index < self->labels->len) class_quark = g_array_index (self->labels, GQuark, class_index); } /* We add the analytics-objectdetection-meta to the buffer. Since * there's only one class the class confidence level is set to -1.0 * as it's deemed not important. */ gst_analytics_relation_meta_add_od_mtd (rmeta, class_quark, bb1.x, bb1.y, bb1.w, bb1.h, confid, &od_mtd); g_array_append_val (self->od_mtds, od_mtd); gsize offset_pos = candidate - (gfloat *) map_info_masks.data; g_hash_table_insert (self->candidate_offsets, GUINT_TO_POINTER (od_mtd.id), GSIZE_TO_POINTER (offset_pos)); /* If the maximum number of candidate selected is reached exit the * selection process. */ if (selected->len >= self->max_detection) { break; } } } GST_LOG_OBJECT (self, "Selected count: %u", selected->len); if (gst_debug_category_get_threshold (GST_CAT_DEFAULT) >= GST_LEVEL_TRACE) { DebugCandidates ctx; /* For debug purpose only. Prints candidates after NMS */ ctx.start = 0; ctx.fields = 5; ctx.offset = offset; ctx.self = self; g_ptr_array_foreach (selected, gst_yolo_tensor_decoder_debug_print_candidate, &ctx); } /* We unmap the memory */ gst_buffer_unmap (masks_tensor->data, &map_info_masks); } /* Yolo segmentation tensor decoder */ static void gst_yolo_seg_tensor_decoder_set_property (GObject * object, guint prop_id, const GValue * value, GParamSpec * pspec) { GstYoloSegTensorDecoder *self = GST_YOLO_SEG_TENSOR_DECODER (object); switch (prop_id) { case PROP_LOGITS_TENSOR_NAME: self->logits_tensor_id = g_quark_from_string (g_value_get_string (value)); break; default: gst_yolo_od_tensor_decoder_set_property (object, prop_id, value, pspec); break; } } static void gst_yolo_seg_tensor_decoder_get_property (GObject * object, guint prop_id, GValue * value, GParamSpec * pspec) { GstYoloSegTensorDecoder *self = GST_YOLO_SEG_TENSOR_DECODER (object); switch (prop_id) { case PROP_LOGITS_TENSOR_NAME: g_value_set_string (value, g_quark_to_string (self->logits_tensor_id)); break; default: G_OBJECT_CLASS (gst_yolo_seg_tensor_decoder_parent_class)->get_property (object, prop_id, value, pspec); break; } } static gboolean gst_yolo_seg_tensor_decoder_stop (GstBaseTransform * trans) { GstYoloSegTensorDecoder *self = GST_YOLO_SEG_TENSOR_DECODER (trans); self->mask_w = 0; self->mask_h = 0; self->mask_length = 0; if (self->mask_pool) gst_buffer_pool_set_active (self->mask_pool, FALSE); g_clear_object (&self->mask_pool); GST_BASE_TRANSFORM_CLASS (gst_yolo_seg_tensor_decoder_parent_class)->stop (trans); return TRUE; } static void gst_yolo_seg_tensor_decoder_finalize (GObject * object) { G_OBJECT_CLASS (gst_yolo_seg_tensor_decoder_parent_class)->finalize (object); } static void gst_yolo_seg_tensor_decoder_class_init (GstYoloSegTensorDecoderClass * klass) { GObjectClass *gobject_class = (GObjectClass *) klass; GstElementClass *element_class = (GstElementClass *) klass; GstBaseTransformClass *basetransform_class = (GstBaseTransformClass *) klass; /* Set GObject vmethod to get and set property */ gobject_class->set_property = gst_yolo_seg_tensor_decoder_set_property; gobject_class->get_property = gst_yolo_seg_tensor_decoder_get_property; g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_LOGITS_TENSOR_NAME, g_param_spec_string ("tensors-name-logits", "Logits tensors name", "Name that identify Yolo logits tensors.", GST_MODEL_YOLO_SEGMENTATION_LOGITS, (GParamFlags) (G_PARAM_READWRITE | G_PARAM_CONSTRUCT | G_PARAM_STATIC_STRINGS))); /* Element description. */ gst_element_class_set_static_metadata (element_class, "yolosegv8tensordecoder", "TensorDecoder/Video", "Decode tensors output from the inference of Yolo or FastSAM model (segmentation)" " on video frames. The original repository of the Yolo is located at" " https://github.com/ultralytics/ultralytics. For easy experimentation a" " object segmentation model based on Yolo architecture in Onnx " " format can be found at https://col.la/gstonnxmodelseg . This model " "already has tensors name embedded matching default " "values of tensors-masks-name and tensors-logits-name properties. It's " "also possible to embed tensor-ids into any model based on Yolo " "architecture to allow this tensor-decoder to decode tensors. This " "process is described in the Readme of this repository: " "https://col.la/gstonnxmodels", "Daniel Morin "); /* Add pads to element base on pad template defined earlier */ gst_element_class_add_pad_template (element_class, gst_static_pad_template_get (&gst_yolo_tensor_decoder_src_template)); gst_element_class_add_pad_template (element_class, gst_static_pad_template_get (&gst_yolo_tensor_decoder_sink_template)); /* Set GstBaseTransform vmethod transform_ip. This methode is called * by the srcpad when it receive buffer. ip stand for in-place meaning the * buffer remain unchanged by the element. Tensor-decoder only monitor * buffer it receive for a meta attach to the buffer that is a GstTensorMeta * and has a tensor-id can be handled by GstYoloTensorDecoder. */ basetransform_class->transform_ip = GST_DEBUG_FUNCPTR (gst_yolo_seg_tensor_decoder_transform_ip); /* Set GstBaseTransform set_caps vmethod. This will be called once the * capability negotiation has been completed. We will be able to extract * resolution from this callback. */ basetransform_class->set_caps = GST_DEBUG_FUNCPTR (gst_yolo_tensor_decoder_set_caps); /* Set GstBaseTransform stop vmethod. This will be called when the element * is set to NULL state. */ basetransform_class->stop = gst_yolo_seg_tensor_decoder_stop; /* Set GObject vmethod finalize */ gobject_class->finalize = gst_yolo_seg_tensor_decoder_finalize; /* Calculate the class id placeholder (also a quark) that will be set on all * OD analytics-meta. */ OOI_CLASS_ID = g_quark_from_static_string ("Yolo-None"); /* Calculate the Yolo Logits tensor-id */ GST_MODEL_YOLO_SEGMENTATION_LOGITS_ID = g_quark_from_static_string (GST_MODEL_YOLO_SEGMENTATION_LOGITS); } static void gst_yolo_seg_tensor_decoder_init (GstYoloSegTensorDecoder * self) { /* GstYoloSegTensorDecoder instance initialization */ self->mask_w = 0; self->mask_h = 0; self->mask_length = 0; self->mask_pool = NULL; memset (&self->mask_roi, 0, sizeof (BBox)); gst_base_transform_set_passthrough (GST_BASE_TRANSFORM (self), FALSE); } /* gst_yolo_seg_tensor_decoder_transform_ip: * @trans: Instance * @buf:inout: Buffer containing media and where tensors can be attached * @return: Flow errors * Decode Yolo tensors, post-process tensors and store decoded information * into an analytics-meta that is attached to the buffer before been pushed * downstream. */ static GstFlowReturn gst_yolo_seg_tensor_decoder_transform_ip (GstBaseTransform * trans, GstBuffer * buf) { GstYoloSegTensorDecoder *self = GST_YOLO_SEG_TENSOR_DECODER (trans); GstYoloOdTensorDecoder *parent = GST_YOLO_OD_TENSOR_DECODER (trans); GstTensor *masks_tensor, *logits_tensor; GstAnalyticsRelationMeta *rmeta; gsize mask_w, mask_h; if (!gst_yolo_tensor_decoder_get_tensor_meta (self, buf, &masks_tensor, &logits_tensor)) return GST_FLOW_OK; if (logits_tensor->num_dims != 4) { GST_ELEMENT_ERROR (self, STREAM, DECODE, (NULL), ("Logits tensor must have 4 dimensions but has %zu", masks_tensor->num_dims)); return GST_FLOW_ERROR; } mask_w = logits_tensor->dims[2]; mask_h = logits_tensor->dims[3]; /* The masks need to be cropped to fit the SAR of the image. */ /* TODO: We're reconstructing the transformation that was done on the * original image based on the assumption that the complete image without * deformation would be analyzed. This assumption is not alway true and * we should try to find a way to convey this transformation information * and retrieve from here to know the transformation that need to be done * on the mask.*/ if (self->mask_w != mask_w || self->mask_h != mask_h) { self->mask_w = mask_w; self->mask_h = mask_h; self->mask_length = mask_w * mask_h; if (parent->video_info.width > parent->video_info.height) { self->bb2mask_gain = ((gfloat) self->mask_w) / parent->video_info.width; self->mask_roi.x = 0; self->mask_roi.w = self->mask_w; self->mask_roi.h = ((gfloat) self->bb2mask_gain) * parent->video_info.height; self->mask_roi.y = (self->mask_h - self->mask_roi.h) / 2; } else { self->bb2mask_gain = ((gfloat) self->mask_h) / parent->video_info.height; self->mask_roi.y = 0; self->mask_roi.h = self->mask_h; self->mask_roi.w = self->bb2mask_gain * parent->video_info.width; self->mask_roi.x = (self->mask_w - self->mask_roi.w) / 2; } if (self->mask_pool) { gst_buffer_pool_set_active (self->mask_pool, FALSE); g_clear_object (&self->mask_pool); } } if (self->mask_pool == NULL) { GstVideoInfo minfo; GstCaps *caps; gst_video_info_init (&minfo); gst_video_info_set_format (&minfo, GST_VIDEO_FORMAT_GRAY8, self->mask_w, self->mask_h); caps = gst_video_info_to_caps (&minfo);; self->mask_pool = gst_video_buffer_pool_new (); GstStructure *config = gst_buffer_pool_get_config (self->mask_pool); gst_buffer_pool_config_set_params (config, caps, self->mask_length, 0, 0); gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META); gst_buffer_pool_set_config (self->mask_pool, config); gst_buffer_pool_set_active (self->mask_pool, TRUE); gst_caps_unref (caps); } static GstAnalyticsRelationMetaInitParams rmeta_init_params = { .initial_buf_size = 1024, .initial_relation_order = 10 }; /* Retrieve or attach an analytics-relation-meta to the buffer. * Analytics-relation-meta are container that can reveive multiple * analytics-meta, like OD and Segmentation. The following call will only * retrieve an analytics-relation-meta if it exist or create one if it * does not exist. */ rmeta = gst_buffer_add_analytics_relation_meta_full (buf, &rmeta_init_params); g_assert (rmeta != NULL); /* Decode masks_tensor and attach the information in a structured way * to rmeta. */ gst_yolo_seg_tensor_decoder_decode_masks_logits_f32 (self, masks_tensor, logits_tensor, rmeta); return GST_FLOW_OK; } static void gst_yolo_seg_tensor_decoder_decode_masks_logits_f32 (GstYoloSegTensorDecoder * self, GstTensor * masks_tensor, GstTensor * logits_tensor, GstAnalyticsRelationMeta * rmeta) { GstYoloOdTensorDecoder *parent = GST_YOLO_OD_TENSOR_DECODER (self); GstMapInfo map_info_logits, out_mask_info, map_info_masks; GstAnalyticsSegmentationMtd seg_mtd; guint8 *mask_data; GstFlowReturn flowret; BBox bb_mask; gfloat *candidate, *data_logits; guint rv = 0; guint region_ids[2] = { 0, 0 }; GstBuffer *mask_buf; gsize offset, m0_offset; gst_yolo_od_tensor_decoder_decode_masks_f32 (parent, masks_tensor, rmeta); /* Retrieve memory at index 0 from logits_tensor in READ mode */ rv = gst_buffer_map (logits_tensor->data, &map_info_logits, GST_MAP_READ); g_assert (rv); data_logits = (gfloat *) map_info_logits.data; /* Trace masks tensor dimensions */ if (gst_debug_category_get_threshold (GST_CAT_DEFAULT) >= GST_LEVEL_TRACE) { for (gsize i = 0; i < logits_tensor->num_dims; i++) { GST_TRACE_OBJECT (self, "Masks Tensor dim %zu: %zu", i, logits_tensor->dims[i]); } } GST_LOG_OBJECT (self, "Mask Tensor shape dims %zu", masks_tensor->num_dims); offset = masks_tensor->dims[2]; /* masks count in mask_tensors will be 32 */ m0_offset = (masks_tensor->dims[1] - YOLO_MASKS_WEIGHT_SIZE) * offset; #define MASK_X(candidate, index) candidate[m0_offset + (index * offset)] masks_tensor->data = gst_buffer_make_writable (masks_tensor->data); rv = gst_buffer_map (masks_tensor->data, &map_info_masks, GST_MAP_READ); g_assert (rv); gfloat *mask_tensor_data = (gfloat *) map_info_masks.data; for (gsize c = 0; c < parent->od_mtds->len; c++) { BBox bb; GstAnalyticsODMtd od_mtd; od_mtd = g_array_index (parent->od_mtds, GstAnalyticsODMtd, c); candidate = mask_tensor_data + GPOINTER_TO_SIZE (g_hash_table_lookup (parent->candidate_offsets, GUINT_TO_POINTER (od_mtd.id))); gst_analytics_od_mtd_get_location (&od_mtd, &bb.x, &bb.y, (gint *) & bb.w, (gint *) & bb.h, NULL); bb_mask.x = self->bb2mask_gain * bb.x + self->mask_roi.x; bb_mask.y = self->bb2mask_gain * bb.y + self->mask_roi.y; bb_mask.w = self->bb2mask_gain * bb.w; bb_mask.h = self->bb2mask_gain * bb.h; mask_buf = NULL; flowret = gst_buffer_pool_acquire_buffer (self->mask_pool, &mask_buf, NULL); g_assert (flowret == GST_FLOW_OK); gst_buffer_map (mask_buf, &out_mask_info, GST_MAP_READWRITE); mask_data = (guint8 *) out_mask_info.data; GstVideoMeta *vmeta = gst_buffer_get_video_meta (mask_buf); g_assert (vmeta != NULL); vmeta->width = bb_mask.w; vmeta->height = bb_mask.h; #define MX_MAX (bb_mask.x + bb_mask.w) #define MY_MAX (bb_mask.y + bb_mask.h) for (gint my = bb_mask.y, i = 0, j; my < MY_MAX; my++) { for (gint mx = bb_mask.x; mx < MX_MAX; mx++, i++) { float sum = 0.0f; j = my * self->mask_w + mx; for (gsize k = 0; k < logits_tensor->dims[1]; ++k) { GST_TRACE_OBJECT (self, "protos data at (%d, %zu) is %f", j, k, data_logits[k * self->mask_length + j]); sum += MASK_X (candidate, k) * data_logits[k * self->mask_length + j]; } mask_data[i] = sigmoid (sum) > 0.5 ? c + 1 : 0; } } gst_analytics_relation_meta_add_segmentation_mtd (rmeta, mask_buf, GST_SEGMENTATION_TYPE_INSTANCE, 1, region_ids, bb.x, bb.y, bb.w, bb.h, &seg_mtd); gst_buffer_unmap (mask_buf, &out_mask_info); } gst_buffer_unmap (logits_tensor->data, &map_info_logits); gst_buffer_unmap (masks_tensor->data, &map_info_masks); }