/*
 * GStreamer gstreamer-yolotensordecoder
 * Copyright (C) 2024 Collabora Ltd.
 *  Authors: Daniel Morin <daniel.morin@collabora.com>
 *           Vineet Suryan <vineet.suryan@collabora.com>
 *           Santosh Mahto <santosh.mahto@collabora.com>
 *
 * gstyolotensordecoder.c
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
 * Boston, MA 02110-1301, USA.
 */

/**
 * SECTION:element-yolotensordecoder.c
 * @short_description: Decode tensors from a FastSAM or YOLOv8 detection and segmentation
 * neural network.
 *
 *
 * This element can parse per-buffer inference tensors meta data generated by an upstream
 * inference element
 *
 *
 * ## Example launch command:
 *
 * Test image file, model file and labels file can be found here :
 * https://gitlab.collabora.com/gstreamer/onnx-models
 *
 * GST_DEBUG=yolotensordecoder \
 * gst-launch-1.0 multifilesrc location=strawberry_crops.jpg ! decodebin \
 *  ! videoconvertscale add-borders=1 ! onnxinference execution-provider=cpu
 *  model-file=segmentation.onnx input-image-format=chw input-tensor-offset=0 \
 *  input-tensor-scale=255.0 ! yolotensordecoder \
 *  class-confidence-threshold=0.8 iou-threshold=0.7 max-detections=100 \
 *  label-file=coco_labels.txt ! objectdetectionoverlay  \
 *  object-detection-outline-color=0xFF0000FF draw-labels=true ! \
 *  segmentationoverlay hint-maximum-segment-type=50 ! videoconvert ! ximagesink
 *
 */

#ifdef HAVE_CONFI_H
#include "config.h"
#endif

#include "gstyolotensordecoder.h"

#include <gst/analytics/analytics.h>
#include <gio/gio.h>

#include <math.h>

#define GST_MODEL_YOLO_DETECTION_MASK \
  "Gst.Model.Yolo.Segmentation.Masks"
#define GST_MODEL_YOLO_SEGMENTATION_LOGITS \
  "Gst.Model.Yolo.Segmentation.Logits"

#define YOLO_MASKS_WEIGHT_SIZE 32
GST_DEBUG_CATEGORY_STATIC (yolo_tensor_decoder_debug);
#define GST_CAT_DEFAULT yolo_tensor_decoder_debug

GST_ELEMENT_REGISTER_DEFINE (yolo_seg_tensor_decoder, "yolosegv8tensordecoder",
    GST_RANK_PRIMARY, GST_TYPE_YOLO_SEG_TENSOR_DECODER);
GST_ELEMENT_REGISTER_DEFINE (yolo_od_tensor_decoder, "yoloodv5tensordecoder",
    GST_RANK_PRIMARY, GST_TYPE_YOLO_OD_TENSOR_DECODER);

/* GstYoloTensorDecoder properties, see properties description in
 * gst_yolo_tensor_decoder_class_init for more details. */
enum
{
  PROP_0,
  PROP_BOX_CONFI_THRESH,
  PROP_CLS_CONFI_THRESH,
  PROP_IOU_THRESH,
  PROP_MAX_DETECTION,
  PROP_MASK_TENSOR_NAME,
  PROP_LOGITS_TENSOR_NAME,
  PROP_LABEL_FILE
};

/* For debug purpose */
typedef struct _DebugCandidates
{
  gpointer self;
  gsize fields;                 /* Fields count do debug */
  gsize offset;                 /* Fields offset */
  gsize start;                  /* First field index to debug */
} DebugCandidates;

/* Specify the range of confidence level in tensor output*/
typedef struct _ConfidenceRange
{
  gsize start;                  /* Start index of confidence level */
  gsize end;                    /* End index of confidence level */
  gsize step;                   /* Step size of next confidence level index */
} ConfidenceRange;

/* Default properties value */
static const gfloat DEFAULT_BOX_CONFI_THRESH = 0.4f;
static const gfloat DEFAULT_CLS_CONFI_THRESH = 0.4f;
static const gfloat DEFAULT_IOU_THRESH = 0.7f;
static const gsize DEFAULT_MAX_DETECTION = 100;

/* Global variable storing class for OD. Generally OD has class
 * and we need to provide one but this class is just a placeholder.*/
GQuark OOI_CLASS_ID;

/* To tensor-id are defined by a string that is converted to quark
 * which is just an integer value using a hash function. For efficiency
 * we compare on the quark (hash value). Since tensor-id never change we
 * just calculate the hash once during initialization and store the value in
 * these variables. */
GQuark GST_MODEL_YOLO_DETECTION_MASKS_ID;
GQuark GST_MODEL_YOLO_SEGMENTATION_LOGITS_ID;

/* GStreamer element srcpad template. Template of a srcpad that can receive
 * any raw video. */
static GstStaticPadTemplate gst_yolo_tensor_decoder_src_template =
GST_STATIC_PAD_TEMPLATE ("src",
    GST_PAD_SRC,
    GST_PAD_ALWAYS,
    GST_STATIC_CAPS ("video/x-raw"));

/* GStreamer element sinkpad template. Template of a sinkpad that can receive
 * any raw video. */
static GstStaticPadTemplate gst_yolo_tensor_decoder_sink_template =
GST_STATIC_PAD_TEMPLATE ("sink",
    GST_PAD_SINK,
    GST_PAD_ALWAYS,
    GST_STATIC_CAPS ("video/x-raw"));

/*Common Prototypes */

static gboolean gst_yolo_tensor_decoder_set_caps (GstBaseTransform * trans,
    GstCaps * incaps, GstCaps * outcaps);
static gboolean gst_yolo_tensor_decoder_get_tensor_meta (GstYoloOdTensorDecoder
    * self, GstBuffer * buf, GstTensor ** mask_tensor,
    GstTensor ** logits_tensor);
/* GstYoloOdTensorDecoder Prototypes */
static void gst_yolo_od_tensor_decoder_set_property (GObject * object,
    guint prop_id, const GValue * value, GParamSpec * pspec);
static void gst_yolo_od_tensor_decoder_get_property (GObject * object,
    guint prop_id, GValue * value, GParamSpec * pspec);
static gboolean gst_yolo_od_tensor_decoder_stop (GstBaseTransform * trans);
static GstFlowReturn gst_yolo_od_tensor_decoder_transform_ip (GstBaseTransform *
    trans, GstBuffer * buf);
static void gst_yolo_od_tensor_decoder_finalize (GObject * object);
static void gst_yolo_od_tensor_decoder_decode_masks_f32 (GstYoloOdTensorDecoder
    * self, GstTensor * masks_tensor, GstAnalyticsRelationMeta * rmeta);
/* GstYoloSegTensorDecoder Prototypes */
static void gst_yolo_seg_tensor_decoder_set_property (GObject * object,
    guint prop_id, const GValue * value, GParamSpec * pspec);
static void gst_yolo_seg_tensor_decoder_get_property (GObject * object,
    guint prop_id, GValue * value, GParamSpec * pspec);
static gboolean gst_yolo_seg_tensor_decoder_stop (GstBaseTransform * trans);
static GstFlowReturn gst_yolo_seg_tensor_decoder_transform_ip (GstBaseTransform
    * trans, GstBuffer * buf);
static void gst_yolo_seg_tensor_decoder_finalize (GObject * object);
static void
gst_yolo_seg_tensor_decoder_decode_masks_logits_f32 (GstYoloSegTensorDecoder *
    self, GstTensor * masks_tensor, GstTensor * logits_tensor,
    GstAnalyticsRelationMeta * rmeta);

G_DEFINE_TYPE (GstYoloOdTensorDecoder, gst_yolo_od_tensor_decoder,
    GST_TYPE_BASE_TRANSFORM);
G_DEFINE_TYPE (GstYoloSegTensorDecoder, gst_yolo_seg_tensor_decoder,
    GST_TYPE_YOLO_OD_TENSOR_DECODER);

static GArray *
read_labels (const char *labels_file)
{
  GArray *array;
  GFile *file = g_file_new_for_path (labels_file);
  GFileInputStream *file_stream;
  GDataInputStream *data_stream;
  GError *error = NULL;
  gchar *line;

  file_stream = g_file_read (file, NULL, &error);
  g_object_unref (file);
  if (!file_stream) {
    GST_WARNING ("Could not open file %s: %s\n", labels_file, error->message);
    g_clear_error (&error);
    return NULL;
  }

  data_stream = g_data_input_stream_new (G_INPUT_STREAM (file_stream));
  g_object_unref (file_stream);

  array = g_array_new (FALSE, FALSE, sizeof (GQuark));

  while ((line = g_data_input_stream_read_line (data_stream, NULL, NULL,
              &error))) {
    GQuark label = g_quark_from_string (line);
    g_array_append_val (array, label);
    g_free (line);
  }

  g_object_unref (data_stream);

  if (error) {
    GST_WARNING ("Could not open file %s: %s", labels_file, error->message);
    g_array_free (array, TRUE);
    g_clear_error (&error);
    return NULL;
  }

  if (array->len == 0) {
    g_array_free (array, TRUE);
    return NULL;
  }

  return array;
}

static gboolean
gst_yolo_tensor_decoder_get_tensor_meta (GstYoloOdTensorDecoder * self,
    GstBuffer * buf, GstTensor ** mask_tensor, GstTensor ** logits_tensor)
{
  GstTensorMeta *tensor_meta;
  gint mask_tensor_idx, logits_tensor_idx;

  g_return_val_if_fail (mask_tensor != NULL || logits_tensor != NULL, FALSE);

  /* Retrieve all TensorMeta attach the buffer */
  tensor_meta = gst_buffer_get_tensor_meta (buf);
  if (!tensor_meta) {
    GST_LOG_OBJECT (self, "No tensor meta");
    return FALSE;
  }

  GST_LOG_OBJECT (self, "Num tensors %zu", tensor_meta->num_tensors);

  if (mask_tensor) {
    *mask_tensor = NULL;
    /* Retrieve the index of the tensor that has a tensor-id matching
     * GST_MODEL_YOLO_SEGMENTATION_MASKS_ID in the GstTensorMeta. */
    mask_tensor_idx = gst_tensor_meta_get_index_from_id (tensor_meta,
        GST_MODEL_YOLO_DETECTION_MASKS_ID);
    if (mask_tensor_idx >= 0) {
      GST_LOG_OBJECT (self, "Masks tensor id: %d", mask_tensor_idx);
      *mask_tensor = tensor_meta->tensors[mask_tensor_idx];
    }
    if (!*mask_tensor) {
      GST_INFO_OBJECT (self, "Couldn't find mask or logits tensor, skipping");
      return FALSE;
    }
  }

  if (logits_tensor) {
    *logits_tensor = NULL;
    /* Retrieve the index of the tensor that has a tensor-id matching
     * GST_MODEL_YOLO_SEGMENTATION_LOGITS_ID in the GstTensorMeta. */
    logits_tensor_idx = gst_tensor_meta_get_index_from_id (tensor_meta,
        GST_MODEL_YOLO_SEGMENTATION_LOGITS_ID);
    if (logits_tensor_idx >= 0) {
      GST_LOG_OBJECT (self, "Masks tensor id: %d", logits_tensor_idx);
      *logits_tensor = tensor_meta->tensors[logits_tensor_idx];
    }
    if (!*logits_tensor) {
      GST_INFO_OBJECT (self, "Couldn't find mask or logits tensor, skipping");
      return FALSE;
    }
  }

  return TRUE;
}

static gboolean
gst_yolo_tensor_decoder_set_caps (GstBaseTransform * trans, GstCaps * incaps,
    GstCaps * outcaps)
{
  GstYoloOdTensorDecoder *self = GST_YOLO_OD_TENSOR_DECODER (trans);

  if (!gst_video_info_from_caps (&self->video_info, incaps)) {
    GST_ERROR_OBJECT (self, "Failed to parse caps");
    return FALSE;
  }

  if (gst_base_transform_is_passthrough (trans)) {
    GST_ERROR_OBJECT (self, "Failed. Can't handle passthrough");
    return FALSE;
  }

  return TRUE;
}


static void
gst_yolo_od_tensor_decoder_set_property (GObject * object, guint prop_id,
    const GValue * value, GParamSpec * pspec)
{
  GstYoloOdTensorDecoder *self = GST_YOLO_OD_TENSOR_DECODER (object);
  const gchar *filename;

  switch (prop_id) {
    case PROP_BOX_CONFI_THRESH:
      GST_OBJECT_LOCK (self);
      self->box_confi_thresh = g_value_get_float (value);
      GST_OBJECT_UNLOCK (self);
      break;
    case PROP_CLS_CONFI_THRESH:
      GST_OBJECT_LOCK (self);
      self->cls_confi_thresh = g_value_get_float (value);
      GST_OBJECT_UNLOCK (self);
      break;
    case PROP_IOU_THRESH:
      GST_OBJECT_LOCK (self);
      self->iou_thresh = g_value_get_float (value);
      GST_OBJECT_UNLOCK (self);
      break;
    case PROP_MAX_DETECTION:
      GST_OBJECT_LOCK (self);
      self->max_detection = g_value_get_uint (value);
      GST_OBJECT_UNLOCK (self);
      break;
    case PROP_MASK_TENSOR_NAME:
      GST_OBJECT_LOCK (self);
      self->mask_tensor_id = g_quark_from_string (g_value_get_string (value));
      GST_OBJECT_UNLOCK (self);
      break;
    case PROP_LABEL_FILE:
    {
      GArray *labels;

      filename = g_value_get_string (value);
      labels = read_labels (filename);

      if (labels) {
        g_free (self->label_file);
        self->label_file = g_strdup (filename);
        g_clear_pointer (&self->labels, g_array_unref);
        self->labels = labels;
      } else {
        GST_WARNING_OBJECT (self, "Label file '%s' not found!", filename);
      }
      break;
    }
    default:
      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
      break;
  }
}

static void
gst_yolo_od_tensor_decoder_get_property (GObject * object, guint prop_id,
    GValue * value, GParamSpec * pspec)
{
  GstYoloOdTensorDecoder *self = GST_YOLO_OD_TENSOR_DECODER (object);

  switch (prop_id) {
    case PROP_BOX_CONFI_THRESH:
      g_value_set_float (value, self->box_confi_thresh);
      break;
    case PROP_CLS_CONFI_THRESH:
      g_value_set_float (value, self->cls_confi_thresh);
      break;
    case PROP_IOU_THRESH:
      g_value_set_float (value, self->iou_thresh);
      break;
    case PROP_MAX_DETECTION:
      g_value_set_uint (value, self->max_detection);
      break;
    case PROP_MASK_TENSOR_NAME:
      GST_OBJECT_LOCK (self);
      g_value_set_string (value, g_quark_to_string (self->mask_tensor_id));
      GST_OBJECT_UNLOCK (self);
      break;
    case PROP_LABEL_FILE:
      g_value_set_string (value, self->label_file);
      break;
    default:
      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
      break;
  }
}

static gboolean
gst_yolo_od_tensor_decoder_stop (GstBaseTransform * trans)
{
  GstYoloOdTensorDecoder *self = GST_YOLO_OD_TENSOR_DECODER (trans);

  g_clear_pointer (&self->sel_candidates, g_ptr_array_unref);
  g_clear_pointer (&self->selected, g_ptr_array_unref);
  g_clear_pointer (&self->od_mtds, g_array_unref);
  if (self->candidate_offsets)
    g_hash_table_destroy (self->candidate_offsets);

  return TRUE;
}

static void
gst_yolo_od_tensor_decoder_class_init (GstYoloOdTensorDecoderClass * klass)
{
  GObjectClass *gobject_class = (GObjectClass *) klass;
  GstElementClass *element_class = (GstElementClass *) klass;
  GstBaseTransformClass *basetransform_class = (GstBaseTransformClass *) klass;

  /* Define GstYoloTensorDecoder debug category. */
  GST_DEBUG_CATEGORY_INIT (yolo_tensor_decoder_debug, "yolotensordecoder",
      0, "Tensor decoder for Yolo detection N.N.");

  /* Set GObject vmethod to get and set property */
  gobject_class->set_property = gst_yolo_od_tensor_decoder_set_property;
  gobject_class->get_property = gst_yolo_od_tensor_decoder_get_property;

  /* Define GstYoloTensorDecoder properties using GObject properties
   * interface.*/


  /**
   * GstYoloTensorDecoder:box-confidence-threshold
   *
   * Threshold on boxes location confidence level
   *
   * Since: 1.26
   */
  g_object_class_install_property (G_OBJECT_CLASS (klass),
      PROP_BOX_CONFI_THRESH,
      g_param_spec_float ("box-confidence-threshold",
          "Box location confidence threshold",
          "Boxes with a location confidence level inferior to this threshold "
          "will be excluded",
          0.0, 1.0, DEFAULT_BOX_CONFI_THRESH,
          (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));

  /**
   * GstYoloTensorDecoder:class-confidence-threshold
   *
   * Threshold on object class confidence level
   *
   * Since: 1.26
   */
  g_object_class_install_property (G_OBJECT_CLASS (klass),
      PROP_CLS_CONFI_THRESH,
      g_param_spec_float ("class-confidence-threshold",
          "Class confidence threshold",
          "Classes with a confidence level inferior to this threshold "
          "will be excluded",
          0.0, 1.0, DEFAULT_CLS_CONFI_THRESH,
          (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));

  /**
   * GstYoloTensorDecoder:class-confidence-threshold
   *
   * Threshold on maximum intersection-over-union between bounding boxes to
   * consider them distinct.
   *
   * Since: 1.26
   */
  g_object_class_install_property (G_OBJECT_CLASS (klass),
      PROP_IOU_THRESH,
      g_param_spec_float ("iou-threshold",
          "Maximum IOU threshold",
          "Maximum intersection-over-union between bounding boxes to "
          "consider them distinct.",
          0.0, 1.0, DEFAULT_IOU_THRESH,
          (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));

  /**
   * GstYoloTensorDecoder:max-detections
   *
   * Threshold on maximum object/masks detections
   *
   * Since: 1.26
   */
  g_object_class_install_property (G_OBJECT_CLASS (klass),
      PROP_MAX_DETECTION,
      g_param_spec_uint ("max-detections",
          "Maximum object/masks detections.",
          "Maximum object/masks detections.",
          0, G_MAXUINT, DEFAULT_MAX_DETECTION,
          (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));

  /**
   * GstYoloTensorDecoder:tensors-name-masks
   *
   * Overwrite mask tensors name
   *
   * Since: 1.26
   */
  g_object_class_install_property (G_OBJECT_CLASS (klass),
      PROP_MASK_TENSOR_NAME,
      g_param_spec_string ("tensors-name-masks",
          "Mask tensors name",
          "Name that identify Yolo mask tensors.",
          GST_MODEL_YOLO_DETECTION_MASK,
          (GParamFlags) (G_PARAM_READWRITE | G_PARAM_CONSTRUCT |
              G_PARAM_STATIC_STRINGS)));

  /**
   * GstYoloTensorDecoder:label-file
   *
   * Label file
   *
   * Since: 1.26
   */
  g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_LABEL_FILE,
      g_param_spec_string ("label-file",
          "Label file", "Label file", NULL, (GParamFlags)
          (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));


  /* Element description. */
  gst_element_class_set_static_metadata (element_class, "yoloodv5tensordecoder",
      "TensorDecoder/Video",
      "Decode tensors output from the inference of Yolo or FastSAM model (Detection)"
      " on video frames. The original repository of the Yolo is located at"
      " https://github.com/ultralytics/ultralytics. For easy experimentation a"
      " strawberry segmentation model based on Yolo architecture in Onnx "
      " format can be found at https://col.la/gstonnxmodelseg . This model "
      "already has tensors name embedded matching default "
      "values of tensors-masks-name and tensors-logits-name properties. It's "
      "also possible to embed tensor-ids into any model based on Yolo "
      "architecture to allow this tensor-decoder to decode tensors. This "
      "process is described in the Readme of this repository: "
      "https://col.la/gstonnxmodels",
      "Daniel Morin <daniel.morin@collabora.com>");

  /* Add pads to element base on pad template defined earlier */
  gst_element_class_add_pad_template (element_class,
      gst_static_pad_template_get (&gst_yolo_tensor_decoder_src_template));
  gst_element_class_add_pad_template (element_class,
      gst_static_pad_template_get (&gst_yolo_tensor_decoder_sink_template));

  /* Set GstBaseTransform vmethod transform_ip. This methode is called
   * by the srcpad when it receive buffer. ip stand for in-place meaning the
   * buffer remain unchanged by the element. Tensor-decoder only monitor
   * buffer it receive for a meta attach to the buffer that is a GstTensorMeta
   * and has a tensor-id can be handled by GstYoloTensorDecoder. */
  basetransform_class->transform_ip =
      GST_DEBUG_FUNCPTR (gst_yolo_od_tensor_decoder_transform_ip);

  /* Set GstBaseTransform set_caps vmethod. This will be called once the
   * capability negotiation has been completed. We will be able to extract
   * resolution from this callback. */
  basetransform_class->set_caps =
      GST_DEBUG_FUNCPTR (gst_yolo_tensor_decoder_set_caps);

  /* Set GObject vmethod finalize */
  basetransform_class->stop = gst_yolo_od_tensor_decoder_stop;

  gobject_class->finalize = gst_yolo_od_tensor_decoder_finalize;

  /* Calculate the class id placeholder (also a quark) that will be set on all
   * OD analytics-meta. */
  OOI_CLASS_ID = g_quark_from_static_string ("Yolo-None");

  /* Calculate the Yolo Mask tensor-id */
  GST_MODEL_YOLO_DETECTION_MASKS_ID =
      g_quark_from_static_string (GST_MODEL_YOLO_DETECTION_MASK);

}

static void
gst_yolo_od_tensor_decoder_init (GstYoloOdTensorDecoder * self)
{
  /* GstYoloTensorDecoder instance initialization */
  self->box_confi_thresh = DEFAULT_BOX_CONFI_THRESH;
  self->cls_confi_thresh = DEFAULT_CLS_CONFI_THRESH;
  self->iou_thresh = DEFAULT_IOU_THRESH;
  self->max_detection = DEFAULT_MAX_DETECTION;
  self->sel_candidates = NULL;
  self->selected = NULL;
  self->od_mtds = NULL;
  self->candidate_offsets = NULL;
  gst_base_transform_set_passthrough (GST_BASE_TRANSFORM (self), FALSE);
}

static GstFlowReturn
gst_yolo_od_tensor_decoder_transform_ip (GstBaseTransform * trans,
    GstBuffer * buf)
{
  GstYoloOdTensorDecoder *self = GST_YOLO_OD_TENSOR_DECODER (trans);
  GstTensor *masks_tensor;
  GstAnalyticsRelationMeta *rmeta;

  if (!gst_yolo_tensor_decoder_get_tensor_meta (self, buf, &masks_tensor, NULL)) {
    return GST_FLOW_OK;
  }

  if (masks_tensor->num_dims != 3) {
    GST_ELEMENT_ERROR (self, STREAM, DECODE, (NULL),
        ("Masks tensor must have 3 dimensions but has %zu",
            masks_tensor->num_dims));
    return GST_FLOW_ERROR;
  }

  static GstAnalyticsRelationMetaInitParams rmeta_init_params = {
    .initial_buf_size = 1024,
    .initial_relation_order = 10
  };

  /* Retrieve or attach an analytics-relation-meta to the buffer.
   * Analytics-relation-meta are container that can reveive multiple
   * analytics-meta, like OD and Segmentation. The following call will only
   * retrieve an analytics-relation-meta if it exist or create one if it
   * does not exist. */
  rmeta = gst_buffer_add_analytics_relation_meta_full (buf, &rmeta_init_params);
  g_assert (rmeta != NULL);

  /* Decode masks_tensor and attach the information in a structured way
   * to rmeta. */
  gst_yolo_od_tensor_decoder_decode_masks_f32 (self, masks_tensor, rmeta);

  return GST_FLOW_OK;
}

static void
gst_yolo_od_tensor_decoder_finalize (GObject * object)
{
  GstYoloOdTensorDecoder *self = GST_YOLO_OD_TENSOR_DECODER (object);

  g_free (self->label_file);
  g_clear_pointer (&self->labels, g_array_unref);

  G_OBJECT_CLASS (gst_yolo_od_tensor_decoder_parent_class)->finalize (object);
}

/* Evaluate if there's an intersection between segement s1 and s2 */
static guint
linear_intersection (guint s1_min, guint s1_max, guint s2_min, guint s2_max)
{
  guint tmp;
  if (s1_max > s2_min && s2_max > s1_min) {
    if (s1_min > s2_min) {
      tmp = (s2_max > s1_max) ? s1_max : s2_max;
      return tmp - s1_min;
    } else {
      tmp = (s1_max > s2_max) ? s2_max : s1_max;
      return tmp - s2_min;
    }
  }
  return 0.0f;
}

static gfloat
iou (guint bb1_x, guint bb1_y, guint bb1_w, guint bb1_h,
    guint bb2_x, guint bb2_y, guint bb2_w, guint bb2_h)
{
  /* Rational: linear intersection is much faster to calculate then
   * 2d intersection. We project the two bounding boxes considered for
   * intersection on one axis and verify if the segments the create intersect.
   * If they don't, the bounding boxes can't intersect in 2d and we don't
   * need to verify if they intersect on the other dimension. If they
   * intersect on the first dimension we verify if they intersec on the other
   * dimension. Again if the don't intersect the bounding boxes can't intersect
   * on in a 2D space. If they intersected on both axis we calculate the IoU.*/
  const guint x_intersection =
      linear_intersection (bb1_x, bb1_x + bb1_w, bb2_x, bb2_x + bb2_w);
  if (x_intersection > 0) {
    const guint y_intersection = linear_intersection (bb1_y, bb1_y + bb1_h,
        bb2_y, bb2_y + bb2_h);
    if (y_intersection > 0) {
      const guint bb1_area = bb1_w * bb1_h;
      const guint bb2_area = bb2_w * bb2_h;
      const guint intersect_area = x_intersection * y_intersection;
      const guint union_area = bb1_area + bb2_area - intersect_area;
      return union_area == 0 ? 0.0f : ((gfloat) intersect_area) / union_area;
    }
  }

  return 0.0f;
}

/* Extract bounding box from tensor data */
static void
gst_yolo_tensor_decoder_convert_bbox (gfloat * candidate, gsize * offset,
    BBox * bbox)
{
  gfloat w = *(candidate + offset[2]);
  gfloat h = *(candidate + offset[3]);
  bbox->x = *(candidate + offset[0]) - (w / 2);
  bbox->y = *(candidate + offset[1]) - (h / 2);
  bbox->w = w + 0.5;
  bbox->h = h + 0.5;
}

/* Calculate iou between boundingbox of candidate c1 and c2
 */
static gfloat
gst_yolo_tensor_decoder_iou (gfloat * c1, gfloat * c2, gsize * offset,
    BBox * bb1, BBox * bb2)
{
  gst_yolo_tensor_decoder_convert_bbox (c1, offset, bb1);
  gst_yolo_tensor_decoder_convert_bbox (c2, offset, bb2);
  return iou (bb1->x, bb1->y, bb1->w, bb1->h, bb2->x, bb2->y, bb2->w, bb2->h);
}

/* Utility function to find maxmum confidence value across classes
 * specified by range.
 */
static gfloat
gst_yolo_tensor_decoder_find_max_class_confidence (const gfloat * c,
    const ConfidenceRange * c_range, gsize * max_class_ofs)
{
  gfloat max_val = 0.0;
  for (gsize i = c_range->start; i <= c_range->end; i += c_range->step) {
    if (*(c + i) > max_val) {
      max_val = *(c + i);
      *max_class_ofs = i;
    }
  }
  return max_val;
}

/* Compare c1 and c2
 * Utility function for sorting candiates based on the a field identified
 * by offset.
 */
static gint
gst_yolo_tensor_decoder_sort_candidates (gconstpointer c1, gconstpointer c2,
    gpointer range)
{
  ConfidenceRange *c_range = (ConfidenceRange *) range;
  const gfloat *candidate1 = *((gfloat **) c1);
  const gfloat *candidate2 = *((gfloat **) c2);
  gfloat max_c1_confi;
  gfloat max_c2_confi;
  gsize offset;

  if (candidate1[c_range->start] <= -1.0) {
    offset = (gsize) (-candidate1[c_range->start]);
    max_c1_confi = candidate1[offset];
  } else {
    max_c1_confi = candidate1[c_range->start];
  }

  if (candidate2[c_range->start] <= -1.0) {
    offset = (gsize) (-candidate2[c_range->start]);
    max_c2_confi = candidate2[offset];
  } else {
    max_c2_confi = candidate2[c_range->start];
  }

  return max_c1_confi < max_c2_confi ? 1 : max_c1_confi > max_c2_confi ? -1 : 0;
}

static void
gst_yolo_tensor_decoder_debug_print_candidate (gpointer candidate_,
    gpointer data)
{
  DebugCandidates *ctx = data;
  const gfloat *candidate = candidate_;

  for (gsize i = ctx->start; i < ctx->fields + ctx->start; i++) {
    GST_TRACE_OBJECT (ctx->self, "Field %lu: %f", i,
        *(candidate + (i * ctx->offset)));
  }
}

static float
sigmoid (float x)
{
  /* Check for positive overflow */
  if (x > 0) {
    double exp_neg_x = exp (-x);
    return 1.0 / (1.0 + exp_neg_x);
  }
  /* Check for negative overflow and improve stability for negative x */
  else {
    double exp_x = exp (x);
    return exp_x / (1.0 + exp_x);
  }
}

static gboolean
gst_yolo_tensor_decoder_decode_valid_bb (GstYoloOdTensorDecoder * self,
    gfloat x, gfloat y, gfloat w, gfloat h)
{
  GstYoloOdTensorDecoder *parent = GST_YOLO_OD_TENSOR_DECODER (self);

  if (x > (GST_VIDEO_INFO_WIDTH (&parent->video_info)))
    return FALSE;
  if (y > (GST_VIDEO_INFO_HEIGHT (&parent->video_info)))
    return FALSE;
  if (x < -(gfloat) (GST_VIDEO_INFO_WIDTH (&parent->video_info) / 2.0))
    return FALSE;
  if (y < -(gfloat) (GST_VIDEO_INFO_HEIGHT (&parent->video_info) / 2.0))
    return FALSE;
  if (w <= 0)
    return FALSE;
  if (h <= 0)
    return FALSE;
  if (w > (GST_VIDEO_INFO_WIDTH (&parent->video_info)))
    return FALSE;
  if (h > (GST_VIDEO_INFO_HEIGHT (&parent->video_info)))
    return FALSE;

  return TRUE;
}


static void
gst_yolo_od_tensor_decoder_decode_masks_f32 (GstYoloOdTensorDecoder * self,
    GstTensor * masks_tensor, GstAnalyticsRelationMeta * rmeta)
{
  GstMapInfo map_info_masks;
  gfloat *candidate, **candidates, iou, confid = -1.0;
  gboolean rv, keep;
  gsize offset, x_offset, y_offset, w_offset, h_offset, offsets[4];
  GPtrArray *sel_candidates = self->sel_candidates, *selected = self->selected;
  BBox bb1, bb2;
  GstAnalyticsODMtd od_mtd;
  ConfidenceRange c_range;
  gsize max_class_offset = 0, class_index;
  GQuark class_quark = OOI_CLASS_ID;

  /* Retrieve memory at index 0 and map it in READWRITE mode */
  masks_tensor->data = gst_buffer_make_writable (masks_tensor->data);
  rv = gst_buffer_map (masks_tensor->data, &map_info_masks, GST_MAP_READWRITE);
  g_assert (rv);


  GST_LOG_OBJECT (self, "Mask Tensor shape dims %zu", masks_tensor->num_dims);

  /* Trace masks tensor dimensions */
  if (gst_debug_category_get_threshold (GST_CAT_DEFAULT) >= GST_LEVEL_TRACE) {
    for (gsize i = 0; i < masks_tensor->num_dims; i++) {
      GST_TRACE_OBJECT (self, "Masks Tensor dim %zu: %zu", i,
          masks_tensor->dims[i]);
    }
  }

  /* Allocated array to store selected candidates */
  if (sel_candidates == NULL) {
    /* Number of candidates can be large, keep the array to avoid frequent
     * allocation */
    sel_candidates = g_ptr_array_new_full (masks_tensor->dims[2], NULL);
    self->sel_candidates = sel_candidates;
    selected = g_ptr_array_new_full (masks_tensor->dims[2], NULL);
    self->selected = selected;
    self->od_mtds = g_array_new (FALSE, FALSE, sizeof (GstAnalyticsODMtd));
    self->candidate_offsets = g_hash_table_new (g_direct_hash, g_direct_equal);
  } else {
    /* Reset lengths when we re-use arrays */
    g_ptr_array_set_size (sel_candidates, 0);
    g_ptr_array_set_size (selected, 0);
    g_array_set_size (self->od_mtds, 0);
    g_hash_table_remove_all (self->candidate_offsets);
  }

  /* masks_tensor->dims[2] contain the number of candidates. Let's call the
   * number of candidates C. We store this value in offset as we use it
   * calculate the offset of candidate fields. The variable #data_masks above point
   * at the masks tensor data, but candidates data is organize like a plane.
   * Candidates bbox X coord fields from 0 to C start at the begining of the
   * tensor data and are continguous in memory, followed by all candidates
   * field Y, followed by field W, ... followed by field class confidence level,
   * ..., followed by all candidates mask0, ..., followed by all candidates
   * mask31. Bellow we pre-calculate each field offset relative to the
   * candidate pointer (pointer to field X), which will allow us to easily
   * access each candiates field.
   * */
  offset = masks_tensor->dims[2];
  x_offset = 0;
  y_offset = offset;
  w_offset = 2 * offset;
  h_offset = 3 * offset;
  /* Start index of label confidence level */
  c_range.start = 4 * offset;
  /* Last index of label confidence level */
  c_range.end = (masks_tensor->dims[1] - YOLO_MASKS_WEIGHT_SIZE - 1) * offset;

  /* Step between class confidence level */
  c_range.step = offset;
  offsets[0] = x_offset;
  offsets[1] = y_offset;
  offsets[2] = w_offset;
  offsets[3] = h_offset;

#define BB_X(candidate) candidate[x_offset]
#define BB_Y(candidate) candidate[y_offset]
#define BB_W(candidate) candidate[w_offset]
#define BB_H(candidate) candidate[h_offset]

  candidate = (gfloat *) map_info_masks.data;
  for (gsize c_idx = 0; c_idx < masks_tensor->dims[2]; c_idx++) {
    /* Yolo have multiple class, so maximum confidence level across all class is used
     * to evaluate the relevance of the candidate. Here we filter candidates
     * based on their class confidence level.*/
    gfloat max_confidence =
        gst_yolo_tensor_decoder_find_max_class_confidence (candidate, &c_range,
        &max_class_offset);
    if (max_confidence > self->cls_confi_thresh
        && gst_yolo_tensor_decoder_decode_valid_bb (self,
            BB_X (candidate), BB_Y (candidate), BB_W (candidate),
            BB_H (candidate))) {

      /* We need a way to keep track of the class with maximum confidence. At
       * this level we're operating on a large number of candidate. Candidates
       * will be sorted and filtered later one. Here we use an inplace method
       * to store the offset of the class with highest confidence level. If
       * the class with highest confidence level is the first one we keep it's
       * value as-is, otherwise we overwrite the first class confidence level
       * with the value of the -offset of the class with maximum confidence. */
      if (max_class_offset != c_range.start) {
        candidate[c_range.start] = -(float) (max_class_offset);
      }

      g_ptr_array_add (sel_candidates, candidate);
      GST_TRACE_OBJECT (self,
          "%lu: x,y=(%f;%f) w,h=(%f;%f), s=%f c=%f",
          c_idx,
          candidate[x_offset],
          candidate[y_offset],
          candidate[w_offset],
          candidate[h_offset],
          candidate[w_offset] * candidate[h_offset], max_confidence);
    }

    /* Pointer arithmetic, going to the next candidate. This is the candidate
     * pointer that is now incremented to the next candidate which is also
     * the field X of the next candidate.*/
    candidate += 1;
  }

  GST_LOG_OBJECT (self, "Selected candidates count: %u", sel_candidates->len);

  /* We sort the remaining candidates because, in the next selection phase we
   * have a maximum and we want to make sure that considered only the candidates
   * with the highest class confidence level before potentially reaching the
   * maximum.*/
  g_ptr_array_sort_with_data (sel_candidates,
      gst_yolo_tensor_decoder_sort_candidates, &c_range);

  if (gst_debug_category_get_threshold (GST_CAT_DEFAULT) >= GST_LEVEL_TRACE) {
    /* For debug purpose only. Prints candidates before NMS */
    DebugCandidates ctx;
    ctx.start = 0;
    ctx.fields = 5;
    ctx.offset = offset;
    ctx.self = self;
    g_ptr_array_foreach (sel_candidates,
        gst_yolo_tensor_decoder_debug_print_candidate, &ctx);
  }

  /* Algorithm in part inspired by OpenCV NMSBoxes */
  candidates = (gfloat **) sel_candidates->pdata;
  for (gsize c = 0; c < sel_candidates->len; c++) {
    keep = TRUE;

    /* We only want to a NMS using IoU between candidates we've decided to
     * keep and the new one we considering to keep. selected array contain
     * the candidates we decided to keep and candidates[c] is the candidate
     * we're considering to keep or reject */
    for (gsize s = 0; s < selected->len && keep; s++) {
      iou = gst_yolo_tensor_decoder_iou (candidates[c], selected->pdata[s],
          offsets, &bb1, &bb2);
      keep = iou <= self->iou_thresh;
    }

    if (keep) {
      candidate = sel_candidates->pdata[c];
      if (selected->len == 0) {
        /* The first bounding-box always get in as there's no others bbox
         * to filter on based on IoU */
        gst_yolo_tensor_decoder_convert_bbox (candidate, offsets, &bb1);
      }

      g_ptr_array_add (selected, candidate);

      if (self->labels) {
        if (candidate[c_range.start] <= -1.0) {
          /* Max class is not the first one and `candidate[c_range.start]`
           * contain -offset to the class with maximum confidence */
          max_class_offset = (gsize) (-candidate[c_range.start]);
          confid = candidate[max_class_offset];

          /* Set overwritten confidence to 0 to avoir incorrect interpreation */
          candidate[c_range.start] = 0.0;
          class_index = (max_class_offset - c_range.start) / c_range.step;
        } else {
          confid = candidate[c_range.start];
          class_index = 0;
        }

        if (class_index < self->labels->len)
          class_quark = g_array_index (self->labels, GQuark, class_index);
      }

      /* We add the analytics-objectdetection-meta to the buffer. Since
       * there's only one class the class confidence level is set to -1.0
       * as it's deemed not important. */
      gst_analytics_relation_meta_add_od_mtd (rmeta, class_quark,
          bb1.x, bb1.y, bb1.w, bb1.h, confid, &od_mtd);
      g_array_append_val (self->od_mtds, od_mtd);
      gsize offset_pos = candidate - (gfloat *) map_info_masks.data;
      g_hash_table_insert (self->candidate_offsets,
          GUINT_TO_POINTER (od_mtd.id), GSIZE_TO_POINTER (offset_pos));

      /* If the maximum number of candidate selected is reached exit the
       * selection process. */
      if (selected->len >= self->max_detection) {
        break;
      }
    }
  }

  GST_LOG_OBJECT (self, "Selected count: %u", selected->len);

  if (gst_debug_category_get_threshold (GST_CAT_DEFAULT) >= GST_LEVEL_TRACE) {
    DebugCandidates ctx;
    /* For debug purpose only. Prints candidates after NMS */
    ctx.start = 0;
    ctx.fields = 5;
    ctx.offset = offset;
    ctx.self = self;
    g_ptr_array_foreach (selected,
        gst_yolo_tensor_decoder_debug_print_candidate, &ctx);
  }

  /* We unmap the memory */
  gst_buffer_unmap (masks_tensor->data, &map_info_masks);
}

/* Yolo segmentation tensor decoder */

static void
gst_yolo_seg_tensor_decoder_set_property (GObject * object, guint prop_id,
    const GValue * value, GParamSpec * pspec)
{
  GstYoloSegTensorDecoder *self = GST_YOLO_SEG_TENSOR_DECODER (object);

  switch (prop_id) {
    case PROP_LOGITS_TENSOR_NAME:
      self->logits_tensor_id = g_quark_from_string (g_value_get_string (value));
      break;
    default:
      gst_yolo_od_tensor_decoder_set_property (object, prop_id, value, pspec);
      break;
  }
}

static void
gst_yolo_seg_tensor_decoder_get_property (GObject * object, guint prop_id,
    GValue * value, GParamSpec * pspec)
{
  GstYoloSegTensorDecoder *self = GST_YOLO_SEG_TENSOR_DECODER (object);

  switch (prop_id) {
    case PROP_LOGITS_TENSOR_NAME:
      g_value_set_string (value, g_quark_to_string (self->logits_tensor_id));
      break;
    default:
      G_OBJECT_CLASS (gst_yolo_seg_tensor_decoder_parent_class)->get_property
          (object, prop_id, value, pspec);
      break;
  }
}

static gboolean
gst_yolo_seg_tensor_decoder_stop (GstBaseTransform * trans)
{
  GstYoloSegTensorDecoder *self = GST_YOLO_SEG_TENSOR_DECODER (trans);

  self->mask_w = 0;
  self->mask_h = 0;
  self->mask_length = 0;
  if (self->mask_pool)
    gst_buffer_pool_set_active (self->mask_pool, FALSE);
  g_clear_object (&self->mask_pool);
  GST_BASE_TRANSFORM_CLASS (gst_yolo_seg_tensor_decoder_parent_class)->stop
      (trans);

  return TRUE;
}

static void
gst_yolo_seg_tensor_decoder_finalize (GObject * object)
{
  G_OBJECT_CLASS (gst_yolo_seg_tensor_decoder_parent_class)->finalize (object);
}

static void
gst_yolo_seg_tensor_decoder_class_init (GstYoloSegTensorDecoderClass * klass)
{
  GObjectClass *gobject_class = (GObjectClass *) klass;
  GstElementClass *element_class = (GstElementClass *) klass;
  GstBaseTransformClass *basetransform_class = (GstBaseTransformClass *) klass;

  /* Set GObject vmethod to get and set property */
  gobject_class->set_property = gst_yolo_seg_tensor_decoder_set_property;
  gobject_class->get_property = gst_yolo_seg_tensor_decoder_get_property;

  g_object_class_install_property (G_OBJECT_CLASS (klass),
      PROP_LOGITS_TENSOR_NAME,
      g_param_spec_string ("tensors-name-logits",
          "Logits tensors name",
          "Name that identify Yolo logits tensors.",
          GST_MODEL_YOLO_SEGMENTATION_LOGITS,
          (GParamFlags) (G_PARAM_READWRITE | G_PARAM_CONSTRUCT |
              G_PARAM_STATIC_STRINGS)));

  /* Element description. */
  gst_element_class_set_static_metadata (element_class,
      "yolosegv8tensordecoder", "TensorDecoder/Video",
      "Decode tensors output from the inference of Yolo or FastSAM model (segmentation)"
      " on video frames. The original repository of the Yolo is located at"
      " https://github.com/ultralytics/ultralytics. For easy experimentation a"
      " object segmentation model based on Yolo architecture in Onnx "
      " format can be found at https://col.la/gstonnxmodelseg . This model "
      "already has tensors name embedded matching default "
      "values of tensors-masks-name and tensors-logits-name properties. It's "
      "also possible to embed tensor-ids into any model based on Yolo "
      "architecture to allow this tensor-decoder to decode tensors. This "
      "process is described in the Readme of this repository: "
      "https://col.la/gstonnxmodels",
      "Daniel Morin <daniel.morin@collabora.com>");

  /* Add pads to element base on pad template defined earlier */
  gst_element_class_add_pad_template (element_class,
      gst_static_pad_template_get (&gst_yolo_tensor_decoder_src_template));
  gst_element_class_add_pad_template (element_class,
      gst_static_pad_template_get (&gst_yolo_tensor_decoder_sink_template));

  /* Set GstBaseTransform vmethod transform_ip. This methode is called
   * by the srcpad when it receive buffer. ip stand for in-place meaning the
   * buffer remain unchanged by the element. Tensor-decoder only monitor
   * buffer it receive for a meta attach to the buffer that is a GstTensorMeta
   * and has a tensor-id can be handled by GstYoloTensorDecoder. */
  basetransform_class->transform_ip =
      GST_DEBUG_FUNCPTR (gst_yolo_seg_tensor_decoder_transform_ip);

  /* Set GstBaseTransform set_caps vmethod. This will be called once the
   * capability negotiation has been completed. We will be able to extract
   * resolution from this callback. */
  basetransform_class->set_caps =
      GST_DEBUG_FUNCPTR (gst_yolo_tensor_decoder_set_caps);

  /* Set GstBaseTransform stop vmethod. This will be called when the element
   * is set to NULL state. */
  basetransform_class->stop = gst_yolo_seg_tensor_decoder_stop;
  /* Set GObject vmethod finalize */
  gobject_class->finalize = gst_yolo_seg_tensor_decoder_finalize;

  /* Calculate the class id placeholder (also a quark) that will be set on all
   * OD analytics-meta. */
  OOI_CLASS_ID = g_quark_from_static_string ("Yolo-None");

  /* Calculate the Yolo Logits tensor-id */
  GST_MODEL_YOLO_SEGMENTATION_LOGITS_ID =
      g_quark_from_static_string (GST_MODEL_YOLO_SEGMENTATION_LOGITS);
}

static void
gst_yolo_seg_tensor_decoder_init (GstYoloSegTensorDecoder * self)
{
  /* GstYoloSegTensorDecoder instance initialization */
  self->mask_w = 0;
  self->mask_h = 0;
  self->mask_length = 0;
  self->mask_pool = NULL;
  memset (&self->mask_roi, 0, sizeof (BBox));
  gst_base_transform_set_passthrough (GST_BASE_TRANSFORM (self), FALSE);
}

/* gst_yolo_seg_tensor_decoder_transform_ip:
 * @trans: Instance
 * @buf:inout: Buffer containing media and where tensors can be attached
 * @return: Flow errors
 * Decode Yolo tensors, post-process tensors and store decoded information
 * into an analytics-meta that is attached to the buffer before been pushed
 * downstream.
 */
static GstFlowReturn
gst_yolo_seg_tensor_decoder_transform_ip (GstBaseTransform * trans,
    GstBuffer * buf)
{
  GstYoloSegTensorDecoder *self = GST_YOLO_SEG_TENSOR_DECODER (trans);
  GstYoloOdTensorDecoder *parent = GST_YOLO_OD_TENSOR_DECODER (trans);
  GstTensor *masks_tensor, *logits_tensor;
  GstAnalyticsRelationMeta *rmeta;
  gsize mask_w, mask_h;

  if (!gst_yolo_tensor_decoder_get_tensor_meta (self, buf, &masks_tensor,
          &logits_tensor))
    return GST_FLOW_OK;

  if (logits_tensor->num_dims != 4) {
    GST_ELEMENT_ERROR (self, STREAM, DECODE, (NULL),
        ("Logits tensor must have 4 dimensions but has %zu",
            masks_tensor->num_dims));
    return GST_FLOW_ERROR;
  }

  mask_w = logits_tensor->dims[2];
  mask_h = logits_tensor->dims[3];


  /* The masks need to be cropped to fit the SAR of the image. */
  /* TODO: We're reconstructing the transformation that was done on the
   * original image based on the assumption that the complete image without
   * deformation would be analyzed. This assumption is not alway true and
   * we should try to find a way to convey this transformation information
   * and retrieve from here to know the transformation that need to be done
   * on the mask.*/

  if (self->mask_w != mask_w || self->mask_h != mask_h) {
    self->mask_w = mask_w;
    self->mask_h = mask_h;
    self->mask_length = mask_w * mask_h;

    if (parent->video_info.width > parent->video_info.height) {
      self->bb2mask_gain = ((gfloat) self->mask_w) / parent->video_info.width;
      self->mask_roi.x = 0;
      self->mask_roi.w = self->mask_w;
      self->mask_roi.h =
          ((gfloat) self->bb2mask_gain) * parent->video_info.height;
      self->mask_roi.y = (self->mask_h - self->mask_roi.h) / 2;
    } else {
      self->bb2mask_gain = ((gfloat) self->mask_h) / parent->video_info.height;
      self->mask_roi.y = 0;
      self->mask_roi.h = self->mask_h;
      self->mask_roi.w = self->bb2mask_gain * parent->video_info.width;
      self->mask_roi.x = (self->mask_w - self->mask_roi.w) / 2;
    }

    if (self->mask_pool) {
      gst_buffer_pool_set_active (self->mask_pool, FALSE);
      g_clear_object (&self->mask_pool);
    }
  }

  if (self->mask_pool == NULL) {
    GstVideoInfo minfo;
    GstCaps *caps;
    gst_video_info_init (&minfo);
    gst_video_info_set_format (&minfo, GST_VIDEO_FORMAT_GRAY8, self->mask_w,
        self->mask_h);
    caps = gst_video_info_to_caps (&minfo);;
    self->mask_pool = gst_video_buffer_pool_new ();

    GstStructure *config = gst_buffer_pool_get_config (self->mask_pool);
    gst_buffer_pool_config_set_params (config, caps, self->mask_length, 0, 0);
    gst_buffer_pool_config_add_option (config,
        GST_BUFFER_POOL_OPTION_VIDEO_META);
    gst_buffer_pool_set_config (self->mask_pool, config);
    gst_buffer_pool_set_active (self->mask_pool, TRUE);
    gst_caps_unref (caps);
  }


  static GstAnalyticsRelationMetaInitParams rmeta_init_params = {
    .initial_buf_size = 1024,
    .initial_relation_order = 10
  };

  /* Retrieve or attach an analytics-relation-meta to the buffer.
   * Analytics-relation-meta are container that can reveive multiple
   * analytics-meta, like OD and Segmentation. The following call will only
   * retrieve an analytics-relation-meta if it exist or create one if it
   * does not exist. */
  rmeta = gst_buffer_add_analytics_relation_meta_full (buf, &rmeta_init_params);
  g_assert (rmeta != NULL);

  /* Decode masks_tensor and attach the information in a structured way
   * to rmeta. */
  gst_yolo_seg_tensor_decoder_decode_masks_logits_f32 (self, masks_tensor,
      logits_tensor, rmeta);

  return GST_FLOW_OK;
}

static void
gst_yolo_seg_tensor_decoder_decode_masks_logits_f32 (GstYoloSegTensorDecoder *
    self, GstTensor * masks_tensor, GstTensor * logits_tensor,
    GstAnalyticsRelationMeta * rmeta)
{
  GstYoloOdTensorDecoder *parent = GST_YOLO_OD_TENSOR_DECODER (self);
  GstMapInfo map_info_logits, out_mask_info, map_info_masks;
  GstAnalyticsSegmentationMtd seg_mtd;
  guint8 *mask_data;
  GstFlowReturn flowret;
  BBox bb_mask;
  gfloat *candidate, *data_logits;
  guint rv = 0;
  guint region_ids[2] = { 0, 0 };
  GstBuffer *mask_buf;
  gsize offset, m0_offset;

  gst_yolo_od_tensor_decoder_decode_masks_f32 (parent, masks_tensor, rmeta);

  /* Retrieve memory at index 0 from logits_tensor in READ mode */
  rv = gst_buffer_map (logits_tensor->data, &map_info_logits, GST_MAP_READ);
  g_assert (rv);
  data_logits = (gfloat *) map_info_logits.data;

  /* Trace masks tensor dimensions */
  if (gst_debug_category_get_threshold (GST_CAT_DEFAULT) >= GST_LEVEL_TRACE) {
    for (gsize i = 0; i < logits_tensor->num_dims; i++) {
      GST_TRACE_OBJECT (self, "Masks Tensor dim %zu: %zu", i,
          logits_tensor->dims[i]);
    }
  }

  GST_LOG_OBJECT (self, "Mask Tensor shape dims %zu", masks_tensor->num_dims);

  offset = masks_tensor->dims[2];
  /* masks count in mask_tensors will be 32 */
  m0_offset = (masks_tensor->dims[1] - YOLO_MASKS_WEIGHT_SIZE) * offset;

#define MASK_X(candidate, index) candidate[m0_offset + (index * offset)]

  masks_tensor->data = gst_buffer_make_writable (masks_tensor->data);
  rv = gst_buffer_map (masks_tensor->data, &map_info_masks, GST_MAP_READ);
  g_assert (rv);

  gfloat *mask_tensor_data = (gfloat *) map_info_masks.data;
  for (gsize c = 0; c < parent->od_mtds->len; c++) {
    BBox bb;
    GstAnalyticsODMtd od_mtd;
    od_mtd = g_array_index (parent->od_mtds, GstAnalyticsODMtd, c);
    candidate =
        mask_tensor_data +
        GPOINTER_TO_SIZE (g_hash_table_lookup (parent->candidate_offsets,
            GUINT_TO_POINTER (od_mtd.id)));
    gst_analytics_od_mtd_get_location (&od_mtd, &bb.x, &bb.y, (gint *) & bb.w,
        (gint *) & bb.h, NULL);

    bb_mask.x = self->bb2mask_gain * bb.x + self->mask_roi.x;
    bb_mask.y = self->bb2mask_gain * bb.y + self->mask_roi.y;
    bb_mask.w = self->bb2mask_gain * bb.w;
    bb_mask.h = self->bb2mask_gain * bb.h;

    mask_buf = NULL;
    flowret = gst_buffer_pool_acquire_buffer (self->mask_pool, &mask_buf, NULL);
    g_assert (flowret == GST_FLOW_OK);
    gst_buffer_map (mask_buf, &out_mask_info, GST_MAP_READWRITE);
    mask_data = (guint8 *) out_mask_info.data;

    GstVideoMeta *vmeta = gst_buffer_get_video_meta (mask_buf);
    g_assert (vmeta != NULL);
    vmeta->width = bb_mask.w;
    vmeta->height = bb_mask.h;

#define MX_MAX (bb_mask.x + bb_mask.w)
#define MY_MAX (bb_mask.y + bb_mask.h)

    for (gint my = bb_mask.y, i = 0, j; my < MY_MAX; my++) {
      for (gint mx = bb_mask.x; mx < MX_MAX; mx++, i++) {
        float sum = 0.0f;
        j = my * self->mask_w + mx;
        for (gsize k = 0; k < logits_tensor->dims[1]; ++k) {
          GST_TRACE_OBJECT (self, "protos data at (%d, %zu) is %f", j, k,
              data_logits[k * self->mask_length + j]);
          sum += MASK_X (candidate, k) * data_logits[k * self->mask_length + j];
        }
        mask_data[i] = sigmoid (sum) > 0.5 ? c + 1 : 0;
      }
    }

    gst_analytics_relation_meta_add_segmentation_mtd (rmeta, mask_buf,
        GST_SEGMENTATION_TYPE_INSTANCE, 1, region_ids, bb.x, bb.y, bb.w,
        bb.h, &seg_mtd);

    gst_buffer_unmap (mask_buf, &out_mask_info);
  }

  gst_buffer_unmap (logits_tensor->data, &map_info_logits);
  gst_buffer_unmap (masks_tensor->data, &map_info_masks);
}