tensordecoder: add facedetector tensor decoding support

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/8600>
2025-03-05 15:51:05 +05:30 · 2025-03-05 15:51:05 +05:30 · 251d74ca69
commit 251d74ca69
parent 6c5ab780d5
5 changed files with 828 additions and 2 deletions
--- a/subprojects/gst-plugins-bad/docs/plugins/gst_plugins_cache.json
+++ b/subprojects/gst-plugins-bad/docs/plugins/gst_plugins_cache.json
@ -248095,6 +248095,62 @@
                },
                "rank": "primary"
            },
+            "facedetector": {
+                "author": "The original repository of the Ultra Light Face Detection is located at https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB.Raghavendra Rao <raghavendra.rao@collabora.com>",
+                "description": "Detect tensor output from the inference of Ultra Light Face Detection to detect the faces in video frames.",
+                "hierarchy": [
+                    "GstFaceDetectorTensorDecoder",
+                    "GstBaseTransform",
+                    "GstElement",
+                    "GstObject",
+                    "GInitiallyUnowned",
+                    "GObject"
+                ],
+                "klass": "Tensordecoder/Video",
+                "pad-templates": {
+                    "sink": {
+                        "caps": "video/x-raw:\n",
+                        "direction": "sink",
+                        "presence": "always"
+                    },
+                    "src": {
+                        "caps": "video/x-raw:\n",
+                        "direction": "src",
+                        "presence": "always"
+                    }
+                },
+                "properties": {
+                    "iou-threshold": {
+                        "blurb": "Threshold for removing boxes based on proportion of the image",
+                        "conditionally-available": false,
+                        "construct": false,
+                        "construct-only": false,
+                        "controllable": false,
+                        "default": "0.3",
+                        "max": "1",
+                        "min": "0",
+                        "mutable": "null",
+                        "readable": true,
+                        "type": "gfloat",
+                        "writable": true
+                    },
+                    "score-threshold": {
+                        "blurb": "Threshold for deciding when to remove boxes based on score",
+                        "conditionally-available": false,
+                        "construct": false,
+                        "construct-only": false,
+                        "controllable": false,
+                        "default": "0.6",
+                        "max": "1",
+                        "min": "0",
+                        "mutable": "null",
+                        "readable": true,
+                        "type": "gfloat",
+                        "writable": true
+                    }
+                },
+                "rank": "primary"
+            },
            "ssdobjectdetector": {
                "author": "Aaron Boxer <aaron.boxer@collabora.com>, Marcus Edel <marcus.edel@collabora.com>",
                "description": "Apply tensor output from inference to detect objects in video frames",
--- a/subprojects/gst-plugins-bad/gst/tensordecoders/gstfacedetectortensordecoder.c
+++ b/subprojects/gst-plugins-bad/gst/tensordecoders/gstfacedetectortensordecoder.c
@ -0,0 +1,680 @@
+/*
+ * GStreamer gstreamer-facedetector
+ * Copyright (C) 2025 Collabora Ltd.
+ *
+ * gstfacedetectortensordecoder.c
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+ /**
+ * SECTION:element-facedetector
+ * @short_description: Detect faces in video buffers using the Ultra Light Face Detection model.
+ *
+ * This element can parse per-buffer inference tensor meta data generated by an upstream
+ * inference element.
+ *
+ * ## Example launch command:
+ *
+ * Test image files can be found here :
+ * https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/tree/master/imgs
+ *
+ * The Model file can be found here :
+ * https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/tree/master/models/onnx
+ *
+ * GST_DEBUG=facedetector \
+ * gst-launch-1.0 multifilesrc location=~/imgs/11.jpg ! jpegdec ! videoconvertscale ! \
+ * onnxinference model-file=version-RFB-320.onnx input-image-format=chw input-tensor-offset=-127 input-tensor-scale=128.0 ! \
+ * facedetector ! objectdetectionoverlay object-detection-outline-color=0xFF0000FF draw-labels=false ! \
+ * videoconvertscale ! autovideosink
+ *
+ * Since: 1.28
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "gstfacedetectortensordecoder.h"
+
+#include <gio/gio.h>
+
+#include <gst/gst.h>
+#include <gst/video/video.h>
+#include <gst/analytics/analytics.h>
+#include <math.h>               /* for expf() */
+
+/* Face detection tensor id strings */
+#define BOXES_TENSOR_ID "ssd-mobilenet-v1-variant-1-out-boxes"
+#define SCORES_TENSOR_ID "ultra-lightweight-face-detection-rfb-320-v1-variant-1-out-scores"
+
+GST_DEBUG_CATEGORY_STATIC (face_detector_tensor_decoder_debug);
+#define GST_CAT_DEFAULT face_detector_tensor_decoder_debug
+
+GST_ELEMENT_REGISTER_DEFINE (face_detector_tensor_decoder, "facedetector",
+    GST_RANK_PRIMARY, GST_TYPE_FACE_DETECTOR_TENSOR_DECODER);
+
+/* GstFaceDetectorTensorDecoder properties, see properties description in
+ * gst_face_detector_tensor_decoder_class_init for more details. */
+enum
+{
+  PROP_0,
+  PROP_SCORE_THRESHOLD,
+  PROP_IOU_THRESHOLD
+};
+
+/* Default properties value */
+static const gfloat DEFAULT_SCORE_THRESHOLD = 0.6f;     /* confidence threshold */
+static const gfloat DEFAULT_IOU_THRESHOLD = 0.3f;       /* NMS IoU threshold */
+
+/* To tensor-id are defined by a string that is converted to quark
+ * which is just an integer value using a hash function. For efficiency
+ * we compare on the quark (hash value). Since tensor-id never change we
+ * just calculate the hash once during initialization and store the value in
+ * these variables. */
+GQuark BOXES_TENSOR_ID_QUARK;
+GQuark SCORES_TENSOR_ID_QUARK;
+
+GQuark FACE_QUARK;
+
+/* GStreamer element srcpad template. Template of a srcpad that can receive
+ * any raw video. */
+static GstStaticPadTemplate gst_face_detector_tensor_decoder_src_template =
+GST_STATIC_PAD_TEMPLATE ("src",
+    GST_PAD_SRC,
+    GST_PAD_ALWAYS,
+    GST_STATIC_CAPS ("video/x-raw")
+    );
+
+/* GStreamer element sinkpad template. Template of a sinkpad that can receive
+ * any raw video. */
+static GstStaticPadTemplate gst_face_detector_tensor_decoder_sink_template =
+GST_STATIC_PAD_TEMPLATE ("sink",
+    GST_PAD_SINK,
+    GST_PAD_ALWAYS,
+    GST_STATIC_CAPS ("video/x-raw")
+    );
+
+/* Prototypes */
+static void gst_face_detector_tensor_decoder_set_property (GObject * object,
+    guint prop_id, const GValue * value, GParamSpec * pspec);
+static void gst_face_detector_tensor_decoder_get_property (GObject * object,
+    guint prop_id, GValue * value, GParamSpec * pspec);
+static void gst_face_detector_tensor_decoder_finalize (GObject * object);
+static GstFlowReturn
+gst_face_detector_tensor_decoder_transform_ip (GstBaseTransform * trans,
+    GstBuffer * buf);
+static gboolean gst_face_detector_tensor_decoder_set_caps (GstBaseTransform *
+    trans, GstCaps * incaps, GstCaps * outcaps);
+
+G_DEFINE_TYPE (GstFaceDetectorTensorDecoder, gst_face_detector_tensor_decoder,
+    GST_TYPE_BASE_TRANSFORM);
+
+static void
+gst_face_detector_tensor_decoder_class_init (GstFaceDetectorTensorDecoderClass
+    * klass)
+{
+  GObjectClass *gobject_class = (GObjectClass *) klass;
+  GstElementClass *element_class = (GstElementClass *) klass;
+  GstBaseTransformClass *basetransform_class = (GstBaseTransformClass *) klass;
+
+  /* Define GstFaceDetectorTensorDecoder debug category. */
+  GST_DEBUG_CATEGORY_INIT (face_detector_tensor_decoder_debug,
+      "facedetector", 0, "Tensor Decoder for Face Detection");
+
+  /* Set GObject vmethod to get and set property */
+  gobject_class->set_property = gst_face_detector_tensor_decoder_set_property;
+  gobject_class->get_property = gst_face_detector_tensor_decoder_get_property;
+  gobject_class->finalize = gst_face_detector_tensor_decoder_finalize;
+
+  /* Define GstFaceDetectorTensorDecoder properties using GObject properties
+   * interface.*/
+
+  /**
+   * GstFaceDetectorTensorDecoder:score-threshold
+   *
+   * Threshold for deciding when to remove boxes based on score
+   *
+   * Since: 1.28
+   */
+  g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_SCORE_THRESHOLD,
+      g_param_spec_float ("score-threshold",
+          "Score threshold",
+          "Threshold for deciding when to remove boxes based on score",
+          0.0, 1.0, DEFAULT_SCORE_THRESHOLD, (GParamFlags)
+          (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+
+  /**
+   * GstFaceDetectorTensorDecoder:iou-threshold
+   *
+   * Threshold for removing boxes based on proportion of the image
+   *
+   * Since: 1.28
+   */
+  g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_IOU_THRESHOLD,
+      g_param_spec_float ("iou-threshold",
+          "IoU threshold",
+          "Threshold for removing boxes based on proportion of the image",
+          0.0, 1.0, DEFAULT_IOU_THRESHOLD,
+          (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+
+  /* Element description. */
+  gst_element_class_set_static_metadata (element_class,
+      "facedetector", "Tensordecoder/Video",
+      "Detect tensor output from the inference of Ultra Light Face Detection"
+      " to detect the faces in video frames.",
+      "The original repository of the Ultra Light Face Detection is located at"
+      " https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB."
+      "Raghavendra Rao <raghavendra.rao@collabora.com>");
+
+  /* Add pads to element base on pad template defined earlier */
+  gst_element_class_add_pad_template (element_class,
+      gst_static_pad_template_get
+      (&gst_face_detector_tensor_decoder_sink_template));
+  gst_element_class_add_pad_template (element_class,
+      gst_static_pad_template_get
+      (&gst_face_detector_tensor_decoder_src_template));
+
+  /* Set GstBaseTransform vmethod transform_ip. This methode is called
+   * by the srcpad when it receive buffer. ip stand for in-place meaning the
+   * buffer remain unchanged by the element. Tensor-decoder only monitor
+   * buffer it receive for a meta attach to the buffer that is a GstTensorMeta
+   * and has a tensor-id can be handled by GstFaceDetectorTensorDecoder. */
+  basetransform_class->transform_ip =
+      GST_DEBUG_FUNCPTR (gst_face_detector_tensor_decoder_transform_ip);
+
+  /* Set GstBaseTransform set_caps vmethod. This will be called once the
+   * capability negotiation has been completed. We will be able to extract
+   * resolution from this callback. */
+  basetransform_class->set_caps =
+      GST_DEBUG_FUNCPTR (gst_face_detector_tensor_decoder_set_caps);
+
+  BOXES_TENSOR_ID_QUARK = g_quark_from_static_string (BOXES_TENSOR_ID);
+  SCORES_TENSOR_ID_QUARK = g_quark_from_static_string (SCORES_TENSOR_ID);
+  FACE_QUARK = g_quark_from_static_string ("face");
+}
+
+static void
+gst_face_detector_tensor_decoder_init (GstFaceDetectorTensorDecoder * self)
+{
+  self->score_threshold = DEFAULT_SCORE_THRESHOLD;
+  self->iou_threshold = DEFAULT_IOU_THRESHOLD;
+  self->sel_candidates = NULL;
+  self->selected = NULL;
+  gst_base_transform_set_passthrough (GST_BASE_TRANSFORM (self), FALSE);
+}
+
+static void
+gst_face_detector_tensor_decoder_finalize (GObject * object)
+{
+  GstFaceDetectorTensorDecoder *self =
+      GST_FACE_DETECTOR_TENSOR_DECODER (object);
+
+  g_clear_pointer (&self->sel_candidates, g_ptr_array_unref);
+  g_clear_pointer (&self->selected, g_ptr_array_unref);
+  g_clear_pointer (&self->candidates, g_free);
+
+  G_OBJECT_CLASS (gst_face_detector_tensor_decoder_parent_class)->finalize
+      (object);
+}
+
+static void
+gst_face_detector_tensor_decoder_set_property (GObject * object, guint prop_id,
+    const GValue * value, GParamSpec * pspec)
+{
+  GstFaceDetectorTensorDecoder *self =
+      GST_FACE_DETECTOR_TENSOR_DECODER (object);
+
+  switch (prop_id) {
+    case PROP_SCORE_THRESHOLD:
+      self->score_threshold = g_value_get_float (value);
+      break;
+    case PROP_IOU_THRESHOLD:
+      self->iou_threshold = g_value_get_float (value);
+      break;
+    default:
+      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+      break;
+  }
+}
+
+static void
+gst_face_detector_tensor_decoder_get_property (GObject * object, guint prop_id,
+    GValue * value, GParamSpec * pspec)
+{
+  GstFaceDetectorTensorDecoder *self =
+      GST_FACE_DETECTOR_TENSOR_DECODER (object);
+
+  switch (prop_id) {
+    case PROP_SCORE_THRESHOLD:
+      g_value_set_float (value, self->score_threshold);
+      break;
+    case PROP_IOU_THRESHOLD:
+      g_value_set_float (value, self->iou_threshold);
+      break;
+    default:
+      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+      break;
+  }
+}
+
+/* gst_face_detector_tensor_decoder_set_caps:
+ *
+ * Callback on caps negotiation completed. We use it here to retrieve
+ * video resolution. See GstBaseTransform for more details.
+ */
+static gboolean
+gst_face_detector_tensor_decoder_set_caps (GstBaseTransform * trans,
+    GstCaps * incaps, GstCaps * outcaps)
+{
+  GstFaceDetectorTensorDecoder *self = GST_FACE_DETECTOR_TENSOR_DECODER (trans);
+
+  if (!gst_video_info_from_caps (&self->video_info, incaps)) {
+    GST_ERROR_OBJECT (self, "Failed to parse caps");
+    return FALSE;
+  }
+
+  return TRUE;
+}
+
+/* gst_face_detector_tensor_decoder_get_tensor_meta
+ * @buf:in: buffer
+ * @boxes_tensor:out: Boxes tensor
+ * @scores_tensor:out: scores tensor
+ * @return: TRUE if buf has boxes and scores tensor attach to it.
+ * Retrieve FaceDetection boxes and scores tensors from buffer.
+ */
+static gboolean
+gst_face_detector_tensor_decoder_get_tensor_meta (GstFaceDetectorTensorDecoder
+    * self, GstBuffer * buf, GstTensor ** boxes_tensor,
+    GstTensor ** scores_tensor)
+{
+  GstTensorMeta *tensor_meta;
+  gint boxes_tensor_idx, scores_tensor_idx;
+
+  g_return_val_if_fail (boxes_tensor != NULL, FALSE);
+  g_return_val_if_fail (scores_tensor != NULL, FALSE);
+
+  *boxes_tensor = NULL;
+  *scores_tensor = NULL;
+
+  /* Retrieve all TensorMeta attach the buffer */
+  tensor_meta = gst_buffer_get_tensor_meta (buf);
+  if (!tensor_meta) {
+    GST_LOG_OBJECT (self, "No tensor meta");
+    return FALSE;
+  }
+
+  GST_LOG_OBJECT (self, "Num tensors %zu", tensor_meta->num_tensors);
+
+  /* Retrieve the index of the tensor that has a tensor-id matching
+   * BOXES_TENSOR_ID_QUARK in the GstTensorMeta. */
+  boxes_tensor_idx = gst_tensor_meta_get_index_from_id (tensor_meta,
+      BOXES_TENSOR_ID_QUARK);
+
+  /* Retrieve the index of the tensor that has a tensor-id matching*
+   * SCORES_TENSOR_ID_QUARK in the GstTensorMeta. */
+  scores_tensor_idx =
+      gst_tensor_meta_get_index_from_id (tensor_meta, SCORES_TENSOR_ID_QUARK);
+
+  if (boxes_tensor_idx >= 0 && scores_tensor_idx >= 0) {
+    GST_LOG_OBJECT (self, "Boxes tensor id: %d", boxes_tensor_idx);
+    GST_LOG_OBJECT (self, "Scores tensor id: %d", scores_tensor_idx);
+
+    *boxes_tensor = tensor_meta->tensors[boxes_tensor_idx];
+    *scores_tensor = tensor_meta->tensors[scores_tensor_idx];
+
+    return TRUE;
+  } else {
+    GST_INFO_OBJECT (self, "Couldn't find boxes or scores tensor, skipping");
+  }
+
+  return FALSE;
+}
+
+/* Compare c1 and c2
+ * Utility function for sorting candiates based on the scores.
+ */
+static gint
+gst_face_detector_tensor_decoder_sort_candidates (gconstpointer c1,
+    gconstpointer c2)
+{
+  const Candidate *candidate1 = *((Candidate **) c1);
+  const Candidate *candidate2 = *((Candidate **) c2);
+
+  if (*candidate1->score < *candidate2->score) {
+    return 1;
+  } else if (*candidate1->score > *candidate2->score) {
+    return -1;
+  } else {
+    return 0;
+  }
+}
+
+static gfloat
+iou_box (const Candidate * a, const Candidate * b)
+{
+  gfloat ax1 = a->box[0];
+  gfloat ay1 = a->box[1];
+  gfloat ax2 = a->box[2];
+  gfloat ay2 = a->box[3];
+
+  gfloat bx1 = b->box[0];
+  gfloat by1 = b->box[1];
+  gfloat bx2 = b->box[2];
+  gfloat by2 = b->box[3];
+
+  gfloat xx1 = (ax1 > bx1) ? ax1 : bx1;
+  gfloat yy1 = (ay1 > by1) ? ay1 : by1;
+  gfloat xx2 = (ax2 < bx2) ? ax2 : bx2;
+  gfloat yy2 = (ay2 < by2) ? ay2 : by2;
+
+  gfloat w = xx2 - xx1;
+  gfloat h = yy2 - yy1;
+  if (w < 0.0f || h < 0.0f) {
+    /* No overlap */
+    return 0.0f;
+  }
+
+  /* Area of intersection */
+  gfloat intersection = w * h;
+
+  /* Area of each box */
+  gfloat areaA = (ax2 - ax1) * (ay2 - ay1);
+  gfloat areaB = (bx2 - bx1) * (by2 - by1);
+  if (areaA <= 0.0f || areaB <= 0.0f)
+    return 0.0f;
+
+  /* IoU = intersection / union */
+  gfloat iou = intersection / (areaA + areaB - intersection);
+  return iou;
+}
+
+/* hard_nms:
+ * @sel_candidates: array of pointers of selected boxes with scores
+ * @selected: array of pointers of selected boxes with scores after the removal of overlappings
+ * @iou_threshold: threshold for removing boxes based on proportion of the image
+ * @top_k: number of boxes to keep (if top_k <= 0, keep all).
+ * @return: void
+ * Hard NMS:
+ *   1) Keep highest scoring box
+ *   2) Remove boxes with IoU >= iou_threshold
+ *   3) Repeat until no boxes left or we reach top_k
+ */
+static void
+hard_nms (const GPtrArray * sel_candidates,
+    GPtrArray * selected, gfloat iou_threshold, gint top_k)
+{
+  /* Edge case: Handle the case of no input boxes */
+  if (sel_candidates->len == 0) {
+    return;
+  }
+
+  /* We'll mark boxes as "suppressed" using an array of booleans. */
+  gchar *discarded = g_alloca0 (sel_candidates->len);   /* 0 => keep, 1 => discard */
+
+  /* The maximum possible output is 'sel_candidates->len'. We'll store the kept boxes into 'selected'. */
+
+  /* Perform NMS. */
+  for (gsize i = 0; i < sel_candidates->len; i++) {
+    if (discarded[i]) {
+      /* Already thrown out due to overlap. */
+      continue;
+    }
+
+    /* Get the current indexed candidate from the selected candidates.
+     * Then store this current box/candidate into final selected candidates array
+     */
+    Candidate *c = (Candidate *) g_ptr_array_index (sel_candidates, i);
+    g_ptr_array_add (selected, c);
+
+    /* If we have reached top_k (and top_k > 0), break. */
+    if (top_k > 0 && selected->len == top_k) {
+      break;
+    }
+
+    /* Suppress any candidate that overlap (IoU >= iou_threshold) with the current one. */
+    for (gsize j = i + 1; j < sel_candidates->len; j++) {
+      if (discarded[j])
+        continue;
+
+      gfloat overlap = iou_box (g_ptr_array_index (sel_candidates, i),
+          g_ptr_array_index (sel_candidates, j));
+      if (overlap >= iou_threshold) {
+        discarded[j] = 1;       /* Mark for discard */
+      }
+    }
+  }
+}
+
+/* gst_face_detector_tensor_decoder_decode_boxes_f32:
+ * @self: Instance
+ * @boxes_tensor: Buffer containing the boxes tensor
+ * @scores_tensor: Buffer containing the scores/confidences tensor
+ * @rmeta: analytics-meta that is attached to the buffer
+ * @return: void
+ * Decode Face Detection tensors, post-process tensors and store decoded information
+ * into an analytics-meta that is attached to the buffer before been pushed
+ * downstream.
+ */
+static void
+gst_face_detector_tensor_decoder_decode_boxes_f32 (GstFaceDetectorTensorDecoder
+    * self, GstTensor * boxes_tensor, GstTensor * scores_tensor,
+    GstAnalyticsRelationMeta * rmeta)
+{
+  GstMapInfo map_info_boxes, map_info_scores;
+  gfloat *candidate, *score;
+  gboolean rv;
+  GPtrArray *sel_candidates = self->sel_candidates, *selected = self->selected;
+
+  /* Retrieve memory at index 0 from boxes_tensor in READ mode */
+  boxes_tensor->data = gst_buffer_make_writable (boxes_tensor->data);
+
+  rv = gst_buffer_map (boxes_tensor->data, &map_info_boxes, GST_MAP_READ);
+  g_assert (rv);
+
+  /* Retrieve memory at index 0 from scores_tensor in READ mode */
+  rv = gst_buffer_map (scores_tensor->data, &map_info_scores, GST_MAP_READ);
+  g_assert (rv);
+
+  GST_LOG_OBJECT (self, "Boxes Tensor shape dims %zu", boxes_tensor->num_dims);
+  GST_LOG_OBJECT (self, "scores Tensor shape dims %zu",
+      scores_tensor->num_dims);
+
+  if (gst_debug_category_get_threshold (GST_CAT_DEFAULT) >= GST_LEVEL_TRACE) {
+    /* Trace boxes tensor dimensions */
+    for (gsize i = 0; i < boxes_tensor->num_dims; i++) {
+      GST_TRACE_OBJECT (self, "Boxes Tensor dim %zu: %zu", i,
+          boxes_tensor->dims[i]);
+    }
+
+    /* Trace scores tensor dimensions */
+    for (gsize i = 0; i < scores_tensor->num_dims; i++) {
+      GST_TRACE_OBJECT (self, "Scores Tensor dim %zu: %zu", i,
+          scores_tensor->dims[i]);
+    }
+  }
+
+  /* Allocate array to store selected candidates */
+  if (sel_candidates == NULL) {
+    /* Number of candidates can be large, keep the array to avoid frequent
+     * allocation */
+    sel_candidates = g_ptr_array_new_full (boxes_tensor->dims[1], NULL);
+    self->sel_candidates = sel_candidates;
+    selected = g_ptr_array_new_full (boxes_tensor->dims[1], NULL);
+    self->selected = selected;
+    self->candidates = (Candidate *) g_new0 (Candidate, boxes_tensor->dims[1]);
+  } else {
+    /* Reset lengths when we re-use arrays */
+    g_ptr_array_set_size (sel_candidates, 0);
+    g_ptr_array_set_size (selected, 0);
+  }
+
+  score = (gfloat *) map_info_scores.data;
+  candidate = (gfloat *) map_info_boxes.data;
+
+  gsize idx = 0;
+
+  /* For UltraLightFaceDetection:
+   *  "boxes" => shape [N,4], where N = 4420
+   *  "scores"=> shape [N,2], (background,face)
+   *  We'll skip the background (index = 0) and keep the foreground (index = 1).
+   */
+
+  /*
+   * Iterate through the Scores tensor.
+   * Check whether the score exceeds default threshold, if it does, select the score and corresponding box.
+   * Add these selected boxes to the sel_candidates array.
+   * */
+  for (gsize i = 1, j = 0; i < scores_tensor->dims[1] * 2; i += 2, j += 4) {
+    if (score[i] >= self->score_threshold) {
+      self->candidates[idx].index = idx;
+      self->candidates[idx].box = &candidate[j];
+      self->candidates[idx].score = &score[i];
+
+      g_ptr_array_add (sel_candidates, &self->candidates[idx]);
+      idx++;
+    }
+  }
+
+  GST_LOG_OBJECT (self, "Number of selected candidates = %d",
+      sel_candidates->len);
+
+  if (sel_candidates->len == 0) {
+    GST_LOG_OBJECT (self, "No boxes above threshold=%1.2f",
+        self->score_threshold);
+    goto cleanup;
+  }
+
+  if (gst_debug_category_get_threshold (GST_CAT_DEFAULT) >= GST_LEVEL_TRACE) {
+    for (gsize i = 0; i < sel_candidates->len; i++) {
+      Candidate *c = (Candidate *) g_ptr_array_index (sel_candidates, i);
+      gsize j = 0;
+      for (; j < boxes_tensor->dims[2]; j++) {
+        GST_TRACE_OBJECT (self, "sel_candidates[%zu] = %1.5f ", i + j,
+            c->box[j]);
+      }
+      GST_TRACE_OBJECT (self, "score[%zu] = %1.5f", i + j, c->score[0]);
+    }
+  }
+
+  /*
+   * Sort the sel_candidates array so as to have the candidates in descending order w.r.t. scores
+   */
+  g_ptr_array_sort (sel_candidates,
+      gst_face_detector_tensor_decoder_sort_candidates);
+
+  if (gst_debug_category_get_threshold (GST_CAT_DEFAULT) >= GST_LEVEL_TRACE) {
+    for (gsize i = 0; i < sel_candidates->len; i++) {
+      Candidate *c = (Candidate *) g_ptr_array_index (sel_candidates, i);
+      GST_TRACE_OBJECT (self, "c[%zu] = %1.5f index = %d", i, c->score[0],
+          c->index);
+    }
+  }
+
+  /* NMS */
+  hard_nms (sel_candidates, selected, self->iou_threshold, -1);
+
+  GST_LOG_OBJECT (self, "Number of faces detected = %d", selected->len);
+  if (gst_debug_category_get_threshold (GST_CAT_DEFAULT) >= GST_LEVEL_TRACE) {
+    for (gsize i = 0; i < selected->len; i++) {
+      Candidate *c = (Candidate *) g_ptr_array_index (selected, i);
+      GST_TRACE_OBJECT (self,
+          "%zu x1 = %1.5f y1 = %1.5f x2 = %1.5f y2 = %1.5f score = %1.5f",
+          i + 1, c->box[i + 0], c->box[i + 1], c->box[i + 2], c->box[i + 3],
+          c->score[0]);
+    }
+  }
+
+  gsize frame_width = self->video_info.width;
+  gsize frame_height = self->video_info.height;
+
+  /* Convert each final box from normalized to pixel coords and attach to meta. */
+  for (gint i = 0; i < selected->len; i++) {
+    Candidate *c = (Candidate *) g_ptr_array_index (selected, i);
+    gfloat x1 = c->box[0] * frame_width;
+    gfloat y1 = c->box[1] * frame_height;
+    gfloat x2 = c->box[2] * frame_width;
+    gfloat y2 = c->box[3] * frame_height;
+    gfloat w_ = x2 - x1;
+    gfloat h_ = y2 - y1;
+
+    /* Add to analytics meta: (x, y, width, height). */
+    gst_analytics_relation_meta_add_od_mtd (rmeta, FACE_QUARK,
+        (gint) (x1 + 0.5f), (gint) (y1 + 0.5f),
+        (gint) (w_ + 0.5f), (gint) (h_ + 0.5f), c->score[0], NULL);
+  }
+
+cleanup:
+
+  /* Unmap */
+  gst_buffer_unmap (boxes_tensor->data, &map_info_boxes);
+  gst_buffer_unmap (scores_tensor->data, &map_info_scores);
+}
+
+/* gst_face_detector_tensor_decoder_transform_ip:
+ * @trans: Instance
+ * @buf:inout: Buffer containing media and where tensors can be attached
+ * @return: Flow errors
+ * Decode Face Detection tensors, post-process tensors and store decoded information
+ * into an analytics-meta that is attached to the buffer before been pushed
+ * downstream.
+ */
+static GstFlowReturn
+gst_face_detector_tensor_decoder_transform_ip (GstBaseTransform * trans,
+    GstBuffer * buf)
+{
+  GstFaceDetectorTensorDecoder *self = GST_FACE_DETECTOR_TENSOR_DECODER (trans);
+  GstTensor *boxes_tensor, *scores_tensor;
+  GstAnalyticsRelationMeta *rmeta;
+
+  if (!gst_face_detector_tensor_decoder_get_tensor_meta (self, buf,
+          &boxes_tensor, &scores_tensor))
+    return GST_FLOW_OK;
+
+  if (boxes_tensor->num_dims != 3) {
+    GST_ELEMENT_ERROR (self, STREAM, DECODE, (NULL),
+        ("Boxes tensor must have 3 dimensions but has %zu",
+            boxes_tensor->num_dims));
+    return GST_FLOW_ERROR;
+  }
+
+  if (scores_tensor->num_dims != 3) {
+    GST_ELEMENT_ERROR (self, STREAM, DECODE, (NULL),
+        ("scores tensor must have 3 dimensions but has %zu",
+            boxes_tensor->num_dims));
+    return GST_FLOW_ERROR;
+  }
+
+  if (boxes_tensor->data_type != GST_TENSOR_DATA_TYPE_FLOAT32 &&
+      scores_tensor->data_type != GST_TENSOR_DATA_TYPE_FLOAT32) {
+    GST_ELEMENT_ERROR (self, STREAM, NOT_IMPLEMENTED,
+        ("Only data-type FLOAT32 support is implemented"),
+        ("Please implement."));
+
+    return GST_FLOW_ERROR;
+  }
+
+  rmeta = gst_buffer_add_analytics_relation_meta (buf);
+  g_assert (rmeta != NULL);
+
+  /* Decode boxes_tensor, scores_tensor and attach the information in a structured way
+   * to rmeta. */
+  gst_face_detector_tensor_decoder_decode_boxes_f32 (self, boxes_tensor,
+      scores_tensor, rmeta);
+
+  return GST_FLOW_OK;
+}
--- a/subprojects/gst-plugins-bad/gst/tensordecoders/gstfacedetectortensordecoder.h
+++ b/subprojects/gst-plugins-bad/gst/tensordecoders/gstfacedetectortensordecoder.h
@ -0,0 +1,86 @@
+/*
+ * GStreamer gstreamer-facedetectortensordecoder
+ * Copyright (C) 2025 Collabora Ltd
+ *
+ * gstfacedetectortensordecoder.h
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __GST_FACE_DETECTOR_TENSOR_DECODER_H__
+#define __GST_FACE_DETECTOR_TENSOR_DECODER_H__
+
+#include <gst/gst.h>
+#include <gst/video/video.h>
+#include <gst/base/base.h>
+
+G_BEGIN_DECLS
+#define GST_TYPE_FACE_DETECTOR_TENSOR_DECODER            (gst_face_detector_tensor_decoder_get_type())
+G_DECLARE_FINAL_TYPE (GstFaceDetectorTensorDecoder,
+    gst_face_detector_tensor_decoder, GST, FACE_DETECTOR_TENSOR_DECODER,
+    GstBaseTransform)
+
+typedef struct
+{
+  guint16 index;
+  gfloat *box;
+  gfloat *score;
+} Candidate;
+
+/**
+ * GstFaceDetectorTensorDecoder:
+ *
+ * Since: 1.28
+ */
+struct _GstFaceDetectorTensorDecoder
+{
+  GstBaseTransform basetransform;
+
+  /* Confidence threshold. */
+  gfloat score_threshold;
+
+  /* Intersection-of-Union threshold. */
+  gfloat iou_threshold;
+
+  /* Video Info */
+  GstVideoInfo video_info;
+
+  /* Candidates with a class confidence level above threshold. */
+  GPtrArray *sel_candidates;
+
+  /* Final candidates selected that respect class confidence level,
+  * NMS and maximum detection. */
+  GPtrArray *selected;
+
+  /* Candidates with a class confidence level and bounding boxes. */
+  Candidate *candidates;
+};
+
+/**
+ * GstFaceDetectorTensorDecoderClass:
+ *
+ * @parent_class base transform base class
+ *
+ * Since: 1.28
+ */
+struct _GstFaceDetectorTensorDecoderClass
+{
+  GstBaseTransformClass parent_class;
+};
+
+GST_ELEMENT_REGISTER_DECLARE (face_detector_tensor_decoder)
+    G_END_DECLS
+#endif /* __GST_FACE_DETECTOR_TENSOR_DECODER_H__ */
--- a/subprojects/gst-plugins-bad/gst/tensordecoders/gsttensordecoders.c
+++ b/subprojects/gst-plugins-bad/gst/tensordecoders/gsttensordecoders.c
@ -26,6 +26,7 @@

 #include "gstssdobjectdetector.h"
 #include "gstclassifiertensordecoder.h"
+#include "gstfacedetectortensordecoder.h"

 /**
 * SECTION:plugin-tensordecoders
@ -40,6 +41,7 @@ plugin_init (GstPlugin * plugin)
  gboolean ret = FALSE;
  ret |= GST_ELEMENT_REGISTER (ssd_object_detector, plugin);
  ret |= GST_ELEMENT_REGISTER (classifier_tensor_decoder, plugin);
+  ret |= GST_ELEMENT_REGISTER (face_detector_tensor_decoder, plugin);

  return ret;
 }
--- a/subprojects/gst-plugins-bad/gst/tensordecoders/meson.build
+++ b/subprojects/gst-plugins-bad/gst/tensordecoders/meson.build
@ -1,12 +1,14 @@
 tensordecoders_sources = [
  'gsttensordecoders.c',
  'gstssdobjectdetector.c',
-  'gstclassifiertensordecoder.c'
+  'gstclassifiertensordecoder.c',
+  'gstfacedetectortensordecoder.c'
 ]

 tensordecoders_headers = [
  'gstssdobjectdetector.h',
-  'gstclassifiertensordecoder.h'
+  'gstclassifiertensordecoder.h',
+  'gstfacedetectortensordecoder.h'
 ]

 doc_sources = []