tensordecoder: add facedetector tensor decoding support
Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/8600>
This commit is contained in:
parent
6c5ab780d5
commit
251d74ca69
@ -248095,6 +248095,62 @@
|
||||
},
|
||||
"rank": "primary"
|
||||
},
|
||||
"facedetector": {
|
||||
"author": "The original repository of the Ultra Light Face Detection is located at https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB.Raghavendra Rao <raghavendra.rao@collabora.com>",
|
||||
"description": "Detect tensor output from the inference of Ultra Light Face Detection to detect the faces in video frames.",
|
||||
"hierarchy": [
|
||||
"GstFaceDetectorTensorDecoder",
|
||||
"GstBaseTransform",
|
||||
"GstElement",
|
||||
"GstObject",
|
||||
"GInitiallyUnowned",
|
||||
"GObject"
|
||||
],
|
||||
"klass": "Tensordecoder/Video",
|
||||
"pad-templates": {
|
||||
"sink": {
|
||||
"caps": "video/x-raw:\n",
|
||||
"direction": "sink",
|
||||
"presence": "always"
|
||||
},
|
||||
"src": {
|
||||
"caps": "video/x-raw:\n",
|
||||
"direction": "src",
|
||||
"presence": "always"
|
||||
}
|
||||
},
|
||||
"properties": {
|
||||
"iou-threshold": {
|
||||
"blurb": "Threshold for removing boxes based on proportion of the image",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "0.3",
|
||||
"max": "1",
|
||||
"min": "0",
|
||||
"mutable": "null",
|
||||
"readable": true,
|
||||
"type": "gfloat",
|
||||
"writable": true
|
||||
},
|
||||
"score-threshold": {
|
||||
"blurb": "Threshold for deciding when to remove boxes based on score",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "0.6",
|
||||
"max": "1",
|
||||
"min": "0",
|
||||
"mutable": "null",
|
||||
"readable": true,
|
||||
"type": "gfloat",
|
||||
"writable": true
|
||||
}
|
||||
},
|
||||
"rank": "primary"
|
||||
},
|
||||
"ssdobjectdetector": {
|
||||
"author": "Aaron Boxer <aaron.boxer@collabora.com>, Marcus Edel <marcus.edel@collabora.com>",
|
||||
"description": "Apply tensor output from inference to detect objects in video frames",
|
||||
|
@ -0,0 +1,680 @@
|
||||
/*
|
||||
* GStreamer gstreamer-facedetector
|
||||
* Copyright (C) 2025 Collabora Ltd.
|
||||
*
|
||||
* gstfacedetectortensordecoder.c
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Library General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Library General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Library General Public
|
||||
* License along with this library; if not, write to the
|
||||
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
/**
|
||||
* SECTION:element-facedetector
|
||||
* @short_description: Detect faces in video buffers using the Ultra Light Face Detection model.
|
||||
*
|
||||
* This element can parse per-buffer inference tensor meta data generated by an upstream
|
||||
* inference element.
|
||||
*
|
||||
* ## Example launch command:
|
||||
*
|
||||
* Test image files can be found here :
|
||||
* https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/tree/master/imgs
|
||||
*
|
||||
* The Model file can be found here :
|
||||
* https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/tree/master/models/onnx
|
||||
*
|
||||
* GST_DEBUG=facedetector \
|
||||
* gst-launch-1.0 multifilesrc location=~/imgs/11.jpg ! jpegdec ! videoconvertscale ! \
|
||||
* onnxinference model-file=version-RFB-320.onnx input-image-format=chw input-tensor-offset=-127 input-tensor-scale=128.0 ! \
|
||||
* facedetector ! objectdetectionoverlay object-detection-outline-color=0xFF0000FF draw-labels=false ! \
|
||||
* videoconvertscale ! autovideosink
|
||||
*
|
||||
* Since: 1.28
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "gstfacedetectortensordecoder.h"
|
||||
|
||||
#include <gio/gio.h>
|
||||
|
||||
#include <gst/gst.h>
|
||||
#include <gst/video/video.h>
|
||||
#include <gst/analytics/analytics.h>
|
||||
#include <math.h> /* for expf() */
|
||||
|
||||
/* Face detection tensor id strings */
|
||||
#define BOXES_TENSOR_ID "ssd-mobilenet-v1-variant-1-out-boxes"
|
||||
#define SCORES_TENSOR_ID "ultra-lightweight-face-detection-rfb-320-v1-variant-1-out-scores"
|
||||
|
||||
GST_DEBUG_CATEGORY_STATIC (face_detector_tensor_decoder_debug);
|
||||
#define GST_CAT_DEFAULT face_detector_tensor_decoder_debug
|
||||
|
||||
GST_ELEMENT_REGISTER_DEFINE (face_detector_tensor_decoder, "facedetector",
|
||||
GST_RANK_PRIMARY, GST_TYPE_FACE_DETECTOR_TENSOR_DECODER);
|
||||
|
||||
/* GstFaceDetectorTensorDecoder properties, see properties description in
|
||||
* gst_face_detector_tensor_decoder_class_init for more details. */
|
||||
enum
|
||||
{
|
||||
PROP_0,
|
||||
PROP_SCORE_THRESHOLD,
|
||||
PROP_IOU_THRESHOLD
|
||||
};
|
||||
|
||||
/* Default properties value */
|
||||
static const gfloat DEFAULT_SCORE_THRESHOLD = 0.6f; /* confidence threshold */
|
||||
static const gfloat DEFAULT_IOU_THRESHOLD = 0.3f; /* NMS IoU threshold */
|
||||
|
||||
/* To tensor-id are defined by a string that is converted to quark
|
||||
* which is just an integer value using a hash function. For efficiency
|
||||
* we compare on the quark (hash value). Since tensor-id never change we
|
||||
* just calculate the hash once during initialization and store the value in
|
||||
* these variables. */
|
||||
GQuark BOXES_TENSOR_ID_QUARK;
|
||||
GQuark SCORES_TENSOR_ID_QUARK;
|
||||
|
||||
GQuark FACE_QUARK;
|
||||
|
||||
/* GStreamer element srcpad template. Template of a srcpad that can receive
|
||||
* any raw video. */
|
||||
static GstStaticPadTemplate gst_face_detector_tensor_decoder_src_template =
|
||||
GST_STATIC_PAD_TEMPLATE ("src",
|
||||
GST_PAD_SRC,
|
||||
GST_PAD_ALWAYS,
|
||||
GST_STATIC_CAPS ("video/x-raw")
|
||||
);
|
||||
|
||||
/* GStreamer element sinkpad template. Template of a sinkpad that can receive
|
||||
* any raw video. */
|
||||
static GstStaticPadTemplate gst_face_detector_tensor_decoder_sink_template =
|
||||
GST_STATIC_PAD_TEMPLATE ("sink",
|
||||
GST_PAD_SINK,
|
||||
GST_PAD_ALWAYS,
|
||||
GST_STATIC_CAPS ("video/x-raw")
|
||||
);
|
||||
|
||||
/* Prototypes */
|
||||
static void gst_face_detector_tensor_decoder_set_property (GObject * object,
|
||||
guint prop_id, const GValue * value, GParamSpec * pspec);
|
||||
static void gst_face_detector_tensor_decoder_get_property (GObject * object,
|
||||
guint prop_id, GValue * value, GParamSpec * pspec);
|
||||
static void gst_face_detector_tensor_decoder_finalize (GObject * object);
|
||||
static GstFlowReturn
|
||||
gst_face_detector_tensor_decoder_transform_ip (GstBaseTransform * trans,
|
||||
GstBuffer * buf);
|
||||
static gboolean gst_face_detector_tensor_decoder_set_caps (GstBaseTransform *
|
||||
trans, GstCaps * incaps, GstCaps * outcaps);
|
||||
|
||||
G_DEFINE_TYPE (GstFaceDetectorTensorDecoder, gst_face_detector_tensor_decoder,
|
||||
GST_TYPE_BASE_TRANSFORM);
|
||||
|
||||
static void
|
||||
gst_face_detector_tensor_decoder_class_init (GstFaceDetectorTensorDecoderClass
|
||||
* klass)
|
||||
{
|
||||
GObjectClass *gobject_class = (GObjectClass *) klass;
|
||||
GstElementClass *element_class = (GstElementClass *) klass;
|
||||
GstBaseTransformClass *basetransform_class = (GstBaseTransformClass *) klass;
|
||||
|
||||
/* Define GstFaceDetectorTensorDecoder debug category. */
|
||||
GST_DEBUG_CATEGORY_INIT (face_detector_tensor_decoder_debug,
|
||||
"facedetector", 0, "Tensor Decoder for Face Detection");
|
||||
|
||||
/* Set GObject vmethod to get and set property */
|
||||
gobject_class->set_property = gst_face_detector_tensor_decoder_set_property;
|
||||
gobject_class->get_property = gst_face_detector_tensor_decoder_get_property;
|
||||
gobject_class->finalize = gst_face_detector_tensor_decoder_finalize;
|
||||
|
||||
/* Define GstFaceDetectorTensorDecoder properties using GObject properties
|
||||
* interface.*/
|
||||
|
||||
/**
|
||||
* GstFaceDetectorTensorDecoder:score-threshold
|
||||
*
|
||||
* Threshold for deciding when to remove boxes based on score
|
||||
*
|
||||
* Since: 1.28
|
||||
*/
|
||||
g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_SCORE_THRESHOLD,
|
||||
g_param_spec_float ("score-threshold",
|
||||
"Score threshold",
|
||||
"Threshold for deciding when to remove boxes based on score",
|
||||
0.0, 1.0, DEFAULT_SCORE_THRESHOLD, (GParamFlags)
|
||||
(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
|
||||
|
||||
/**
|
||||
* GstFaceDetectorTensorDecoder:iou-threshold
|
||||
*
|
||||
* Threshold for removing boxes based on proportion of the image
|
||||
*
|
||||
* Since: 1.28
|
||||
*/
|
||||
g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_IOU_THRESHOLD,
|
||||
g_param_spec_float ("iou-threshold",
|
||||
"IoU threshold",
|
||||
"Threshold for removing boxes based on proportion of the image",
|
||||
0.0, 1.0, DEFAULT_IOU_THRESHOLD,
|
||||
(GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
|
||||
|
||||
/* Element description. */
|
||||
gst_element_class_set_static_metadata (element_class,
|
||||
"facedetector", "Tensordecoder/Video",
|
||||
"Detect tensor output from the inference of Ultra Light Face Detection"
|
||||
" to detect the faces in video frames.",
|
||||
"The original repository of the Ultra Light Face Detection is located at"
|
||||
" https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB."
|
||||
"Raghavendra Rao <raghavendra.rao@collabora.com>");
|
||||
|
||||
/* Add pads to element base on pad template defined earlier */
|
||||
gst_element_class_add_pad_template (element_class,
|
||||
gst_static_pad_template_get
|
||||
(&gst_face_detector_tensor_decoder_sink_template));
|
||||
gst_element_class_add_pad_template (element_class,
|
||||
gst_static_pad_template_get
|
||||
(&gst_face_detector_tensor_decoder_src_template));
|
||||
|
||||
/* Set GstBaseTransform vmethod transform_ip. This methode is called
|
||||
* by the srcpad when it receive buffer. ip stand for in-place meaning the
|
||||
* buffer remain unchanged by the element. Tensor-decoder only monitor
|
||||
* buffer it receive for a meta attach to the buffer that is a GstTensorMeta
|
||||
* and has a tensor-id can be handled by GstFaceDetectorTensorDecoder. */
|
||||
basetransform_class->transform_ip =
|
||||
GST_DEBUG_FUNCPTR (gst_face_detector_tensor_decoder_transform_ip);
|
||||
|
||||
/* Set GstBaseTransform set_caps vmethod. This will be called once the
|
||||
* capability negotiation has been completed. We will be able to extract
|
||||
* resolution from this callback. */
|
||||
basetransform_class->set_caps =
|
||||
GST_DEBUG_FUNCPTR (gst_face_detector_tensor_decoder_set_caps);
|
||||
|
||||
BOXES_TENSOR_ID_QUARK = g_quark_from_static_string (BOXES_TENSOR_ID);
|
||||
SCORES_TENSOR_ID_QUARK = g_quark_from_static_string (SCORES_TENSOR_ID);
|
||||
FACE_QUARK = g_quark_from_static_string ("face");
|
||||
}
|
||||
|
||||
static void
|
||||
gst_face_detector_tensor_decoder_init (GstFaceDetectorTensorDecoder * self)
|
||||
{
|
||||
self->score_threshold = DEFAULT_SCORE_THRESHOLD;
|
||||
self->iou_threshold = DEFAULT_IOU_THRESHOLD;
|
||||
self->sel_candidates = NULL;
|
||||
self->selected = NULL;
|
||||
gst_base_transform_set_passthrough (GST_BASE_TRANSFORM (self), FALSE);
|
||||
}
|
||||
|
||||
static void
|
||||
gst_face_detector_tensor_decoder_finalize (GObject * object)
|
||||
{
|
||||
GstFaceDetectorTensorDecoder *self =
|
||||
GST_FACE_DETECTOR_TENSOR_DECODER (object);
|
||||
|
||||
g_clear_pointer (&self->sel_candidates, g_ptr_array_unref);
|
||||
g_clear_pointer (&self->selected, g_ptr_array_unref);
|
||||
g_clear_pointer (&self->candidates, g_free);
|
||||
|
||||
G_OBJECT_CLASS (gst_face_detector_tensor_decoder_parent_class)->finalize
|
||||
(object);
|
||||
}
|
||||
|
||||
static void
|
||||
gst_face_detector_tensor_decoder_set_property (GObject * object, guint prop_id,
|
||||
const GValue * value, GParamSpec * pspec)
|
||||
{
|
||||
GstFaceDetectorTensorDecoder *self =
|
||||
GST_FACE_DETECTOR_TENSOR_DECODER (object);
|
||||
|
||||
switch (prop_id) {
|
||||
case PROP_SCORE_THRESHOLD:
|
||||
self->score_threshold = g_value_get_float (value);
|
||||
break;
|
||||
case PROP_IOU_THRESHOLD:
|
||||
self->iou_threshold = g_value_get_float (value);
|
||||
break;
|
||||
default:
|
||||
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
gst_face_detector_tensor_decoder_get_property (GObject * object, guint prop_id,
|
||||
GValue * value, GParamSpec * pspec)
|
||||
{
|
||||
GstFaceDetectorTensorDecoder *self =
|
||||
GST_FACE_DETECTOR_TENSOR_DECODER (object);
|
||||
|
||||
switch (prop_id) {
|
||||
case PROP_SCORE_THRESHOLD:
|
||||
g_value_set_float (value, self->score_threshold);
|
||||
break;
|
||||
case PROP_IOU_THRESHOLD:
|
||||
g_value_set_float (value, self->iou_threshold);
|
||||
break;
|
||||
default:
|
||||
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* gst_face_detector_tensor_decoder_set_caps:
|
||||
*
|
||||
* Callback on caps negotiation completed. We use it here to retrieve
|
||||
* video resolution. See GstBaseTransform for more details.
|
||||
*/
|
||||
static gboolean
|
||||
gst_face_detector_tensor_decoder_set_caps (GstBaseTransform * trans,
|
||||
GstCaps * incaps, GstCaps * outcaps)
|
||||
{
|
||||
GstFaceDetectorTensorDecoder *self = GST_FACE_DETECTOR_TENSOR_DECODER (trans);
|
||||
|
||||
if (!gst_video_info_from_caps (&self->video_info, incaps)) {
|
||||
GST_ERROR_OBJECT (self, "Failed to parse caps");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* gst_face_detector_tensor_decoder_get_tensor_meta
|
||||
* @buf:in: buffer
|
||||
* @boxes_tensor:out: Boxes tensor
|
||||
* @scores_tensor:out: scores tensor
|
||||
* @return: TRUE if buf has boxes and scores tensor attach to it.
|
||||
* Retrieve FaceDetection boxes and scores tensors from buffer.
|
||||
*/
|
||||
static gboolean
|
||||
gst_face_detector_tensor_decoder_get_tensor_meta (GstFaceDetectorTensorDecoder
|
||||
* self, GstBuffer * buf, GstTensor ** boxes_tensor,
|
||||
GstTensor ** scores_tensor)
|
||||
{
|
||||
GstTensorMeta *tensor_meta;
|
||||
gint boxes_tensor_idx, scores_tensor_idx;
|
||||
|
||||
g_return_val_if_fail (boxes_tensor != NULL, FALSE);
|
||||
g_return_val_if_fail (scores_tensor != NULL, FALSE);
|
||||
|
||||
*boxes_tensor = NULL;
|
||||
*scores_tensor = NULL;
|
||||
|
||||
/* Retrieve all TensorMeta attach the buffer */
|
||||
tensor_meta = gst_buffer_get_tensor_meta (buf);
|
||||
if (!tensor_meta) {
|
||||
GST_LOG_OBJECT (self, "No tensor meta");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
GST_LOG_OBJECT (self, "Num tensors %zu", tensor_meta->num_tensors);
|
||||
|
||||
/* Retrieve the index of the tensor that has a tensor-id matching
|
||||
* BOXES_TENSOR_ID_QUARK in the GstTensorMeta. */
|
||||
boxes_tensor_idx = gst_tensor_meta_get_index_from_id (tensor_meta,
|
||||
BOXES_TENSOR_ID_QUARK);
|
||||
|
||||
/* Retrieve the index of the tensor that has a tensor-id matching*
|
||||
* SCORES_TENSOR_ID_QUARK in the GstTensorMeta. */
|
||||
scores_tensor_idx =
|
||||
gst_tensor_meta_get_index_from_id (tensor_meta, SCORES_TENSOR_ID_QUARK);
|
||||
|
||||
if (boxes_tensor_idx >= 0 && scores_tensor_idx >= 0) {
|
||||
GST_LOG_OBJECT (self, "Boxes tensor id: %d", boxes_tensor_idx);
|
||||
GST_LOG_OBJECT (self, "Scores tensor id: %d", scores_tensor_idx);
|
||||
|
||||
*boxes_tensor = tensor_meta->tensors[boxes_tensor_idx];
|
||||
*scores_tensor = tensor_meta->tensors[scores_tensor_idx];
|
||||
|
||||
return TRUE;
|
||||
} else {
|
||||
GST_INFO_OBJECT (self, "Couldn't find boxes or scores tensor, skipping");
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* Compare c1 and c2
|
||||
* Utility function for sorting candiates based on the scores.
|
||||
*/
|
||||
static gint
|
||||
gst_face_detector_tensor_decoder_sort_candidates (gconstpointer c1,
|
||||
gconstpointer c2)
|
||||
{
|
||||
const Candidate *candidate1 = *((Candidate **) c1);
|
||||
const Candidate *candidate2 = *((Candidate **) c2);
|
||||
|
||||
if (*candidate1->score < *candidate2->score) {
|
||||
return 1;
|
||||
} else if (*candidate1->score > *candidate2->score) {
|
||||
return -1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static gfloat
|
||||
iou_box (const Candidate * a, const Candidate * b)
|
||||
{
|
||||
gfloat ax1 = a->box[0];
|
||||
gfloat ay1 = a->box[1];
|
||||
gfloat ax2 = a->box[2];
|
||||
gfloat ay2 = a->box[3];
|
||||
|
||||
gfloat bx1 = b->box[0];
|
||||
gfloat by1 = b->box[1];
|
||||
gfloat bx2 = b->box[2];
|
||||
gfloat by2 = b->box[3];
|
||||
|
||||
gfloat xx1 = (ax1 > bx1) ? ax1 : bx1;
|
||||
gfloat yy1 = (ay1 > by1) ? ay1 : by1;
|
||||
gfloat xx2 = (ax2 < bx2) ? ax2 : bx2;
|
||||
gfloat yy2 = (ay2 < by2) ? ay2 : by2;
|
||||
|
||||
gfloat w = xx2 - xx1;
|
||||
gfloat h = yy2 - yy1;
|
||||
if (w < 0.0f || h < 0.0f) {
|
||||
/* No overlap */
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
/* Area of intersection */
|
||||
gfloat intersection = w * h;
|
||||
|
||||
/* Area of each box */
|
||||
gfloat areaA = (ax2 - ax1) * (ay2 - ay1);
|
||||
gfloat areaB = (bx2 - bx1) * (by2 - by1);
|
||||
if (areaA <= 0.0f || areaB <= 0.0f)
|
||||
return 0.0f;
|
||||
|
||||
/* IoU = intersection / union */
|
||||
gfloat iou = intersection / (areaA + areaB - intersection);
|
||||
return iou;
|
||||
}
|
||||
|
||||
/* hard_nms:
|
||||
* @sel_candidates: array of pointers of selected boxes with scores
|
||||
* @selected: array of pointers of selected boxes with scores after the removal of overlappings
|
||||
* @iou_threshold: threshold for removing boxes based on proportion of the image
|
||||
* @top_k: number of boxes to keep (if top_k <= 0, keep all).
|
||||
* @return: void
|
||||
* Hard NMS:
|
||||
* 1) Keep highest scoring box
|
||||
* 2) Remove boxes with IoU >= iou_threshold
|
||||
* 3) Repeat until no boxes left or we reach top_k
|
||||
*/
|
||||
static void
|
||||
hard_nms (const GPtrArray * sel_candidates,
|
||||
GPtrArray * selected, gfloat iou_threshold, gint top_k)
|
||||
{
|
||||
/* Edge case: Handle the case of no input boxes */
|
||||
if (sel_candidates->len == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* We'll mark boxes as "suppressed" using an array of booleans. */
|
||||
gchar *discarded = g_alloca0 (sel_candidates->len); /* 0 => keep, 1 => discard */
|
||||
|
||||
/* The maximum possible output is 'sel_candidates->len'. We'll store the kept boxes into 'selected'. */
|
||||
|
||||
/* Perform NMS. */
|
||||
for (gsize i = 0; i < sel_candidates->len; i++) {
|
||||
if (discarded[i]) {
|
||||
/* Already thrown out due to overlap. */
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Get the current indexed candidate from the selected candidates.
|
||||
* Then store this current box/candidate into final selected candidates array
|
||||
*/
|
||||
Candidate *c = (Candidate *) g_ptr_array_index (sel_candidates, i);
|
||||
g_ptr_array_add (selected, c);
|
||||
|
||||
/* If we have reached top_k (and top_k > 0), break. */
|
||||
if (top_k > 0 && selected->len == top_k) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* Suppress any candidate that overlap (IoU >= iou_threshold) with the current one. */
|
||||
for (gsize j = i + 1; j < sel_candidates->len; j++) {
|
||||
if (discarded[j])
|
||||
continue;
|
||||
|
||||
gfloat overlap = iou_box (g_ptr_array_index (sel_candidates, i),
|
||||
g_ptr_array_index (sel_candidates, j));
|
||||
if (overlap >= iou_threshold) {
|
||||
discarded[j] = 1; /* Mark for discard */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* gst_face_detector_tensor_decoder_decode_boxes_f32:
|
||||
* @self: Instance
|
||||
* @boxes_tensor: Buffer containing the boxes tensor
|
||||
* @scores_tensor: Buffer containing the scores/confidences tensor
|
||||
* @rmeta: analytics-meta that is attached to the buffer
|
||||
* @return: void
|
||||
* Decode Face Detection tensors, post-process tensors and store decoded information
|
||||
* into an analytics-meta that is attached to the buffer before been pushed
|
||||
* downstream.
|
||||
*/
|
||||
static void
|
||||
gst_face_detector_tensor_decoder_decode_boxes_f32 (GstFaceDetectorTensorDecoder
|
||||
* self, GstTensor * boxes_tensor, GstTensor * scores_tensor,
|
||||
GstAnalyticsRelationMeta * rmeta)
|
||||
{
|
||||
GstMapInfo map_info_boxes, map_info_scores;
|
||||
gfloat *candidate, *score;
|
||||
gboolean rv;
|
||||
GPtrArray *sel_candidates = self->sel_candidates, *selected = self->selected;
|
||||
|
||||
/* Retrieve memory at index 0 from boxes_tensor in READ mode */
|
||||
boxes_tensor->data = gst_buffer_make_writable (boxes_tensor->data);
|
||||
|
||||
rv = gst_buffer_map (boxes_tensor->data, &map_info_boxes, GST_MAP_READ);
|
||||
g_assert (rv);
|
||||
|
||||
/* Retrieve memory at index 0 from scores_tensor in READ mode */
|
||||
rv = gst_buffer_map (scores_tensor->data, &map_info_scores, GST_MAP_READ);
|
||||
g_assert (rv);
|
||||
|
||||
GST_LOG_OBJECT (self, "Boxes Tensor shape dims %zu", boxes_tensor->num_dims);
|
||||
GST_LOG_OBJECT (self, "scores Tensor shape dims %zu",
|
||||
scores_tensor->num_dims);
|
||||
|
||||
if (gst_debug_category_get_threshold (GST_CAT_DEFAULT) >= GST_LEVEL_TRACE) {
|
||||
/* Trace boxes tensor dimensions */
|
||||
for (gsize i = 0; i < boxes_tensor->num_dims; i++) {
|
||||
GST_TRACE_OBJECT (self, "Boxes Tensor dim %zu: %zu", i,
|
||||
boxes_tensor->dims[i]);
|
||||
}
|
||||
|
||||
/* Trace scores tensor dimensions */
|
||||
for (gsize i = 0; i < scores_tensor->num_dims; i++) {
|
||||
GST_TRACE_OBJECT (self, "Scores Tensor dim %zu: %zu", i,
|
||||
scores_tensor->dims[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocate array to store selected candidates */
|
||||
if (sel_candidates == NULL) {
|
||||
/* Number of candidates can be large, keep the array to avoid frequent
|
||||
* allocation */
|
||||
sel_candidates = g_ptr_array_new_full (boxes_tensor->dims[1], NULL);
|
||||
self->sel_candidates = sel_candidates;
|
||||
selected = g_ptr_array_new_full (boxes_tensor->dims[1], NULL);
|
||||
self->selected = selected;
|
||||
self->candidates = (Candidate *) g_new0 (Candidate, boxes_tensor->dims[1]);
|
||||
} else {
|
||||
/* Reset lengths when we re-use arrays */
|
||||
g_ptr_array_set_size (sel_candidates, 0);
|
||||
g_ptr_array_set_size (selected, 0);
|
||||
}
|
||||
|
||||
score = (gfloat *) map_info_scores.data;
|
||||
candidate = (gfloat *) map_info_boxes.data;
|
||||
|
||||
gsize idx = 0;
|
||||
|
||||
/* For UltraLightFaceDetection:
|
||||
* "boxes" => shape [N,4], where N = 4420
|
||||
* "scores"=> shape [N,2], (background,face)
|
||||
* We'll skip the background (index = 0) and keep the foreground (index = 1).
|
||||
*/
|
||||
|
||||
/*
|
||||
* Iterate through the Scores tensor.
|
||||
* Check whether the score exceeds default threshold, if it does, select the score and corresponding box.
|
||||
* Add these selected boxes to the sel_candidates array.
|
||||
* */
|
||||
for (gsize i = 1, j = 0; i < scores_tensor->dims[1] * 2; i += 2, j += 4) {
|
||||
if (score[i] >= self->score_threshold) {
|
||||
self->candidates[idx].index = idx;
|
||||
self->candidates[idx].box = &candidate[j];
|
||||
self->candidates[idx].score = &score[i];
|
||||
|
||||
g_ptr_array_add (sel_candidates, &self->candidates[idx]);
|
||||
idx++;
|
||||
}
|
||||
}
|
||||
|
||||
GST_LOG_OBJECT (self, "Number of selected candidates = %d",
|
||||
sel_candidates->len);
|
||||
|
||||
if (sel_candidates->len == 0) {
|
||||
GST_LOG_OBJECT (self, "No boxes above threshold=%1.2f",
|
||||
self->score_threshold);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (gst_debug_category_get_threshold (GST_CAT_DEFAULT) >= GST_LEVEL_TRACE) {
|
||||
for (gsize i = 0; i < sel_candidates->len; i++) {
|
||||
Candidate *c = (Candidate *) g_ptr_array_index (sel_candidates, i);
|
||||
gsize j = 0;
|
||||
for (; j < boxes_tensor->dims[2]; j++) {
|
||||
GST_TRACE_OBJECT (self, "sel_candidates[%zu] = %1.5f ", i + j,
|
||||
c->box[j]);
|
||||
}
|
||||
GST_TRACE_OBJECT (self, "score[%zu] = %1.5f", i + j, c->score[0]);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Sort the sel_candidates array so as to have the candidates in descending order w.r.t. scores
|
||||
*/
|
||||
g_ptr_array_sort (sel_candidates,
|
||||
gst_face_detector_tensor_decoder_sort_candidates);
|
||||
|
||||
if (gst_debug_category_get_threshold (GST_CAT_DEFAULT) >= GST_LEVEL_TRACE) {
|
||||
for (gsize i = 0; i < sel_candidates->len; i++) {
|
||||
Candidate *c = (Candidate *) g_ptr_array_index (sel_candidates, i);
|
||||
GST_TRACE_OBJECT (self, "c[%zu] = %1.5f index = %d", i, c->score[0],
|
||||
c->index);
|
||||
}
|
||||
}
|
||||
|
||||
/* NMS */
|
||||
hard_nms (sel_candidates, selected, self->iou_threshold, -1);
|
||||
|
||||
GST_LOG_OBJECT (self, "Number of faces detected = %d", selected->len);
|
||||
if (gst_debug_category_get_threshold (GST_CAT_DEFAULT) >= GST_LEVEL_TRACE) {
|
||||
for (gsize i = 0; i < selected->len; i++) {
|
||||
Candidate *c = (Candidate *) g_ptr_array_index (selected, i);
|
||||
GST_TRACE_OBJECT (self,
|
||||
"%zu x1 = %1.5f y1 = %1.5f x2 = %1.5f y2 = %1.5f score = %1.5f",
|
||||
i + 1, c->box[i + 0], c->box[i + 1], c->box[i + 2], c->box[i + 3],
|
||||
c->score[0]);
|
||||
}
|
||||
}
|
||||
|
||||
gsize frame_width = self->video_info.width;
|
||||
gsize frame_height = self->video_info.height;
|
||||
|
||||
/* Convert each final box from normalized to pixel coords and attach to meta. */
|
||||
for (gint i = 0; i < selected->len; i++) {
|
||||
Candidate *c = (Candidate *) g_ptr_array_index (selected, i);
|
||||
gfloat x1 = c->box[0] * frame_width;
|
||||
gfloat y1 = c->box[1] * frame_height;
|
||||
gfloat x2 = c->box[2] * frame_width;
|
||||
gfloat y2 = c->box[3] * frame_height;
|
||||
gfloat w_ = x2 - x1;
|
||||
gfloat h_ = y2 - y1;
|
||||
|
||||
/* Add to analytics meta: (x, y, width, height). */
|
||||
gst_analytics_relation_meta_add_od_mtd (rmeta, FACE_QUARK,
|
||||
(gint) (x1 + 0.5f), (gint) (y1 + 0.5f),
|
||||
(gint) (w_ + 0.5f), (gint) (h_ + 0.5f), c->score[0], NULL);
|
||||
}
|
||||
|
||||
cleanup:
|
||||
|
||||
/* Unmap */
|
||||
gst_buffer_unmap (boxes_tensor->data, &map_info_boxes);
|
||||
gst_buffer_unmap (scores_tensor->data, &map_info_scores);
|
||||
}
|
||||
|
||||
/* gst_face_detector_tensor_decoder_transform_ip:
|
||||
* @trans: Instance
|
||||
* @buf:inout: Buffer containing media and where tensors can be attached
|
||||
* @return: Flow errors
|
||||
* Decode Face Detection tensors, post-process tensors and store decoded information
|
||||
* into an analytics-meta that is attached to the buffer before been pushed
|
||||
* downstream.
|
||||
*/
|
||||
static GstFlowReturn
|
||||
gst_face_detector_tensor_decoder_transform_ip (GstBaseTransform * trans,
|
||||
GstBuffer * buf)
|
||||
{
|
||||
GstFaceDetectorTensorDecoder *self = GST_FACE_DETECTOR_TENSOR_DECODER (trans);
|
||||
GstTensor *boxes_tensor, *scores_tensor;
|
||||
GstAnalyticsRelationMeta *rmeta;
|
||||
|
||||
if (!gst_face_detector_tensor_decoder_get_tensor_meta (self, buf,
|
||||
&boxes_tensor, &scores_tensor))
|
||||
return GST_FLOW_OK;
|
||||
|
||||
if (boxes_tensor->num_dims != 3) {
|
||||
GST_ELEMENT_ERROR (self, STREAM, DECODE, (NULL),
|
||||
("Boxes tensor must have 3 dimensions but has %zu",
|
||||
boxes_tensor->num_dims));
|
||||
return GST_FLOW_ERROR;
|
||||
}
|
||||
|
||||
if (scores_tensor->num_dims != 3) {
|
||||
GST_ELEMENT_ERROR (self, STREAM, DECODE, (NULL),
|
||||
("scores tensor must have 3 dimensions but has %zu",
|
||||
boxes_tensor->num_dims));
|
||||
return GST_FLOW_ERROR;
|
||||
}
|
||||
|
||||
if (boxes_tensor->data_type != GST_TENSOR_DATA_TYPE_FLOAT32 &&
|
||||
scores_tensor->data_type != GST_TENSOR_DATA_TYPE_FLOAT32) {
|
||||
GST_ELEMENT_ERROR (self, STREAM, NOT_IMPLEMENTED,
|
||||
("Only data-type FLOAT32 support is implemented"),
|
||||
("Please implement."));
|
||||
|
||||
return GST_FLOW_ERROR;
|
||||
}
|
||||
|
||||
rmeta = gst_buffer_add_analytics_relation_meta (buf);
|
||||
g_assert (rmeta != NULL);
|
||||
|
||||
/* Decode boxes_tensor, scores_tensor and attach the information in a structured way
|
||||
* to rmeta. */
|
||||
gst_face_detector_tensor_decoder_decode_boxes_f32 (self, boxes_tensor,
|
||||
scores_tensor, rmeta);
|
||||
|
||||
return GST_FLOW_OK;
|
||||
}
|
@ -0,0 +1,86 @@
|
||||
/*
|
||||
* GStreamer gstreamer-facedetectortensordecoder
|
||||
* Copyright (C) 2025 Collabora Ltd
|
||||
*
|
||||
* gstfacedetectortensordecoder.h
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Library General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Library General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Library General Public
|
||||
* License along with this library; if not, write to the
|
||||
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef __GST_FACE_DETECTOR_TENSOR_DECODER_H__
|
||||
#define __GST_FACE_DETECTOR_TENSOR_DECODER_H__
|
||||
|
||||
#include <gst/gst.h>
|
||||
#include <gst/video/video.h>
|
||||
#include <gst/base/base.h>
|
||||
|
||||
G_BEGIN_DECLS
|
||||
#define GST_TYPE_FACE_DETECTOR_TENSOR_DECODER (gst_face_detector_tensor_decoder_get_type())
|
||||
G_DECLARE_FINAL_TYPE (GstFaceDetectorTensorDecoder,
|
||||
gst_face_detector_tensor_decoder, GST, FACE_DETECTOR_TENSOR_DECODER,
|
||||
GstBaseTransform)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
guint16 index;
|
||||
gfloat *box;
|
||||
gfloat *score;
|
||||
} Candidate;
|
||||
|
||||
/**
|
||||
* GstFaceDetectorTensorDecoder:
|
||||
*
|
||||
* Since: 1.28
|
||||
*/
|
||||
struct _GstFaceDetectorTensorDecoder
|
||||
{
|
||||
GstBaseTransform basetransform;
|
||||
|
||||
/* Confidence threshold. */
|
||||
gfloat score_threshold;
|
||||
|
||||
/* Intersection-of-Union threshold. */
|
||||
gfloat iou_threshold;
|
||||
|
||||
/* Video Info */
|
||||
GstVideoInfo video_info;
|
||||
|
||||
/* Candidates with a class confidence level above threshold. */
|
||||
GPtrArray *sel_candidates;
|
||||
|
||||
/* Final candidates selected that respect class confidence level,
|
||||
* NMS and maximum detection. */
|
||||
GPtrArray *selected;
|
||||
|
||||
/* Candidates with a class confidence level and bounding boxes. */
|
||||
Candidate *candidates;
|
||||
};
|
||||
|
||||
/**
|
||||
* GstFaceDetectorTensorDecoderClass:
|
||||
*
|
||||
* @parent_class base transform base class
|
||||
*
|
||||
* Since: 1.28
|
||||
*/
|
||||
struct _GstFaceDetectorTensorDecoderClass
|
||||
{
|
||||
GstBaseTransformClass parent_class;
|
||||
};
|
||||
|
||||
GST_ELEMENT_REGISTER_DECLARE (face_detector_tensor_decoder)
|
||||
G_END_DECLS
|
||||
#endif /* __GST_FACE_DETECTOR_TENSOR_DECODER_H__ */
|
@ -26,6 +26,7 @@
|
||||
|
||||
#include "gstssdobjectdetector.h"
|
||||
#include "gstclassifiertensordecoder.h"
|
||||
#include "gstfacedetectortensordecoder.h"
|
||||
|
||||
/**
|
||||
* SECTION:plugin-tensordecoders
|
||||
@ -40,6 +41,7 @@ plugin_init (GstPlugin * plugin)
|
||||
gboolean ret = FALSE;
|
||||
ret |= GST_ELEMENT_REGISTER (ssd_object_detector, plugin);
|
||||
ret |= GST_ELEMENT_REGISTER (classifier_tensor_decoder, plugin);
|
||||
ret |= GST_ELEMENT_REGISTER (face_detector_tensor_decoder, plugin);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -1,12 +1,14 @@
|
||||
tensordecoders_sources = [
|
||||
'gsttensordecoders.c',
|
||||
'gstssdobjectdetector.c',
|
||||
'gstclassifiertensordecoder.c'
|
||||
'gstclassifiertensordecoder.c',
|
||||
'gstfacedetectortensordecoder.c'
|
||||
]
|
||||
|
||||
tensordecoders_headers = [
|
||||
'gstssdobjectdetector.h',
|
||||
'gstclassifiertensordecoder.h'
|
||||
'gstclassifiertensordecoder.h',
|
||||
'gstfacedetectortensordecoder.h'
|
||||
]
|
||||
|
||||
doc_sources = []
|
||||
|
Loading…
x
Reference in New Issue
Block a user