Compare commits

...

5 Commits

Author SHA1 Message Date
Elias Rosendahl
13387dd026 try to increase opencv version 2025-09-01 19:43:08 +02:00
Elias Rosendahl
8acdb1f8d3 remove refereces of old decoders 2025-09-01 18:02:52 +02:00
Elias Rosendahl
52dfe8c17a update meson.build 2025-09-01 17:49:19 +02:00
Elias Rosendahl
5acf382495 remove new decoders 2025-09-01 17:39:34 +02:00
Elias Rosendahl
daf86b5a3d add komata 2025-09-01 17:24:15 +02:00
8 changed files with 87 additions and 1454 deletions

View File

@ -30,7 +30,7 @@ libopencv_headers = [
gstopencv_cargs = []
opencv_dep = dependency('opencv', version : ['>= 3.0.0', '< 3.5.0'], required : false, include_type: 'system' )
opencv_dep = dependency('opencv', version : ['>= 4.0.0', '< 5.5.0'], required : false, include_type: 'system' )
if not opencv_dep.found()
opencv_dep = dependency('opencv4', version : ['>= 4.0.0'], required : opencv_opt, include_type: 'system')
if not opencv_dep.found()

View File

@ -1,508 +0,0 @@
/*
* GStreamer gstreamer-classifiertensordecoder
* Copyright (C) 2025 Collabora Ltd.
* @author: Daniel Morin <daniel.morin@dmohub.org>
*
* gstclassifiertensordecoder.c
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
/**
* SECTION:element-classifiertensordecoder.c
* @short_description: Decode tensors from classification model using a common
* tensor output format.
*
*
* This element can parse per-buffer inference tensor meta data generated by
* an upstream inference element.
*
* Tensor format must be:
* Dims: [batch-size, class_count]
* Datatype: float32
*
* Tensor [M,N]
* Batch 0 | Class 0 confidence level | ... | Class N confidence level |
* ...
* Batch M | Class 0 confidence level | ... | Class N confidence level |
*
* In-memory tensor format:
*
* |Batch 0, Class 0 confidence level |
* |Batch 0, ... |
* |Batch 0, Class N confidence level |
* | ... |
* |Batch M, Class 0 confidence level |
* |Batch M, ... |
* |Batch M, Class N confidence level |
*
*
* ## Example launch command:
* |[
* gst-launch-1.0 filesrc location=/onnx-models/images/bus.jpg \
* ! jpegdec \
* ! videoconvertscale add-borders=1 \
* ! onnxinference execution-provider=cpu \
* model-file=/onnx-models/models/mobilenet_v1.onnx \
* ! classifiertensordecoder labels-file=labels.txt ! fakesink \
* ]| This pipeline create an tensor-decoder for classification model
*
*/
#ifdef HAVE_CONFI_H
#include "config.h"
#endif
#include "gstclassifiertensordecoder.h"
#include <gst/gst.h>
#include <math.h>
#include <gst/analytics/analytics.h>
const gchar GST_MODEL_STD_IMAGE_CLASSIFICATION[] = "classification-generic-out";
GST_DEBUG_CATEGORY_STATIC (classifier_tensor_decoder_debug);
#define GST_CAT_DEFAULT classifier_tensor_decoder_debug
#define gst_classifier_tensor_decoder_parent_class parent_class
GST_ELEMENT_REGISTER_DEFINE (classifier_tensor_decoder,
"classifiertensordecoder", GST_RANK_PRIMARY,
GST_TYPE_CLASSIFIER_TENSOR_DECODER);
/* GstClassifierTensorDecoder properties */
enum
{
PROP_0,
PROP_THRESHOLD,
PROP_LABEL_FILE
};
static const float DEFAULT_THRESHOLD = 0.7f;
static GstStaticPadTemplate gst_classifier_tensor_decoder_src_template =
GST_STATIC_PAD_TEMPLATE ("src",
GST_PAD_SRC,
GST_PAD_ALWAYS,
GST_STATIC_CAPS_ANY);
static GstStaticPadTemplate gst_classifier_tensor_decoder_sink_template =
GST_STATIC_PAD_TEMPLATE ("sink",
GST_PAD_SINK,
GST_PAD_ALWAYS,
GST_STATIC_CAPS_ANY);
static void gst_classifier_tensor_decoder_set_property (GObject * object,
guint prop_id, const GValue * value, GParamSpec * pspec);
static void gst_classifier_tensor_decoder_get_property (GObject * object,
guint prop_id, GValue * value, GParamSpec * pspec);
static void gst_classifier_tensor_decoder_finalize (GObject * object);
static GstFlowReturn
gst_classifier_tensor_decoder_transform_ip (GstBaseTransform * trans,
GstBuffer * buf);
static GstStateChangeReturn
gst_classifier_tensor_decoder_change_state (GstElement * element,
GstStateChange transition);
#define softmax(len, values, results, max_val) \
gsize i; \
gfloat sum = 0.0; \
gfloat value; \
g_return_if_fail (values != NULL); \
g_return_if_fail (result != NULL); \
\
/* Calculate exponential of every value */ \
for (i = 0; i < len; i++) { \
value = values[i] / max_val; \
result[i] = exp (value); \
sum += result[i]; \
} \
\
/* Complete softmax */ \
for (i = 0; i < len; i++) { \
result[i] = result[i] / sum; \
}
static void
softmax_u8 (gsize len, const guint8 * values, gfloat * result)
{
softmax (len, values, results, 255.0);
}
static void
softmax_f32 (gsize len, const gfloat * values, gfloat * result)
{
softmax (len, values, results, 1.0);
}
G_DEFINE_TYPE (GstClassifierTensorDecoder, gst_classifier_tensor_decoder,
GST_TYPE_BASE_TRANSFORM);
static void
gst_classifier_tensor_decoder_class_init (GstClassifierTensorDecoderClass *
klass)
{
GObjectClass *gobject_class = (GObjectClass *) klass;
GstElementClass *element_class = (GstElementClass *) klass;
GstBaseTransformClass *basetransform_class = (GstBaseTransformClass *) klass;
GST_DEBUG_CATEGORY_INIT (classifier_tensor_decoder_debug,
"classifiertensordecoder", 0,
"Tensor decoder for classification model with common output format");
gobject_class->set_property = gst_classifier_tensor_decoder_set_property;
gobject_class->get_property = gst_classifier_tensor_decoder_get_property;
gobject_class->finalize = gst_classifier_tensor_decoder_finalize;
g_object_class_install_property (G_OBJECT_CLASS (klass),
PROP_THRESHOLD,
g_param_spec_float ("class-confidence-threshold",
"Class confidence threshold",
"Classes with a confidence level inferior to this threshold "
"will be excluded",
0.0, 1.0, DEFAULT_THRESHOLD,
(GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
g_object_class_install_property (G_OBJECT_CLASS (klass),
PROP_LABEL_FILE,
g_param_spec_string ("labels-file",
"Class labels file",
"Path to a file containing class label. COCO format",
NULL, (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
element_class->change_state = gst_classifier_tensor_decoder_change_state;
gst_element_class_set_static_metadata (element_class,
"classifiertensordecoder", "Tensordecoder",
"Decode tensors output from classification model using common format.\n"
"\tTensor format must be: \n" "\t\tDims: [batch-size, class_count]\n"
"\t\tDatatype: float32 \n" "\n" "\t\tTensor [M,N]\n"
"\t\t\tBatch 0 | Class 0 confidence level | ... | Class N-1 confidence level |\n"
"\t\t\t...\n"
"\t\t\tBatch M-1 | Class 0 confidence level | ... | Class N-1 confidence level |\n"
"\t\t\n" "\tIn-memory tensor format:\n" "\n"
"\t\t|Batch 0, Class 0 confidence level |\n"
"\t\t|Batch 0, ... |\n"
"\t\t|Batch 0, Class N-1 confidence level |\n"
"\t\t| ... |\n"
"\t\t|Batch M-1, Class 0 confidence level |\n"
"\t\t|Batch M-1, ... |\n"
"\t\t|Batch M-1, Class N-1 confidence level |\n" "\n" " model",
"Daniel Morin <daniel.morin@collabora.com>");
gst_element_class_add_pad_template (element_class,
gst_static_pad_template_get
(&gst_classifier_tensor_decoder_src_template));
gst_element_class_add_pad_template (element_class,
gst_static_pad_template_get
(&gst_classifier_tensor_decoder_sink_template));
basetransform_class->transform_ip =
GST_DEBUG_FUNCPTR (gst_classifier_tensor_decoder_transform_ip);
}
static void
gst_classifier_tensor_decoder_init (GstClassifierTensorDecoder * self)
{
self->threshold = DEFAULT_THRESHOLD;
self->labels_file = NULL;
self->softmax_res = NULL;
gst_base_transform_set_passthrough (GST_BASE_TRANSFORM (self), FALSE);
}
static void
gst_classifier_tensor_decoder_finalize (GObject * object)
{
GstClassifierTensorDecoder *self = GST_CLASSIFIER_TENSOR_DECODER (object);
g_free (self->labels_file);
G_OBJECT_CLASS (gst_classifier_tensor_decoder_parent_class)->finalize
(object);
}
static void
gst_classifier_tensor_decoder_set_property (GObject * object, guint prop_id,
const GValue * value, GParamSpec * pspec)
{
GstClassifierTensorDecoder *self = GST_CLASSIFIER_TENSOR_DECODER (object);
static GFileTest filetest = (G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR);
switch (prop_id) {
case PROP_THRESHOLD:
self->threshold = g_value_get_float (value);
break;
case PROP_LABEL_FILE:
self->labels_file = g_strdup (g_value_get_string (value));
if (self->labels_file) {
if (!g_file_test (self->labels_file, filetest)) {
GST_ERROR_OBJECT (self, "Unable to load %s", self->labels_file);
g_free (g_steal_pointer (&self->labels_file));
}
} else {
GST_ERROR_OBJECT (self, "Invalid file");
}
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
}
}
static void
gst_classifier_tensor_decoder_get_property (GObject * object, guint prop_id,
GValue * value, GParamSpec * pspec)
{
GstClassifierTensorDecoder *self = GST_CLASSIFIER_TENSOR_DECODER (object);
switch (prop_id) {
case PROP_THRESHOLD:
g_value_set_float (value, self->threshold);
break;
case PROP_LABEL_FILE:
g_value_set_string (value, self->labels_file);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
}
}
static guint
gst_classifier_tensor_decoder_load_labels (GstClassifierTensorDecoder * self)
{
gchar *content = NULL;
gchar **tokens = NULL;
gsize len;
GError *err = NULL;
GQuark val;
if (self->labels_file == NULL) {
GST_ERROR_OBJECT (self, "Missing label file");
return 0;
}
if (!g_file_get_contents (self->labels_file, &content, &len, &err)) {
GST_ERROR_OBJECT (self, "Could not load labels file %s: %s",
self->labels_file, err->message);
g_error_free (err);
return 0;
}
if (len == 0) {
GST_ERROR_OBJECT (self, "Labels file %s is empty", self->labels_file);
g_free (content);
return 0;
}
tokens = g_strsplit (content, "\n", 0);
g_free (content);
if (tokens[0] == NULL) {
GST_ERROR_OBJECT (self, "Labels file %s has no labels", self->labels_file);
g_free (content);
return 0;
}
self->class_quark = g_array_new (FALSE, FALSE, sizeof (GQuark));
for (int i = 0; tokens[i] != NULL && tokens[i][0] != '\0'; i++) {
val = g_quark_from_string (tokens[i]);
g_array_append_val (self->class_quark, val);
}
self->softmax_res = g_array_sized_new (FALSE, TRUE, sizeof (gfloat),
self->class_quark->len);
g_strfreev (tokens);
return self->class_quark->len;
}
static GstStateChangeReturn
gst_classifier_tensor_decoder_change_state (GstElement * element,
GstStateChange transition)
{
GstClassifierTensorDecoder *self = GST_CLASSIFIER_TENSOR_DECODER (element);
GstStateChangeReturn ret;
switch (transition) {
case GST_STATE_CHANGE_NULL_TO_READY:
if (!gst_classifier_tensor_decoder_load_labels (self)) {
return GST_STATE_CHANGE_FAILURE;
}
break;
default:
break;
}
ret = GST_ELEMENT_CLASS (parent_class)->change_state (element, transition);
switch (transition) {
case GST_STATE_CHANGE_READY_TO_NULL:
g_array_free (self->class_quark, FALSE);
g_array_free (self->softmax_res, TRUE);
break;
default:
break;
}
return ret;
}
static GstTensorMeta *
gst_classifier_tensor_decoder_get_tensor_meta (GstClassifierTensorDecoder *
self, GstBuffer * buf)
{
GstMeta *meta = NULL;
gpointer iter_state = NULL;
if (!gst_buffer_get_meta (buf, GST_TENSOR_META_API_TYPE)) {
GST_DEBUG_OBJECT (self,
"missing tensor meta from buffer %" GST_PTR_FORMAT, buf);
return NULL;
}
while ((meta = gst_buffer_iterate_meta_filtered (buf, &iter_state,
GST_TENSOR_META_API_TYPE))) {
GstTensorMeta *tensor_meta = (GstTensorMeta *) meta;
if (tensor_meta->num_tensors != 1)
continue;
gint index = gst_tensor_meta_get_index_from_id (tensor_meta,
g_quark_from_static_string (GST_MODEL_STD_IMAGE_CLASSIFICATION));
if (index == -1)
continue;
return tensor_meta;
}
return NULL;
}
static GstFlowReturn
gst_classifier_tensor_decoder_decode (GstClassifierTensorDecoder * self,
GstBuffer * buf, GstAnalyticsRelationMeta * rmeta, GstTensorMeta * tmeta)
{
GstMapInfo map_info = GST_MAP_INFO_INIT;
gfloat max = 0.0;
gfloat *softmax_res = (gfloat *) self->softmax_res->data;
gsize len;
GQuark q, qmax;
gint max_idx = -1;
const GstTensor *tensor;
GstAnalyticsClsMtd cls_mtd;
const gsize DIMS[] = { 1, G_MAXSIZE };
tensor = gst_tensor_meta_get_typed_tensor (tmeta,
g_quark_from_static_string (GST_MODEL_STD_IMAGE_CLASSIFICATION),
GST_TENSOR_DATA_TYPE_FLOAT32, GST_TENSOR_DIM_ORDER_ROW_MAJOR, 1, NULL);
if (tensor == NULL)
tensor = gst_tensor_meta_get_typed_tensor (tmeta,
g_quark_from_static_string (GST_MODEL_STD_IMAGE_CLASSIFICATION),
GST_TENSOR_DATA_TYPE_FLOAT32, GST_TENSOR_DIM_ORDER_ROW_MAJOR, 2, DIMS);
if (tensor == NULL)
tensor = gst_tensor_meta_get_typed_tensor (tmeta,
g_quark_from_static_string (GST_MODEL_STD_IMAGE_CLASSIFICATION),
GST_TENSOR_DATA_TYPE_UINT8, GST_TENSOR_DIM_ORDER_ROW_MAJOR, 1, NULL);
if (tensor == NULL)
tensor = gst_tensor_meta_get_typed_tensor (tmeta,
g_quark_from_static_string (GST_MODEL_STD_IMAGE_CLASSIFICATION),
GST_TENSOR_DATA_TYPE_UINT8, GST_TENSOR_DIM_ORDER_ROW_MAJOR, 2, DIMS);
if (tensor == NULL) {
GST_ELEMENT_ERROR (GST_BASE_TRANSFORM (self), STREAM, FAILED,
(NULL), ("Could not find classification tensor"));
return GST_FLOW_ERROR;
}
len = tensor->dims[tensor->num_dims - 1];
if (len != self->class_quark->len) {
GST_WARNING_OBJECT (self, "Labels file has size %zu, but the tensor has"
" %u entries, it is probably not the right labels file",
len, self->class_quark->len);
len = MIN (len, self->class_quark->len);
}
if (!gst_buffer_map (tensor->data, &map_info, GST_MAP_READ)) {
GST_ELEMENT_ERROR (self, STREAM, FAILED, (NULL),
("Failed to map tensor data"));
return GST_FLOW_ERROR;
}
GST_TRACE_OBJECT (self, "Tensor shape dims %zu", tensor->num_dims);
if (gst_debug_category_get_threshold (GST_CAT_DEFAULT) >= GST_LEVEL_TRACE) {
for (gint i = 0; i < tensor->num_dims; i++) {
GST_TRACE_OBJECT (self, "Tensor dim %d: %zu", i, tensor->dims[i]);
}
}
switch (tensor->data_type) {
case GST_TENSOR_DATA_TYPE_FLOAT32:
softmax_f32 (len, (gfloat *) map_info.data, softmax_res);
break;
case GST_TENSOR_DATA_TYPE_UINT8:
softmax_u8 (len, (guint8 *) map_info.data, softmax_res);
break;
default:
g_return_val_if_reached (GST_FLOW_ERROR);
break;
}
gst_buffer_unmap (tensor->data, &map_info);
for (gint j = 0; j < len; j++) {
q = g_array_index (self->class_quark, GQuark, j);
if (softmax_res[j] > max) {
max = softmax_res[j];
max_idx = j;
qmax = q;
}
}
if (max_idx != -1) {
gst_analytics_relation_meta_add_one_cls_mtd (rmeta, max, qmax, &cls_mtd);
GST_LOG_OBJECT (self, "Max class is %d:%s with %f", max_idx,
g_quark_to_string (qmax), max);
}
return GST_FLOW_OK;
}
static GstFlowReturn
gst_classifier_tensor_decoder_transform_ip (GstBaseTransform * trans,
GstBuffer * buf)
{
GstClassifierTensorDecoder *self = GST_CLASSIFIER_TENSOR_DECODER (trans);
GstTensorMeta *tmeta;
GstAnalyticsRelationMeta *rmeta;
tmeta = gst_classifier_tensor_decoder_get_tensor_meta (self, buf);
if (tmeta != NULL) {
rmeta = gst_buffer_add_analytics_relation_meta (buf);
g_assert (rmeta != NULL);
} else {
GST_WARNING_OBJECT (trans, "missing tensor meta");
return TRUE;
}
return gst_classifier_tensor_decoder_decode (self, buf, rmeta, tmeta);
}

View File

@ -1,66 +0,0 @@
/*
* GStreamer gstreamer-classifiertensordecoder
* Copyright (C) 2025 Collabora Ltd
* @author: Daniel Morin <daniel.morin@dmohub.org>
*
* gstclassifiertensordecoder.h
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifndef __GST_CLASSIFIER_TENSOR_DECODER_H__
#define __GST_CLASSIFIER_TENSOR_DECODER_H__
#include <gst/gst.h>
#include <gst/base/gstbasetransform.h>
G_BEGIN_DECLS
#define GST_TYPE_CLASSIFIER_TENSOR_DECODER (gst_classifier_tensor_decoder_get_type ())
G_DECLARE_FINAL_TYPE (GstClassifierTensorDecoder, gst_classifier_tensor_decoder,
GST, CLASSIFIER_TENSOR_DECODER, GstBaseTransform)
/**
* GstClassifierTensorDecoder:
*
* @threshold: Class confidence threshold
* @labels_file: Path where to read class labels
* @class_quark: Class labels quark representation
* @softmax_res: Soft-max of output vector
*
* Since: 1.24
*/
struct _GstClassifierTensorDecoder
{
GstBaseTransform basetransform;
gfloat threshold;
gchar *labels_file;
GArray *class_quark;
GArray *softmax_res;
};
struct _GstClassifierTensorDecoderClass
{
GstBaseTransformClass parent_class;
/* TODO: Add vmethod to allow overwriting: decode, postprocess, load_labels */
};
GST_ELEMENT_REGISTER_DECLARE (classifier_tensor_decoder)
G_END_DECLS
#endif /* __GST_CLASSIFIER_TENSOR_DECODER_H__ */

View File

@ -1,678 +0,0 @@
/*
* GStreamer gstreamer-ultralightfacedetectortensordec
* Copyright (C) 2025 Collabora Ltd.
*
* gstfacedetectortensordecoder.c
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
/**
* SECTION:element-ultralightfacedetectortensordec
* @short_description: Detect faces in video buffers using the Ultra Light Face Detection model.
*
* This element can parse per-buffer inference tensor meta data generated by an upstream
* inference element.
*
* ## Example launch command:
*
* Test image files can be found here :
* https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/tree/master/imgs
*
* The Model file can be found here :
* https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/tree/master/models/onnx
*
* GST_DEBUG=ultralightfacedetectortensordec \
* gst-launch-1.0 multifilesrc location=~/imgs/11.jpg ! jpegdec ! videoconvertscale ! \
* onnxinference model-file=version-RFB-320.onnx input-image-format=chw input-tensor-offset=-127 input-tensor-scale=128.0 ! \
* ultralightfacedetectortensordec ! objectdetectionoverlay object-detection-outline-color=0xFF0000FF draw-labels=false ! \
* videoconvertscale ! autovideosink
*
* Since: 1.28
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "gstfacedetectortensordecoder.h"
#include <gio/gio.h>
#include <gst/gst.h>
#include <gst/video/video.h>
#include <gst/analytics/analytics.h>
#include <math.h> /* for expf() */
/* Face detection tensor id strings */
#define BOXES_TENSOR_ID "ssd-mobilenet-v1-variant-1-out-boxes"
#define SCORES_TENSOR_ID "ultra-lightweight-face-detection-rfb-320-v1-variant-1-out-scores"
GST_DEBUG_CATEGORY_STATIC (face_detector_tensor_decoder_debug);
#define GST_CAT_DEFAULT face_detector_tensor_decoder_debug
GST_ELEMENT_REGISTER_DEFINE (face_detector_tensor_decoder,
"ultralightfacedetectortensordec", GST_RANK_PRIMARY,
GST_TYPE_FACE_DETECTOR_TENSOR_DECODER);
/* GstFaceDetectorTensorDecoder properties, see properties description in
* gst_face_detector_tensor_decoder_class_init for more details. */
enum
{
PROP_0,
PROP_SCORE_THRESHOLD,
PROP_IOU_THRESHOLD
};
/* Default properties value */
static const gfloat DEFAULT_SCORE_THRESHOLD = 0.6f; /* confidence threshold */
static const gfloat DEFAULT_IOU_THRESHOLD = 0.3f; /* NMS IoU threshold */
/* To tensor-id are defined by a string that is converted to quark
* which is just an integer value using a hash function. For efficiency
* we compare on the quark (hash value). Since tensor-id never change we
* just calculate the hash once during initialization and store the value in
* these variables. */
GQuark BOXES_TENSOR_ID_QUARK;
GQuark SCORES_TENSOR_ID_QUARK;
GQuark FACE_QUARK;
/* GStreamer element srcpad template. Template of a srcpad that can receive
* any raw video. */
static GstStaticPadTemplate gst_face_detector_tensor_decoder_src_template =
GST_STATIC_PAD_TEMPLATE ("src",
GST_PAD_SRC,
GST_PAD_ALWAYS,
GST_STATIC_CAPS ("video/x-raw")
);
/* GStreamer element sinkpad template. Template of a sinkpad that can receive
* any raw video. */
static GstStaticPadTemplate gst_face_detector_tensor_decoder_sink_template =
GST_STATIC_PAD_TEMPLATE ("sink",
GST_PAD_SINK,
GST_PAD_ALWAYS,
GST_STATIC_CAPS ("video/x-raw")
);
/* Prototypes */
static void gst_face_detector_tensor_decoder_set_property (GObject * object,
guint prop_id, const GValue * value, GParamSpec * pspec);
static void gst_face_detector_tensor_decoder_get_property (GObject * object,
guint prop_id, GValue * value, GParamSpec * pspec);
static void gst_face_detector_tensor_decoder_finalize (GObject * object);
static GstFlowReturn
gst_face_detector_tensor_decoder_transform_ip (GstBaseTransform * trans,
GstBuffer * buf);
static gboolean gst_face_detector_tensor_decoder_set_caps (GstBaseTransform *
trans, GstCaps * incaps, GstCaps * outcaps);
G_DEFINE_TYPE (GstFaceDetectorTensorDecoder, gst_face_detector_tensor_decoder,
GST_TYPE_BASE_TRANSFORM);
static void
gst_face_detector_tensor_decoder_class_init (GstFaceDetectorTensorDecoderClass
* klass)
{
GObjectClass *gobject_class = (GObjectClass *) klass;
GstElementClass *element_class = (GstElementClass *) klass;
GstBaseTransformClass *basetransform_class = (GstBaseTransformClass *) klass;
/* Define GstFaceDetectorTensorDecoder debug category. */
GST_DEBUG_CATEGORY_INIT (face_detector_tensor_decoder_debug,
"ultralightfacedetectortensordec", 0,
"Tensor Decoder for Face Detection");
/* Set GObject vmethod to get and set property */
gobject_class->set_property = gst_face_detector_tensor_decoder_set_property;
gobject_class->get_property = gst_face_detector_tensor_decoder_get_property;
gobject_class->finalize = gst_face_detector_tensor_decoder_finalize;
/* Define GstFaceDetectorTensorDecoder properties using GObject properties
* interface.*/
/**
* GstFaceDetectorTensorDecoder:score-threshold
*
* Threshold for deciding when to remove boxes based on score
*
* Since: 1.28
*/
g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_SCORE_THRESHOLD,
g_param_spec_float ("score-threshold",
"Score threshold",
"Threshold for deciding when to remove boxes based on score",
0.0, 1.0, DEFAULT_SCORE_THRESHOLD, (GParamFlags)
(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
/**
* GstFaceDetectorTensorDecoder:iou-threshold
*
* Threshold for removing boxes based on proportion of the image
*
* Since: 1.28
*/
g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_IOU_THRESHOLD,
g_param_spec_float ("iou-threshold",
"IoU threshold",
"Threshold for removing boxes based on proportion of the image",
0.0, 1.0, DEFAULT_IOU_THRESHOLD,
(GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
/* Element description. */
gst_element_class_set_static_metadata (element_class,
"ultralightfacedetectortensordec", "Tensordecoder/Video",
"Detect tensor output from the inference of Ultra Light Face Detection"
" to detect the faces in video frames."
"The original repository of the Ultra Light Face Detection is located at"
" https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB.",
"Raghavendra Rao <raghavendra.rao@collabora.com>");
/* Add pads to element base on pad template defined earlier */
gst_element_class_add_pad_template (element_class,
gst_static_pad_template_get
(&gst_face_detector_tensor_decoder_sink_template));
gst_element_class_add_pad_template (element_class,
gst_static_pad_template_get
(&gst_face_detector_tensor_decoder_src_template));
/* Set GstBaseTransform vmethod transform_ip. This methode is called
* by the srcpad when it receive buffer. ip stand for in-place meaning the
* buffer remain unchanged by the element. Tensor-decoder only monitor
* buffer it receive for a meta attach to the buffer that is a GstTensorMeta
* and has a tensor-id can be handled by GstFaceDetectorTensorDecoder. */
basetransform_class->transform_ip =
GST_DEBUG_FUNCPTR (gst_face_detector_tensor_decoder_transform_ip);
/* Set GstBaseTransform set_caps vmethod. This will be called once the
* capability negotiation has been completed. We will be able to extract
* resolution from this callback. */
basetransform_class->set_caps =
GST_DEBUG_FUNCPTR (gst_face_detector_tensor_decoder_set_caps);
BOXES_TENSOR_ID_QUARK = g_quark_from_static_string (BOXES_TENSOR_ID);
SCORES_TENSOR_ID_QUARK = g_quark_from_static_string (SCORES_TENSOR_ID);
FACE_QUARK = g_quark_from_static_string ("face");
}
static void
gst_face_detector_tensor_decoder_init (GstFaceDetectorTensorDecoder * self)
{
self->score_threshold = DEFAULT_SCORE_THRESHOLD;
self->iou_threshold = DEFAULT_IOU_THRESHOLD;
self->sel_candidates = NULL;
self->selected = NULL;
gst_base_transform_set_passthrough (GST_BASE_TRANSFORM (self), FALSE);
}
static void
gst_face_detector_tensor_decoder_finalize (GObject * object)
{
GstFaceDetectorTensorDecoder *self =
GST_FACE_DETECTOR_TENSOR_DECODER (object);
g_clear_pointer (&self->sel_candidates, g_ptr_array_unref);
g_clear_pointer (&self->selected, g_ptr_array_unref);
g_clear_pointer (&self->candidates, g_free);
G_OBJECT_CLASS (gst_face_detector_tensor_decoder_parent_class)->finalize
(object);
}
static void
gst_face_detector_tensor_decoder_set_property (GObject * object, guint prop_id,
const GValue * value, GParamSpec * pspec)
{
GstFaceDetectorTensorDecoder *self =
GST_FACE_DETECTOR_TENSOR_DECODER (object);
switch (prop_id) {
case PROP_SCORE_THRESHOLD:
self->score_threshold = g_value_get_float (value);
break;
case PROP_IOU_THRESHOLD:
self->iou_threshold = g_value_get_float (value);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
}
}
static void
gst_face_detector_tensor_decoder_get_property (GObject * object, guint prop_id,
GValue * value, GParamSpec * pspec)
{
GstFaceDetectorTensorDecoder *self =
GST_FACE_DETECTOR_TENSOR_DECODER (object);
switch (prop_id) {
case PROP_SCORE_THRESHOLD:
g_value_set_float (value, self->score_threshold);
break;
case PROP_IOU_THRESHOLD:
g_value_set_float (value, self->iou_threshold);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
}
}
/* gst_face_detector_tensor_decoder_set_caps:
*
* Callback on caps negotiation completed. We use it here to retrieve
* video resolution. See GstBaseTransform for more details.
*/
static gboolean
gst_face_detector_tensor_decoder_set_caps (GstBaseTransform * trans,
GstCaps * incaps, GstCaps * outcaps)
{
GstFaceDetectorTensorDecoder *self = GST_FACE_DETECTOR_TENSOR_DECODER (trans);
if (!gst_video_info_from_caps (&self->video_info, incaps)) {
GST_ERROR_OBJECT (self, "Failed to parse caps");
return FALSE;
}
return TRUE;
}
/* gst_face_detector_tensor_decoder_get_tensor_meta
* @buf:in: buffer
* @boxes_tensor:out: Boxes tensor
* @scores_tensor:out: scores tensor
*
* Retrieve FaceDetection boxes and scores tensors from buffer.
*
* @return: TRUE if buf has boxes and scores tensor with desired features are attached to it.
* Otherwise FALSE will be returned.
*/
static gboolean
gst_face_detector_tensor_decoder_get_tensor_meta (GstFaceDetectorTensorDecoder
* self, GstBuffer * buf, const GstTensor ** boxes_tensor,
const GstTensor ** scores_tensor)
{
GstMeta *meta;
gpointer state = NULL;
static const gsize BOXES_DIMS[] = { 1, G_MAXSIZE, 4 };
static const gsize SCORES_DIMS[] = { 1, G_MAXSIZE, 2 };
g_return_val_if_fail (boxes_tensor != NULL, FALSE);
g_return_val_if_fail (scores_tensor != NULL, FALSE);
*boxes_tensor = NULL;
*scores_tensor = NULL;
/* Find ultralightfacedetectortensordec TensorMeta */
while ((meta = gst_buffer_iterate_meta_filtered (buf, &state,
GST_TENSOR_META_API_TYPE))) {
GstTensorMeta *tensor_meta = (GstTensorMeta *) meta;
GST_LOG_OBJECT (self, "Num tensors %zu", tensor_meta->num_tensors);
/* Retrieve the tensor that has a tensor-id matching
* BOXES_TENSOR_ID_QUARK in the GstTensorMeta along with
* the reading order from the memory matching with GST_TENSOR_DIM_ORDER_ROW_MAJOR,
* 3 dimensions and the data type matching with GST_TENSOR_DATA_TYPE_FLOAT32 */
*boxes_tensor =
gst_tensor_meta_get_typed_tensor (tensor_meta, BOXES_TENSOR_ID_QUARK,
GST_TENSOR_DATA_TYPE_FLOAT32, GST_TENSOR_DIM_ORDER_ROW_MAJOR, 3,
BOXES_DIMS);
if (*boxes_tensor == NULL)
continue;
/* Retrieve the tensor that has a tensor-id matching
* SCORES_TENSOR_ID_QUARK in the GstTensorMeta along with
* the reading order from the memory matching with GST_TENSOR_DIM_ORDER_ROW_MAJOR,
* 3 dimensions and the data type matching with GST_TENSOR_DATA_TYPE_FLOAT32 */
*scores_tensor =
gst_tensor_meta_get_typed_tensor (tensor_meta, SCORES_TENSOR_ID_QUARK,
GST_TENSOR_DATA_TYPE_FLOAT32, GST_TENSOR_DIM_ORDER_ROW_MAJOR, 3,
SCORES_DIMS);
if (*scores_tensor == NULL)
continue;
}
if (*boxes_tensor == NULL) {
GST_WARNING_OBJECT (self, "Can't retrieve boxes tensor");
return FALSE;
}
if (*scores_tensor == NULL) {
GST_WARNING_OBJECT (self, "Can't retrieve boxes tensor");
return FALSE;
}
return TRUE;
}
/* Compare c1 and c2
* Utility function for sorting candiates based on the scores.
*/
static gint
gst_face_detector_tensor_decoder_sort_candidates (gconstpointer c1,
gconstpointer c2)
{
const Candidate *candidate1 = *((Candidate **) c1);
const Candidate *candidate2 = *((Candidate **) c2);
if (*candidate1->score < *candidate2->score) {
return 1;
} else if (*candidate1->score > *candidate2->score) {
return -1;
} else {
return 0;
}
}
static gfloat
iou_box (const Candidate * a, const Candidate * b)
{
gfloat ax1 = a->box[0];
gfloat ay1 = a->box[1];
gfloat ax2 = a->box[2];
gfloat ay2 = a->box[3];
gfloat bx1 = b->box[0];
gfloat by1 = b->box[1];
gfloat bx2 = b->box[2];
gfloat by2 = b->box[3];
gfloat xx1 = (ax1 > bx1) ? ax1 : bx1;
gfloat yy1 = (ay1 > by1) ? ay1 : by1;
gfloat xx2 = (ax2 < bx2) ? ax2 : bx2;
gfloat yy2 = (ay2 < by2) ? ay2 : by2;
gfloat w = xx2 - xx1;
gfloat h = yy2 - yy1;
if (w < 0.0f || h < 0.0f) {
/* No overlap */
return 0.0f;
}
/* Area of intersection */
gfloat intersection = w * h;
/* Area of each box */
gfloat areaA = (ax2 - ax1) * (ay2 - ay1);
gfloat areaB = (bx2 - bx1) * (by2 - by1);
if (areaA <= 0.0f || areaB <= 0.0f)
return 0.0f;
/* IoU = intersection / union */
gfloat iou = intersection / (areaA + areaB - intersection);
return iou;
}
/* hard_nms:
* @sel_candidates: array of pointers of selected boxes with scores
* @selected: array of pointers of selected boxes with scores after the removal of overlappings
* @iou_threshold: threshold for removing boxes based on proportion of the image
* @top_k: number of boxes to keep (if top_k <= 0, keep all).
* @return: void
* Hard NMS:
* 1) Keep highest scoring box
* 2) Remove boxes with IoU >= iou_threshold
* 3) Repeat until no boxes left or we reach top_k
*/
static void
hard_nms (const GPtrArray * sel_candidates,
GPtrArray * selected, gfloat iou_threshold, gint top_k)
{
/* Edge case: Handle the case of no input boxes */
if (sel_candidates->len == 0) {
return;
}
/* We'll mark boxes as "suppressed" using an array of booleans. */
gchar *discarded = g_alloca0 (sel_candidates->len); /* 0 => keep, 1 => discard */
/* The maximum possible output is 'sel_candidates->len'. We'll store the kept boxes into 'selected'. */
/* Perform NMS. */
for (gsize i = 0; i < sel_candidates->len; i++) {
if (discarded[i]) {
/* Already thrown out due to overlap. */
continue;
}
/* Get the current indexed candidate from the selected candidates.
* Then store this current box/candidate into final selected candidates array
*/
Candidate *c = (Candidate *) g_ptr_array_index (sel_candidates, i);
g_ptr_array_add (selected, c);
/* If we have reached top_k (and top_k > 0), break. */
if (top_k > 0 && selected->len == top_k) {
break;
}
/* Suppress any candidate that overlap (IoU >= iou_threshold) with the current one. */
for (gsize j = i + 1; j < sel_candidates->len; j++) {
if (discarded[j])
continue;
gfloat overlap = iou_box (g_ptr_array_index (sel_candidates, i),
g_ptr_array_index (sel_candidates, j));
if (overlap >= iou_threshold) {
discarded[j] = 1; /* Mark for discard */
}
}
}
}
/* gst_face_detector_tensor_decoder_decode_boxes_f32:
* @self: Instance
* @boxes_tensor: Buffer containing the boxes tensor
* @scores_tensor: Buffer containing the scores/confidences tensor
* @rmeta: analytics-meta that is attached to the buffer
* @return: void
* Decode Face Detection tensors, post-process tensors and store decoded information
* into an analytics-meta that is attached to the buffer before been pushed
* downstream.
*/
static void
gst_face_detector_tensor_decoder_decode_boxes_f32 (GstFaceDetectorTensorDecoder
* self, const GstTensor * boxes_tensor, const GstTensor * scores_tensor,
GstAnalyticsRelationMeta * rmeta)
{
GstMapInfo map_info_boxes, map_info_scores;
gfloat *candidate, *score;
gboolean rv;
GPtrArray *sel_candidates = self->sel_candidates, *selected = self->selected;
rv = gst_buffer_map (boxes_tensor->data, &map_info_boxes, GST_MAP_READ);
g_assert (rv);
/* Retrieve memory at index 0 from scores_tensor in READ mode */
rv = gst_buffer_map (scores_tensor->data, &map_info_scores, GST_MAP_READ);
g_assert (rv);
GST_LOG_OBJECT (self, "Boxes Tensor shape dims %zu", boxes_tensor->num_dims);
GST_LOG_OBJECT (self, "scores Tensor shape dims %zu",
scores_tensor->num_dims);
if (gst_debug_category_get_threshold (GST_CAT_DEFAULT) >= GST_LEVEL_TRACE) {
/* Trace boxes tensor dimensions */
for (gsize i = 0; i < boxes_tensor->num_dims; i++) {
GST_TRACE_OBJECT (self, "Boxes Tensor dim %zu: %zu", i,
boxes_tensor->dims[i]);
}
/* Trace scores tensor dimensions */
for (gsize i = 0; i < scores_tensor->num_dims; i++) {
GST_TRACE_OBJECT (self, "Scores Tensor dim %zu: %zu", i,
scores_tensor->dims[i]);
}
}
/* Allocate array to store selected candidates */
if (sel_candidates == NULL) {
/* Number of candidates can be large, keep the array to avoid frequent
* allocation */
sel_candidates = g_ptr_array_new_full (boxes_tensor->dims[1], NULL);
self->sel_candidates = sel_candidates;
selected = g_ptr_array_new_full (boxes_tensor->dims[1], NULL);
self->selected = selected;
self->candidates = (Candidate *) g_new0 (Candidate, boxes_tensor->dims[1]);
} else {
/* Reset lengths when we re-use arrays */
g_ptr_array_set_size (sel_candidates, 0);
g_ptr_array_set_size (selected, 0);
}
score = (gfloat *) map_info_scores.data;
candidate = (gfloat *) map_info_boxes.data;
gsize idx = 0;
/* For UltraLightFaceDetection:
* "boxes" => shape [N,4], where N = 4420
* "scores"=> shape [N,2], (background,face)
* We'll skip the background (index = 0) and keep the foreground (index = 1).
*/
/*
* Iterate through the Scores tensor.
* Check whether the score exceeds default threshold, if it does, select the score and corresponding box.
* Add these selected boxes to the sel_candidates array.
* */
for (gsize i = 1, j = 0; i < scores_tensor->dims[1] * 2; i += 2, j += 4) {
if (score[i] >= self->score_threshold) {
self->candidates[idx].index = idx;
self->candidates[idx].box = &candidate[j];
self->candidates[idx].score = &score[i];
g_ptr_array_add (sel_candidates, &self->candidates[idx]);
idx++;
}
}
GST_LOG_OBJECT (self, "Number of selected candidates = %d",
sel_candidates->len);
if (sel_candidates->len == 0) {
GST_LOG_OBJECT (self, "No boxes above threshold=%1.2f",
self->score_threshold);
goto cleanup;
}
if (gst_debug_category_get_threshold (GST_CAT_DEFAULT) >= GST_LEVEL_TRACE) {
for (gsize i = 0; i < sel_candidates->len; i++) {
Candidate *c = (Candidate *) g_ptr_array_index (sel_candidates, i);
gsize j = 0;
for (; j < boxes_tensor->dims[2]; j++) {
GST_TRACE_OBJECT (self, "sel_candidates[%zu] = %1.5f ", i + j,
c->box[j]);
}
GST_TRACE_OBJECT (self, "score[%zu] = %1.5f", i + j, c->score[0]);
}
}
/*
* Sort the sel_candidates array so as to have the candidates in descending order w.r.t. scores
*/
g_ptr_array_sort (sel_candidates,
gst_face_detector_tensor_decoder_sort_candidates);
if (gst_debug_category_get_threshold (GST_CAT_DEFAULT) >= GST_LEVEL_TRACE) {
for (gsize i = 0; i < sel_candidates->len; i++) {
Candidate *c = (Candidate *) g_ptr_array_index (sel_candidates, i);
GST_TRACE_OBJECT (self, "c[%zu] = %1.5f index = %d", i, c->score[0],
c->index);
}
}
/* NMS */
hard_nms (sel_candidates, selected, self->iou_threshold, -1);
GST_LOG_OBJECT (self, "Number of faces detected = %d", selected->len);
if (gst_debug_category_get_threshold (GST_CAT_DEFAULT) >= GST_LEVEL_TRACE) {
for (gsize i = 0; i < selected->len; i++) {
Candidate *c = (Candidate *) g_ptr_array_index (selected, i);
GST_TRACE_OBJECT (self,
"%zu x1 = %1.5f y1 = %1.5f x2 = %1.5f y2 = %1.5f score = %1.5f",
i + 1, c->box[i + 0], c->box[i + 1], c->box[i + 2], c->box[i + 3],
c->score[0]);
}
}
gsize frame_width = self->video_info.width;
gsize frame_height = self->video_info.height;
/* Convert each final box from normalized to pixel coords and attach to meta. */
for (gint i = 0; i < selected->len; i++) {
Candidate *c = (Candidate *) g_ptr_array_index (selected, i);
gfloat x1 = c->box[0] * frame_width;
gfloat y1 = c->box[1] * frame_height;
gfloat x2 = c->box[2] * frame_width;
gfloat y2 = c->box[3] * frame_height;
gfloat w_ = x2 - x1;
gfloat h_ = y2 - y1;
/* Add to analytics meta: (x, y, width, height). */
gst_analytics_relation_meta_add_od_mtd (rmeta, FACE_QUARK,
(gint) (x1 + 0.5f), (gint) (y1 + 0.5f),
(gint) (w_ + 0.5f), (gint) (h_ + 0.5f), c->score[0], NULL);
}
cleanup:
/* Unmap */
gst_buffer_unmap (boxes_tensor->data, &map_info_boxes);
gst_buffer_unmap (scores_tensor->data, &map_info_scores);
}
/* gst_face_detector_tensor_decoder_transform_ip:
* @trans: Instance
* @buf:inout: Buffer containing media and where tensors can be attached
* @return: Flow errors
* Decode Face Detection tensors, post-process tensors and store decoded information
* into an analytics-meta that is attached to the buffer before been pushed
* downstream.
*/
static GstFlowReturn
gst_face_detector_tensor_decoder_transform_ip (GstBaseTransform * trans,
GstBuffer * buf)
{
GstFaceDetectorTensorDecoder *self = GST_FACE_DETECTOR_TENSOR_DECODER (trans);
const GstTensor *boxes_tensor, *scores_tensor;
GstAnalyticsRelationMeta *rmeta;
/* Retrive the desired Face Detection tensors.
* Return Flow Error if the desired tensors were not supported. */
if (!gst_face_detector_tensor_decoder_get_tensor_meta (self, buf,
&boxes_tensor, &scores_tensor)) {
GST_ELEMENT_ERROR (self, STREAM, DECODE, (NULL),
("Tensor doesn't have the expected data type or shape."));
return GST_FLOW_ERROR;
}
rmeta = gst_buffer_add_analytics_relation_meta (buf);
g_assert (rmeta != NULL);
/* Decode boxes_tensor, scores_tensor and attach the information in a structured way
* to rmeta. */
gst_face_detector_tensor_decoder_decode_boxes_f32 (self, boxes_tensor,
scores_tensor, rmeta);
return GST_FLOW_OK;
}

View File

@ -1,86 +0,0 @@
/*
* GStreamer gstreamer-facedetectortensordecoder
* Copyright (C) 2025 Collabora Ltd
*
* gstfacedetectortensordecoder.h
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifndef __GST_FACE_DETECTOR_TENSOR_DECODER_H__
#define __GST_FACE_DETECTOR_TENSOR_DECODER_H__
#include <gst/gst.h>
#include <gst/video/video.h>
#include <gst/base/base.h>
G_BEGIN_DECLS
#define GST_TYPE_FACE_DETECTOR_TENSOR_DECODER (gst_face_detector_tensor_decoder_get_type())
G_DECLARE_FINAL_TYPE (GstFaceDetectorTensorDecoder,
gst_face_detector_tensor_decoder, GST, FACE_DETECTOR_TENSOR_DECODER,
GstBaseTransform)
typedef struct
{
guint16 index;
gfloat *box;
gfloat *score;
} Candidate;
/**
* GstFaceDetectorTensorDecoder:
*
* Since: 1.28
*/
struct _GstFaceDetectorTensorDecoder
{
GstBaseTransform basetransform;
/* Confidence threshold. */
gfloat score_threshold;
/* Intersection-of-Union threshold. */
gfloat iou_threshold;
/* Video Info */
GstVideoInfo video_info;
/* Candidates with a class confidence level above threshold. */
GPtrArray *sel_candidates;
/* Final candidates selected that respect class confidence level,
* NMS and maximum detection. */
GPtrArray *selected;
/* Candidates with a class confidence level and bounding boxes. */
Candidate *candidates;
};
/**
* GstFaceDetectorTensorDecoderClass:
*
* @parent_class base transform base class
*
* Since: 1.28
*/
struct _GstFaceDetectorTensorDecoderClass
{
GstBaseTransformClass parent_class;
};
GST_ELEMENT_REGISTER_DECLARE (face_detector_tensor_decoder)
G_END_DECLS
#endif /* __GST_FACE_DETECTOR_TENSOR_DECODER_H__ */

View File

@ -53,10 +53,10 @@
#include <gst/analytics/analytics.h>
/* Object detection tensor id strings */
#define GST_MODEL_OBJECT_DETECTOR_BOXES "ssd-mobilenet-v1-variant-1-out-boxes"
#define GST_MODEL_OBJECT_DETECTOR_SCORES "ssd-mobilenet-v1-variant-1-out-scores"
#define GST_MODEL_OBJECT_DETECTOR_NUM_DETECTIONS "generic-variant-1-out-count"
#define GST_MODEL_OBJECT_DETECTOR_CLASSES "ssd-mobilenet-v1-variant-1-out-classes"
#define GST_MODEL_OBJECT_DETECTOR_BOXES "Gst.Model.ObjectDetector.Boxes"
#define GST_MODEL_OBJECT_DETECTOR_SCORES "Gst.Model.ObjectDetector.Scores"
#define GST_MODEL_OBJECT_DETECTOR_NUM_DETECTIONS "Gst.Model.ObjectDetector.NumDetections"
#define GST_MODEL_OBJECT_DETECTOR_CLASSES "Gst.Model.ObjectDetector.Classes"
GST_DEBUG_CATEGORY_STATIC (ssd_object_detector_debug);
#define GST_CAT_DEFAULT ssd_object_detector_debug
@ -161,7 +161,7 @@ gst_ssd_object_detector_class_init (GstSsdObjectDetectorClass * klass)
(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
gst_element_class_set_static_metadata (element_class, "objectdetector",
"Tensordecoder/Video",
"TensorDecoder/Video",
"Apply tensor output from inference to detect objects in video frames",
"Aaron Boxer <aaron.boxer@collabora.com>, Marcus Edel <marcus.edel@collabora.com>");
gst_element_class_add_pad_template (element_class,
@ -306,80 +306,48 @@ gst_ssd_object_detector_get_property (GObject * object, guint prop_id,
}
}
static gboolean
gst_ssd_object_detector_get_tensors (GstSsdObjectDetector * object_detector,
GstBuffer * buf, const GstTensor ** classes_tensor,
const GstTensor ** numdetect_tensor, const GstTensor ** scores_tensor,
const GstTensor ** boxes_tensor)
static GstTensorMeta *
gst_ssd_object_detector_get_tensor_meta (GstSsdObjectDetector * object_detector,
GstBuffer * buf)
{
GstMeta *meta = NULL;
gpointer iter_state = NULL;
static const gsize BOXES_DIMS[] = { 1, G_MAXSIZE, 4 };
static const gsize NUM_DETECT_DIMS[] = { 1 };
static const gsize SCORES_CLASSES_DIMS[] = { 1, G_MAXSIZE };
if (!gst_buffer_get_meta (buf, GST_TENSOR_META_API_TYPE)) {
GST_DEBUG_OBJECT (object_detector,
"missing tensor meta from buffer %" GST_PTR_FORMAT, buf);
return FALSE;
return NULL;
}
// find object detector meta
while ((meta = gst_buffer_iterate_meta_filtered (buf, &iter_state,
GST_TENSOR_META_API_TYPE))) {
GstTensorMeta *tmeta = (GstTensorMeta *) meta;
*boxes_tensor = gst_tensor_meta_get_typed_tensor (tmeta,
g_quark_from_static_string (GST_MODEL_OBJECT_DETECTOR_BOXES),
GST_TENSOR_DATA_TYPE_FLOAT32, GST_TENSOR_DIM_ORDER_ROW_MAJOR, 3,
BOXES_DIMS);
if (*boxes_tensor == NULL)
*boxes_tensor = gst_tensor_meta_get_typed_tensor (tmeta,
g_quark_from_static_string (GST_MODEL_OBJECT_DETECTOR_BOXES),
GST_TENSOR_DATA_TYPE_UINT32, GST_TENSOR_DIM_ORDER_ROW_MAJOR, 3,
BOXES_DIMS);
if (*boxes_tensor == NULL)
GstTensorMeta *tensor_meta = (GstTensorMeta *) meta;
/* SSD model must have either 3 or 4 output tensor nodes: 4 if there is a label node,
* and only 3 if there is no label */
if (tensor_meta->num_tensors != 3 && tensor_meta->num_tensors != 4)
continue;
*scores_tensor = gst_tensor_meta_get_typed_tensor (tmeta,
g_quark_from_static_string (GST_MODEL_OBJECT_DETECTOR_SCORES),
GST_TENSOR_DATA_TYPE_FLOAT32, GST_TENSOR_DIM_ORDER_ROW_MAJOR, 2,
SCORES_CLASSES_DIMS);
if (*scores_tensor == NULL)
*scores_tensor = gst_tensor_meta_get_typed_tensor (tmeta,
g_quark_from_static_string (GST_MODEL_OBJECT_DETECTOR_SCORES),
GST_TENSOR_DATA_TYPE_UINT32, GST_TENSOR_DIM_ORDER_ROW_MAJOR, 2,
SCORES_CLASSES_DIMS);
if (*scores_tensor == NULL)
gint boxesIndex = gst_tensor_meta_get_index_from_id (tensor_meta,
g_quark_from_static_string (GST_MODEL_OBJECT_DETECTOR_BOXES));
gint scoresIndex = gst_tensor_meta_get_index_from_id (tensor_meta,
g_quark_from_static_string (GST_MODEL_OBJECT_DETECTOR_SCORES));
gint numDetectionsIndex = gst_tensor_meta_get_index_from_id (tensor_meta,
g_quark_from_static_string (GST_MODEL_OBJECT_DETECTOR_NUM_DETECTIONS));
gint clasesIndex = gst_tensor_meta_get_index_from_id (tensor_meta,
g_quark_from_static_string (GST_MODEL_OBJECT_DETECTOR_CLASSES));
if (boxesIndex == -1 || scoresIndex == -1 || numDetectionsIndex == -1)
continue;
*numdetect_tensor = gst_tensor_meta_get_typed_tensor (tmeta,
g_quark_from_static_string (GST_MODEL_OBJECT_DETECTOR_NUM_DETECTIONS),
GST_TENSOR_DATA_TYPE_FLOAT32, GST_TENSOR_DIM_ORDER_ROW_MAJOR, 1,
NUM_DETECT_DIMS);
if (*numdetect_tensor == NULL)
*numdetect_tensor = gst_tensor_meta_get_typed_tensor (tmeta,
g_quark_from_static_string (GST_MODEL_OBJECT_DETECTOR_NUM_DETECTIONS),
GST_TENSOR_DATA_TYPE_UINT32, GST_TENSOR_DIM_ORDER_ROW_MAJOR, 1,
NUM_DETECT_DIMS);
if (*numdetect_tensor == NULL)
if (tensor_meta->num_tensors == 4 && clasesIndex == -1)
continue;
*classes_tensor = gst_tensor_meta_get_typed_tensor (tmeta,
g_quark_from_static_string (GST_MODEL_OBJECT_DETECTOR_CLASSES),
GST_TENSOR_DATA_TYPE_FLOAT32, GST_TENSOR_DIM_ORDER_ROW_MAJOR, 2,
SCORES_CLASSES_DIMS);
if (*classes_tensor == NULL)
*classes_tensor = gst_tensor_meta_get_typed_tensor (tmeta,
g_quark_from_static_string (GST_MODEL_OBJECT_DETECTOR_CLASSES),
GST_TENSOR_DATA_TYPE_UINT32, GST_TENSOR_DIM_ORDER_ROW_MAJOR, 2,
SCORES_CLASSES_DIMS);
return TRUE;
return tensor_meta;
}
return FALSE;
return NULL;
}
static gboolean
@ -412,7 +380,7 @@ gst_ssd_object_detector_transform_ip (GstBaseTransform * trans, GstBuffer * buf)
#define DEFINE_GET_FUNC(TYPE, MAX) \
static gboolean \
get_ ## TYPE ## _at_index (const GstTensor *tensor, GstMapInfo *map, \
get_ ## TYPE ## _at_index (GstTensor *tensor, GstMapInfo *map, \
guint index, TYPE * out) \
{ \
switch (tensor->data_type) { \
@ -437,16 +405,18 @@ gst_ssd_object_detector_transform_ip (GstBaseTransform * trans, GstBuffer * buf)
return TRUE; \
}
DEFINE_GET_FUNC (guint32, UINT32_MAX);
DEFINE_GET_FUNC (float, FLOAT_MAX);
DEFINE_GET_FUNC (guint32, UINT32_MAX)
DEFINE_GET_FUNC (float, FLOAT_MAX)
#undef DEFINE_GET_FUNC
static void
extract_bounding_boxes (GstSsdObjectDetector * self, gsize w, gsize h,
GstAnalyticsRelationMeta * rmeta, const GstTensor * classes_tensor,
const GstTensor * numdetect_tensor, const GstTensor * scores_tensor,
const GstTensor * boxes_tensor)
static void
extract_bounding_boxes (GstSsdObjectDetector * self, gsize w, gsize h,
GstAnalyticsRelationMeta * rmeta, GstTensorMeta * tmeta)
{
gint classes_index;
gint boxes_index;
gint scores_index;
gint numdetect_index;
GstMapInfo boxes_map = GST_MAP_INFO_INIT;
GstMapInfo numdetect_map = GST_MAP_INFO_INIT;
GstMapInfo scores_map = GST_MAP_INFO_INIT;
@ -454,49 +424,57 @@ extract_bounding_boxes (GstSsdObjectDetector * self, gsize w, gsize h,
guint num_detections = 0;
if (numdetect_tensor == NULL || scores_tensor == NULL || boxes_tensor == NULL) {
classes_index = gst_tensor_meta_get_index_from_id (tmeta,
g_quark_from_static_string (GST_MODEL_OBJECT_DETECTOR_CLASSES));
numdetect_index = gst_tensor_meta_get_index_from_id (tmeta,
g_quark_from_static_string (GST_MODEL_OBJECT_DETECTOR_NUM_DETECTIONS));
scores_index = gst_tensor_meta_get_index_from_id (tmeta,
g_quark_from_static_string (GST_MODEL_OBJECT_DETECTOR_SCORES));
boxes_index = gst_tensor_meta_get_index_from_id (tmeta,
g_quark_from_static_string (GST_MODEL_OBJECT_DETECTOR_BOXES));
if (numdetect_index == -1 || scores_index == -1 || numdetect_index == -1) {
GST_WARNING ("Missing tensor data expected for SSD model");
return;
}
if (!gst_buffer_map (numdetect_tensor->data, &numdetect_map, GST_MAP_READ)) {
GST_ERROR_OBJECT (self, "Failed to map numdetect tensor memory");
if (!gst_buffer_map (tmeta->tensors[numdetect_index]->data, &numdetect_map,
GST_MAP_READ)) {
GST_ERROR_OBJECT (self, "Failed to map tensor memory for index %d",
numdetect_index);
goto cleanup;
}
if (!gst_buffer_map (boxes_tensor->data, &boxes_map, GST_MAP_READ)) {
GST_ERROR_OBJECT (self, "Failed to map boxes tensor memory");
if (!gst_buffer_map (tmeta->tensors[boxes_index]->data, &boxes_map,
GST_MAP_READ)) {
GST_ERROR_OBJECT (self, "Failed to map tensor memory for index %d",
boxes_index);
goto cleanup;
}
if (!gst_buffer_map (scores_tensor->data, &scores_map, GST_MAP_READ)) {
GST_ERROR_OBJECT (self, "Failed to map scores tensor memory");
if (!gst_buffer_map (tmeta->tensors[scores_index]->data, &scores_map,
GST_MAP_READ)) {
GST_ERROR_OBJECT (self, "Failed to map tensor memory for index %d",
scores_index);
goto cleanup;
}
if (classes_tensor &&
!gst_buffer_map (classes_tensor->data, &classes_map, GST_MAP_READ)) {
GST_DEBUG_OBJECT (self, "Failed to map classes tensor memory");
goto cleanup;
if (classes_index != -1 &&
!gst_buffer_map (tmeta->tensors[classes_index]->data, &classes_map,
GST_MAP_READ)) {
GST_DEBUG_OBJECT (self, "Failed to map tensor memory for index %d",
classes_index);
}
if (!get_guint32_at_index (numdetect_tensor, &numdetect_map,
if (!get_guint32_at_index (tmeta->tensors[numdetect_index], &numdetect_map,
0, &num_detections)) {
GST_ERROR_OBJECT (self, "Failed to get the number of detections");
goto cleanup;
}
GST_LOG_OBJECT (self, "Model claims %u detections", num_detections);
num_detections = MIN (num_detections, scores_tensor->dims[1]);
num_detections = MIN (num_detections, boxes_tensor->dims[1]);
if (classes_tensor)
num_detections = MIN (num_detections, classes_tensor->dims[1]);
GST_LOG_OBJECT (self, "Model really has %u detections"
" (%zu scores, %zu boxes, %zu classes)", num_detections,
scores_tensor->dims[1], boxes_tensor->dims[1],
classes_tensor ? classes_tensor->dims[1] : 0);
GST_LOG_OBJECT (self, "Model claims %d detections", num_detections);
for (int i = 0; i < num_detections; i++) {
float score;
@ -506,20 +484,25 @@ extract_bounding_boxes (GstSsdObjectDetector * self, gsize w, gsize h,
GQuark label = 0;
GstAnalyticsODMtd odmtd;
if (!get_float_at_index (scores_tensor, &scores_map, i, &score))
if (!get_float_at_index (tmeta->tensors[numdetect_index], &scores_map,
i, &score))
continue;
GST_LOG_OBJECT (self, "Detection %u score is %f", i, score);
if (score < self->score_threshold)
continue;
if (!get_float_at_index (boxes_tensor, &boxes_map, i * 4, &y))
if (!get_float_at_index (tmeta->tensors[boxes_index], &boxes_map,
i * 4, &y))
continue;
if (!get_float_at_index (boxes_tensor, &boxes_map, i * 4 + 1, &x))
if (!get_float_at_index (tmeta->tensors[boxes_index], &boxes_map,
i * 4 + 1, &x))
continue;
if (!get_float_at_index (boxes_tensor, &boxes_map, i * 4 + 2, &bheight))
if (!get_float_at_index (tmeta->tensors[boxes_index], &boxes_map,
i * 4 + 2, &bheight))
continue;
if (!get_float_at_index (boxes_tensor, &boxes_map, i * 4 + 3, &bwidth))
if (!get_float_at_index (tmeta->tensors[boxes_index], &boxes_map,
i * 4 + 3, &bwidth))
continue;
if (CLAMP (bwidth, 0, 1) * CLAMP (bheight, 0, 1) > self->size_threshold) {
@ -530,7 +513,8 @@ extract_bounding_boxes (GstSsdObjectDetector * self, gsize w, gsize h,
}
if (self->labels && classes_map.memory &&
get_guint32_at_index (classes_tensor, &classes_map, i, &bclass)) {
get_guint32_at_index (tmeta->tensors[classes_index], &classes_map,
i, &bclass)) {
if (bclass < self->labels->len)
label = g_array_index (self->labels, GQuark, bclass);
}
@ -552,13 +536,13 @@ extract_bounding_boxes (GstSsdObjectDetector * self, gsize w, gsize h,
cleanup:
if (numdetect_map.memory)
gst_buffer_unmap (numdetect_tensor->data, &numdetect_map);
gst_buffer_unmap (tmeta->tensors[numdetect_index]->data, &numdetect_map);
if (classes_map.memory)
gst_buffer_unmap (classes_tensor->data, &classes_map);
gst_buffer_unmap (tmeta->tensors[classes_index]->data, &classes_map);
if (scores_map.memory)
gst_buffer_unmap (scores_tensor->data, &scores_map);
gst_buffer_unmap (tmeta->tensors[scores_index]->data, &scores_map);
if (boxes_map.memory)
gst_buffer_unmap (boxes_tensor->data, &boxes_map);
gst_buffer_unmap (tmeta->tensors[boxes_index]->data, &boxes_map);
}
@ -566,15 +550,12 @@ static gboolean
gst_ssd_object_detector_process (GstBaseTransform * trans, GstBuffer * buf)
{
GstSsdObjectDetector *self = GST_SSD_OBJECT_DETECTOR (trans);
GstTensorMeta *tmeta;
GstAnalyticsRelationMeta *rmeta;
const GstTensor *classes_tensor = NULL;
const GstTensor *numdetect_tensor = NULL;
const GstTensor *scores_tensor = NULL;
const GstTensor *boxes_tensor = NULL;
// get all tensor metas
if (!gst_ssd_object_detector_get_tensors (self, buf,
&classes_tensor, &numdetect_tensor, &scores_tensor, &boxes_tensor)) {
tmeta = gst_ssd_object_detector_get_tensor_meta (self, buf);
if (!tmeta) {
GST_WARNING_OBJECT (trans, "missing tensor meta");
return TRUE;
} else {
@ -583,8 +564,7 @@ gst_ssd_object_detector_process (GstBaseTransform * trans, GstBuffer * buf)
}
extract_bounding_boxes (self, self->video_info.width,
self->video_info.height, rmeta, classes_tensor, numdetect_tensor,
scores_tensor, boxes_tensor);
self->video_info.height, rmeta, tmeta);
return TRUE;
}

View File

@ -26,8 +26,6 @@
#include "gstssdobjectdetector.h"
#include "gstyolotensordecoder.h"
#include "gstclassifiertensordecoder.h"
#include "gstfacedetectortensordecoder.h"
/**
* SECTION:plugin-tensordecoders
@ -43,9 +41,6 @@ plugin_init (GstPlugin * plugin)
ret |= GST_ELEMENT_REGISTER (ssd_object_detector, plugin);
ret |= GST_ELEMENT_REGISTER (yolo_seg_tensor_decoder, plugin);
ret |= GST_ELEMENT_REGISTER (yolo_od_tensor_decoder, plugin);
ret |= GST_ELEMENT_REGISTER (yolo_tensor_decoder, plugin);
ret |= GST_ELEMENT_REGISTER (classifier_tensor_decoder, plugin);
ret |= GST_ELEMENT_REGISTER (face_detector_tensor_decoder, plugin);
return ret;
}

View File

@ -2,14 +2,10 @@ tensordecoders_sources = [
'gsttensordecoders.c',
'gstssdobjectdetector.c',
'gstyolotensordecoder.c'
'gstclassifiertensordecoder.c',
'gstfacedetectortensordecoder.c'
]
tensordecoders_headers = [
'gstssdobjectdetector.h',
'gstclassifiertensordecoder.h',
'gstfacedetectortensordecoder.h'
]
doc_sources = []