tensordecoder: add general classifier tensor-decoder

- Classification output is more standard compare to other tensor-decoder.
- This tensor-decoder implement a standard classification tensor-decoder.

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/8548>
This commit is contained in:
Daniel Morin 2025-02-24 11:15:29 -05:00 committed by GStreamer Marge Bot
parent 7f4282481e
commit ec60daa66d
5 changed files with 644 additions and 1 deletions

View File

@ -248041,6 +248041,60 @@
"tensordecoders": {
"description": "Tensor decoders elements",
"elements": {
"classifiertensordecoder": {
"author": "Daniel Morin <daniel.morin@collabora.com>",
"description": "Decode tensors output from classification model using common format.\n\tTensor format must be: \n\t\tDims: [batch-size, class_count]\n\t\tDatatype: float32 \n\n\t\tTensor [M,N]\n\t\t\tBatch 0 | Class 0 confidence level | ... | Class N-1 confidence level |\n\t\t\t...\n\t\t\tBatch M-1 | Class 0 confidence level | ... | Class N-1 confidence level |\n\t\t\n\tIn-memory tensor format:\n\n\t\t|Batch 0, Class 0 confidence level |\n\t\t|Batch 0, ... |\n\t\t|Batch 0, Class N-1 confidence level |\n\t\t| ... |\n\t\t|Batch M-1, Class 0 confidence level |\n\t\t|Batch M-1, ... |\n\t\t|Batch M-1, Class N-1 confidence level |\n\n model",
"hierarchy": [
"GstClassifierTensorDecoder",
"GstBaseTransform",
"GstElement",
"GstObject",
"GInitiallyUnowned",
"GObject"
],
"klass": "Tensordecoder",
"pad-templates": {
"sink": {
"caps": "ANY",
"direction": "sink",
"presence": "always"
},
"src": {
"caps": "ANY",
"direction": "src",
"presence": "always"
}
},
"properties": {
"class-confidence-threshold": {
"blurb": "Classes with a confidence level inferior to this threshold will be excluded",
"conditionally-available": false,
"construct": false,
"construct-only": false,
"controllable": false,
"default": "0.7",
"max": "1",
"min": "0",
"mutable": "null",
"readable": true,
"type": "gfloat",
"writable": true
},
"labels-file": {
"blurb": "Path to a file containing class label. COCO format",
"conditionally-available": false,
"construct": false,
"construct-only": false,
"controllable": false,
"default": "NULL",
"mutable": "null",
"readable": true,
"type": "gchararray",
"writable": true
}
},
"rank": "primary"
},
"ssdobjectdetector": {
"author": "Aaron Boxer <aaron.boxer@collabora.com>, Marcus Edel <marcus.edel@collabora.com>",
"description": "Apply tensor output from inference to detect objects in video frames",

View File

@ -0,0 +1,519 @@
/*
* GStreamer gstreamer-classifiertensordecoder
* Copyright (C) 2025 Collabora Ltd.
* @author: Daniel Morin <daniel.morin@dmohub.org>
*
* gstclassifiertensordecoder.c
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
/**
* SECTION:element-classifiertensordecoder.c
* @short_description: Decode tensors from classification model using a common
* tensor output format.
*
*
* This element can parse per-buffer inference tensor meta data generated by
* an upstream inference element.
*
* Tensor format must be:
* Dims: [batch-size, class_count]
* Datatype: float32
*
* Tensor [M,N]
* Batch 0 | Class 0 confidence level | ... | Class N confidence level |
* ...
* Batch M | Class 0 confidence level | ... | Class N confidence level |
*
* In-memory tensor format:
*
* |Batch 0, Class 0 confidence level |
* |Batch 0, ... |
* |Batch 0, Class N confidence level |
* | ... |
* |Batch M, Class 0 confidence level |
* |Batch M, ... |
* |Batch M, Class N confidence level |
*
*
* ## Example launch command:
* |[
* gst-launch-1.0 multifilesrc location=/onnx-models/images/bus.jpg \
* ! decodebin ! videoconvert ! onnxinference execution-provider=cpu \
* model-file=/onnx-models/models/mobilenet_v1.onnx \
* ! classifiertensordecoder labels-file=labels.txt ! fakesink \
)
* ]| This pipeline create an tensor-decoder for classification model
*
*/
#ifdef HAVE_CONFI_H
#include "config.h"
#endif
#include "gstclassifiertensordecoder.h"
#include <gst/gst.h>
#include <math.h>
#include <gst/analytics/analytics.h>
const gchar GST_MODEL_STD_IMAGE_CLASSIFICATION[] = "Gst.Model.Classifier.Std";
GST_DEBUG_CATEGORY_STATIC (classifier_tensor_decoder_debug);
#define GST_CAT_DEFAULT classifier_tensor_decoder_debug
#define gst_classifier_tensor_decoder_parent_class parent_class
GST_ELEMENT_REGISTER_DEFINE (classifier_tensor_decoder,
"classifiertensordecoder", GST_RANK_PRIMARY,
GST_TYPE_CLASSIFIER_TENSOR_DECODER);
/* GstClassifierTensorDecoder properties */
enum
{
PROP_0,
PROP_THRESHOLD,
PROP_LABEL_FILE
};
static const float DEFAULT_THRESHOLD = 0.7f;
static GstStaticPadTemplate gst_classifier_tensor_decoder_src_template =
GST_STATIC_PAD_TEMPLATE ("src",
GST_PAD_SRC,
GST_PAD_ALWAYS,
GST_STATIC_CAPS_ANY);
static GstStaticPadTemplate gst_classifier_tensor_decoder_sink_template =
GST_STATIC_PAD_TEMPLATE ("sink",
GST_PAD_SINK,
GST_PAD_ALWAYS,
GST_STATIC_CAPS_ANY);
static void gst_classifier_tensor_decoder_set_property (GObject * object,
guint prop_id, const GValue * value, GParamSpec * pspec);
static void gst_classifier_tensor_decoder_get_property (GObject * object,
guint prop_id, GValue * value, GParamSpec * pspec);
static void gst_classifier_tensor_decoder_finalize (GObject * object);
static GstFlowReturn
gst_classifier_tensor_decoder_transform_ip (GstBaseTransform * trans,
GstBuffer * buf);
static GstStateChangeReturn
gst_classifier_tensor_decoder_change_state (GstElement * element,
GstStateChange transition);
#define softmax(len, values, results, max_val) \
gsize i; \
gfloat sum = 0.0; \
gfloat value; \
g_return_if_fail (values != NULL); \
g_return_if_fail (result != NULL); \
\
/* Calculate exponential of every value */ \
for (i = 0; i < len; i++) { \
value = values[i] / max_val; \
result[i] = exp (value); \
sum += result[i]; \
} \
\
/* Complete softmax */ \
for (i = 0; i < len; i++) { \
result[i] = result[i] / sum; \
}
static void
softmax_u8 (gsize len, const guint8 * values, gfloat * result)
{
softmax (len, values, results, 255.0);
}
static void
softmax_f32 (gsize len, const gfloat * values, gfloat * result)
{
softmax (len, values, results, 1.0);
}
G_DEFINE_TYPE (GstClassifierTensorDecoder, gst_classifier_tensor_decoder,
GST_TYPE_BASE_TRANSFORM);
static void
gst_classifier_tensor_decoder_class_init (GstClassifierTensorDecoderClass *
klass)
{
GObjectClass *gobject_class = (GObjectClass *) klass;
GstElementClass *element_class = (GstElementClass *) klass;
GstBaseTransformClass *basetransform_class = (GstBaseTransformClass *) klass;
GST_DEBUG_CATEGORY_INIT (classifier_tensor_decoder_debug,
"classifiertensordecoder", 0,
"Tensor decoder for classification model with common output format");
gobject_class->set_property = gst_classifier_tensor_decoder_set_property;
gobject_class->get_property = gst_classifier_tensor_decoder_get_property;
gobject_class->finalize = gst_classifier_tensor_decoder_finalize;
g_object_class_install_property (G_OBJECT_CLASS (klass),
PROP_THRESHOLD,
g_param_spec_float ("class-confidence-threshold",
"Class confidence threshold",
"Classes with a confidence level inferior to this threshold "
"will be excluded",
0.0, 1.0, DEFAULT_THRESHOLD,
(GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
g_object_class_install_property (G_OBJECT_CLASS (klass),
PROP_LABEL_FILE,
g_param_spec_string ("labels-file",
"Class labels file",
"Path to a file containing class label. COCO format",
NULL, (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
element_class->change_state = gst_classifier_tensor_decoder_change_state;
gst_element_class_set_static_metadata (element_class,
"classifiertensordecoder", "Tensordecoder",
"Decode tensors output from classification model using common format.\n"
"\tTensor format must be: \n" "\t\tDims: [batch-size, class_count]\n"
"\t\tDatatype: float32 \n" "\n" "\t\tTensor [M,N]\n"
"\t\t\tBatch 0 | Class 0 confidence level | ... | Class N-1 confidence level |\n"
"\t\t\t...\n"
"\t\t\tBatch M-1 | Class 0 confidence level | ... | Class N-1 confidence level |\n"
"\t\t\n" "\tIn-memory tensor format:\n" "\n"
"\t\t|Batch 0, Class 0 confidence level |\n"
"\t\t|Batch 0, ... |\n"
"\t\t|Batch 0, Class N-1 confidence level |\n"
"\t\t| ... |\n"
"\t\t|Batch M-1, Class 0 confidence level |\n"
"\t\t|Batch M-1, ... |\n"
"\t\t|Batch M-1, Class N-1 confidence level |\n" "\n" " model",
"Daniel Morin <daniel.morin@collabora.com>");
gst_element_class_add_pad_template (element_class,
gst_static_pad_template_get
(&gst_classifier_tensor_decoder_src_template));
gst_element_class_add_pad_template (element_class,
gst_static_pad_template_get
(&gst_classifier_tensor_decoder_sink_template));
basetransform_class->transform_ip =
GST_DEBUG_FUNCPTR (gst_classifier_tensor_decoder_transform_ip);
}
static void
gst_classifier_tensor_decoder_init (GstClassifierTensorDecoder * self)
{
self->threshold = DEFAULT_THRESHOLD;
self->labels_file = NULL;
self->softmax_res = NULL;
gst_base_transform_set_passthrough (GST_BASE_TRANSFORM (self), FALSE);
}
static void
gst_classifier_tensor_decoder_finalize (GObject * object)
{
GstClassifierTensorDecoder *self = GST_CLASSIFIER_TENSOR_DECODER (object);
g_free (self->labels_file);
G_OBJECT_CLASS (gst_classifier_tensor_decoder_parent_class)->finalize
(object);
}
static void
gst_classifier_tensor_decoder_set_property (GObject * object, guint prop_id,
const GValue * value, GParamSpec * pspec)
{
GstClassifierTensorDecoder *self = GST_CLASSIFIER_TENSOR_DECODER (object);
static GFileTest filetest = (G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR);
switch (prop_id) {
case PROP_THRESHOLD:
self->threshold = g_value_get_float (value);
break;
case PROP_LABEL_FILE:
self->labels_file = g_strdup (g_value_get_string (value));
if (self->labels_file) {
if (!g_file_test (self->labels_file, filetest)) {
GST_ERROR_OBJECT (self, "Unable to load %s", self->labels_file);
g_free (g_steal_pointer (&self->labels_file));
}
} else {
GST_ERROR_OBJECT (self, "Invalid file");
}
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
}
}
static void
gst_classifier_tensor_decoder_get_property (GObject * object, guint prop_id,
GValue * value, GParamSpec * pspec)
{
GstClassifierTensorDecoder *self = GST_CLASSIFIER_TENSOR_DECODER (object);
switch (prop_id) {
case PROP_THRESHOLD:
g_value_set_float (value, self->threshold);
break;
case PROP_LABEL_FILE:
g_value_set_string (value, self->labels_file);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
}
}
static gboolean
gst_classifier_tensor_decoder_load_labels (GstClassifierTensorDecoder * self)
{
gboolean rv;
gchar *content = NULL;
gchar **tokens = NULL;
gsize len;
GError *err;
GQuark val;
g_return_val_if_fail (self->labels_file != NULL, FALSE);
rv = g_file_get_contents (self->labels_file, &content, &len, &err);
g_return_val_if_fail (rv, FALSE);
g_return_val_if_fail (len != 0, FALSE);
tokens = g_strsplit (content, "\n", 0);
g_free (content);
if (tokens[0] != NULL) {
self->class_quark = g_array_new (FALSE, FALSE, sizeof (GQuark));
}
for (int i = 0; tokens[i] != NULL && tokens[i][0] != '\0'; i++) {
val = g_quark_from_string (tokens[i]);
g_array_append_val (self->class_quark, val);
}
self->softmax_res = g_array_sized_new (FALSE, TRUE, sizeof (gfloat),
self->class_quark->len);
g_strfreev (tokens);
return rv;
}
static GstStateChangeReturn
gst_classifier_tensor_decoder_change_state (GstElement * element,
GstStateChange transition)
{
GstClassifierTensorDecoder *self = GST_CLASSIFIER_TENSOR_DECODER (element);
GstStateChangeReturn ret;
switch (transition) {
case GST_STATE_CHANGE_NULL_TO_READY:
if (!gst_classifier_tensor_decoder_load_labels (self)) {
return GST_STATE_CHANGE_FAILURE;
}
break;
default:
break;
}
ret = GST_ELEMENT_CLASS (parent_class)->change_state (element, transition);
switch (transition) {
case GST_STATE_CHANGE_READY_TO_NULL:
g_array_free (self->class_quark, FALSE);
g_array_free (self->softmax_res, TRUE);
break;
default:
break;
}
return ret;
}
static GstTensorMeta *
gst_classifier_tensor_decoder_get_tensor_meta (GstClassifierTensorDecoder *
self, GstBuffer * buf)
{
GstMeta *meta = NULL;
gpointer iter_state = NULL;
if (!gst_buffer_get_meta (buf, GST_TENSOR_META_API_TYPE)) {
GST_DEBUG_OBJECT (self,
"missing tensor meta from buffer %" GST_PTR_FORMAT, buf);
return NULL;
}
while ((meta = gst_buffer_iterate_meta_filtered (buf, &iter_state,
GST_TENSOR_META_API_TYPE))) {
GstTensorMeta *tensor_meta = (GstTensorMeta *) meta;
if (tensor_meta->num_tensors != 1)
continue;
gint index = gst_tensor_meta_get_index_from_id (tensor_meta,
g_quark_from_static_string (GST_MODEL_STD_IMAGE_CLASSIFICATION));
if (index == -1)
continue;
return tensor_meta;
}
return NULL;
}
static GstFlowReturn
gst_classifier_tensor_decoder_decode (GstClassifierTensorDecoder * self,
GstBuffer * buf, GstAnalyticsRelationMeta * rmeta, GstTensorMeta * tmeta)
{
GstMapInfo map_info = GST_MAP_INFO_INIT;
gfloat max = 0.0;
gfloat *softmax_res = (gfloat *) self->softmax_res->data;
gsize len;
GQuark q, qmax;
gint max_idx = -1;
gsize index;
GstTensor *tensor;
GstAnalyticsClsMtd cls_mtd;
index = gst_tensor_meta_get_index_from_id (tmeta,
g_quark_from_static_string (GST_MODEL_STD_IMAGE_CLASSIFICATION));
tensor = tmeta->tensors[index];
if (tensor->dims_order != GST_TENSOR_DIM_ORDER_ROW_MAJOR) {
GST_ELEMENT_ERROR (GST_BASE_TRANSFORM (self), STREAM, NOT_IMPLEMENTED,
("Only row-major tensor are supported"),
("this element only support tensor with dims_order set to "
"GST_TENSOR_DIM_ORDER_ROW_MAJOR"));
return GST_FLOW_ERROR;
}
if (tensor->num_dims != 1 && tensor->num_dims != 2) {
GST_ELEMENT_ERROR (GST_BASE_TRANSFORM (self), STREAM, FAILED,
("Only tenson of 1 dimension is supported."),
("tensor dimension must be 1xm or m."));
return GST_FLOW_ERROR;
}
if (tensor->data_type != GST_TENSOR_DATA_TYPE_FLOAT32 &&
tensor->data_type != GST_TENSOR_DATA_TYPE_UINT8) {
GST_ELEMENT_ERROR (GST_BASE_TRANSFORM (self), STREAM, NOT_IMPLEMENTED,
("Only data-type UINT8 and FLOAT32 support is implemented"),
("Please implement."));
return GST_FLOW_ERROR;
}
if (tensor->num_dims == 1) {
if (tensor->dims[0] == 0) {
GST_ELEMENT_ERROR (GST_BASE_TRANSFORM (self), STREAM, FAILED,
("A tensor without content (dims[0] ==0, num_dims=1) can't be used"),
("A tensor without content (dims[0] ==0, num_dims=1) can't be used"));
return GST_FLOW_ERROR;
}
len = tensor->dims[0];
} else {
if (tensor->dims[0] != 1) {
GST_ELEMENT_ERROR (GST_BASE_TRANSFORM (self), STREAM, NOT_IMPLEMENTED,
("Batch not implemented"),
("Batch not implemented, please implement"));
return GST_FLOW_ERROR;
}
if (tensor->dims[1] == 0) {
GST_ELEMENT_ERROR (GST_BASE_TRANSFORM (self), STREAM, FAILED,
("A tensor without content (dims[0] ==0, num_dims=1) can't be used"),
("A tensor without content (dims[0] ==0, num_dims=1) can't be used"));
return GST_FLOW_ERROR;
}
len = tensor->dims[1];
}
g_return_val_if_fail (len == self->class_quark->len, GST_FLOW_ERROR);
if (!gst_buffer_map (tensor->data, &map_info, GST_MAP_READ)) {
GST_ERROR_OBJECT (self, "Failed to map tensor data");
}
GST_TRACE_OBJECT (self, "Tensor shape dims %zu", tensor->num_dims);
if (gst_debug_category_get_threshold (GST_CAT_DEFAULT) >= GST_LEVEL_TRACE) {
for (gint i = 0; i < tensor->num_dims; i++) {
GST_TRACE_OBJECT (self, "Tensor dim %d: %zu", i, tensor->dims[i]);
}
}
switch (tensor->data_type) {
case GST_TENSOR_DATA_TYPE_FLOAT32:
softmax_f32 (len, (gfloat *) map_info.data, softmax_res);
break;
case GST_TENSOR_DATA_TYPE_UINT8:
softmax_u8 (len, (guint8 *) map_info.data, softmax_res);
break;
default:
g_return_val_if_reached (GST_FLOW_ERROR);
break;
}
gst_buffer_unmap (tensor->data, &map_info);
for (gint j = 0; j < len; j++) {
q = g_array_index (self->class_quark, GQuark, j);
if (softmax_res[j] > max) {
max = softmax_res[j];
max_idx = j;
qmax = q;
}
}
if (max_idx != -1) {
gst_analytics_relation_meta_add_one_cls_mtd (rmeta, max, qmax, &cls_mtd);
GST_LOG_OBJECT (self, "Max class is %d:%s with %f", max_idx,
g_quark_to_string (qmax), max);
}
return GST_FLOW_OK;
}
static GstFlowReturn
gst_classifier_tensor_decoder_transform_ip (GstBaseTransform * trans,
GstBuffer * buf)
{
GstClassifierTensorDecoder *self = GST_CLASSIFIER_TENSOR_DECODER (trans);
GstTensorMeta *tmeta;
GstAnalyticsRelationMeta *rmeta;
tmeta = gst_classifier_tensor_decoder_get_tensor_meta (self, buf);
if (tmeta != NULL) {
rmeta = gst_buffer_add_analytics_relation_meta (buf);
g_assert (rmeta != NULL);
} else {
GST_WARNING_OBJECT (trans, "missing tensor meta");
return TRUE;
}
return gst_classifier_tensor_decoder_decode (self, buf, rmeta, tmeta);
}

View File

@ -0,0 +1,66 @@
/*
* GStreamer gstreamer-classifiertensordecoder
* Copyright (C) 2025 Collabora Ltd
* @author: Daniel Morin <daniel.morin@dmohub.org>
*
* gstclassifiertensordecoder.h
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifndef __GST_CLASSIFIER_TENSOR_DECODER_H__
#define __GST_CLASSIFIER_TENSOR_DECODER_H__
#include <gst/gst.h>
#include <gst/base/gstbasetransform.h>
G_BEGIN_DECLS
#define GST_TYPE_CLASSIFIER_TENSOR_DECODER (gst_classifier_tensor_decoder_get_type ())
G_DECLARE_FINAL_TYPE (GstClassifierTensorDecoder, gst_classifier_tensor_decoder,
GST, CLASSIFIER_TENSOR_DECODER, GstBaseTransform)
/**
* GstClassifierTensorDecoder:
*
* @threshold: Class confidence threshold
* @labels_file: Path where to read class labels
* @class_quark: Class labels quark representation
* @softmax_res: Soft-max of output vector
*
* Since: 1.24
*/
struct _GstClassifierTensorDecoder
{
GstBaseTransform basetransform;
gfloat threshold;
gchar *labels_file;
GArray *class_quark;
GArray *softmax_res;
};
struct _GstClassifierTensorDecoderClass
{
GstBaseTransformClass parent_class;
/* TODO: Add vmethod to allow overwriting: decode, postprocess, load_labels */
};
GST_ELEMENT_REGISTER_DECLARE (classifier_tensor_decoder)
G_END_DECLS
#endif /* __GST_CLASSIFIER_TENSOR_DECODER_H__ */

View File

@ -25,6 +25,7 @@
#endif
#include "gstssdobjectdetector.h"
#include "gstclassifiertensordecoder.h"
/**
* SECTION:plugin-tensordecoders
@ -38,6 +39,7 @@ plugin_init (GstPlugin * plugin)
{
gboolean ret = FALSE;
ret |= GST_ELEMENT_REGISTER (ssd_object_detector, plugin);
ret |= GST_ELEMENT_REGISTER (classifier_tensor_decoder, plugin);
return ret;
}

View File

@ -1,10 +1,12 @@
tensordecoders_sources = [
'gsttensordecoders.c',
'gstssdobjectdetector.c'
'gstssdobjectdetector.c',
'gstclassifiertensordecoder.c'
]
tensordecoders_headers = [
'gstssdobjectdetector.h',
'gstclassifiertensordecoder.h'
]
doc_sources = []