tensordecoder: Improve class retrieval

- Optimize the way to retrieve class with maximum confidence. Avoir multiple
  pass.
- Add support for label files
This commit is contained in:
Daniel Morin 2025-02-13 16:59:29 -05:00 committed by Elias Rosendahl
parent 62eeb7e008
commit 01a504c310
2 changed files with 168 additions and 18 deletions

View File

@ -57,6 +57,7 @@
#include "gstyolotensordecoder.h"
#include <gst/analytics/analytics.h>
#include <gio/gio.h>
#include <math.h>
@ -81,7 +82,8 @@ enum
PROP_IOU_THRESH,
PROP_MAX_DETECTION,
PROP_MASK_TENSOR_NAME,
PROP_LOGITS_TENSOR_NAME
PROP_LOGITS_TENSOR_NAME,
PROP_LABEL_FILE
};
/* For debug purpose */
@ -150,6 +152,7 @@ static gboolean gst_yolo_tensor_decoder_set_caps (GstBaseTransform * trans,
static void gst_yolo_tensor_decoder_decode_masks_f32 (GstYoloTensorDecoder
* self, GstTensor * masks_tensor, GstTensor * logits_tensor,
GstAnalyticsRelationMeta * rmeta);
static void gst_yolo_tensor_decoder_finalize (GObject * object);
G_DEFINE_TYPE (GstYoloTensorDecoder, gst_yolo_tensor_decoder,
GST_TYPE_BASE_TRANSFORM);
@ -168,6 +171,7 @@ gst_yolo_tensor_decoder_class_init (GstYoloTensorDecoderClass * klass)
/* Set GObject vmethod to get and set property */
gobject_class->set_property = gst_yolo_tensor_decoder_set_property;
gobject_class->get_property = gst_yolo_tensor_decoder_get_property;
gobject_class->finalize = gst_yolo_tensor_decoder_finalize;
/* Define GstYoloTensorDecoder properties using GObject properties
* interface.*/
@ -224,6 +228,18 @@ gst_yolo_tensor_decoder_class_init (GstYoloTensorDecoderClass * klass)
(GParamFlags) (G_PARAM_READWRITE | G_PARAM_CONSTRUCT |
G_PARAM_STATIC_STRINGS)));
/**
* GstYoloTensorDecoder:label-file
*
* Label file
*
* Since: 1.26
*/
g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_LABEL_FILE,
g_param_spec_string ("label-file",
"Label file", "Label file", NULL, (GParamFlags)
(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
/* Element description. */
gst_element_class_set_static_metadata (element_class, "yolotensordecoder",
"TensorDecoder/Video",
@ -277,6 +293,17 @@ gst_yolo_tensor_decoder_class_init (GstYoloTensorDecoderClass * klass)
g_quark_from_static_string (GST_MODEL_YOLO_SEGMENTATION_LOGITS);
}
static void
gst_yolo_tensor_decoder_finalize (GObject * object)
{
GstYoloTensorDecoder *self = GST_YOLO_TENSOR_DECODER (object);
g_free (self->label_file);
g_clear_pointer (&self->labels, g_array_unref);
G_OBJECT_CLASS (gst_yolo_tensor_decoder_parent_class)->finalize (object);
}
static void
gst_yolo_tensor_decoder_init (GstYoloTensorDecoder * self)
{
@ -314,11 +341,59 @@ gst_yolo_tensor_decoder_stop (GstBaseTransform * trans)
return TRUE;
}
static GArray *
read_labels (const char *labels_file)
{
GArray *array;
GFile *file = g_file_new_for_path (labels_file);
GFileInputStream *file_stream;
GDataInputStream *data_stream;
GError *error = NULL;
gchar *line;
file_stream = g_file_read (file, NULL, &error);
g_object_unref (file);
if (!file_stream) {
GST_WARNING ("Could not open file %s: %s\n", labels_file, error->message);
g_clear_error (&error);
return NULL;
}
data_stream = g_data_input_stream_new (G_INPUT_STREAM (file_stream));
g_object_unref (file_stream);
array = g_array_new (FALSE, FALSE, sizeof (GQuark));
while ((line = g_data_input_stream_read_line (data_stream, NULL, NULL,
&error))) {
GQuark label = g_quark_from_string (line);
g_array_append_val (array, label);
g_free (line);
}
g_object_unref (data_stream);
if (error) {
GST_WARNING ("Could not open file %s: %s", labels_file, error->message);
g_array_free (array, TRUE);
g_clear_error (&error);
return NULL;
}
if (array->len == 0) {
g_array_free (array, TRUE);
return NULL;
}
return array;
}
static void
gst_yolo_tensor_decoder_set_property (GObject * object, guint prop_id,
const GValue * value, GParamSpec * pspec)
{
GstYoloTensorDecoder *self = GST_YOLO_TENSOR_DECODER (object);
const gchar *filename;
switch (prop_id) {
case PROP_BOX_CONFI_THRESH:
@ -351,6 +426,22 @@ gst_yolo_tensor_decoder_set_property (GObject * object, guint prop_id,
self->logits_tensor_id = g_quark_from_string (g_value_get_string (value));
GST_OBJECT_UNLOCK (self);
break;
case PROP_LABEL_FILE:
{
GArray *labels;
filename = g_value_get_string (value);
labels = read_labels (filename);
if (labels) {
g_free (self->label_file);
self->label_file = g_strdup (filename);
g_clear_pointer (&self->labels, g_array_unref);
self->labels = labels;
} else {
GST_WARNING_OBJECT (self, "Label file '%s' not found!", filename);
}
}
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
@ -386,6 +477,9 @@ gst_yolo_tensor_decoder_get_property (GObject * object, guint prop_id,
g_value_set_string (value, g_quark_to_string (self->logits_tensor_id));
GST_OBJECT_UNLOCK (self);
break;
case PROP_LABEL_FILE:
g_value_set_string (value, self->label_file);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
@ -479,8 +573,7 @@ gst_yolo_tensor_decoder_set_caps (GstBaseTransform * trans, GstCaps * incaps,
* downstream.
*/
static GstFlowReturn
gst_yolo_tensor_decoder_transform_ip (GstBaseTransform * trans,
GstBuffer * buf)
gst_yolo_tensor_decoder_transform_ip (GstBaseTransform * trans, GstBuffer * buf)
{
GstYoloTensorDecoder *self = GST_YOLO_TENSOR_DECODER (trans);
GstTensor *masks_tensor, *logits_tensor;
@ -656,13 +749,15 @@ gst_yolo_tensor_decoder_iou (gfloat * c1, gfloat * c2, gsize * offset,
* specified by range.
*/
static gfloat
gst_yolo_tensor_decoder_find_max_class_confidence (const gfloat *c, gpointer range)
gst_yolo_tensor_decoder_find_max_class_confidence (const gfloat * c,
const ConfidenceRange * c_range, gsize * max_class_ofs)
{
ConfidenceRange c_range = *((ConfidenceRange *) range);
gfloat max_val = 0.0;
for (gsize i = c_range.start; i <= c_range.end; i += c_range.step) {
if (*(c + i) > max_val)
for (gsize i = c_range->start; i <= c_range->end; i += c_range->step) {
if (*(c + i) > max_val) {
max_val = *(c + i);
*max_class_ofs = i;
}
}
return max_val;
}
@ -672,14 +767,29 @@ gst_yolo_tensor_decoder_find_max_class_confidence (const gfloat *c, gpointer ran
* by offset.
*/
static gint
gst_yolo_tensor_decoder_sort_candidates (gconstpointer c1, gconstpointer c2, gpointer range)
gst_yolo_tensor_decoder_sort_candidates (gconstpointer c1, gconstpointer c2,
gpointer range)
{
ConfidenceRange *c_range = (ConfidenceRange *) range;
const gfloat *candidate1 = *((gfloat **) c1);
const gfloat *candidate2 = *((gfloat **) c2);
const gfloat max_c1_confi =
gst_yolo_tensor_decoder_find_max_class_confidence (candidate1, range);
const gfloat max_c2_confi =
gst_yolo_tensor_decoder_find_max_class_confidence (candidate2, range);
gfloat max_c1_confi;
gfloat max_c2_confi;
gsize offset;
if (candidate1[c_range->start] <= -1.0) {
offset = (gsize) (-candidate1[c_range->start]);
max_c1_confi = candidate1[offset];
} else {
max_c1_confi = candidate1[c_range->start];
}
if (candidate2[c_range->start] <= -1.0) {
offset = (gsize) (-candidate2[c_range->start]);
max_c2_confi = candidate2[offset];
} else {
max_c2_confi = candidate2[c_range->start];
}
return max_c1_confi < max_c2_confi ? 1 : max_c1_confi > max_c2_confi ? -1 : 0;
}
@ -745,7 +855,7 @@ gst_yolo_tensor_decoder_decode_masks_f32 (GstYoloTensorDecoder * self,
/*guint batch_size = masks_tensor->dims[0]; */
/*guint num_masks = masks_tensor->dims[1]; */
GstMapInfo map_info_masks, map_info_logits, out_mask_info;
gfloat *candidate, **candidates, iou, *data_logits;
gfloat *candidate, **candidates, iou, *data_logits, confid = -1.0;
gboolean rv, keep;
gsize offset, x_offset, y_offset, w_offset, h_offset, offsets[4];
gsize m0_offset;
@ -755,9 +865,11 @@ gst_yolo_tensor_decoder_decode_masks_f32 (GstYoloTensorDecoder * self,
GstAnalyticsSegmentationMtd seg_mtd;
guint8 *mask_data;
ConfidenceRange c_range;
gsize max_class_offset = 0, class_index;
GQuark class_quark = OOI_CLASS_ID;
/* Retrieve memory at index 0 and map it in READ mode */
rv = gst_buffer_map (masks_tensor->data, &map_info_masks, GST_MAP_READ);
rv = gst_buffer_map (masks_tensor->data, &map_info_masks, GST_MAP_READWRITE);
g_assert (rv);
/* Retrieve memory at index 0 from logits_tensor in READ mode */
@ -829,17 +941,30 @@ gst_yolo_tensor_decoder_decode_masks_f32 (GstYoloTensorDecoder * self,
#define BB_W(candidate) candidate[w_offset]
#define BB_H(candidate) candidate[h_offset]
candidate = (gfloat *) map_info_masks.data;;
candidate = (gfloat *) map_info_masks.data;
for (gsize c_idx = 0; c_idx < masks_tensor->dims[2]; c_idx++) {
/* Yolo have multiple class, so maximum confidence level across all class is used
* to evaluate the relevance of the candidate. Here we filter candidates
* based on their class confidence level.*/
gfloat max_confidence =
gst_yolo_tensor_decoder_find_max_class_confidence (candidate, &c_range);
gst_yolo_tensor_decoder_find_max_class_confidence (candidate, &c_range,
&max_class_offset);
if (max_confidence > self->cls_confi_thresh
&& gst_yolo_tensor_decoder_decode_valid_bb (self,
BB_X (candidate), BB_Y (candidate), BB_W (candidate),
BB_H (candidate))) {
/* We need a way to keep track of the class with maximum confidence. At
* this level we're operating on a large number of candidate. Candidates
* will be sorted and filtered later one. Here we use an inplace method
* to store the offset of the class with highest confidence level. If
* the class with highest confidence level is the first one we keep it's
* value as-is, otherwise we overwrite the first class confidence level
* with the value of the -offset of the class with maximum confidence. */
if (max_class_offset != c_range.start) {
candidate[c_range.start] = -(float) (max_class_offset);
}
g_ptr_array_add (sel_candidates, candidate);
GST_TRACE_OBJECT (self,
"%lu: x,y=(%f;%f) w,h=(%f;%f), s=%f c=%f",
@ -906,11 +1031,30 @@ gst_yolo_tensor_decoder_decode_masks_f32 (GstYoloTensorDecoder * self,
g_ptr_array_add (selected, candidate);
region_ids[1] = selected->len;
if (self->labels) {
if (candidate[c_range.start] <= -1.0) {
/* Max class is not the first one and `candidate[c_range.start]`
* contain -offset to the class with maximum confidence */
max_class_offset = (gsize) (-candidate[c_range.start]);
confid = candidate[max_class_offset];
/* Set overwritten confidence to 0 to avoir incorrect interpreation */
candidate[c_range.start] = 0.0;
class_index = (max_class_offset - c_range.start) / c_range.step;
} else {
confid = candidate[c_range.start];
class_index = 0;
}
if (class_index < self->labels->len)
class_quark = g_array_index (self->labels, GQuark, class_index);
}
/* We add the analytics-objectdetection-meta to the buffer. Since
* there's only one class the class confidence level is set to -1.0
* as it's deemed not important. */
gst_analytics_relation_meta_add_od_mtd (rmeta, OOI_CLASS_ID,
bb1.x, bb1.y, bb1.w, bb1.h, -1.0, &od_mtd);
gst_analytics_relation_meta_add_od_mtd (rmeta, class_quark,
bb1.x, bb1.y, bb1.w, bb1.h, confid, &od_mtd);
bb_mask.x = self->bb2mask_gain * bb1.x + self->mask_roi.x;
bb_mask.y = self->bb2mask_gain * bb1.y + self->mask_roi.y;

View File

@ -89,6 +89,12 @@ struct _GstYoloTensorDecoder
/* BufferPool for mask */
GstBufferPool *mask_pool;
/* Labels file */
gchar *label_file;
/* Labels */
GArray *labels;
};
struct _GstYoloTensorDecoderClass