tensordecoder: Improve class retrieval
- Optimize the way to retrieve class with maximum confidence. Avoir multiple pass. - Add support for label files
This commit is contained in:
parent
62eeb7e008
commit
01a504c310
subprojects/gst-plugins-bad/gst/tensordecoders
@ -57,6 +57,7 @@
|
|||||||
#include "gstyolotensordecoder.h"
|
#include "gstyolotensordecoder.h"
|
||||||
|
|
||||||
#include <gst/analytics/analytics.h>
|
#include <gst/analytics/analytics.h>
|
||||||
|
#include <gio/gio.h>
|
||||||
|
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
|
||||||
@ -81,7 +82,8 @@ enum
|
|||||||
PROP_IOU_THRESH,
|
PROP_IOU_THRESH,
|
||||||
PROP_MAX_DETECTION,
|
PROP_MAX_DETECTION,
|
||||||
PROP_MASK_TENSOR_NAME,
|
PROP_MASK_TENSOR_NAME,
|
||||||
PROP_LOGITS_TENSOR_NAME
|
PROP_LOGITS_TENSOR_NAME,
|
||||||
|
PROP_LABEL_FILE
|
||||||
};
|
};
|
||||||
|
|
||||||
/* For debug purpose */
|
/* For debug purpose */
|
||||||
@ -150,6 +152,7 @@ static gboolean gst_yolo_tensor_decoder_set_caps (GstBaseTransform * trans,
|
|||||||
static void gst_yolo_tensor_decoder_decode_masks_f32 (GstYoloTensorDecoder
|
static void gst_yolo_tensor_decoder_decode_masks_f32 (GstYoloTensorDecoder
|
||||||
* self, GstTensor * masks_tensor, GstTensor * logits_tensor,
|
* self, GstTensor * masks_tensor, GstTensor * logits_tensor,
|
||||||
GstAnalyticsRelationMeta * rmeta);
|
GstAnalyticsRelationMeta * rmeta);
|
||||||
|
static void gst_yolo_tensor_decoder_finalize (GObject * object);
|
||||||
|
|
||||||
G_DEFINE_TYPE (GstYoloTensorDecoder, gst_yolo_tensor_decoder,
|
G_DEFINE_TYPE (GstYoloTensorDecoder, gst_yolo_tensor_decoder,
|
||||||
GST_TYPE_BASE_TRANSFORM);
|
GST_TYPE_BASE_TRANSFORM);
|
||||||
@ -168,6 +171,7 @@ gst_yolo_tensor_decoder_class_init (GstYoloTensorDecoderClass * klass)
|
|||||||
/* Set GObject vmethod to get and set property */
|
/* Set GObject vmethod to get and set property */
|
||||||
gobject_class->set_property = gst_yolo_tensor_decoder_set_property;
|
gobject_class->set_property = gst_yolo_tensor_decoder_set_property;
|
||||||
gobject_class->get_property = gst_yolo_tensor_decoder_get_property;
|
gobject_class->get_property = gst_yolo_tensor_decoder_get_property;
|
||||||
|
gobject_class->finalize = gst_yolo_tensor_decoder_finalize;
|
||||||
|
|
||||||
/* Define GstYoloTensorDecoder properties using GObject properties
|
/* Define GstYoloTensorDecoder properties using GObject properties
|
||||||
* interface.*/
|
* interface.*/
|
||||||
@ -224,6 +228,18 @@ gst_yolo_tensor_decoder_class_init (GstYoloTensorDecoderClass * klass)
|
|||||||
(GParamFlags) (G_PARAM_READWRITE | G_PARAM_CONSTRUCT |
|
(GParamFlags) (G_PARAM_READWRITE | G_PARAM_CONSTRUCT |
|
||||||
G_PARAM_STATIC_STRINGS)));
|
G_PARAM_STATIC_STRINGS)));
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GstYoloTensorDecoder:label-file
|
||||||
|
*
|
||||||
|
* Label file
|
||||||
|
*
|
||||||
|
* Since: 1.26
|
||||||
|
*/
|
||||||
|
g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_LABEL_FILE,
|
||||||
|
g_param_spec_string ("label-file",
|
||||||
|
"Label file", "Label file", NULL, (GParamFlags)
|
||||||
|
(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
|
||||||
|
|
||||||
/* Element description. */
|
/* Element description. */
|
||||||
gst_element_class_set_static_metadata (element_class, "yolotensordecoder",
|
gst_element_class_set_static_metadata (element_class, "yolotensordecoder",
|
||||||
"TensorDecoder/Video",
|
"TensorDecoder/Video",
|
||||||
@ -277,6 +293,17 @@ gst_yolo_tensor_decoder_class_init (GstYoloTensorDecoderClass * klass)
|
|||||||
g_quark_from_static_string (GST_MODEL_YOLO_SEGMENTATION_LOGITS);
|
g_quark_from_static_string (GST_MODEL_YOLO_SEGMENTATION_LOGITS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
gst_yolo_tensor_decoder_finalize (GObject * object)
|
||||||
|
{
|
||||||
|
GstYoloTensorDecoder *self = GST_YOLO_TENSOR_DECODER (object);
|
||||||
|
|
||||||
|
g_free (self->label_file);
|
||||||
|
g_clear_pointer (&self->labels, g_array_unref);
|
||||||
|
|
||||||
|
G_OBJECT_CLASS (gst_yolo_tensor_decoder_parent_class)->finalize (object);
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
gst_yolo_tensor_decoder_init (GstYoloTensorDecoder * self)
|
gst_yolo_tensor_decoder_init (GstYoloTensorDecoder * self)
|
||||||
{
|
{
|
||||||
@ -314,11 +341,59 @@ gst_yolo_tensor_decoder_stop (GstBaseTransform * trans)
|
|||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static GArray *
|
||||||
|
read_labels (const char *labels_file)
|
||||||
|
{
|
||||||
|
GArray *array;
|
||||||
|
GFile *file = g_file_new_for_path (labels_file);
|
||||||
|
GFileInputStream *file_stream;
|
||||||
|
GDataInputStream *data_stream;
|
||||||
|
GError *error = NULL;
|
||||||
|
gchar *line;
|
||||||
|
|
||||||
|
file_stream = g_file_read (file, NULL, &error);
|
||||||
|
g_object_unref (file);
|
||||||
|
if (!file_stream) {
|
||||||
|
GST_WARNING ("Could not open file %s: %s\n", labels_file, error->message);
|
||||||
|
g_clear_error (&error);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
data_stream = g_data_input_stream_new (G_INPUT_STREAM (file_stream));
|
||||||
|
g_object_unref (file_stream);
|
||||||
|
|
||||||
|
array = g_array_new (FALSE, FALSE, sizeof (GQuark));
|
||||||
|
|
||||||
|
while ((line = g_data_input_stream_read_line (data_stream, NULL, NULL,
|
||||||
|
&error))) {
|
||||||
|
GQuark label = g_quark_from_string (line);
|
||||||
|
g_array_append_val (array, label);
|
||||||
|
g_free (line);
|
||||||
|
}
|
||||||
|
|
||||||
|
g_object_unref (data_stream);
|
||||||
|
|
||||||
|
if (error) {
|
||||||
|
GST_WARNING ("Could not open file %s: %s", labels_file, error->message);
|
||||||
|
g_array_free (array, TRUE);
|
||||||
|
g_clear_error (&error);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (array->len == 0) {
|
||||||
|
g_array_free (array, TRUE);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return array;
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
gst_yolo_tensor_decoder_set_property (GObject * object, guint prop_id,
|
gst_yolo_tensor_decoder_set_property (GObject * object, guint prop_id,
|
||||||
const GValue * value, GParamSpec * pspec)
|
const GValue * value, GParamSpec * pspec)
|
||||||
{
|
{
|
||||||
GstYoloTensorDecoder *self = GST_YOLO_TENSOR_DECODER (object);
|
GstYoloTensorDecoder *self = GST_YOLO_TENSOR_DECODER (object);
|
||||||
|
const gchar *filename;
|
||||||
|
|
||||||
switch (prop_id) {
|
switch (prop_id) {
|
||||||
case PROP_BOX_CONFI_THRESH:
|
case PROP_BOX_CONFI_THRESH:
|
||||||
@ -351,6 +426,22 @@ gst_yolo_tensor_decoder_set_property (GObject * object, guint prop_id,
|
|||||||
self->logits_tensor_id = g_quark_from_string (g_value_get_string (value));
|
self->logits_tensor_id = g_quark_from_string (g_value_get_string (value));
|
||||||
GST_OBJECT_UNLOCK (self);
|
GST_OBJECT_UNLOCK (self);
|
||||||
break;
|
break;
|
||||||
|
case PROP_LABEL_FILE:
|
||||||
|
{
|
||||||
|
GArray *labels;
|
||||||
|
|
||||||
|
filename = g_value_get_string (value);
|
||||||
|
labels = read_labels (filename);
|
||||||
|
|
||||||
|
if (labels) {
|
||||||
|
g_free (self->label_file);
|
||||||
|
self->label_file = g_strdup (filename);
|
||||||
|
g_clear_pointer (&self->labels, g_array_unref);
|
||||||
|
self->labels = labels;
|
||||||
|
} else {
|
||||||
|
GST_WARNING_OBJECT (self, "Label file '%s' not found!", filename);
|
||||||
|
}
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
|
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
|
||||||
break;
|
break;
|
||||||
@ -386,6 +477,9 @@ gst_yolo_tensor_decoder_get_property (GObject * object, guint prop_id,
|
|||||||
g_value_set_string (value, g_quark_to_string (self->logits_tensor_id));
|
g_value_set_string (value, g_quark_to_string (self->logits_tensor_id));
|
||||||
GST_OBJECT_UNLOCK (self);
|
GST_OBJECT_UNLOCK (self);
|
||||||
break;
|
break;
|
||||||
|
case PROP_LABEL_FILE:
|
||||||
|
g_value_set_string (value, self->label_file);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
|
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
|
||||||
break;
|
break;
|
||||||
@ -479,8 +573,7 @@ gst_yolo_tensor_decoder_set_caps (GstBaseTransform * trans, GstCaps * incaps,
|
|||||||
* downstream.
|
* downstream.
|
||||||
*/
|
*/
|
||||||
static GstFlowReturn
|
static GstFlowReturn
|
||||||
gst_yolo_tensor_decoder_transform_ip (GstBaseTransform * trans,
|
gst_yolo_tensor_decoder_transform_ip (GstBaseTransform * trans, GstBuffer * buf)
|
||||||
GstBuffer * buf)
|
|
||||||
{
|
{
|
||||||
GstYoloTensorDecoder *self = GST_YOLO_TENSOR_DECODER (trans);
|
GstYoloTensorDecoder *self = GST_YOLO_TENSOR_DECODER (trans);
|
||||||
GstTensor *masks_tensor, *logits_tensor;
|
GstTensor *masks_tensor, *logits_tensor;
|
||||||
@ -656,13 +749,15 @@ gst_yolo_tensor_decoder_iou (gfloat * c1, gfloat * c2, gsize * offset,
|
|||||||
* specified by range.
|
* specified by range.
|
||||||
*/
|
*/
|
||||||
static gfloat
|
static gfloat
|
||||||
gst_yolo_tensor_decoder_find_max_class_confidence (const gfloat *c, gpointer range)
|
gst_yolo_tensor_decoder_find_max_class_confidence (const gfloat * c,
|
||||||
|
const ConfidenceRange * c_range, gsize * max_class_ofs)
|
||||||
{
|
{
|
||||||
ConfidenceRange c_range = *((ConfidenceRange *) range);
|
|
||||||
gfloat max_val = 0.0;
|
gfloat max_val = 0.0;
|
||||||
for (gsize i = c_range.start; i <= c_range.end; i += c_range.step) {
|
for (gsize i = c_range->start; i <= c_range->end; i += c_range->step) {
|
||||||
if (*(c + i) > max_val)
|
if (*(c + i) > max_val) {
|
||||||
max_val = *(c + i);
|
max_val = *(c + i);
|
||||||
|
*max_class_ofs = i;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return max_val;
|
return max_val;
|
||||||
}
|
}
|
||||||
@ -672,14 +767,29 @@ gst_yolo_tensor_decoder_find_max_class_confidence (const gfloat *c, gpointer ran
|
|||||||
* by offset.
|
* by offset.
|
||||||
*/
|
*/
|
||||||
static gint
|
static gint
|
||||||
gst_yolo_tensor_decoder_sort_candidates (gconstpointer c1, gconstpointer c2, gpointer range)
|
gst_yolo_tensor_decoder_sort_candidates (gconstpointer c1, gconstpointer c2,
|
||||||
|
gpointer range)
|
||||||
{
|
{
|
||||||
|
ConfidenceRange *c_range = (ConfidenceRange *) range;
|
||||||
const gfloat *candidate1 = *((gfloat **) c1);
|
const gfloat *candidate1 = *((gfloat **) c1);
|
||||||
const gfloat *candidate2 = *((gfloat **) c2);
|
const gfloat *candidate2 = *((gfloat **) c2);
|
||||||
const gfloat max_c1_confi =
|
gfloat max_c1_confi;
|
||||||
gst_yolo_tensor_decoder_find_max_class_confidence (candidate1, range);
|
gfloat max_c2_confi;
|
||||||
const gfloat max_c2_confi =
|
gsize offset;
|
||||||
gst_yolo_tensor_decoder_find_max_class_confidence (candidate2, range);
|
|
||||||
|
if (candidate1[c_range->start] <= -1.0) {
|
||||||
|
offset = (gsize) (-candidate1[c_range->start]);
|
||||||
|
max_c1_confi = candidate1[offset];
|
||||||
|
} else {
|
||||||
|
max_c1_confi = candidate1[c_range->start];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (candidate2[c_range->start] <= -1.0) {
|
||||||
|
offset = (gsize) (-candidate2[c_range->start]);
|
||||||
|
max_c2_confi = candidate2[offset];
|
||||||
|
} else {
|
||||||
|
max_c2_confi = candidate2[c_range->start];
|
||||||
|
}
|
||||||
|
|
||||||
return max_c1_confi < max_c2_confi ? 1 : max_c1_confi > max_c2_confi ? -1 : 0;
|
return max_c1_confi < max_c2_confi ? 1 : max_c1_confi > max_c2_confi ? -1 : 0;
|
||||||
}
|
}
|
||||||
@ -745,7 +855,7 @@ gst_yolo_tensor_decoder_decode_masks_f32 (GstYoloTensorDecoder * self,
|
|||||||
/*guint batch_size = masks_tensor->dims[0]; */
|
/*guint batch_size = masks_tensor->dims[0]; */
|
||||||
/*guint num_masks = masks_tensor->dims[1]; */
|
/*guint num_masks = masks_tensor->dims[1]; */
|
||||||
GstMapInfo map_info_masks, map_info_logits, out_mask_info;
|
GstMapInfo map_info_masks, map_info_logits, out_mask_info;
|
||||||
gfloat *candidate, **candidates, iou, *data_logits;
|
gfloat *candidate, **candidates, iou, *data_logits, confid = -1.0;
|
||||||
gboolean rv, keep;
|
gboolean rv, keep;
|
||||||
gsize offset, x_offset, y_offset, w_offset, h_offset, offsets[4];
|
gsize offset, x_offset, y_offset, w_offset, h_offset, offsets[4];
|
||||||
gsize m0_offset;
|
gsize m0_offset;
|
||||||
@ -755,9 +865,11 @@ gst_yolo_tensor_decoder_decode_masks_f32 (GstYoloTensorDecoder * self,
|
|||||||
GstAnalyticsSegmentationMtd seg_mtd;
|
GstAnalyticsSegmentationMtd seg_mtd;
|
||||||
guint8 *mask_data;
|
guint8 *mask_data;
|
||||||
ConfidenceRange c_range;
|
ConfidenceRange c_range;
|
||||||
|
gsize max_class_offset = 0, class_index;
|
||||||
|
GQuark class_quark = OOI_CLASS_ID;
|
||||||
|
|
||||||
/* Retrieve memory at index 0 and map it in READ mode */
|
/* Retrieve memory at index 0 and map it in READ mode */
|
||||||
rv = gst_buffer_map (masks_tensor->data, &map_info_masks, GST_MAP_READ);
|
rv = gst_buffer_map (masks_tensor->data, &map_info_masks, GST_MAP_READWRITE);
|
||||||
g_assert (rv);
|
g_assert (rv);
|
||||||
|
|
||||||
/* Retrieve memory at index 0 from logits_tensor in READ mode */
|
/* Retrieve memory at index 0 from logits_tensor in READ mode */
|
||||||
@ -829,17 +941,30 @@ gst_yolo_tensor_decoder_decode_masks_f32 (GstYoloTensorDecoder * self,
|
|||||||
#define BB_W(candidate) candidate[w_offset]
|
#define BB_W(candidate) candidate[w_offset]
|
||||||
#define BB_H(candidate) candidate[h_offset]
|
#define BB_H(candidate) candidate[h_offset]
|
||||||
|
|
||||||
candidate = (gfloat *) map_info_masks.data;;
|
candidate = (gfloat *) map_info_masks.data;
|
||||||
for (gsize c_idx = 0; c_idx < masks_tensor->dims[2]; c_idx++) {
|
for (gsize c_idx = 0; c_idx < masks_tensor->dims[2]; c_idx++) {
|
||||||
/* Yolo have multiple class, so maximum confidence level across all class is used
|
/* Yolo have multiple class, so maximum confidence level across all class is used
|
||||||
* to evaluate the relevance of the candidate. Here we filter candidates
|
* to evaluate the relevance of the candidate. Here we filter candidates
|
||||||
* based on their class confidence level.*/
|
* based on their class confidence level.*/
|
||||||
gfloat max_confidence =
|
gfloat max_confidence =
|
||||||
gst_yolo_tensor_decoder_find_max_class_confidence (candidate, &c_range);
|
gst_yolo_tensor_decoder_find_max_class_confidence (candidate, &c_range,
|
||||||
|
&max_class_offset);
|
||||||
if (max_confidence > self->cls_confi_thresh
|
if (max_confidence > self->cls_confi_thresh
|
||||||
&& gst_yolo_tensor_decoder_decode_valid_bb (self,
|
&& gst_yolo_tensor_decoder_decode_valid_bb (self,
|
||||||
BB_X (candidate), BB_Y (candidate), BB_W (candidate),
|
BB_X (candidate), BB_Y (candidate), BB_W (candidate),
|
||||||
BB_H (candidate))) {
|
BB_H (candidate))) {
|
||||||
|
|
||||||
|
/* We need a way to keep track of the class with maximum confidence. At
|
||||||
|
* this level we're operating on a large number of candidate. Candidates
|
||||||
|
* will be sorted and filtered later one. Here we use an inplace method
|
||||||
|
* to store the offset of the class with highest confidence level. If
|
||||||
|
* the class with highest confidence level is the first one we keep it's
|
||||||
|
* value as-is, otherwise we overwrite the first class confidence level
|
||||||
|
* with the value of the -offset of the class with maximum confidence. */
|
||||||
|
if (max_class_offset != c_range.start) {
|
||||||
|
candidate[c_range.start] = -(float) (max_class_offset);
|
||||||
|
}
|
||||||
|
|
||||||
g_ptr_array_add (sel_candidates, candidate);
|
g_ptr_array_add (sel_candidates, candidate);
|
||||||
GST_TRACE_OBJECT (self,
|
GST_TRACE_OBJECT (self,
|
||||||
"%lu: x,y=(%f;%f) w,h=(%f;%f), s=%f c=%f",
|
"%lu: x,y=(%f;%f) w,h=(%f;%f), s=%f c=%f",
|
||||||
@ -906,11 +1031,30 @@ gst_yolo_tensor_decoder_decode_masks_f32 (GstYoloTensorDecoder * self,
|
|||||||
g_ptr_array_add (selected, candidate);
|
g_ptr_array_add (selected, candidate);
|
||||||
region_ids[1] = selected->len;
|
region_ids[1] = selected->len;
|
||||||
|
|
||||||
|
if (self->labels) {
|
||||||
|
if (candidate[c_range.start] <= -1.0) {
|
||||||
|
/* Max class is not the first one and `candidate[c_range.start]`
|
||||||
|
* contain -offset to the class with maximum confidence */
|
||||||
|
max_class_offset = (gsize) (-candidate[c_range.start]);
|
||||||
|
confid = candidate[max_class_offset];
|
||||||
|
|
||||||
|
/* Set overwritten confidence to 0 to avoir incorrect interpreation */
|
||||||
|
candidate[c_range.start] = 0.0;
|
||||||
|
class_index = (max_class_offset - c_range.start) / c_range.step;
|
||||||
|
} else {
|
||||||
|
confid = candidate[c_range.start];
|
||||||
|
class_index = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (class_index < self->labels->len)
|
||||||
|
class_quark = g_array_index (self->labels, GQuark, class_index);
|
||||||
|
}
|
||||||
|
|
||||||
/* We add the analytics-objectdetection-meta to the buffer. Since
|
/* We add the analytics-objectdetection-meta to the buffer. Since
|
||||||
* there's only one class the class confidence level is set to -1.0
|
* there's only one class the class confidence level is set to -1.0
|
||||||
* as it's deemed not important. */
|
* as it's deemed not important. */
|
||||||
gst_analytics_relation_meta_add_od_mtd (rmeta, OOI_CLASS_ID,
|
gst_analytics_relation_meta_add_od_mtd (rmeta, class_quark,
|
||||||
bb1.x, bb1.y, bb1.w, bb1.h, -1.0, &od_mtd);
|
bb1.x, bb1.y, bb1.w, bb1.h, confid, &od_mtd);
|
||||||
|
|
||||||
bb_mask.x = self->bb2mask_gain * bb1.x + self->mask_roi.x;
|
bb_mask.x = self->bb2mask_gain * bb1.x + self->mask_roi.x;
|
||||||
bb_mask.y = self->bb2mask_gain * bb1.y + self->mask_roi.y;
|
bb_mask.y = self->bb2mask_gain * bb1.y + self->mask_roi.y;
|
||||||
|
@ -89,6 +89,12 @@ struct _GstYoloTensorDecoder
|
|||||||
|
|
||||||
/* BufferPool for mask */
|
/* BufferPool for mask */
|
||||||
GstBufferPool *mask_pool;
|
GstBufferPool *mask_pool;
|
||||||
|
|
||||||
|
/* Labels file */
|
||||||
|
gchar *label_file;
|
||||||
|
|
||||||
|
/* Labels */
|
||||||
|
GArray *labels;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct _GstYoloTensorDecoderClass
|
struct _GstYoloTensorDecoderClass
|
||||||
|
Loading…
x
Reference in New Issue
Block a user