tensordecoder: Improve class retrieval
- Optimize the way to retrieve class with maximum confidence. Avoir multiple pass. - Add support for label files
This commit is contained in:
parent
62eeb7e008
commit
01a504c310
@ -57,6 +57,7 @@
|
||||
#include "gstyolotensordecoder.h"
|
||||
|
||||
#include <gst/analytics/analytics.h>
|
||||
#include <gio/gio.h>
|
||||
|
||||
#include <math.h>
|
||||
|
||||
@ -81,7 +82,8 @@ enum
|
||||
PROP_IOU_THRESH,
|
||||
PROP_MAX_DETECTION,
|
||||
PROP_MASK_TENSOR_NAME,
|
||||
PROP_LOGITS_TENSOR_NAME
|
||||
PROP_LOGITS_TENSOR_NAME,
|
||||
PROP_LABEL_FILE
|
||||
};
|
||||
|
||||
/* For debug purpose */
|
||||
@ -150,6 +152,7 @@ static gboolean gst_yolo_tensor_decoder_set_caps (GstBaseTransform * trans,
|
||||
static void gst_yolo_tensor_decoder_decode_masks_f32 (GstYoloTensorDecoder
|
||||
* self, GstTensor * masks_tensor, GstTensor * logits_tensor,
|
||||
GstAnalyticsRelationMeta * rmeta);
|
||||
static void gst_yolo_tensor_decoder_finalize (GObject * object);
|
||||
|
||||
G_DEFINE_TYPE (GstYoloTensorDecoder, gst_yolo_tensor_decoder,
|
||||
GST_TYPE_BASE_TRANSFORM);
|
||||
@ -168,6 +171,7 @@ gst_yolo_tensor_decoder_class_init (GstYoloTensorDecoderClass * klass)
|
||||
/* Set GObject vmethod to get and set property */
|
||||
gobject_class->set_property = gst_yolo_tensor_decoder_set_property;
|
||||
gobject_class->get_property = gst_yolo_tensor_decoder_get_property;
|
||||
gobject_class->finalize = gst_yolo_tensor_decoder_finalize;
|
||||
|
||||
/* Define GstYoloTensorDecoder properties using GObject properties
|
||||
* interface.*/
|
||||
@ -224,6 +228,18 @@ gst_yolo_tensor_decoder_class_init (GstYoloTensorDecoderClass * klass)
|
||||
(GParamFlags) (G_PARAM_READWRITE | G_PARAM_CONSTRUCT |
|
||||
G_PARAM_STATIC_STRINGS)));
|
||||
|
||||
/**
|
||||
* GstYoloTensorDecoder:label-file
|
||||
*
|
||||
* Label file
|
||||
*
|
||||
* Since: 1.26
|
||||
*/
|
||||
g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_LABEL_FILE,
|
||||
g_param_spec_string ("label-file",
|
||||
"Label file", "Label file", NULL, (GParamFlags)
|
||||
(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
|
||||
|
||||
/* Element description. */
|
||||
gst_element_class_set_static_metadata (element_class, "yolotensordecoder",
|
||||
"TensorDecoder/Video",
|
||||
@ -277,6 +293,17 @@ gst_yolo_tensor_decoder_class_init (GstYoloTensorDecoderClass * klass)
|
||||
g_quark_from_static_string (GST_MODEL_YOLO_SEGMENTATION_LOGITS);
|
||||
}
|
||||
|
||||
static void
|
||||
gst_yolo_tensor_decoder_finalize (GObject * object)
|
||||
{
|
||||
GstYoloTensorDecoder *self = GST_YOLO_TENSOR_DECODER (object);
|
||||
|
||||
g_free (self->label_file);
|
||||
g_clear_pointer (&self->labels, g_array_unref);
|
||||
|
||||
G_OBJECT_CLASS (gst_yolo_tensor_decoder_parent_class)->finalize (object);
|
||||
}
|
||||
|
||||
static void
|
||||
gst_yolo_tensor_decoder_init (GstYoloTensorDecoder * self)
|
||||
{
|
||||
@ -314,11 +341,59 @@ gst_yolo_tensor_decoder_stop (GstBaseTransform * trans)
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static GArray *
|
||||
read_labels (const char *labels_file)
|
||||
{
|
||||
GArray *array;
|
||||
GFile *file = g_file_new_for_path (labels_file);
|
||||
GFileInputStream *file_stream;
|
||||
GDataInputStream *data_stream;
|
||||
GError *error = NULL;
|
||||
gchar *line;
|
||||
|
||||
file_stream = g_file_read (file, NULL, &error);
|
||||
g_object_unref (file);
|
||||
if (!file_stream) {
|
||||
GST_WARNING ("Could not open file %s: %s\n", labels_file, error->message);
|
||||
g_clear_error (&error);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
data_stream = g_data_input_stream_new (G_INPUT_STREAM (file_stream));
|
||||
g_object_unref (file_stream);
|
||||
|
||||
array = g_array_new (FALSE, FALSE, sizeof (GQuark));
|
||||
|
||||
while ((line = g_data_input_stream_read_line (data_stream, NULL, NULL,
|
||||
&error))) {
|
||||
GQuark label = g_quark_from_string (line);
|
||||
g_array_append_val (array, label);
|
||||
g_free (line);
|
||||
}
|
||||
|
||||
g_object_unref (data_stream);
|
||||
|
||||
if (error) {
|
||||
GST_WARNING ("Could not open file %s: %s", labels_file, error->message);
|
||||
g_array_free (array, TRUE);
|
||||
g_clear_error (&error);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (array->len == 0) {
|
||||
g_array_free (array, TRUE);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return array;
|
||||
}
|
||||
|
||||
static void
|
||||
gst_yolo_tensor_decoder_set_property (GObject * object, guint prop_id,
|
||||
const GValue * value, GParamSpec * pspec)
|
||||
{
|
||||
GstYoloTensorDecoder *self = GST_YOLO_TENSOR_DECODER (object);
|
||||
const gchar *filename;
|
||||
|
||||
switch (prop_id) {
|
||||
case PROP_BOX_CONFI_THRESH:
|
||||
@ -351,6 +426,22 @@ gst_yolo_tensor_decoder_set_property (GObject * object, guint prop_id,
|
||||
self->logits_tensor_id = g_quark_from_string (g_value_get_string (value));
|
||||
GST_OBJECT_UNLOCK (self);
|
||||
break;
|
||||
case PROP_LABEL_FILE:
|
||||
{
|
||||
GArray *labels;
|
||||
|
||||
filename = g_value_get_string (value);
|
||||
labels = read_labels (filename);
|
||||
|
||||
if (labels) {
|
||||
g_free (self->label_file);
|
||||
self->label_file = g_strdup (filename);
|
||||
g_clear_pointer (&self->labels, g_array_unref);
|
||||
self->labels = labels;
|
||||
} else {
|
||||
GST_WARNING_OBJECT (self, "Label file '%s' not found!", filename);
|
||||
}
|
||||
}
|
||||
default:
|
||||
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
|
||||
break;
|
||||
@ -386,6 +477,9 @@ gst_yolo_tensor_decoder_get_property (GObject * object, guint prop_id,
|
||||
g_value_set_string (value, g_quark_to_string (self->logits_tensor_id));
|
||||
GST_OBJECT_UNLOCK (self);
|
||||
break;
|
||||
case PROP_LABEL_FILE:
|
||||
g_value_set_string (value, self->label_file);
|
||||
break;
|
||||
default:
|
||||
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
|
||||
break;
|
||||
@ -479,8 +573,7 @@ gst_yolo_tensor_decoder_set_caps (GstBaseTransform * trans, GstCaps * incaps,
|
||||
* downstream.
|
||||
*/
|
||||
static GstFlowReturn
|
||||
gst_yolo_tensor_decoder_transform_ip (GstBaseTransform * trans,
|
||||
GstBuffer * buf)
|
||||
gst_yolo_tensor_decoder_transform_ip (GstBaseTransform * trans, GstBuffer * buf)
|
||||
{
|
||||
GstYoloTensorDecoder *self = GST_YOLO_TENSOR_DECODER (trans);
|
||||
GstTensor *masks_tensor, *logits_tensor;
|
||||
@ -656,13 +749,15 @@ gst_yolo_tensor_decoder_iou (gfloat * c1, gfloat * c2, gsize * offset,
|
||||
* specified by range.
|
||||
*/
|
||||
static gfloat
|
||||
gst_yolo_tensor_decoder_find_max_class_confidence (const gfloat *c, gpointer range)
|
||||
gst_yolo_tensor_decoder_find_max_class_confidence (const gfloat * c,
|
||||
const ConfidenceRange * c_range, gsize * max_class_ofs)
|
||||
{
|
||||
ConfidenceRange c_range = *((ConfidenceRange *) range);
|
||||
gfloat max_val = 0.0;
|
||||
for (gsize i = c_range.start; i <= c_range.end; i += c_range.step) {
|
||||
if (*(c + i) > max_val)
|
||||
for (gsize i = c_range->start; i <= c_range->end; i += c_range->step) {
|
||||
if (*(c + i) > max_val) {
|
||||
max_val = *(c + i);
|
||||
*max_class_ofs = i;
|
||||
}
|
||||
}
|
||||
return max_val;
|
||||
}
|
||||
@ -672,14 +767,29 @@ gst_yolo_tensor_decoder_find_max_class_confidence (const gfloat *c, gpointer ran
|
||||
* by offset.
|
||||
*/
|
||||
static gint
|
||||
gst_yolo_tensor_decoder_sort_candidates (gconstpointer c1, gconstpointer c2, gpointer range)
|
||||
gst_yolo_tensor_decoder_sort_candidates (gconstpointer c1, gconstpointer c2,
|
||||
gpointer range)
|
||||
{
|
||||
ConfidenceRange *c_range = (ConfidenceRange *) range;
|
||||
const gfloat *candidate1 = *((gfloat **) c1);
|
||||
const gfloat *candidate2 = *((gfloat **) c2);
|
||||
const gfloat max_c1_confi =
|
||||
gst_yolo_tensor_decoder_find_max_class_confidence (candidate1, range);
|
||||
const gfloat max_c2_confi =
|
||||
gst_yolo_tensor_decoder_find_max_class_confidence (candidate2, range);
|
||||
gfloat max_c1_confi;
|
||||
gfloat max_c2_confi;
|
||||
gsize offset;
|
||||
|
||||
if (candidate1[c_range->start] <= -1.0) {
|
||||
offset = (gsize) (-candidate1[c_range->start]);
|
||||
max_c1_confi = candidate1[offset];
|
||||
} else {
|
||||
max_c1_confi = candidate1[c_range->start];
|
||||
}
|
||||
|
||||
if (candidate2[c_range->start] <= -1.0) {
|
||||
offset = (gsize) (-candidate2[c_range->start]);
|
||||
max_c2_confi = candidate2[offset];
|
||||
} else {
|
||||
max_c2_confi = candidate2[c_range->start];
|
||||
}
|
||||
|
||||
return max_c1_confi < max_c2_confi ? 1 : max_c1_confi > max_c2_confi ? -1 : 0;
|
||||
}
|
||||
@ -745,7 +855,7 @@ gst_yolo_tensor_decoder_decode_masks_f32 (GstYoloTensorDecoder * self,
|
||||
/*guint batch_size = masks_tensor->dims[0]; */
|
||||
/*guint num_masks = masks_tensor->dims[1]; */
|
||||
GstMapInfo map_info_masks, map_info_logits, out_mask_info;
|
||||
gfloat *candidate, **candidates, iou, *data_logits;
|
||||
gfloat *candidate, **candidates, iou, *data_logits, confid = -1.0;
|
||||
gboolean rv, keep;
|
||||
gsize offset, x_offset, y_offset, w_offset, h_offset, offsets[4];
|
||||
gsize m0_offset;
|
||||
@ -755,9 +865,11 @@ gst_yolo_tensor_decoder_decode_masks_f32 (GstYoloTensorDecoder * self,
|
||||
GstAnalyticsSegmentationMtd seg_mtd;
|
||||
guint8 *mask_data;
|
||||
ConfidenceRange c_range;
|
||||
gsize max_class_offset = 0, class_index;
|
||||
GQuark class_quark = OOI_CLASS_ID;
|
||||
|
||||
/* Retrieve memory at index 0 and map it in READ mode */
|
||||
rv = gst_buffer_map (masks_tensor->data, &map_info_masks, GST_MAP_READ);
|
||||
rv = gst_buffer_map (masks_tensor->data, &map_info_masks, GST_MAP_READWRITE);
|
||||
g_assert (rv);
|
||||
|
||||
/* Retrieve memory at index 0 from logits_tensor in READ mode */
|
||||
@ -829,17 +941,30 @@ gst_yolo_tensor_decoder_decode_masks_f32 (GstYoloTensorDecoder * self,
|
||||
#define BB_W(candidate) candidate[w_offset]
|
||||
#define BB_H(candidate) candidate[h_offset]
|
||||
|
||||
candidate = (gfloat *) map_info_masks.data;;
|
||||
candidate = (gfloat *) map_info_masks.data;
|
||||
for (gsize c_idx = 0; c_idx < masks_tensor->dims[2]; c_idx++) {
|
||||
/* Yolo have multiple class, so maximum confidence level across all class is used
|
||||
* to evaluate the relevance of the candidate. Here we filter candidates
|
||||
* based on their class confidence level.*/
|
||||
gfloat max_confidence =
|
||||
gst_yolo_tensor_decoder_find_max_class_confidence (candidate, &c_range);
|
||||
gst_yolo_tensor_decoder_find_max_class_confidence (candidate, &c_range,
|
||||
&max_class_offset);
|
||||
if (max_confidence > self->cls_confi_thresh
|
||||
&& gst_yolo_tensor_decoder_decode_valid_bb (self,
|
||||
BB_X (candidate), BB_Y (candidate), BB_W (candidate),
|
||||
BB_H (candidate))) {
|
||||
|
||||
/* We need a way to keep track of the class with maximum confidence. At
|
||||
* this level we're operating on a large number of candidate. Candidates
|
||||
* will be sorted and filtered later one. Here we use an inplace method
|
||||
* to store the offset of the class with highest confidence level. If
|
||||
* the class with highest confidence level is the first one we keep it's
|
||||
* value as-is, otherwise we overwrite the first class confidence level
|
||||
* with the value of the -offset of the class with maximum confidence. */
|
||||
if (max_class_offset != c_range.start) {
|
||||
candidate[c_range.start] = -(float) (max_class_offset);
|
||||
}
|
||||
|
||||
g_ptr_array_add (sel_candidates, candidate);
|
||||
GST_TRACE_OBJECT (self,
|
||||
"%lu: x,y=(%f;%f) w,h=(%f;%f), s=%f c=%f",
|
||||
@ -906,11 +1031,30 @@ gst_yolo_tensor_decoder_decode_masks_f32 (GstYoloTensorDecoder * self,
|
||||
g_ptr_array_add (selected, candidate);
|
||||
region_ids[1] = selected->len;
|
||||
|
||||
if (self->labels) {
|
||||
if (candidate[c_range.start] <= -1.0) {
|
||||
/* Max class is not the first one and `candidate[c_range.start]`
|
||||
* contain -offset to the class with maximum confidence */
|
||||
max_class_offset = (gsize) (-candidate[c_range.start]);
|
||||
confid = candidate[max_class_offset];
|
||||
|
||||
/* Set overwritten confidence to 0 to avoir incorrect interpreation */
|
||||
candidate[c_range.start] = 0.0;
|
||||
class_index = (max_class_offset - c_range.start) / c_range.step;
|
||||
} else {
|
||||
confid = candidate[c_range.start];
|
||||
class_index = 0;
|
||||
}
|
||||
|
||||
if (class_index < self->labels->len)
|
||||
class_quark = g_array_index (self->labels, GQuark, class_index);
|
||||
}
|
||||
|
||||
/* We add the analytics-objectdetection-meta to the buffer. Since
|
||||
* there's only one class the class confidence level is set to -1.0
|
||||
* as it's deemed not important. */
|
||||
gst_analytics_relation_meta_add_od_mtd (rmeta, OOI_CLASS_ID,
|
||||
bb1.x, bb1.y, bb1.w, bb1.h, -1.0, &od_mtd);
|
||||
gst_analytics_relation_meta_add_od_mtd (rmeta, class_quark,
|
||||
bb1.x, bb1.y, bb1.w, bb1.h, confid, &od_mtd);
|
||||
|
||||
bb_mask.x = self->bb2mask_gain * bb1.x + self->mask_roi.x;
|
||||
bb_mask.y = self->bb2mask_gain * bb1.y + self->mask_roi.y;
|
||||
|
@ -89,6 +89,12 @@ struct _GstYoloTensorDecoder
|
||||
|
||||
/* BufferPool for mask */
|
||||
GstBufferPool *mask_pool;
|
||||
|
||||
/* Labels file */
|
||||
gchar *label_file;
|
||||
|
||||
/* Labels */
|
||||
GArray *labels;
|
||||
};
|
||||
|
||||
struct _GstYoloTensorDecoderClass
|
||||
|
Loading…
x
Reference in New Issue
Block a user