diff --git a/subprojects/gst-plugins-bad/gst/tensordecoders/gstyolotensordecoder.c b/subprojects/gst-plugins-bad/gst/tensordecoders/gstyolotensordecoder.c index b2943a450d..6fcf00b8b0 100644 --- a/subprojects/gst-plugins-bad/gst/tensordecoders/gstyolotensordecoder.c +++ b/subprojects/gst-plugins-bad/gst/tensordecoders/gstyolotensordecoder.c @@ -57,6 +57,7 @@ #include "gstyolotensordecoder.h" #include +#include #include @@ -81,7 +82,8 @@ enum PROP_IOU_THRESH, PROP_MAX_DETECTION, PROP_MASK_TENSOR_NAME, - PROP_LOGITS_TENSOR_NAME + PROP_LOGITS_TENSOR_NAME, + PROP_LABEL_FILE }; /* For debug purpose */ @@ -150,6 +152,7 @@ static gboolean gst_yolo_tensor_decoder_set_caps (GstBaseTransform * trans, static void gst_yolo_tensor_decoder_decode_masks_f32 (GstYoloTensorDecoder * self, GstTensor * masks_tensor, GstTensor * logits_tensor, GstAnalyticsRelationMeta * rmeta); +static void gst_yolo_tensor_decoder_finalize (GObject * object); G_DEFINE_TYPE (GstYoloTensorDecoder, gst_yolo_tensor_decoder, GST_TYPE_BASE_TRANSFORM); @@ -168,6 +171,7 @@ gst_yolo_tensor_decoder_class_init (GstYoloTensorDecoderClass * klass) /* Set GObject vmethod to get and set property */ gobject_class->set_property = gst_yolo_tensor_decoder_set_property; gobject_class->get_property = gst_yolo_tensor_decoder_get_property; + gobject_class->finalize = gst_yolo_tensor_decoder_finalize; /* Define GstYoloTensorDecoder properties using GObject properties * interface.*/ @@ -224,6 +228,18 @@ gst_yolo_tensor_decoder_class_init (GstYoloTensorDecoderClass * klass) (GParamFlags) (G_PARAM_READWRITE | G_PARAM_CONSTRUCT | G_PARAM_STATIC_STRINGS))); + /** + * GstYoloTensorDecoder:label-file + * + * Label file + * + * Since: 1.26 + */ + g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_LABEL_FILE, + g_param_spec_string ("label-file", + "Label file", "Label file", NULL, (GParamFlags) + (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + /* Element description. */ gst_element_class_set_static_metadata (element_class, "yolotensordecoder", "TensorDecoder/Video", @@ -277,6 +293,17 @@ gst_yolo_tensor_decoder_class_init (GstYoloTensorDecoderClass * klass) g_quark_from_static_string (GST_MODEL_YOLO_SEGMENTATION_LOGITS); } +static void +gst_yolo_tensor_decoder_finalize (GObject * object) +{ + GstYoloTensorDecoder *self = GST_YOLO_TENSOR_DECODER (object); + + g_free (self->label_file); + g_clear_pointer (&self->labels, g_array_unref); + + G_OBJECT_CLASS (gst_yolo_tensor_decoder_parent_class)->finalize (object); +} + static void gst_yolo_tensor_decoder_init (GstYoloTensorDecoder * self) { @@ -314,11 +341,59 @@ gst_yolo_tensor_decoder_stop (GstBaseTransform * trans) return TRUE; } +static GArray * +read_labels (const char *labels_file) +{ + GArray *array; + GFile *file = g_file_new_for_path (labels_file); + GFileInputStream *file_stream; + GDataInputStream *data_stream; + GError *error = NULL; + gchar *line; + + file_stream = g_file_read (file, NULL, &error); + g_object_unref (file); + if (!file_stream) { + GST_WARNING ("Could not open file %s: %s\n", labels_file, error->message); + g_clear_error (&error); + return NULL; + } + + data_stream = g_data_input_stream_new (G_INPUT_STREAM (file_stream)); + g_object_unref (file_stream); + + array = g_array_new (FALSE, FALSE, sizeof (GQuark)); + + while ((line = g_data_input_stream_read_line (data_stream, NULL, NULL, + &error))) { + GQuark label = g_quark_from_string (line); + g_array_append_val (array, label); + g_free (line); + } + + g_object_unref (data_stream); + + if (error) { + GST_WARNING ("Could not open file %s: %s", labels_file, error->message); + g_array_free (array, TRUE); + g_clear_error (&error); + return NULL; + } + + if (array->len == 0) { + g_array_free (array, TRUE); + return NULL; + } + + return array; +} + static void gst_yolo_tensor_decoder_set_property (GObject * object, guint prop_id, const GValue * value, GParamSpec * pspec) { GstYoloTensorDecoder *self = GST_YOLO_TENSOR_DECODER (object); + const gchar *filename; switch (prop_id) { case PROP_BOX_CONFI_THRESH: @@ -351,6 +426,22 @@ gst_yolo_tensor_decoder_set_property (GObject * object, guint prop_id, self->logits_tensor_id = g_quark_from_string (g_value_get_string (value)); GST_OBJECT_UNLOCK (self); break; + case PROP_LABEL_FILE: + { + GArray *labels; + + filename = g_value_get_string (value); + labels = read_labels (filename); + + if (labels) { + g_free (self->label_file); + self->label_file = g_strdup (filename); + g_clear_pointer (&self->labels, g_array_unref); + self->labels = labels; + } else { + GST_WARNING_OBJECT (self, "Label file '%s' not found!", filename); + } + } default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); break; @@ -386,6 +477,9 @@ gst_yolo_tensor_decoder_get_property (GObject * object, guint prop_id, g_value_set_string (value, g_quark_to_string (self->logits_tensor_id)); GST_OBJECT_UNLOCK (self); break; + case PROP_LABEL_FILE: + g_value_set_string (value, self->label_file); + break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); break; @@ -479,8 +573,7 @@ gst_yolo_tensor_decoder_set_caps (GstBaseTransform * trans, GstCaps * incaps, * downstream. */ static GstFlowReturn -gst_yolo_tensor_decoder_transform_ip (GstBaseTransform * trans, - GstBuffer * buf) +gst_yolo_tensor_decoder_transform_ip (GstBaseTransform * trans, GstBuffer * buf) { GstYoloTensorDecoder *self = GST_YOLO_TENSOR_DECODER (trans); GstTensor *masks_tensor, *logits_tensor; @@ -656,13 +749,15 @@ gst_yolo_tensor_decoder_iou (gfloat * c1, gfloat * c2, gsize * offset, * specified by range. */ static gfloat -gst_yolo_tensor_decoder_find_max_class_confidence (const gfloat *c, gpointer range) +gst_yolo_tensor_decoder_find_max_class_confidence (const gfloat * c, + const ConfidenceRange * c_range, gsize * max_class_ofs) { - ConfidenceRange c_range = *((ConfidenceRange *) range); gfloat max_val = 0.0; - for (gsize i = c_range.start; i <= c_range.end; i += c_range.step) { - if (*(c + i) > max_val) + for (gsize i = c_range->start; i <= c_range->end; i += c_range->step) { + if (*(c + i) > max_val) { max_val = *(c + i); + *max_class_ofs = i; + } } return max_val; } @@ -672,14 +767,29 @@ gst_yolo_tensor_decoder_find_max_class_confidence (const gfloat *c, gpointer ran * by offset. */ static gint -gst_yolo_tensor_decoder_sort_candidates (gconstpointer c1, gconstpointer c2, gpointer range) +gst_yolo_tensor_decoder_sort_candidates (gconstpointer c1, gconstpointer c2, + gpointer range) { + ConfidenceRange *c_range = (ConfidenceRange *) range; const gfloat *candidate1 = *((gfloat **) c1); const gfloat *candidate2 = *((gfloat **) c2); - const gfloat max_c1_confi = - gst_yolo_tensor_decoder_find_max_class_confidence (candidate1, range); - const gfloat max_c2_confi = - gst_yolo_tensor_decoder_find_max_class_confidence (candidate2, range); + gfloat max_c1_confi; + gfloat max_c2_confi; + gsize offset; + + if (candidate1[c_range->start] <= -1.0) { + offset = (gsize) (-candidate1[c_range->start]); + max_c1_confi = candidate1[offset]; + } else { + max_c1_confi = candidate1[c_range->start]; + } + + if (candidate2[c_range->start] <= -1.0) { + offset = (gsize) (-candidate2[c_range->start]); + max_c2_confi = candidate2[offset]; + } else { + max_c2_confi = candidate2[c_range->start]; + } return max_c1_confi < max_c2_confi ? 1 : max_c1_confi > max_c2_confi ? -1 : 0; } @@ -745,7 +855,7 @@ gst_yolo_tensor_decoder_decode_masks_f32 (GstYoloTensorDecoder * self, /*guint batch_size = masks_tensor->dims[0]; */ /*guint num_masks = masks_tensor->dims[1]; */ GstMapInfo map_info_masks, map_info_logits, out_mask_info; - gfloat *candidate, **candidates, iou, *data_logits; + gfloat *candidate, **candidates, iou, *data_logits, confid = -1.0; gboolean rv, keep; gsize offset, x_offset, y_offset, w_offset, h_offset, offsets[4]; gsize m0_offset; @@ -755,9 +865,11 @@ gst_yolo_tensor_decoder_decode_masks_f32 (GstYoloTensorDecoder * self, GstAnalyticsSegmentationMtd seg_mtd; guint8 *mask_data; ConfidenceRange c_range; + gsize max_class_offset = 0, class_index; + GQuark class_quark = OOI_CLASS_ID; /* Retrieve memory at index 0 and map it in READ mode */ - rv = gst_buffer_map (masks_tensor->data, &map_info_masks, GST_MAP_READ); + rv = gst_buffer_map (masks_tensor->data, &map_info_masks, GST_MAP_READWRITE); g_assert (rv); /* Retrieve memory at index 0 from logits_tensor in READ mode */ @@ -829,17 +941,30 @@ gst_yolo_tensor_decoder_decode_masks_f32 (GstYoloTensorDecoder * self, #define BB_W(candidate) candidate[w_offset] #define BB_H(candidate) candidate[h_offset] - candidate = (gfloat *) map_info_masks.data;; + candidate = (gfloat *) map_info_masks.data; for (gsize c_idx = 0; c_idx < masks_tensor->dims[2]; c_idx++) { /* Yolo have multiple class, so maximum confidence level across all class is used * to evaluate the relevance of the candidate. Here we filter candidates * based on their class confidence level.*/ gfloat max_confidence = - gst_yolo_tensor_decoder_find_max_class_confidence (candidate, &c_range); + gst_yolo_tensor_decoder_find_max_class_confidence (candidate, &c_range, + &max_class_offset); if (max_confidence > self->cls_confi_thresh && gst_yolo_tensor_decoder_decode_valid_bb (self, BB_X (candidate), BB_Y (candidate), BB_W (candidate), BB_H (candidate))) { + + /* We need a way to keep track of the class with maximum confidence. At + * this level we're operating on a large number of candidate. Candidates + * will be sorted and filtered later one. Here we use an inplace method + * to store the offset of the class with highest confidence level. If + * the class with highest confidence level is the first one we keep it's + * value as-is, otherwise we overwrite the first class confidence level + * with the value of the -offset of the class with maximum confidence. */ + if (max_class_offset != c_range.start) { + candidate[c_range.start] = -(float) (max_class_offset); + } + g_ptr_array_add (sel_candidates, candidate); GST_TRACE_OBJECT (self, "%lu: x,y=(%f;%f) w,h=(%f;%f), s=%f c=%f", @@ -906,11 +1031,30 @@ gst_yolo_tensor_decoder_decode_masks_f32 (GstYoloTensorDecoder * self, g_ptr_array_add (selected, candidate); region_ids[1] = selected->len; + if (self->labels) { + if (candidate[c_range.start] <= -1.0) { + /* Max class is not the first one and `candidate[c_range.start]` + * contain -offset to the class with maximum confidence */ + max_class_offset = (gsize) (-candidate[c_range.start]); + confid = candidate[max_class_offset]; + + /* Set overwritten confidence to 0 to avoir incorrect interpreation */ + candidate[c_range.start] = 0.0; + class_index = (max_class_offset - c_range.start) / c_range.step; + } else { + confid = candidate[c_range.start]; + class_index = 0; + } + + if (class_index < self->labels->len) + class_quark = g_array_index (self->labels, GQuark, class_index); + } + /* We add the analytics-objectdetection-meta to the buffer. Since * there's only one class the class confidence level is set to -1.0 * as it's deemed not important. */ - gst_analytics_relation_meta_add_od_mtd (rmeta, OOI_CLASS_ID, - bb1.x, bb1.y, bb1.w, bb1.h, -1.0, &od_mtd); + gst_analytics_relation_meta_add_od_mtd (rmeta, class_quark, + bb1.x, bb1.y, bb1.w, bb1.h, confid, &od_mtd); bb_mask.x = self->bb2mask_gain * bb1.x + self->mask_roi.x; bb_mask.y = self->bb2mask_gain * bb1.y + self->mask_roi.y; diff --git a/subprojects/gst-plugins-bad/gst/tensordecoders/gstyolotensordecoder.h b/subprojects/gst-plugins-bad/gst/tensordecoders/gstyolotensordecoder.h index a6c6a34767..95e0424259 100644 --- a/subprojects/gst-plugins-bad/gst/tensordecoders/gstyolotensordecoder.h +++ b/subprojects/gst-plugins-bad/gst/tensordecoders/gstyolotensordecoder.h @@ -89,6 +89,12 @@ struct _GstYoloTensorDecoder /* BufferPool for mask */ GstBufferPool *mask_pool; + + /* Labels file */ + gchar *label_file; + + /* Labels */ + GArray *labels; }; struct _GstYoloTensorDecoderClass