From 244dd01b22eb5b0f1dcc9a36222f2171214f1482 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20Cr=C3=AAte?= Date: Mon, 6 Jan 2025 13:29:28 -0600 Subject: [PATCH] fastsamtensordecoder: Set mask resolution based on model output --- .../tensordecoders/gstfastsamtensordecoder.c | 103 +++++------------- 1 file changed, 29 insertions(+), 74 deletions(-) diff --git a/subprojects/gst-plugins-bad/gst/tensordecoders/gstfastsamtensordecoder.c b/subprojects/gst-plugins-bad/gst/tensordecoders/gstfastsamtensordecoder.c index 7b029eb2d1..6f52c16303 100644 --- a/subprojects/gst-plugins-bad/gst/tensordecoders/gstfastsamtensordecoder.c +++ b/subprojects/gst-plugins-bad/gst/tensordecoders/gstfastsamtensordecoder.c @@ -132,7 +132,7 @@ static void gst_fastsam_tensor_decoder_set_property (GObject * object, static void gst_fastsam_tensor_decoder_get_property (GObject * object, guint prop_id, GValue * value, GParamSpec * pspec); -static void gst_fastsam_tensor_decoder_finalize (GObject * object); +static gboolean gst_fastsam_tensor_decoder_stop (GstBaseTransform * trans); static GstFlowReturn gst_fastsam_tensor_decoder_transform_ip (GstBaseTransform * trans, GstBuffer * buf); @@ -160,9 +160,6 @@ gst_fastsam_tensor_decoder_class_init (GstFastSAMTensorDecoderClass * klass) gobject_class->set_property = gst_fastsam_tensor_decoder_set_property; gobject_class->get_property = gst_fastsam_tensor_decoder_get_property; - /* Set GObject vmethod finalize */ - gobject_class->finalize = gst_fastsam_tensor_decoder_finalize; - /* Define GstFastSAMTensorDecoder properties using GObject properties * interface.*/ g_object_class_install_property (G_OBJECT_CLASS (klass), @@ -254,6 +251,10 @@ gst_fastsam_tensor_decoder_class_init (GstFastSAMTensorDecoderClass * klass) basetransform_class->set_caps = GST_DEBUG_FUNCPTR (gst_fastsam_tensor_decoder_set_caps); + /* Set GObject vmethod finalize */ + basetransform_class->stop = gst_fastsam_tensor_decoder_stop; + + /* Calculate the class id placeholder (also a quark) that will be set on all * OD analytics-meta. */ OOI_CLASS_ID = g_quark_from_static_string ("FastSAM-None"); @@ -277,32 +278,31 @@ gst_fastsam_tensor_decoder_init (GstFastSAMTensorDecoder * self) self->max_detection = DEFAULT_MAX_DETECTION; self->sel_candidates = NULL; self->selected = NULL; - self->mask_w = 256; - self->mask_h = 256; - self->mask_length = self->mask_w * self->mask_h; + self->mask_w = 0; + self->mask_h = 0; + self->mask_length = 0; memset (&self->mask_roi, 0, sizeof (BBox)); self->mask_pool = NULL; gst_base_transform_set_passthrough (GST_BASE_TRANSFORM (self), FALSE); } -static void -gst_fastsam_tensor_decoder_finalize (GObject * object) +static gboolean +gst_fastsam_tensor_decoder_stop (GstBaseTransform * trans) { - GstFastSAMTensorDecoder *self = GST_FASTSAM_TENSOR_DECODER (object); + GstFastSAMTensorDecoder *self = GST_FASTSAM_TENSOR_DECODER (trans); - if (self->sel_candidates) { - g_ptr_array_unref (g_steal_pointer (&self->sel_candidates)); - } + self->mask_w = 0; + self->mask_h = 0; + self->mask_length = 0; - if (self->selected) { - g_ptr_array_unref (g_steal_pointer (&self->selected)); - } + g_clear_pointer (&self->sel_candidates, g_ptr_array_unref); + g_clear_pointer (&self->selected, g_ptr_array_unref); + if (self->mask_pool) + gst_buffer_pool_set_active (self->mask_pool, FALSE); - if (self->mask_pool) { - gst_object_unref (self->mask_pool); - } + g_clear_object (&self->mask_pool); - G_OBJECT_CLASS (gst_fastsam_tensor_decoder_parent_class)->finalize (object); + return TRUE; } static void @@ -458,46 +458,6 @@ gst_fastsam_tensor_decoder_set_caps (GstBaseTransform * trans, GstCaps * incaps, return FALSE; } - /* The masks need to be cropped to fit the SAR of the image. */ - /* TODO: We're reconstructing the transformation that was done on the - * original image based on the assumption that the complete image without - * deformation would be analyzed. This assumption is not alway true and - * we should try to find a way to convey this transformation information - * and retrieve from here to know the transformation that need to be done - * on the mask.*/ - - if (self->video_info.width > self->video_info.height) { - self->bb2mask_gain = ((gfloat) self->mask_w) / self->video_info.width; - self->mask_roi.x = 0; - self->mask_roi.w = self->mask_w; - self->mask_roi.h = ((gfloat) self->bb2mask_gain) * self->video_info.height; - self->mask_roi.y = (self->mask_h - self->mask_roi.h) / 2; - } else { - self->bb2mask_gain = ((gfloat) self->mask_h) / self->video_info.height; - self->mask_roi.y = 0; - self->mask_roi.h = self->mask_h; - self->mask_roi.w = self->bb2mask_gain * self->video_info.width; - self->mask_roi.x = (self->mask_w - self->mask_roi.w) / 2; - } - - if (self->mask_pool == NULL) { - GstVideoInfo minfo; - GstCaps *caps; - gst_video_info_init (&minfo); - gst_video_info_set_format (&minfo, GST_VIDEO_FORMAT_GRAY8, 256, 256); - caps = gst_video_info_to_caps (&minfo); - self->mask_pool = gst_video_buffer_pool_new (); - GstStructure *config = gst_buffer_pool_get_config (self->mask_pool); - gst_buffer_pool_config_set_params (config, caps, self->mask_length, 0, 0); - gst_buffer_pool_config_add_option (config, - GST_BUFFER_POOL_OPTION_VIDEO_META); - g_return_val_if_fail (gst_buffer_pool_set_config (self->mask_pool, config), - FALSE); - g_return_val_if_fail (gst_buffer_pool_set_active (self->mask_pool, TRUE), - FALSE); - gst_caps_unref (caps); - } - return TRUE; } @@ -522,10 +482,10 @@ gst_fastsam_tensor_decoder_transform_ip (GstBaseTransform * trans, &logits_tensor)) return GST_FLOW_OK; - if (masks_tensor->num_dims < 3) { + if (masks_tensor->num_dims != 3) { GST_ELEMENT_ERROR (self, STREAM, DECODE, (NULL), - ("Masks tensor must have at least 3 dimensions," - "but only has %zu", masks_tensor->num_dims)); + ("Masks tensor must have 3 dimensions but has %zu", + masks_tensor->num_dims)); return GST_FLOW_ERROR; } @@ -603,15 +563,10 @@ gst_fastsam_tensor_decoder_transform_ip (GstBaseTransform * trans, * retrieve an analytics-relation-meta if it exist or create one if it * does not exist. */ rmeta = gst_buffer_add_analytics_relation_meta_full (buf, &rmeta_init_params); - g_return_val_if_fail (rmeta != NULL, GST_FLOW_ERROR); + g_assert (rmeta != NULL); /* Decode masks_tensor and attach the information in a structured way - * to rmeta. - * TODO: I think we need to send both tensors masks and logits - * to gst_fastsam_tensor_decoder_decode_masks_f32 since both are - * required simultanously to extract the segmentation. If this is the case - * we probably should rename gst_fastsam_tensor_decoder_decode_masks_f32 to - * gst_fastsam_tensor_decoder_decode_f32. */ + * to rmeta. */ gst_fastsam_tensor_decoder_decode_masks_f32 (self, masks_tensor, logits_tensor, rmeta); @@ -802,8 +757,8 @@ gst_fastsam_tensor_decoder_decode_masks_f32 (GstFastSAMTensorDecoder * self, self->selected = selected; } else { /* Reset lengths when we re-use arrays */ - sel_candidates->len = 0; - selected->len = 0; + g_ptr_array_set_size (sel_candidates, 0); + g_ptr_array_set_size (selected, 0); } /* masks_tensor->dims[2] contain the number of candidates. Let's call the @@ -941,8 +896,8 @@ gst_fastsam_tensor_decoder_decode_masks_f32 (GstFastSAMTensorDecoder * self, for (gint mx = bb_mask.x; mx < MX_MAX; mx++, i++) { float sum = 0.0f; j = my * self->mask_w + mx; - for (gint k = 0; k < 32; ++k) { - GST_TRACE_OBJECT (self, "protos data at (%d, %d) is %f", j, k, + for (gsize k = 0; k < logits_tensor->dims[1]; ++k) { + GST_TRACE_OBJECT (self, "protos data at (%d, %zu) is %f", j, k, data_logits[k * self->mask_length + j]); sum += MASK_X (candidate, k) * data_logits[k * self->mask_length + j];