/* * GStreamer gstreamer-ssdobjectdetector * Copyright (C) 2021 Collabora Ltd. * * gstssdobjectdetector.c * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301, USA. */ /** * SECTION:element-ssdobjectdetector * @short_description: Detect objects in video buffers using SSD neural network * * This element can parse per-buffer inference tensor meta data generated by an upstream * inference element * * * ## Example launch command: * * Test image file, model file (SSD) and label file can be found here : * https://gitlab.collabora.com/gstreamer/onnx-models * * GST_DEBUG=ssdobjectdetector:5 \ * gst-launch-1.0 multifilesrc location=onnx-models/images/bus.jpg ! \ * jpegdec ! videoconvert ! onnxinference execution-provider=cpu model-file=onnx-models/models/ssd_mobilenet_v1_coco.onnx ! \ * ssdobjectdetector label-file=onnx-models/labels/COCO_classes.txt ! videoconvert ! autovideosink * */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "gstssdobjectdetector.h" #include #include #include #include /* Object detection tensor id strings */ #define GST_MODEL_OBJECT_DETECTOR_BOXES "ssd-mobilenet-v1-variant-1-out-boxes" #define GST_MODEL_OBJECT_DETECTOR_SCORES "ssd-mobilenet-v1-variant-1-out-scores" #define GST_MODEL_OBJECT_DETECTOR_NUM_DETECTIONS "generic-variant-1-out-count" #define GST_MODEL_OBJECT_DETECTOR_CLASSES "ssd-mobilenet-v1-variant-1-out-classes" GST_DEBUG_CATEGORY_STATIC (ssd_object_detector_debug); #define GST_CAT_DEFAULT ssd_object_detector_debug GST_ELEMENT_REGISTER_DEFINE (ssd_object_detector, "ssdobjectdetector", GST_RANK_PRIMARY, GST_TYPE_SSD_OBJECT_DETECTOR); /* GstSsdObjectDetector properties */ enum { PROP_0, PROP_LABEL_FILE, PROP_SCORE_THRESHOLD, PROP_SIZE_THRESHOLD }; #define GST_SSD_OBJECT_DETECTOR_DEFAULT_SCORE_THRESHOLD 0.3f /* 0 to 1 */ #define GST_SSD_OBJECT_DETECTOR_DEFAULT_SIZE_THRESHOLD 0.9f /* 0 to 1 */ static GstStaticPadTemplate gst_ssd_object_detector_src_template = GST_STATIC_PAD_TEMPLATE ("src", GST_PAD_SRC, GST_PAD_ALWAYS, GST_STATIC_CAPS ("video/x-raw") ); static GstStaticPadTemplate gst_ssd_object_detector_sink_template = GST_STATIC_PAD_TEMPLATE ("sink", GST_PAD_SINK, GST_PAD_ALWAYS, GST_STATIC_CAPS ("video/x-raw") ); static void gst_ssd_object_detector_set_property (GObject * object, guint prop_id, const GValue * value, GParamSpec * pspec); static void gst_ssd_object_detector_get_property (GObject * object, guint prop_id, GValue * value, GParamSpec * pspec); static void gst_ssd_object_detector_finalize (GObject * object); static GstFlowReturn gst_ssd_object_detector_transform_ip (GstBaseTransform * trans, GstBuffer * buf); static gboolean gst_ssd_object_detector_process (GstBaseTransform * trans, GstBuffer * buf); static gboolean gst_ssd_object_detector_set_caps (GstBaseTransform * trans, GstCaps * incaps, GstCaps * outcaps); G_DEFINE_TYPE (GstSsdObjectDetector, gst_ssd_object_detector, GST_TYPE_BASE_TRANSFORM); static void gst_ssd_object_detector_class_init (GstSsdObjectDetectorClass * klass) { GObjectClass *gobject_class = (GObjectClass *) klass; GstElementClass *element_class = (GstElementClass *) klass; GstBaseTransformClass *basetransform_class = (GstBaseTransformClass *) klass; GST_DEBUG_CATEGORY_INIT (ssd_object_detector_debug, "ssdobjectdetector", 0, "ssdobjectdetector"); gobject_class->set_property = gst_ssd_object_detector_set_property; gobject_class->get_property = gst_ssd_object_detector_get_property; gobject_class->finalize = gst_ssd_object_detector_finalize; /** * GstSsdObjectDetector:label-file * * Label file * * Since: 1.24 */ g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_LABEL_FILE, g_param_spec_string ("label-file", "Label file", "Label file", NULL, (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); /** * GstSsdObjectDetector:score-threshold * * Threshold for deciding when to remove boxes based on score * * Since: 1.24 */ g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_SCORE_THRESHOLD, g_param_spec_float ("score-threshold", "Score threshold", "Threshold for deciding when to remove boxes based on score", 0.0, 1.0, GST_SSD_OBJECT_DETECTOR_DEFAULT_SCORE_THRESHOLD, (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); /** * GstSsdObjectDetector:size-threshold * * Threshold for deciding when to remove boxes based on proportion of the image * * Since: 1.26 */ g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_SIZE_THRESHOLD, g_param_spec_float ("size-threshold", "Size threshold", "Threshold for deciding when to remove boxes based on proportion of the image", 0.0, 1.0, GST_SSD_OBJECT_DETECTOR_DEFAULT_SIZE_THRESHOLD, (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); gst_element_class_set_static_metadata (element_class, "objectdetector", "Tensordecoder/Video", "Apply tensor output from inference to detect objects in video frames", "Aaron Boxer , Marcus Edel "); gst_element_class_add_pad_template (element_class, gst_static_pad_template_get (&gst_ssd_object_detector_sink_template)); gst_element_class_add_pad_template (element_class, gst_static_pad_template_get (&gst_ssd_object_detector_src_template)); basetransform_class->transform_ip = GST_DEBUG_FUNCPTR (gst_ssd_object_detector_transform_ip); basetransform_class->set_caps = GST_DEBUG_FUNCPTR (gst_ssd_object_detector_set_caps); } static void gst_ssd_object_detector_init (GstSsdObjectDetector * self) { self->size_threshold = GST_SSD_OBJECT_DETECTOR_DEFAULT_SIZE_THRESHOLD; self->score_threshold = GST_SSD_OBJECT_DETECTOR_DEFAULT_SCORE_THRESHOLD; } static void gst_ssd_object_detector_finalize (GObject * object) { GstSsdObjectDetector *self = GST_SSD_OBJECT_DETECTOR (object); g_free (self->label_file); g_clear_pointer (&self->labels, g_array_unref); G_OBJECT_CLASS (gst_ssd_object_detector_parent_class)->finalize (object); } static GArray * read_labels (const char *labels_file) { GArray *array; GFile *file = g_file_new_for_path (labels_file); GFileInputStream *file_stream; GDataInputStream *data_stream; GError *error = NULL; gchar *line; file_stream = g_file_read (file, NULL, &error); g_object_unref (file); if (!file_stream) { GST_WARNING ("Could not open file %s: %s\n", labels_file, error->message); g_clear_error (&error); return NULL; } data_stream = g_data_input_stream_new (G_INPUT_STREAM (file_stream)); g_object_unref (file_stream); array = g_array_new (FALSE, FALSE, sizeof (GQuark)); while ((line = g_data_input_stream_read_line (data_stream, NULL, NULL, &error))) { GQuark label = g_quark_from_string (line); g_array_append_val (array, label); g_free (line); } g_object_unref (data_stream); if (error) { GST_WARNING ("Could not open file %s: %s", labels_file, error->message); g_array_free (array, TRUE); g_clear_error (&error); return NULL; } if (array->len == 0) { g_array_free (array, TRUE); return NULL; } return array; } static void gst_ssd_object_detector_set_property (GObject * object, guint prop_id, const GValue * value, GParamSpec * pspec) { GstSsdObjectDetector *self = GST_SSD_OBJECT_DETECTOR (object); const gchar *filename; switch (prop_id) { case PROP_LABEL_FILE: { GArray *labels; filename = g_value_get_string (value); labels = read_labels (filename); if (labels) { g_free (self->label_file); self->label_file = g_strdup (filename); g_clear_pointer (&self->labels, g_array_unref); self->labels = labels; } else { GST_WARNING_OBJECT (self, "Label file '%s' not found!", filename); } } break; case PROP_SCORE_THRESHOLD: GST_OBJECT_LOCK (self); self->score_threshold = g_value_get_float (value); GST_OBJECT_UNLOCK (self); break; case PROP_SIZE_THRESHOLD: GST_OBJECT_LOCK (self); self->size_threshold = g_value_get_float (value); GST_OBJECT_UNLOCK (self); break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); break; } } static void gst_ssd_object_detector_get_property (GObject * object, guint prop_id, GValue * value, GParamSpec * pspec) { GstSsdObjectDetector *self = GST_SSD_OBJECT_DETECTOR (object); switch (prop_id) { case PROP_LABEL_FILE: g_value_set_string (value, self->label_file); break; case PROP_SCORE_THRESHOLD: GST_OBJECT_LOCK (self); g_value_set_float (value, self->score_threshold); GST_OBJECT_UNLOCK (self); break; case PROP_SIZE_THRESHOLD: GST_OBJECT_LOCK (self); g_value_set_float (value, self->size_threshold); GST_OBJECT_UNLOCK (self); break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); break; } } static gboolean gst_ssd_object_detector_get_tensors (GstSsdObjectDetector * object_detector, GstBuffer * buf, const GstTensor ** classes_tensor, const GstTensor ** numdetect_tensor, const GstTensor ** scores_tensor, const GstTensor ** boxes_tensor) { GstMeta *meta = NULL; gpointer iter_state = NULL; static const gsize BOXES_DIMS[] = { 1, G_MAXSIZE, 4 }; static const gsize NUM_DETECT_DIMS[] = { 1 }; static const gsize SCORES_CLASSES_DIMS[] = { 1, G_MAXSIZE }; if (!gst_buffer_get_meta (buf, GST_TENSOR_META_API_TYPE)) { GST_DEBUG_OBJECT (object_detector, "missing tensor meta from buffer %" GST_PTR_FORMAT, buf); return FALSE; } // find object detector meta while ((meta = gst_buffer_iterate_meta_filtered (buf, &iter_state, GST_TENSOR_META_API_TYPE))) { GstTensorMeta *tmeta = (GstTensorMeta *) meta; *boxes_tensor = gst_tensor_meta_get_typed_tensor (tmeta, g_quark_from_static_string (GST_MODEL_OBJECT_DETECTOR_BOXES), GST_TENSOR_DATA_TYPE_FLOAT32, GST_TENSOR_DIM_ORDER_ROW_MAJOR, 3, BOXES_DIMS); if (*boxes_tensor == NULL) *boxes_tensor = gst_tensor_meta_get_typed_tensor (tmeta, g_quark_from_static_string (GST_MODEL_OBJECT_DETECTOR_BOXES), GST_TENSOR_DATA_TYPE_UINT32, GST_TENSOR_DIM_ORDER_ROW_MAJOR, 3, BOXES_DIMS); if (*boxes_tensor == NULL) continue; *scores_tensor = gst_tensor_meta_get_typed_tensor (tmeta, g_quark_from_static_string (GST_MODEL_OBJECT_DETECTOR_SCORES), GST_TENSOR_DATA_TYPE_FLOAT32, GST_TENSOR_DIM_ORDER_ROW_MAJOR, 2, SCORES_CLASSES_DIMS); if (*scores_tensor == NULL) *scores_tensor = gst_tensor_meta_get_typed_tensor (tmeta, g_quark_from_static_string (GST_MODEL_OBJECT_DETECTOR_SCORES), GST_TENSOR_DATA_TYPE_UINT32, GST_TENSOR_DIM_ORDER_ROW_MAJOR, 2, SCORES_CLASSES_DIMS); if (*scores_tensor == NULL) continue; *numdetect_tensor = gst_tensor_meta_get_typed_tensor (tmeta, g_quark_from_static_string (GST_MODEL_OBJECT_DETECTOR_NUM_DETECTIONS), GST_TENSOR_DATA_TYPE_FLOAT32, GST_TENSOR_DIM_ORDER_ROW_MAJOR, 1, NUM_DETECT_DIMS); if (*numdetect_tensor == NULL) *numdetect_tensor = gst_tensor_meta_get_typed_tensor (tmeta, g_quark_from_static_string (GST_MODEL_OBJECT_DETECTOR_NUM_DETECTIONS), GST_TENSOR_DATA_TYPE_UINT32, GST_TENSOR_DIM_ORDER_ROW_MAJOR, 1, NUM_DETECT_DIMS); if (*numdetect_tensor == NULL) continue; *classes_tensor = gst_tensor_meta_get_typed_tensor (tmeta, g_quark_from_static_string (GST_MODEL_OBJECT_DETECTOR_CLASSES), GST_TENSOR_DATA_TYPE_FLOAT32, GST_TENSOR_DIM_ORDER_ROW_MAJOR, 2, SCORES_CLASSES_DIMS); if (*classes_tensor == NULL) *classes_tensor = gst_tensor_meta_get_typed_tensor (tmeta, g_quark_from_static_string (GST_MODEL_OBJECT_DETECTOR_CLASSES), GST_TENSOR_DATA_TYPE_UINT32, GST_TENSOR_DIM_ORDER_ROW_MAJOR, 2, SCORES_CLASSES_DIMS); return TRUE; } return FALSE; } static gboolean gst_ssd_object_detector_set_caps (GstBaseTransform * trans, GstCaps * incaps, GstCaps * outcaps) { GstSsdObjectDetector *self = GST_SSD_OBJECT_DETECTOR (trans); if (!gst_video_info_from_caps (&self->video_info, incaps)) { GST_ERROR_OBJECT (self, "Failed to parse caps"); return FALSE; } return TRUE; } static GstFlowReturn gst_ssd_object_detector_transform_ip (GstBaseTransform * trans, GstBuffer * buf) { if (!gst_base_transform_is_passthrough (trans)) { if (!gst_ssd_object_detector_process (trans, buf)) { GST_ELEMENT_ERROR (trans, STREAM, FAILED, (NULL), ("ssd object detection failed")); return GST_FLOW_ERROR; } } return GST_FLOW_OK; } #define DEFINE_GET_FUNC(TYPE, MAX) \ static gboolean \ get_ ## TYPE ## _at_index (const GstTensor *tensor, GstMapInfo *map, \ guint index, TYPE * out) \ { \ switch (tensor->data_type) { \ case GST_TENSOR_DATA_TYPE_FLOAT32: { \ float *f = (float *) map->data; \ if (sizeof(*f) * (index + 1) > map->size) \ return FALSE; \ *out = f[index]; \ break; \ } \ case GST_TENSOR_DATA_TYPE_UINT32: { \ guint32 *u = (guint32 *) map->data; \ if (sizeof(*u) * (index + 1) > map->size) \ return FALSE; \ *out = u[index]; \ break; \ } \ default: \ GST_ERROR ("Only float32 and int32 tensors are understood"); \ return FALSE; \ } \ return TRUE; \ } DEFINE_GET_FUNC (guint32, UINT32_MAX); DEFINE_GET_FUNC (float, FLOAT_MAX); #undef DEFINE_GET_FUNC static void extract_bounding_boxes (GstSsdObjectDetector * self, gsize w, gsize h, GstAnalyticsRelationMeta * rmeta, const GstTensor * classes_tensor, const GstTensor * numdetect_tensor, const GstTensor * scores_tensor, const GstTensor * boxes_tensor) { GstMapInfo boxes_map = GST_MAP_INFO_INIT; GstMapInfo numdetect_map = GST_MAP_INFO_INIT; GstMapInfo scores_map = GST_MAP_INFO_INIT; GstMapInfo classes_map = GST_MAP_INFO_INIT; guint num_detections = 0; if (numdetect_tensor == NULL || scores_tensor == NULL || boxes_tensor == NULL) { GST_WARNING ("Missing tensor data expected for SSD model"); return; } if (!gst_buffer_map (numdetect_tensor->data, &numdetect_map, GST_MAP_READ)) { GST_ERROR_OBJECT (self, "Failed to map numdetect tensor memory"); goto cleanup; } if (!gst_buffer_map (boxes_tensor->data, &boxes_map, GST_MAP_READ)) { GST_ERROR_OBJECT (self, "Failed to map boxes tensor memory"); goto cleanup; } if (!gst_buffer_map (scores_tensor->data, &scores_map, GST_MAP_READ)) { GST_ERROR_OBJECT (self, "Failed to map scores tensor memory"); goto cleanup; } if (classes_tensor && !gst_buffer_map (classes_tensor->data, &classes_map, GST_MAP_READ)) { GST_DEBUG_OBJECT (self, "Failed to map classes tensor memory"); goto cleanup; } if (!get_guint32_at_index (numdetect_tensor, &numdetect_map, 0, &num_detections)) { GST_ERROR_OBJECT (self, "Failed to get the number of detections"); goto cleanup; } GST_LOG_OBJECT (self, "Model claims %u detections", num_detections); num_detections = MIN (num_detections, scores_tensor->dims[1]); num_detections = MIN (num_detections, boxes_tensor->dims[1]); if (classes_tensor) num_detections = MIN (num_detections, classes_tensor->dims[1]); GST_LOG_OBJECT (self, "Model really has %u detections" " (%zu scores, %zu boxes, %zu classes)", num_detections, scores_tensor->dims[1], boxes_tensor->dims[1], classes_tensor ? classes_tensor->dims[1] : 0); for (int i = 0; i < num_detections; i++) { float score; float x, y, bwidth, bheight; gint x_i, y_i, bwidth_i, bheight_i; guint32 bclass; GQuark label = 0; GstAnalyticsODMtd odmtd; if (!get_float_at_index (scores_tensor, &scores_map, i, &score)) continue; GST_LOG_OBJECT (self, "Detection %u score is %f", i, score); if (score < self->score_threshold) continue; if (!get_float_at_index (boxes_tensor, &boxes_map, i * 4, &y)) continue; if (!get_float_at_index (boxes_tensor, &boxes_map, i * 4 + 1, &x)) continue; if (!get_float_at_index (boxes_tensor, &boxes_map, i * 4 + 2, &bheight)) continue; if (!get_float_at_index (boxes_tensor, &boxes_map, i * 4 + 3, &bwidth)) continue; if (CLAMP (bwidth, 0, 1) * CLAMP (bheight, 0, 1) > self->size_threshold) { GST_LOG_OBJECT (self, "Object at (%fx%f)=%f > %f, skipping", CLAMP (bwidth, 0, 1), CLAMP (bheight, 0, 1), CLAMP (bwidth, 0, 1) * CLAMP (bheight, 0, 1), self->size_threshold); continue; } if (self->labels && classes_map.memory && get_guint32_at_index (classes_tensor, &classes_map, i, &bclass)) { if (bclass < self->labels->len) label = g_array_index (self->labels, GQuark, bclass); } x_i = x * w; y_i = y * h; bheight_i = (bheight * h) - y_i; bwidth_i = (bwidth * w) - x_i; if (gst_analytics_relation_meta_add_od_mtd (rmeta, label, x_i, y_i, bwidth_i, bheight_i, score, &odmtd)) GST_DEBUG_OBJECT (self, "Object detected with label : %s, score: %f, bound box: %dx%d at (%d,%d)", g_quark_to_string (label), score, bwidth_i, bheight_i, x_i, y_i); else GST_WARNING_OBJECT (self, "Could not add detection to meta"); } cleanup: if (numdetect_map.memory) gst_buffer_unmap (numdetect_tensor->data, &numdetect_map); if (classes_map.memory) gst_buffer_unmap (classes_tensor->data, &classes_map); if (scores_map.memory) gst_buffer_unmap (scores_tensor->data, &scores_map); if (boxes_map.memory) gst_buffer_unmap (boxes_tensor->data, &boxes_map); } static gboolean gst_ssd_object_detector_process (GstBaseTransform * trans, GstBuffer * buf) { GstSsdObjectDetector *self = GST_SSD_OBJECT_DETECTOR (trans); GstAnalyticsRelationMeta *rmeta; const GstTensor *classes_tensor = NULL; const GstTensor *numdetect_tensor = NULL; const GstTensor *scores_tensor = NULL; const GstTensor *boxes_tensor = NULL; // get all tensor metas if (!gst_ssd_object_detector_get_tensors (self, buf, &classes_tensor, &numdetect_tensor, &scores_tensor, &boxes_tensor)) { GST_WARNING_OBJECT (trans, "missing tensor meta"); return TRUE; } else { rmeta = gst_buffer_add_analytics_relation_meta (buf); g_assert (rmeta); } extract_bounding_boxes (self, self->video_info.width, self->video_info.height, rmeta, classes_tensor, numdetect_tensor, scores_tensor, boxes_tensor); return TRUE; }