diff --git a/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12compositor.cpp b/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12compositor.cpp index f588ee360a..4ea8060dc2 100644 --- a/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12compositor.cpp +++ b/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12compositor.cpp @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -172,10 +173,12 @@ enum PROP_ADAPTER, PROP_BACKGROUND, PROP_IGNORE_INACTIVE_PADS, + PROP_ASYNC_DEPTH, }; #define DEFAULT_ADAPTER -1 #define DEFAULT_BACKGROUND GST_D3D12_COMPOSITOR_BACKGROUND_CHECKER +#define DEFAULT_ASYNC_DEPTH 0 static const D3D12_RENDER_TARGET_BLEND_DESC g_blend_source = { TRUE, @@ -527,7 +530,6 @@ struct BackgroundRender bool is_valid = false; guint64 fence_val = 0; }; -/* *INDENT-ON* */ struct ClearColor { @@ -556,8 +558,8 @@ struct GStD3D12CompositorPrivate /* black/white/transparent */ ClearColor clear_color[3]; GstD3D12FenceDataPool *fence_data_pool; - std::vector < D3D12_CPU_DESCRIPTOR_HANDLE > rtv_handles; - std::queue < guint64 > scheduled; + std::vector rtv_handles; + std::queue scheduled; GstVideoInfo negotiated_info; @@ -568,7 +570,9 @@ struct GStD3D12CompositorPrivate /* properties */ gint adapter = DEFAULT_ADAPTER; GstD3D12CompositorBackground background = DEFAULT_BACKGROUND; + std::atomic async_depth = { DEFAULT_ASYNC_DEPTH }; }; +/* *INDENT-ON* */ struct _GstD3D12Compositor { @@ -1308,6 +1312,13 @@ gst_d3d12_compositor_class_init (GstD3D12CompositorClass * klass) "Avoid timing out waiting for inactive pads", FALSE, (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property (object_class, PROP_ASYNC_DEPTH, + g_param_spec_uint ("async-depth", "Async Depth", + "Number of in-flight GPU commands which can be scheduled without " + "synchronization (0 = unlimited)", 0, G_MAXINT, DEFAULT_ASYNC_DEPTH, + (GParamFlags) (GST_PARAM_MUTABLE_PLAYING | + G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + element_class->request_new_pad = GST_DEBUG_FUNCPTR (gst_d3d12_compositor_request_new_pad); element_class->release_pad = @@ -1389,6 +1400,9 @@ gst_d3d12_compositor_set_property (GObject * object, gst_aggregator_set_ignore_inactive_pads (GST_AGGREGATOR (object), g_value_get_boolean (value)); break; + case PROP_ASYNC_DEPTH: + priv->async_depth = g_value_get_uint (value); + break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); break; @@ -1414,6 +1428,9 @@ gst_d3d12_compositor_get_property (GObject * object, g_value_set_boolean (value, gst_aggregator_get_ignore_inactive_pads (GST_AGGREGATOR (object))); break; + case PROP_ASYNC_DEPTH: + g_value_set_uint (value, priv->async_depth); + break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); break; @@ -2355,25 +2372,6 @@ gst_d3d12_compositor_aggregate_frames (GstVideoAggregator * vagg, return GST_FLOW_ERROR; } - auto completed = gst_d3d12_device_get_completed_value (self->device, - D3D12_COMMAND_LIST_TYPE_DIRECT); - while (!priv->scheduled.empty ()) { - if (priv->scheduled.front () > completed) - break; - - priv->scheduled.pop (); - } - - /* avoid too large buffering */ - if (priv->scheduled.size () > 2) { - auto fence_to_wait = priv->scheduled.front (); - priv->scheduled.pop (); - GST_LOG_OBJECT (self, "Waiting for previous command, %" G_GUINT64_FORMAT, - fence_to_wait); - gst_d3d12_device_fence_wait (self->device, - D3D12_COMMAND_LIST_TYPE_DIRECT, fence_to_wait); - } - if (!gst_d3d12_compositor_draw_background (self)) { GST_ERROR_OBJECT (self, "Couldn't draw background"); return GST_FLOW_ERROR; @@ -2429,6 +2427,26 @@ gst_d3d12_compositor_aggregate_frames (GstVideoAggregator * vagg, return ret; priv->scheduled.push (fence_val); + + auto completed = gst_d3d12_device_get_completed_value (self->device, + D3D12_COMMAND_LIST_TYPE_DIRECT); + while (!priv->scheduled.empty ()) { + if (priv->scheduled.front () > completed) + break; + + priv->scheduled.pop (); + } + + auto async_depth = priv->async_depth.load (); + if (async_depth > 0 && priv->scheduled.size () > async_depth) { + auto fence_to_wait = priv->scheduled.front (); + priv->scheduled.pop (); + GST_LOG_OBJECT (self, "Waiting for previous command, %" G_GUINT64_FORMAT, + fence_to_wait); + gst_d3d12_device_fence_wait (self->device, + D3D12_COMMAND_LIST_TYPE_DIRECT, fence_to_wait); + } + if (priv->generated_output_buf != outbuf) { GstVideoFrame out_frame, in_frame; if (!gst_video_frame_map (&in_frame, &vagg->info, diff --git a/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12convert.cpp b/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12convert.cpp index c83a5bc240..aca2f16ce6 100644 --- a/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12convert.cpp +++ b/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12convert.cpp @@ -28,6 +28,7 @@ #include #include #include +#include /* *INDENT-OFF* */ using namespace Microsoft::WRL; @@ -65,6 +66,7 @@ enum PROP_VIDEO_DIRECTION, PROP_GAMMA_MODE, PROP_PRIMARIES_MODE, + PROP_ASYNC_DEPTH, }; #define DEFAULT_ADD_BORDERS TRUE @@ -72,8 +74,7 @@ enum #define DEFAULT_GAMMA_MODE GST_VIDEO_GAMMA_MODE_NONE #define DEFAULT_PRIMARIES_MODE GST_VIDEO_PRIMARIES_MODE_NONE #define DEFAULT_SAMPLING_METHOD GST_D3D12_SAMPLING_METHOD_BILINEAR - -#define ASYNC_DEPTH 2 +#define DEFAULT_ASYNC_DEPTH 0 /* *INDENT-OFF* */ struct ConvertContext @@ -154,6 +155,8 @@ struct GstD3D12ConvertPrivate /* method previously selected and used for negotiation */ GstVideoOrientationMethod active_method = GST_VIDEO_ORIENTATION_IDENTITY; + std::atomic async_depth = { DEFAULT_ASYNC_DEPTH }; + std::mutex lock; }; /* *INDENT-ON* */ @@ -252,6 +255,13 @@ gst_d3d12_convert_class_init (GstD3D12ConvertClass * klass) DEFAULT_PRIMARIES_MODE, (GParamFlags) (GST_PARAM_MUTABLE_PLAYING | G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property (object_class, PROP_ASYNC_DEPTH, + g_param_spec_uint ("async-depth", "Async Depth", + "Number of in-flight GPU commands which can be scheduled without " + "synchronization (0 = unlimited)", 0, G_MAXINT, DEFAULT_ASYNC_DEPTH, + (GParamFlags) (GST_PARAM_MUTABLE_PLAYING | + G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + gst_element_class_add_static_pad_template (element_class, &sink_template); gst_element_class_add_static_pad_template (element_class, &src_template); @@ -425,6 +435,7 @@ gst_d3d12_convert_set_property (GObject * object, guint prop_id, const GValue * value, GParamSpec * pspec) { auto self = GST_D3D12_CONVERT (object); + auto priv = self->priv; switch (prop_id) { case PROP_SAMPLING_METHOD: @@ -449,6 +460,9 @@ gst_d3d12_convert_set_property (GObject * object, guint prop_id, gst_d3d12_convert_set_primaries_mode (self, (GstVideoPrimariesMode) g_value_get_enum (value)); break; + case PROP_ASYNC_DEPTH: + priv->async_depth = g_value_get_uint (value); + break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); break; @@ -482,6 +496,9 @@ gst_d3d12_convert_get_property (GObject * object, guint prop_id, case PROP_PRIMARIES_MODE: g_value_set_enum (value, priv->primaries_mode); break; + case PROP_ASYNC_DEPTH: + g_value_set_uint (value, priv->async_depth); + break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); break; @@ -1971,22 +1988,6 @@ gst_d3d12_convert_transform (GstBaseTransform * trans, GstBuffer * inbuf, "src-height", (gint) in_rect.bottom - in_rect.top, nullptr); } - auto completed = gst_d3d12_device_get_completed_value (priv->ctx->device, - D3D12_COMMAND_LIST_TYPE_DIRECT); - while (!priv->ctx->scheduled.empty ()) { - if (priv->ctx->scheduled.front () > completed) - break; - - priv->ctx->scheduled.pop (); - } - - if (priv->ctx->scheduled.size () >= ASYNC_DEPTH) { - auto fence_to_wait = priv->ctx->scheduled.front (); - priv->ctx->scheduled.pop (); - gst_d3d12_device_fence_wait (priv->ctx->device, - D3D12_COMMAND_LIST_TYPE_DIRECT, fence_to_wait); - } - GstD3D12CommandAllocator *gst_ca; if (!gst_d3d12_command_allocator_pool_acquire (priv->ctx->ca_pool, &gst_ca)) { GST_ERROR_OBJECT (self, "Couldn't acquire command allocator"); @@ -2057,5 +2058,22 @@ gst_d3d12_convert_transform (GstBaseTransform * trans, GstBuffer * inbuf, priv->ctx->scheduled.push (priv->ctx->fence_val); + auto completed = gst_d3d12_device_get_completed_value (priv->ctx->device, + D3D12_COMMAND_LIST_TYPE_DIRECT); + while (!priv->ctx->scheduled.empty ()) { + if (priv->ctx->scheduled.front () > completed) + break; + + priv->ctx->scheduled.pop (); + } + + auto async_depth = priv->async_depth.load (); + if (async_depth > 0 && priv->ctx->scheduled.size () > async_depth) { + auto fence_to_wait = priv->ctx->scheduled.front (); + priv->ctx->scheduled.pop (); + gst_d3d12_device_fence_wait (priv->ctx->device, + D3D12_COMMAND_LIST_TYPE_DIRECT, fence_to_wait); + } + return GST_FLOW_OK; } diff --git a/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12testsrc.cpp b/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12testsrc.cpp index b811cbac3d..9f7801d5bd 100644 --- a/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12testsrc.cpp +++ b/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12testsrc.cpp @@ -46,6 +46,7 @@ #include #include #include +#include #include /* *INDENT-OFF* */ @@ -120,13 +121,13 @@ enum PROP_PATTERN, PROP_ALPHA, PROP_ALPHA_MODE, + PROP_ASYNC_DEPTH, }; #define DEFAULT_ADAPTER -1 #define DEFAULT_PATTERN GST_D3D12_TEST_SRC_SMPTE #define DEFAULT_ALPHA 1.0f - -#define ASYNC_DEPTH 2 +#define DEFAULT_ASYNC_DEPTH 0 struct ColorValue { @@ -321,6 +322,7 @@ struct GstD3D12TestSrcPrivate gint64 accum_frames = 0; GstClockTime accum_rtime = 0; GstClockTime running_time = 0; + std::atomic async_depth = { DEFAULT_ASYNC_DEPTH }; }; /* *INDENT-ON* */ @@ -1387,6 +1389,12 @@ gst_d3d12_test_src_class_init (GstD3D12TestSrcClass * klass) 0, 1, DEFAULT_ALPHA, (GParamFlags) (G_PARAM_READWRITE | GST_PARAM_MUTABLE_READY | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property (object_class, PROP_ASYNC_DEPTH, + g_param_spec_uint ("async-depth", "Async Depth", + "Number of in-flight GPU commands which can be scheduled without " + "synchronization (0 = unlimited)", 0, G_MAXINT, DEFAULT_ASYNC_DEPTH, + (GParamFlags) (GST_PARAM_MUTABLE_PLAYING | + G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); element_class->set_context = GST_DEBUG_FUNCPTR (gst_d3d12_test_src_set_context); @@ -1456,6 +1464,9 @@ gst_d3d12_test_src_set_property (GObject * object, guint prop_id, case PROP_ALPHA: priv->alpha = g_value_get_float (value); break; + case PROP_ASYNC_DEPTH: + priv->async_depth = g_value_get_uint (value); + break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); break; @@ -1482,6 +1493,9 @@ gst_d3d12_test_src_get_property (GObject * object, guint prop_id, case PROP_ALPHA: g_value_set_float (value, priv->alpha); break; + case PROP_ASYNC_DEPTH: + g_value_set_uint (value, priv->async_depth); + break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); break; @@ -2166,22 +2180,6 @@ gst_d3d12_test_src_create (GstBaseSrc * bsrc, guint64 offset, if (ret != GST_FLOW_OK) return ret; - auto completed = gst_d3d12_device_get_completed_value (self->device, - D3D12_COMMAND_LIST_TYPE_DIRECT); - while (!priv->ctx->scheduled.empty ()) { - if (priv->ctx->scheduled.front () > completed) - break; - - priv->ctx->scheduled.pop (); - } - - if (priv->ctx->scheduled.size () >= ASYNC_DEPTH) { - auto fence_to_wait = priv->ctx->scheduled.front (); - priv->ctx->scheduled.pop (); - gst_d3d12_device_fence_wait (self->device, - D3D12_COMMAND_LIST_TYPE_DIRECT, fence_to_wait); - } - GstD3D12CommandAllocator *gst_ca; if (!gst_d3d12_command_allocator_pool_acquire (priv->ctx->ca_pool, &gst_ca)) { GST_ERROR_OBJECT (self, "Couldn't acquire command allocator"); @@ -2264,6 +2262,23 @@ gst_d3d12_test_src_create (GstBaseSrc * bsrc, guint64 offset, priv->ctx->scheduled.push (priv->ctx->fence_val); + auto completed = gst_d3d12_device_get_completed_value (self->device, + D3D12_COMMAND_LIST_TYPE_DIRECT); + while (!priv->ctx->scheduled.empty ()) { + if (priv->ctx->scheduled.front () > completed) + break; + + priv->ctx->scheduled.pop (); + } + + auto async_depth = priv->async_depth.load (); + if (async_depth > 0 && priv->ctx->scheduled.size () > async_depth) { + auto fence_to_wait = priv->ctx->scheduled.front (); + priv->ctx->scheduled.pop (); + gst_d3d12_device_fence_wait (self->device, + D3D12_COMMAND_LIST_TYPE_DIRECT, fence_to_wait); + } + if (priv->downstream_supports_d3d12) { buffer = convert_buffer; convert_buffer = nullptr;