From 9914ff9b4c14343856e25e7b2b341551c6c22ec8 Mon Sep 17 00:00:00 2001 From: Seungha Yang Date: Mon, 19 Dec 2022 18:41:46 +0900 Subject: [PATCH] nvdec: Don't use default CUDA stream NVDEC launches CUDA kernel function (ConvertNV12BLtoNV12 or so) when CuvidMapVideoFrame() is called. Which seems to be NVDEC's internal post-processing kernel function, maybe to convert tiled YUV to linear YUV format or something similar. A problem if we don't pass CUDA stream to the CuvidMapVideoFrame() call is that the NVDEC's internel kernel function will use default CUDA stream. Then lots of the other CUDA API calls will be blocked/serialized. To avoid the unnecessary blocking, we should pass our own CUDA stream object to the CuvidMapVideoFrame() call Part-of: --- subprojects/gst-plugins-bad/sys/nvcodec/gstnvdec.c | 2 ++ subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecoder.c | 1 + 2 files changed, 3 insertions(+) diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdec.c b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdec.c index ff73773d6f..8c259cf5d7 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdec.c +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdec.c @@ -1167,6 +1167,7 @@ copy_video_frame_to_gl_textures (GstGLContext * context, proc_params.progressive_frame = dispinfo->progressive_frame; proc_params.top_field_first = dispinfo->top_field_first; proc_params.unpaired_field = dispinfo->repeat_first_field == -1; + proc_params.output_stream = nvdec->cuda_stream; data->ret = TRUE; @@ -1309,6 +1310,7 @@ gst_nvdec_copy_device_to_memory (GstNvDec * nvdec, params.second_field = dispinfo->repeat_first_field + 1; params.top_field_first = dispinfo->top_field_first; params.unpaired_field = dispinfo->repeat_first_field < 0; + params.output_stream = nvdec->cuda_stream; if (!gst_cuda_result (CuvidMapVideoFrame (nvdec->decoder, dispinfo->picture_index, &dptr, &pitch, ¶ms))) { diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecoder.c b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecoder.c index d39938c1c0..e9c7baf585 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecoder.c +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecoder.c @@ -397,6 +397,7 @@ gst_nv_decoder_frame_map (GstNvDecoderFrame * frame) /* TODO: check interlaced */ params.progressive_frame = 1; + params.output_stream = self->cuda_stream; if (frame->mapped) { GST_WARNING_OBJECT (self, "Frame %p is mapped already", frame);