From 03425bc7022b0a5dba6322b0c67005b1b292204c Mon Sep 17 00:00:00 2001 From: Seungha Yang Date: Wed, 8 Feb 2023 02:25:35 +0900 Subject: [PATCH] nvdecoder: Add support for CUDA zero-copy in stateless decoder Wrap mapped decoder output surface using GstCudaMemory and output without any copy operation. Also, for application to be able to control the number of zero-copyable output surfaces, "num-output-surfaces" property is added. Part-of: --- .../sys/nvcodec/gstnvav1dec.cpp | 180 ++++-- .../sys/nvcodec/gstnvdecobject.cpp | 513 ++++++++++++++++++ .../sys/nvcodec/gstnvdecobject.h | 91 ++++ .../sys/nvcodec/gstnvdecoder.cpp | 484 +++++++---------- .../sys/nvcodec/gstnvdecoder.h | 44 +- .../sys/nvcodec/gstnvh264dec.cpp | 172 ++++-- .../sys/nvcodec/gstnvh265dec.cpp | 163 ++++-- .../sys/nvcodec/gstnvvp8dec.cpp | 167 ++++-- .../sys/nvcodec/gstnvvp9dec.cpp | 168 ++++-- .../gst-plugins-bad/sys/nvcodec/meson.build | 1 + 10 files changed, 1459 insertions(+), 524 deletions(-) create mode 100644 subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecobject.cpp create mode 100644 subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecobject.h diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvav1dec.cpp b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvav1dec.cpp index 5b8ce5e19f..d09cd0e544 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvav1dec.cpp +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvav1dec.cpp @@ -69,6 +69,8 @@ typedef struct _GstNvAV1Dec guint max_height; guint bitdepth; guint8 film_grain_params_present; + + guint num_output_surfaces; } GstNvAV1Dec; typedef struct _GstNvAV1DecClass @@ -81,14 +83,19 @@ enum { PROP_0, PROP_CUDA_DEVICE_ID, + PROP_NUM_OUTPUT_SURFACES, }; +#define DEFAULT_NUM_OUTPUT_SURFACES 0 + static GTypeClass *parent_class = nullptr; #define GST_NV_AV1_DEC(object) ((GstNvAV1Dec *) (object)) #define GST_NV_AV1_DEC_GET_CLASS(object) \ (G_TYPE_INSTANCE_GET_CLASS ((object),G_TYPE_FROM_INSTANCE (object),GstNvAV1DecClass)) +static void gst_nv_av1_dec_set_property (GObject * object, guint prop_id, + const GValue * value, GParamSpec * pspec); static void gst_nv_av1_dec_get_property (GObject * object, guint prop_id, GValue * value, GParamSpec * pspec); @@ -96,11 +103,14 @@ static void gst_nv_av1_dec_set_context (GstElement * element, GstContext * context); static gboolean gst_nv_av1_dec_open (GstVideoDecoder * decoder); static gboolean gst_nv_av1_dec_close (GstVideoDecoder * decoder); +static gboolean gst_nv_av1_dec_stop (GstVideoDecoder * decoder); static gboolean gst_nv_av1_dec_negotiate (GstVideoDecoder * decoder); static gboolean gst_nv_av1_dec_decide_allocation (GstVideoDecoder * decoder, GstQuery * query); static gboolean gst_nv_av1_dec_src_query (GstVideoDecoder * decoder, GstQuery * query); +static gboolean gst_nv_av1_dec_sink_event (GstVideoDecoder * decoder, + GstEvent * event); static GstFlowReturn gst_nv_av1_dec_new_sequence (GstAV1Decoder * decoder, const GstAV1SequenceHeaderOBU * seq_hdr, gint max_dpb_size); @@ -128,6 +138,7 @@ gst_nv_av1_dec_class_init (GstNvAV1DecClass * klass, GstVideoDecoderClass *decoder_class = GST_VIDEO_DECODER_CLASS (klass); GstAV1DecoderClass *av1decoder_class = GST_AV1_DECODER_CLASS (klass); + object_class->set_property = gst_nv_av1_dec_set_property; object_class->get_property = gst_nv_av1_dec_get_property; g_object_class_install_property (object_class, PROP_CUDA_DEVICE_ID, @@ -135,6 +146,23 @@ gst_nv_av1_dec_class_init (GstNvAV1DecClass * klass, "Assigned CUDA device id", 0, G_MAXINT, 0, (GParamFlags) (G_PARAM_READABLE | G_PARAM_STATIC_STRINGS))); + /** + * GstNvAV1Dec:num-output-surfaces: + * + * The number of output surfaces (0 = auto). This property will be used to + * calculate the CUVIDDECODECREATEINFO.ulNumOutputSurfaces parameter + * in case of CUDA output mode + * + * Since: 1.24 + */ + g_object_class_install_property (object_class, PROP_NUM_OUTPUT_SURFACES, + g_param_spec_uint ("num-output-surfaces", "Num Output Surfaces", + "Maximum number of output surfaces simultaneously mapped in CUDA " + "output mode (0 = auto)", + 0, 64, DEFAULT_NUM_OUTPUT_SURFACES, + (GParamFlags) (GST_PARAM_MUTABLE_READY | G_PARAM_READWRITE | + G_PARAM_STATIC_STRINGS))); + element_class->set_context = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_set_context); parent_class = (GTypeClass *) g_type_class_peek_parent (klass); @@ -151,10 +179,12 @@ gst_nv_av1_dec_class_init (GstNvAV1DecClass * klass, decoder_class->open = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_open); decoder_class->close = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_close); + decoder_class->stop = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_stop); decoder_class->negotiate = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_negotiate); decoder_class->decide_allocation = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_decide_allocation); decoder_class->src_query = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_src_query); + decoder_class->sink_event = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_sink_event); av1decoder_class->new_sequence = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_new_sequence); @@ -183,18 +213,39 @@ gst_nv_av1_dec_class_init (GstNvAV1DecClass * klass, static void gst_nv_av1_dec_init (GstNvAV1Dec * self) { + self->num_output_surfaces = DEFAULT_NUM_OUTPUT_SURFACES; +} + +static void +gst_nv_av1_dec_set_property (GObject * object, guint prop_id, + const GValue * value, GParamSpec * pspec) +{ + GstNvAV1Dec *self = GST_NV_AV1_DEC (object); + + switch (prop_id) { + case PROP_NUM_OUTPUT_SURFACES: + self->num_output_surfaces = g_value_get_uint (value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } } static void gst_nv_av1_dec_get_property (GObject * object, guint prop_id, GValue * value, GParamSpec * pspec) { + GstNvAV1Dec *self = GST_NV_AV1_DEC (object); GstNvAV1DecClass *klass = GST_NV_AV1_DEC_GET_CLASS (object); switch (prop_id) { case PROP_CUDA_DEVICE_ID: g_value_set_uint (value, klass->cuda_device_id); break; + case PROP_NUM_OUTPUT_SURFACES: + g_value_set_uint (value, self->num_output_surfaces); + break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); break; @@ -279,6 +330,20 @@ gst_nv_av1_dec_close (GstVideoDecoder * decoder) return TRUE; } +static gboolean +gst_nv_av1_dec_stop (GstVideoDecoder * decoder) +{ + GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); + gboolean ret; + + ret = GST_VIDEO_DECODER_CLASS (parent_class)->stop (decoder); + + if (self->decoder) + gst_nv_decoder_reset (self->decoder); + + return ret; +} + static gboolean gst_nv_av1_dec_negotiate (GstVideoDecoder * decoder) { @@ -328,6 +393,29 @@ gst_nv_av1_dec_src_query (GstVideoDecoder * decoder, GstQuery * query) return GST_VIDEO_DECODER_CLASS (parent_class)->src_query (decoder, query); } +static gboolean +gst_nv_av1_dec_sink_event (GstVideoDecoder * decoder, GstEvent * event) +{ + GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); + + if (!self->decoder) + goto done; + + switch (GST_EVENT_TYPE (event)) { + case GST_EVENT_FLUSH_START: + gst_nv_decoder_set_flushing (self->decoder, TRUE); + break; + case GST_EVENT_FLUSH_STOP: + gst_nv_decoder_set_flushing (self->decoder, FALSE); + break; + default: + break; + } + +done: + return GST_VIDEO_DECODER_CLASS (parent_class)->sink_event (decoder, event); +} + static GstFlowReturn gst_nv_av1_dec_new_sequence (GstAV1Decoder * decoder, const GstAV1SequenceHeaderOBU * seq_hdr, gint max_dpb_size) @@ -389,11 +477,13 @@ gst_nv_av1_dec_new_sequence (GstAV1Decoder * decoder, } gst_video_info_set_format (&info, - out_format, self->max_width, self->max_height); + out_format, GST_ROUND_UP_2 (self->max_width), + GST_ROUND_UP_2 (self->max_height)); if (!gst_nv_decoder_configure (self->decoder, cudaVideoCodec_AV1, &info, self->max_width, self->max_height, self->bitdepth, - max_dpb_size, self->film_grain_params_present ? TRUE : FALSE)) { + max_dpb_size, self->film_grain_params_present ? TRUE : FALSE, + self->num_output_surfaces)) { GST_ERROR_OBJECT (self, "Failed to create decoder"); return GST_FLOW_NOT_NEGOTIATED; } @@ -412,35 +502,33 @@ gst_nv_av1_dec_new_picture (GstAV1Decoder * decoder, GstVideoCodecFrame * frame, GstAV1Picture * picture) { GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); - GstNvDecoderFrame *nv_frame; + GstNvDecSurface *surface; + GstFlowReturn ret; - nv_frame = gst_nv_decoder_new_frame (self->decoder); - if (!nv_frame) { - GST_ERROR_OBJECT (self, "No available decoder frame"); - return GST_FLOW_ERROR; - } + ret = gst_nv_decoder_acquire_surface (self->decoder, &surface); + if (ret != GST_FLOW_OK) + return ret; GST_LOG_OBJECT (self, - "New decoder frame %p (index %d)", nv_frame, nv_frame->index); + "New decoder surface %p (index %d)", surface, surface->index); gst_av1_picture_set_user_data (picture, - nv_frame, (GDestroyNotify) gst_nv_decoder_frame_unref); + surface, (GDestroyNotify) gst_nv_dec_surface_unref); return GST_FLOW_OK; } -static GstNvDecoderFrame * -gst_nv_av1_dec_get_decoder_frame_from_picture (GstNvAV1Dec * self, +static GstNvDecSurface * +gst_nv_av1_dec_get_decoder_surface_from_picture (GstNvAV1Dec * self, GstAV1Picture * picture) { - GstNvDecoderFrame *frame; + GstNvDecSurface *surface; - frame = (GstNvDecoderFrame *) gst_av1_picture_get_user_data (picture); + surface = (GstNvDecSurface *) gst_av1_picture_get_user_data (picture); + if (!surface) + GST_DEBUG_OBJECT (self, "current picture does not have decoder surface"); - if (!frame) - GST_DEBUG_OBJECT (self, "current picture does not have decoder frame"); - - return frame; + return surface; } static GstAV1Picture * @@ -448,13 +536,13 @@ gst_nv_av1_dec_duplicate_picture (GstAV1Decoder * decoder, GstVideoCodecFrame * frame, GstAV1Picture * picture) { GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); - GstNvDecoderFrame *nv_frame; + GstNvDecSurface *surface; GstAV1Picture *new_picture; - nv_frame = gst_nv_av1_dec_get_decoder_frame_from_picture (self, picture); + surface = gst_nv_av1_dec_get_decoder_surface_from_picture (self, picture); - if (!nv_frame) { - GST_ERROR_OBJECT (self, "Parent picture does not have decoder frame"); + if (!surface) { + GST_ERROR_OBJECT (self, "Parent picture does not have decoder surface"); return nullptr; } @@ -462,8 +550,8 @@ gst_nv_av1_dec_duplicate_picture (GstAV1Decoder * decoder, new_picture->frame_hdr = picture->frame_hdr; gst_av1_picture_set_user_data (new_picture, - gst_nv_decoder_frame_ref (nv_frame), - (GDestroyNotify) gst_nv_decoder_frame_unref); + gst_nv_dec_surface_ref (surface), + (GDestroyNotify) gst_nv_dec_surface_unref); return new_picture; } @@ -504,13 +592,13 @@ gst_nv_av1_dec_start_picture (GstAV1Decoder * decoder, GstAV1Picture * picture, const GstAV1LoopFilterParams *lp = &frame_hdr->loop_filter_params; const GstAV1LoopRestorationParams *lrp = &frame_hdr->loop_restoration_params; const GstAV1FilmGrainParams *fgp = &frame_hdr->film_grain_params; - GstNvDecoderFrame *frame; - GstNvDecoderFrame *other_frame; + GstNvDecSurface *surface; + GstNvDecSurface *other_surface; GstAV1Picture *other_pic; guint i, j; - frame = gst_nv_av1_dec_get_decoder_frame_from_picture (self, picture); - if (!frame) { + surface = gst_nv_av1_dec_get_decoder_surface_from_picture (self, picture); + if (!surface) { GST_ERROR_OBJECT (self, "Decoder frame is unavailable"); return GST_FLOW_ERROR; } @@ -519,13 +607,13 @@ gst_nv_av1_dec_start_picture (GstAV1Decoder * decoder, GstAV1Picture * picture, params->PicWidthInMbs = GST_ROUND_UP_16 (frame_hdr->frame_width) >> 4; params->FrameHeightInMbs = GST_ROUND_UP_16 (frame_hdr->frame_height) >> 4; - params->CurrPicIdx = frame->index; + params->CurrPicIdx = surface->index; params->intra_pic_flag = frame_hdr->frame_is_intra; av1_params->width = frame_hdr->frame_width; av1_params->height = frame_hdr->frame_height; av1_params->frame_offset = frame_hdr->order_hint; - av1_params->decodePicIdx = frame->decode_frame_index; + av1_params->decodePicIdx = surface->decode_frame_index; /* sequence header */ av1_params->profile = seq_hdr->seq_profile; @@ -675,14 +763,14 @@ gst_nv_av1_dec_start_picture (GstAV1Decoder * decoder, GstAV1Picture * picture, other_pic = dpb->pic_list[i]; if (other_pic) { - other_frame = - gst_nv_av1_dec_get_decoder_frame_from_picture (self, other_pic); - if (!other_frame) { + other_surface = + gst_nv_av1_dec_get_decoder_surface_from_picture (self, other_pic); + if (!other_surface) { GST_ERROR_OBJECT (self, "reference frame is unavailable"); return GST_FLOW_ERROR; } - ref_idx = other_frame->decode_frame_index; + ref_idx = other_surface->decode_frame_index; } av1_params->ref_frame_map[i] = ref_idx; @@ -711,10 +799,10 @@ gst_nv_av1_dec_start_picture (GstAV1Decoder * decoder, GstAV1Picture * picture, other_pic = dpb->pic_list[ref_idx]; if (other_pic) { - other_frame = - gst_nv_av1_dec_get_decoder_frame_from_picture (self, other_pic); + other_surface = + gst_nv_av1_dec_get_decoder_surface_from_picture (self, other_pic); - av1_params->ref_frame[i].index = other_frame->decode_frame_index; + av1_params->ref_frame[i].index = other_surface->decode_frame_index; av1_params->ref_frame[i].width = other_pic->frame_hdr.frame_width; av1_params->ref_frame[i].height = other_pic->frame_hdr.frame_height; } else { @@ -835,7 +923,7 @@ gst_nv_av1_dec_end_picture (GstAV1Decoder * decoder, GstAV1Picture * picture) params->nNumSlices = self->num_tiles; params->pSliceDataOffsets = self->tile_offsets; - ret = gst_nv_decoder_decode_picture (self->decoder, params); + ret = gst_nv_decoder_decode (self->decoder, params); if (!ret) { GST_ERROR_OBJECT (self, "Failed to decode picture"); @@ -851,21 +939,21 @@ gst_nv_av1_dec_output_picture (GstAV1Decoder * decoder, { GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); GstVideoDecoder *vdec = GST_VIDEO_DECODER (decoder); - GstNvDecoderFrame *decoder_frame; + GstNvDecSurface *surface; + GstFlowReturn ret = GST_FLOW_ERROR; GST_LOG_OBJECT (self, "Outputting picture %p", picture); - decoder_frame = (GstNvDecoderFrame *) gst_av1_picture_get_user_data (picture); - if (!decoder_frame) { + surface = (GstNvDecSurface *) gst_av1_picture_get_user_data (picture); + if (!surface) { GST_ERROR_OBJECT (self, "No decoder frame in picture %p", picture); goto error; } - if (!gst_nv_decoder_finish_frame (self->decoder, vdec, picture->discont_state, - decoder_frame, &frame->output_buffer)) { - GST_ERROR_OBJECT (self, "Failed to handle output picture"); + ret = gst_nv_decoder_finish_surface (self->decoder, + vdec, picture->discont_state, surface, &frame->output_buffer); + if (ret != GST_FLOW_OK) goto error; - } gst_av1_picture_unref (picture); @@ -875,7 +963,7 @@ error: gst_video_decoder_drop_frame (vdec, frame); gst_av1_picture_unref (picture); - return GST_FLOW_ERROR; + return ret; } static guint diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecobject.cpp b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecobject.cpp new file mode 100644 index 0000000000..22f31ecf18 --- /dev/null +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecobject.cpp @@ -0,0 +1,513 @@ +/* GStreamer + * Copyright (C) 2023 Seungha Yang + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "gstnvdecobject.h" +#include +#include +#include +#include +#include +#include +#include + +extern "C" +{ + GST_DEBUG_CATEGORY_EXTERN (gst_nv_decoder_debug); +} + +#define GST_CAT_DEFAULT gst_nv_decoder_debug + +GST_DEFINE_MINI_OBJECT_TYPE (GstNvDecSurface, gst_nv_dec_surface); +static GstNvDecSurface *gst_nv_dec_surface_new (void); + +/* *INDENT-OFF* */ +struct GstNvDecOutput +{ + GstNvDecObject *self = nullptr; + CUdeviceptr devptr = 0; +}; + +struct GstNvDecObjectPrivate +{ + std::vector < GstNvDecSurface * >surface_queue; + std::map < CUdeviceptr, GstMemory *> output_map; + std::map < CUdeviceptr, GstMemory *> free_output_map; + + std::mutex lock; + std::condition_variable cond; +}; +/* *INDENT-ON* */ + +struct _GstNvDecObject +{ + GstObject parent; + + GstNvDecObjectPrivate *priv; + + CUvideodecoder handle; + CUVIDDECODECREATEINFO create_info; + + GstVideoInfo video_info; + + GstCudaContext *context; + + gboolean flushing; + + guint pool_size; + guint num_mapped; + gboolean alloc_aux_frame; +}; + +static void gst_nv_dec_object_finalize (GObject * object); + +#define gst_nv_dec_object_parent_class parent_class +G_DEFINE_TYPE (GstNvDecObject, gst_nv_dec_object, GST_TYPE_OBJECT); + +static void +gst_nv_dec_object_class_init (GstNvDecObjectClass * klass) +{ + GObjectClass *object_class = G_OBJECT_CLASS (klass); + + object_class->finalize = gst_nv_dec_object_finalize; +} + +static void +gst_nv_dec_object_init (GstNvDecObject * self) +{ + self->priv = new GstNvDecObjectPrivate (); +} + +static void +gst_nv_dec_object_finalize (GObject * object) +{ + GstNvDecObject *self = GST_NV_DEC_OBJECT (object); + GstNvDecObjectPrivate *priv = self->priv; + + GST_DEBUG_OBJECT (self, "Finalize"); + + gst_cuda_context_push (self->context); + /* *INDENT-OFF* */ + for (auto it : priv->surface_queue) + gst_nv_dec_surface_unref (it); + + /* *INDENT-OFF* */ + for (auto it : priv->free_output_map) + gst_memory_unref (it.second); + /* *INDENT-ON* */ + + delete self->priv; + + CuvidDestroyDecoder (self->handle); + gst_cuda_context_pop (nullptr); + + gst_object_unref (self->context); + + G_OBJECT_CLASS (parent_class)->finalize (object); +} + +GstNvDecObject * +gst_nv_dec_object_new (GstCudaContext * context, + CUVIDDECODECREATEINFO * create_info, const GstVideoInfo * video_info, + gboolean alloc_aux_frame) +{ + GstNvDecObject *self; + CUresult ret; + CUvideodecoder handle = nullptr; + guint pool_size; + + if (!gst_cuda_context_push (context)) { + GST_ERROR_OBJECT (context, "Failed to push context"); + return nullptr; + } + + ret = CuvidCreateDecoder (&handle, create_info); + gst_cuda_context_pop (nullptr); + + if (!gst_cuda_result (ret)) { + GST_ERROR_OBJECT (context, "Could not create decoder instance"); + return nullptr; + } + + pool_size = create_info->ulNumDecodeSurfaces; + if (alloc_aux_frame) + pool_size /= 2; + + self = (GstNvDecObject *) + g_object_new (GST_TYPE_NV_DEC_OBJECT, nullptr); + gst_object_ref_sink (self); + self->context = (GstCudaContext *) gst_object_ref (context); + self->handle = handle; + self->create_info = *create_info; + self->video_info = *video_info; + self->pool_size = pool_size; + + for (guint i = 0; i < pool_size; i++) { + GstNvDecSurface *surf = gst_nv_dec_surface_new (); + + surf->index = i; + + /* [0, pool_size - 1]: output picture + * [pool_size, pool_size * 2 - 1]: decoder output without film-grain, + * used for reference picture */ + if (alloc_aux_frame) + surf->decode_frame_index = i + pool_size; + else + surf->decode_frame_index = i; + + self->priv->surface_queue.push_back (surf); + } + + return self; +} + +void +gst_nv_dec_object_set_flushing (GstNvDecObject * object, gboolean flushing) +{ + GstNvDecObjectPrivate *priv = object->priv; + std::lock_guard < std::mutex > lk (priv->lock); + object->flushing = flushing; + priv->cond.notify_all (); +} + +static gboolean +gst_nv_dec_object_unmap_surface_unlocked (GstNvDecObject * self, + GstNvDecSurface * surface) +{ + gboolean ret = TRUE; + + if (!gst_cuda_result (CuvidUnmapVideoFrame (self->handle, surface->devptr))) { + GST_ERROR_OBJECT (self, "Couldn't unmap surface %d", surface->index); + ret = FALSE; + } else { + surface->devptr = 0; + self->num_mapped--; + + GST_LOG_OBJECT (self, "Surface %d is unmapped, num-mapped %d", + surface->index, self->num_mapped); + } + self->priv->cond.notify_all (); + + return ret; +} + +GstFlowReturn +gst_nv_dec_object_acquire_surface (GstNvDecObject * object, + GstNvDecSurface ** surface) +{ + GstNvDecObjectPrivate *priv = object->priv; + GstNvDecSurface *surf = nullptr; + std::unique_lock < std::mutex > lk (priv->lock); + + do { + if (object->flushing) { + GST_DEBUG_OBJECT (object, "We are flushing"); + return GST_FLOW_FLUSHING; + } + + if (!priv->surface_queue.empty ()) { + surf = priv->surface_queue[0]; + priv->surface_queue.erase (priv->surface_queue.begin ()); + break; + } + + GST_LOG_OBJECT (object, "No available surface, waiting for release"); + priv->cond.wait (lk); + } while (true); + + g_assert (surf); + g_assert (!surf->object); + + surf->object = (GstNvDecObject *) gst_object_ref (object); + + *surface = surf; + + return GST_FLOW_OK; +} + +gboolean +gst_nv_dec_object_decode (GstNvDecObject * object, CUVIDPICPARAMS * params) +{ + gboolean ret = TRUE; + + GST_LOG_OBJECT (object, "picture index: %u", params->CurrPicIdx); + + if (!gst_cuda_context_push (object->context)) { + GST_ERROR_OBJECT (object, "Failed to push CUDA context"); + return FALSE; + } + + if (!gst_cuda_result (CuvidDecodePicture (object->handle, params))) { + GST_ERROR_OBJECT (object, "Failed to decode picture"); + ret = FALSE; + } + + if (!gst_cuda_context_pop (nullptr)) + GST_WARNING_OBJECT (object, "Failed to pop CUDA context"); + + return ret; +} + +GstFlowReturn +gst_nv_dec_object_map_surface (GstNvDecObject * object, + GstNvDecSurface * surface, GstCudaStream * stream) +{ + GstNvDecObjectPrivate *priv = object->priv; + + if (surface->devptr) { + GST_ERROR_OBJECT (object, "Mapped Surface %d was not cleared", + surface->index); + return GST_FLOW_ERROR; + } + + std::unique_lock < std::mutex > lk (priv->lock); + do { + if (object->flushing) { + GST_DEBUG_OBJECT (object, "We are flushing"); + return GST_FLOW_FLUSHING; + } + + if (object->num_mapped < (guint) object->create_info.ulNumOutputSurfaces) { + CUVIDPROCPARAMS params = { 0 }; + + params.progressive_frame = 1; + params.output_stream = gst_cuda_stream_get_handle (stream); + + if (!gst_cuda_result (CuvidMapVideoFrame (object->handle, surface->index, + &surface->devptr, &surface->pitch, ¶ms))) { + GST_ERROR_OBJECT (object, "Couldn't map picture"); + return GST_FLOW_ERROR; + } + + object->num_mapped++; + GST_LOG_OBJECT (object, "Surface %d is mapped, num-mapped %d", + surface->index, object->num_mapped); + break; + } + + GST_LOG_OBJECT (object, "No available output surface, waiting for release"); + priv->cond.wait (lk); + } while (true); + + return GST_FLOW_OK; +} + +gboolean +gst_nv_dec_object_unmap_surface (GstNvDecObject * object, + GstNvDecSurface * surface) +{ + GstNvDecObjectPrivate *priv = object->priv; + std::lock_guard < std::mutex > lk (priv->lock); + + return gst_nv_dec_object_unmap_surface_unlocked (object, surface); +} + +static gboolean +gst_nv_dec_output_release (GstCudaMemory * mem) +{ + GstNvDecOutput *output = (GstNvDecOutput *) + gst_cuda_memory_get_user_data (mem); + GstNvDecObject *self = output->self; + GstNvDecObjectPrivate *priv = self->priv; + + GST_LOG_OBJECT (self, "Release memory %p", mem); + + gst_memory_ref (GST_MEMORY_CAST (mem)); + GST_MINI_OBJECT_CAST (mem)->dispose = nullptr; + + output->self = nullptr; + + { + std::lock_guard < std::mutex > lk (priv->lock); + + self->num_mapped--; + gst_cuda_context_push (self->context); + if (!gst_cuda_result (CuvidUnmapVideoFrame (self->handle, output->devptr))) { + GST_ERROR_OBJECT (self, "Couldn't unmap frame"); + } else { + GST_LOG_OBJECT (self, "Exported surface is freed, num-mapped %d", + self->num_mapped); + } + gst_cuda_context_pop (nullptr); + + priv->free_output_map[output->devptr] = GST_MEMORY_CAST (mem); + priv->cond.notify_all (); + } + + gst_object_unref (self); + + return FALSE; +} + +static void +gst_nv_dec_output_free (GstNvDecOutput * output) +{ + delete output; +} + +GstFlowReturn +gst_nv_dec_object_export_surface (GstNvDecObject * object, + GstNvDecSurface * surface, GstCudaStream * stream, GstMemory ** memory) +{ + GstNvDecObjectPrivate *priv = object->priv; + GstVideoInfo info; + gsize offset; + GstMemory *mem = nullptr; + GstNvDecOutput *output; + + if (!surface->devptr) { + GST_ERROR_OBJECT (object, "Surface %d is not mapped", surface->index); + return GST_FLOW_ERROR; + } + + GST_LOG_OBJECT (object, "Exporting surface %d", surface->index); + + offset = surface->pitch * object->create_info.ulTargetHeight; + + info = object->video_info; + switch (GST_VIDEO_INFO_FORMAT (&info)) { + case GST_VIDEO_FORMAT_NV12: + case GST_VIDEO_FORMAT_P010_10LE: + case GST_VIDEO_FORMAT_P016_LE: + info.stride[0] = surface->pitch; + info.stride[1] = surface->pitch; + info.offset[0] = 0; + info.offset[1] = offset; + info.size = offset + offset / 2; + break; + case GST_VIDEO_FORMAT_Y444: + case GST_VIDEO_FORMAT_Y444_16LE: + info.stride[0] = surface->pitch; + info.stride[1] = surface->pitch; + info.stride[2] = surface->pitch; + info.offset[0] = 0; + info.offset[1] = offset; + info.offset[2] = offset * 2; + info.size = offset * 3; + break; + default: + GST_ERROR_OBJECT (object, "Unexpected format %s", + gst_video_format_to_string (GST_VIDEO_INFO_FORMAT (&info))); + return GST_FLOW_ERROR; + } + + std::unique_lock < std::mutex > lk (priv->lock); + auto output_iter = priv->output_map.find (surface->devptr); + if (output_iter != priv->output_map.end ()) + mem = output_iter->second; + + if (mem) { + do { + if (object->flushing) { + GST_DEBUG_OBJECT (object, "We are flushing"); + return GST_FLOW_FLUSHING; + } + + auto iter = priv->free_output_map.find (surface->devptr); + if (iter != priv->free_output_map.end ()) { + priv->free_output_map.erase (iter); + break; + } + + GST_LOG_OBJECT (object, "Waiting for output release"); + priv->cond.wait (lk); + } while (true); + } + + if (!mem) { + output = new GstNvDecOutput (); + output->devptr = surface->devptr; + + GST_LOG_OBJECT (object, "New output, allocating memory"); + + mem = gst_cuda_allocator_alloc_wrapped (nullptr, object->context, + stream, &info, output->devptr, output, + (GDestroyNotify) gst_nv_dec_output_free); + + priv->output_map[output->devptr] = mem; + } else { + GST_LOG_OBJECT (object, "Reuse memory"); + } + + GST_MINI_OBJECT_CAST (mem)->dispose = + (GstMiniObjectDisposeFunction) gst_nv_dec_output_release; + + output = (GstNvDecOutput *) + gst_cuda_memory_get_user_data (GST_CUDA_MEMORY_CAST (mem)); + + g_assert (!output->self); + + output->self = (GstNvDecObject *) gst_object_ref (object); + surface->devptr = 0; + + *memory = mem; + + return GST_FLOW_OK; +} + +static gboolean +gst_nv_dec_surface_dispose (GstNvDecSurface * surf) +{ + GstNvDecObject *object; + GstNvDecObjectPrivate *priv; + + if (!surf->object) + return TRUE; + + object = (GstNvDecObject *) g_steal_pointer (&surf->object); + priv = object->priv; + + /* Back to surface queue */ + gst_nv_dec_surface_ref (surf); + + /* *INDENT-OFF* */ + { + std::lock_guard < std::mutex > lk (priv->lock); + /* Keep sorted order */ + priv->surface_queue.insert ( + std::upper_bound (priv->surface_queue.begin (), + priv->surface_queue.end(), surf, + [] (const GstNvDecSurface * a, const GstNvDecSurface * b) + { + return a->index < b->index; + }), surf); + priv->cond.notify_all (); + } + /* *INDENT-ON* */ + + gst_object_unref (object); + + return FALSE; +} + +static GstNvDecSurface * +gst_nv_dec_surface_new (void) +{ + GstNvDecSurface *surf = g_new0 (GstNvDecSurface, 1); + + gst_mini_object_init (GST_MINI_OBJECT_CAST (surf), + 0, GST_TYPE_NV_DEC_SURFACE, nullptr, + (GstMiniObjectDisposeFunction) gst_nv_dec_surface_dispose, + (GstMiniObjectFreeFunction) g_free); + + return surf; +} diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecobject.h b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecobject.h new file mode 100644 index 0000000000..f33e154aa4 --- /dev/null +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecobject.h @@ -0,0 +1,91 @@ +/* GStreamer + * Copyright (C) 2023 Seungha Yang + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#pragma once + +#include +#include +#include +#include "gstcuvidloader.h" + +G_BEGIN_DECLS + +#define GST_TYPE_NV_DEC_OBJECT (gst_nv_dec_object_get_type()) +G_DECLARE_FINAL_TYPE (GstNvDecObject, + gst_nv_dec_object, GST, NV_DEC_OBJECT, GstObject); + +#define GST_TYPE_NV_DEC_SURFACE (gst_nv_dec_surface_get_type()) +typedef struct _GstNvDecSurface GstNvDecSurface; + +struct _GstNvDecSurface +{ + GstMiniObject parent; + + GstNvDecObject *object; + + gint index; + gint decode_frame_index; + + CUdeviceptr devptr; + guint pitch; +}; + +GstNvDecObject * gst_nv_dec_object_new (GstCudaContext * context, + CUVIDDECODECREATEINFO * create_info, + const GstVideoInfo * video_info, + gboolean alloc_aux_frame); + +void gst_nv_dec_object_set_flushing (GstNvDecObject * object, + gboolean flushing); + +GstFlowReturn gst_nv_dec_object_acquire_surface (GstNvDecObject * object, + GstNvDecSurface ** surface); + +gboolean gst_nv_dec_object_decode (GstNvDecObject * object, + CUVIDPICPARAMS * params); + +GstFlowReturn gst_nv_dec_object_map_surface (GstNvDecObject * object, + GstNvDecSurface * surface, + GstCudaStream * stream); + +gboolean gst_nv_dec_object_unmap_surface (GstNvDecObject * object, + GstNvDecSurface * surface); + +GstFlowReturn gst_nv_dec_object_export_surface (GstNvDecObject * object, + GstNvDecSurface * surface, + GstCudaStream * stream, + GstMemory ** memory); + +GType gst_nv_dec_surface_get_type (void); + +static inline GstNvDecSurface * +gst_nv_dec_surface_ref (GstNvDecSurface * surface) +{ + return (GstNvDecSurface *) + gst_mini_object_ref (GST_MINI_OBJECT_CAST (surface)); +} + +static inline void +gst_nv_dec_surface_unref (GstNvDecSurface * surface) +{ + gst_mini_object_unref (GST_MINI_OBJECT_CAST (surface)); +} + +G_END_DECLS + diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecoder.cpp b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecoder.cpp index 12a925e129..83d2c7b9dc 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecoder.cpp +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecoder.cpp @@ -67,11 +67,6 @@ extern "C" #define SUPPORTED_GL_APIS (GstGLAPI) (GST_GL_API_OPENGL | GST_GL_API_OPENGL3) #endif -typedef struct _GstNvDecoderFrameInfo -{ - gboolean available; -} GstNvDecoderFrameInfo; - typedef enum { GST_NV_DECODER_OUTPUT_TYPE_SYSTEM = 0, @@ -83,18 +78,21 @@ typedef enum struct _GstNvDecoder { GstObject parent; + + GstNvDecObject *object; GstCudaContext *context; GstCudaStream *stream; - CUvideodecoder decoder_handle; - - GstNvDecoderFrameInfo *frame_pool; - guint pool_size; - gboolean alloc_aux_frame; GstVideoInfo info; GstVideoInfo coded_info; + CUVIDDECODECREATEINFO create_info; + gboolean alloc_aux_frame; gboolean configured; + guint downstream_min_buffers; + guint num_output_surfaces; + + GMutex lock; /* For OpenGL interop. */ GstObject *gl_display; @@ -105,7 +103,8 @@ struct _GstNvDecoder }; static void gst_nv_decoder_dispose (GObject * object); -static void gst_nv_decoder_reset (GstNvDecoder * self); +static void gst_nv_decoder_finalize (GObject * object); +static void gst_nv_decoder_reset_unlocked (GstNvDecoder * self); #define parent_class gst_nv_decoder_parent_class G_DEFINE_TYPE (GstNvDecoder, gst_nv_decoder, GST_TYPE_OBJECT); @@ -116,11 +115,13 @@ gst_nv_decoder_class_init (GstNvDecoderClass * klass) GObjectClass *gobject_class = G_OBJECT_CLASS (klass); gobject_class->dispose = gst_nv_decoder_dispose; + gobject_class->finalize = gst_nv_decoder_finalize; } static void gst_nv_decoder_init (GstNvDecoder * self) { + g_mutex_init (&self->lock); } static void @@ -128,7 +129,7 @@ gst_nv_decoder_dispose (GObject * object) { GstNvDecoder *self = GST_NV_DECODER (object); - gst_nv_decoder_reset (self); + gst_nv_decoder_reset_unlocked (self); gst_clear_cuda_stream (&self->stream); gst_clear_object (&self->context); @@ -139,6 +140,16 @@ gst_nv_decoder_dispose (GObject * object) G_OBJECT_CLASS (parent_class)->dispose (object); } +static void +gst_nv_decoder_finalize (GObject * object) +{ + GstNvDecoder *self = GST_NV_DECODER (object); + + g_mutex_clear (&self->lock); + + G_OBJECT_CLASS (parent_class)->finalize (object); +} + static cudaVideoChromaFormat chroma_format_from_video_format (GstVideoFormat format) { @@ -185,19 +196,6 @@ output_format_from_video_format (GstVideoFormat format) return cudaVideoSurfaceFormat_NV12; } -static gboolean -gst_nv_decoder_prepare_frame_pool (GstNvDecoder * self, guint pool_size) -{ - self->frame_pool = g_new (GstNvDecoderFrameInfo, pool_size); - - for (guint i = 0; i < pool_size; i++) - self->frame_pool[i].available = TRUE; - - self->pool_size = pool_size; - - return TRUE; -} - GstNvDecoder * gst_nv_decoder_new (GstCudaContext * context) { @@ -227,29 +225,27 @@ gst_nv_decoder_is_configured (GstNvDecoder * decoder) } static void -gst_nv_decoder_reset (GstNvDecoder * self) +gst_nv_decoder_reset_unlocked (GstNvDecoder * self) { - g_clear_pointer (&self->frame_pool, g_free); + if (self->object) + gst_nv_dec_object_set_flushing (self->object, TRUE); - if (self->decoder_handle) { - gst_cuda_context_push (self->context); - CuvidDestroyDecoder (self->decoder_handle); - gst_cuda_context_pop (nullptr); - self->decoder_handle = nullptr; - } + gst_clear_object (&self->object); self->output_type = GST_NV_DECODER_OUTPUT_TYPE_SYSTEM; self->configured = FALSE; + self->downstream_min_buffers = 0; + self->num_output_surfaces = 0; } gboolean gst_nv_decoder_configure (GstNvDecoder * decoder, cudaVideoCodec codec, GstVideoInfo * info, gint coded_width, gint coded_height, - guint coded_bitdepth, guint pool_size, gboolean alloc_aux_frame) + guint coded_bitdepth, guint pool_size, gboolean alloc_aux_frame, + guint num_output_surfaces) { CUVIDDECODECREATEINFO create_info = { 0, }; GstVideoFormat format; - gboolean ret; guint alloc_size; g_return_val_if_fail (GST_IS_NV_DECODER (decoder), FALSE); @@ -260,7 +256,9 @@ gst_nv_decoder_configure (GstNvDecoder * decoder, cudaVideoCodec codec, g_return_val_if_fail (coded_bitdepth >= 8, FALSE); g_return_val_if_fail (pool_size > 0, FALSE); - gst_nv_decoder_reset (decoder); + g_mutex_lock (&decoder->lock); + gst_nv_decoder_reset_unlocked (decoder); + g_mutex_unlock (&decoder->lock); decoder->info = *info; gst_video_info_set_format (&decoder->coded_info, GST_VIDEO_INFO_FORMAT (info), @@ -273,15 +271,15 @@ gst_nv_decoder_configure (GstNvDecoder * decoder, cudaVideoCodec codec, /* Need pool size * 2 for decode-only (used for reference) frame * and output frame, AV1 film grain case for example */ + decoder->alloc_aux_frame = alloc_aux_frame; if (alloc_aux_frame) { alloc_size = pool_size * 2; } else { alloc_size = pool_size; } - decoder->alloc_aux_frame = alloc_aux_frame; + decoder->num_output_surfaces = num_output_surfaces; - /* FIXME: check aligned resolution or actual coded resolution */ create_info.ulWidth = GST_VIDEO_INFO_WIDTH (&decoder->coded_info); create_info.ulHeight = GST_VIDEO_INFO_HEIGHT (&decoder->coded_info); create_info.ulNumDecodeSurfaces = alloc_size; @@ -300,7 +298,7 @@ gst_nv_decoder_configure (GstNvDecoder * decoder, cudaVideoCodec codec, create_info.ulTargetWidth = GST_VIDEO_INFO_WIDTH (info); create_info.ulTargetHeight = GST_VIDEO_INFO_HEIGHT (info); - /* we always copy decoded picture to output buffer */ + /* Will be updated on negotiate() */ create_info.ulNumOutputSurfaces = 1; create_info.target_rect.left = 0; @@ -308,191 +306,56 @@ gst_nv_decoder_configure (GstNvDecoder * decoder, cudaVideoCodec codec, create_info.target_rect.right = GST_VIDEO_INFO_WIDTH (info); create_info.target_rect.bottom = GST_VIDEO_INFO_HEIGHT (info); - if (!gst_cuda_context_push (decoder->context)) { - GST_ERROR_OBJECT (decoder, "Failed to lock CUDA context"); - return FALSE; - } - - ret = gst_cuda_result (CuvidCreateDecoder (&decoder->decoder_handle, - &create_info)); - gst_cuda_context_pop (nullptr); - - if (!ret) { - GST_ERROR_OBJECT (decoder, "Cannot create decoder instance"); - return FALSE; - } - - if (!gst_nv_decoder_prepare_frame_pool (decoder, pool_size)) { - GST_ERROR_OBJECT (decoder, "Cannot prepare internal surface buffer pool"); - gst_nv_decoder_reset (decoder); - return FALSE; - } - + decoder->create_info = create_info; decoder->configured = TRUE; return TRUE; } -GstNvDecoderFrame * -gst_nv_decoder_new_frame (GstNvDecoder * decoder) +GstFlowReturn +gst_nv_decoder_acquire_surface (GstNvDecoder * decoder, + GstNvDecSurface ** surface) { - GstNvDecoderFrame *frame; - gint index_to_use = -1; + g_return_val_if_fail (GST_IS_NV_DECODER (decoder), GST_FLOW_ERROR); - g_return_val_if_fail (GST_IS_NV_DECODER (decoder), nullptr); - - for (guint i = 0; i < decoder->pool_size; i++) { - if (decoder->frame_pool[i].available) { - decoder->frame_pool[i].available = FALSE; - index_to_use = (gint) i; - break; - } - } - - if (index_to_use < 0) { - GST_ERROR_OBJECT (decoder, "No available frame"); - return nullptr; - } - - frame = g_new0 (GstNvDecoderFrame, 1); - frame->index = index_to_use; - frame->decode_frame_index = index_to_use; - frame->decoder = (GstNvDecoder *) gst_object_ref (decoder); - frame->ref_count = 1; - if (decoder->alloc_aux_frame) { - /* [0, pool_size - 1]: output picture - * [pool_size, pool_size * 2 - 1]: decoder output without film-grain, - * used for reference picture */ - frame->decode_frame_index = index_to_use + decoder->pool_size; - } - - GST_LOG_OBJECT (decoder, "New frame %p (index %d)", frame, frame->index); - - return frame; -} - -/* must be called with gst_cuda_context_push */ -static gboolean -gst_nv_decoder_frame_map (GstNvDecoderFrame * frame, GstCudaStream * stream) -{ - GstNvDecoder *self; - CUVIDPROCPARAMS params = { 0 }; - - g_return_val_if_fail (frame != nullptr, FALSE); - g_return_val_if_fail (frame->index >= 0, FALSE); - g_return_val_if_fail (GST_IS_NV_DECODER (frame->decoder), FALSE); - - self = frame->decoder; - - /* TODO: check interlaced */ - params.progressive_frame = 1; - params.output_stream = gst_cuda_stream_get_handle (stream); - - if (frame->mapped) { - GST_WARNING_OBJECT (self, "Frame %p is mapped already", frame); - return TRUE; - } - - if (!gst_cuda_result (CuvidMapVideoFrame (self->decoder_handle, - frame->index, &frame->devptr, &frame->pitch, ¶ms))) { - GST_ERROR_OBJECT (self, "Cannot map picture"); - return FALSE; - } - - frame->mapped = TRUE; - - return TRUE; -} - -/* must be called with gst_cuda_context_push */ -static void -gst_nv_decoder_frame_unmap (GstNvDecoderFrame * frame) -{ - GstNvDecoder *self; - - g_return_if_fail (frame != nullptr); - g_return_if_fail (frame->index >= 0); - g_return_if_fail (GST_IS_NV_DECODER (frame->decoder)); - - self = frame->decoder; - - if (!frame->mapped) { - GST_WARNING_OBJECT (self, "Frame %p is not mapped", frame); - return; - } - - if (!gst_cuda_result (CuvidUnmapVideoFrame (self->decoder_handle, - frame->devptr))) { - GST_ERROR_OBJECT (self, "Cannot unmap picture"); - } - - frame->mapped = FALSE; -} - -GstNvDecoderFrame * -gst_nv_decoder_frame_ref (GstNvDecoderFrame * frame) -{ - g_assert (frame != nullptr); - - g_atomic_int_add (&frame->ref_count, 1); - - return frame; -} - -void -gst_nv_decoder_frame_unref (GstNvDecoderFrame * frame) -{ - GstNvDecoder *self; - - g_assert (frame != nullptr); - - if (g_atomic_int_dec_and_test (&frame->ref_count)) { - GST_LOG ("Free frame %p (index %d)", frame, frame->index); - - if (frame->decoder) { - self = frame->decoder; - if (frame->mapped && gst_cuda_context_push (self->context)) { - gst_nv_decoder_frame_unmap (frame); - gst_cuda_context_pop (nullptr); - } - - if ((guint) frame->index < self->pool_size) { - self->frame_pool[frame->index].available = TRUE; + if (!decoder->object) { + if (decoder->output_type == GST_NV_DECODER_OUTPUT_TYPE_CUDA) { + if (decoder->num_output_surfaces == 0 || + decoder->num_output_surfaces < decoder->downstream_min_buffers) { + /* Auto mode or user specified num-output-surfaces value is too small */ + decoder->create_info.ulNumOutputSurfaces = + decoder->downstream_min_buffers + 2; } else { - GST_WARNING_OBJECT (self, - "Frame %p has invalid index %d", frame, frame->index); + /* Otherwise use user provided value */ + decoder->create_info.ulNumOutputSurfaces = decoder->num_output_surfaces; } - gst_object_unref (self); + GST_INFO_OBJECT (decoder, "Updating ulNumOutputSurfaces to %u, " + "user requested %u, min-downstream %u", + (guint) decoder->create_info.ulNumOutputSurfaces, + decoder->num_output_surfaces, decoder->downstream_min_buffers); } - g_free (frame); + g_mutex_lock (&decoder->lock); + decoder->object = gst_nv_dec_object_new (decoder->context, + &decoder->create_info, &decoder->info, decoder->alloc_aux_frame); + g_mutex_unlock (&decoder->lock); + if (!decoder->object) { + GST_ERROR_OBJECT (decoder, "Couldn't create decoder object"); + return GST_FLOW_ERROR; + } } + + return gst_nv_dec_object_acquire_surface (decoder->object, surface); } gboolean -gst_nv_decoder_decode_picture (GstNvDecoder * decoder, CUVIDPICPARAMS * params) +gst_nv_decoder_decode (GstNvDecoder * decoder, CUVIDPICPARAMS * params) { - GstCudaContext *ctx = decoder->context; - gboolean ret = TRUE; + g_return_val_if_fail (GST_IS_NV_DECODER (decoder), FALSE); + g_return_val_if_fail (decoder->object != nullptr, FALSE); - GST_LOG_OBJECT (decoder, "picture index: %u", params->CurrPicIdx); - - if (!gst_cuda_context_push (ctx)) { - GST_ERROR_OBJECT (decoder, "Failed to push CUDA context"); - return FALSE; - } - - if (!gst_cuda_result (CuvidDecodePicture (decoder->decoder_handle, params))) { - GST_ERROR_OBJECT (decoder, "Failed to decode picture"); - ret = FALSE; - } - - if (!gst_cuda_context_pop (nullptr)) { - GST_WARNING_OBJECT (decoder, "Failed to pop CUDA context"); - } - - return ret; + return gst_nv_dec_object_decode (decoder->object, params); } #ifdef HAVE_NVCODEC_GST_GL @@ -580,7 +443,7 @@ typedef struct { GstNvDecoder *self; gboolean ret; - GstNvDecoderFrame *frame; + GstNvDecSurface *surface; GstBuffer *output_buffer; } GstNvDecoderCopyToGLData; @@ -589,7 +452,7 @@ gst_nv_decoder_copy_frame_to_gl_internal (GstGLContext * context, GstNvDecoderCopyToGLData * data) { GstNvDecoder *self = data->self; - GstNvDecoderFrame *frame = data->frame; + GstNvDecSurface *surface = data->surface; GstCudaGraphicsResource **resources; guint num_resources; guint i; @@ -602,6 +465,12 @@ gst_nv_decoder_copy_frame_to_gl_internal (GstGLContext * context, num_resources = gst_buffer_n_memory (data->output_buffer); resources = g_newa (GstCudaGraphicsResource *, num_resources); + if (!gst_cuda_context_push (self->context)) { + GST_WARNING_OBJECT (self, "Failed to push CUDA context"); + data->ret = FALSE; + return; + } + for (i = 0; i < num_resources; i++) { GstMemory *mem; @@ -618,14 +487,8 @@ gst_nv_decoder_copy_frame_to_gl_internal (GstGLContext * context, GST_MINI_OBJECT_FLAG_SET (mem, GST_GL_BASE_MEMORY_TRANSFER_NEED_UPLOAD); } - if (!gst_cuda_context_push (self->context)) { - GST_WARNING_OBJECT (self, "Failed to push CUDA context"); - data->ret = FALSE; - return; - } - copy_params.srcMemoryType = CU_MEMORYTYPE_DEVICE; - copy_params.srcPitch = frame->pitch; + copy_params.srcPitch = surface->pitch; copy_params.dstMemoryType = CU_MEMORYTYPE_DEVICE; for (i = 0; i < num_resources; i++) { @@ -652,8 +515,8 @@ gst_nv_decoder_copy_frame_to_gl_internal (GstGLContext * context, copy_params.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, i) * GST_VIDEO_INFO_COMP_PSTRIDE (info, i); - copy_params.srcDevice = frame->devptr + - (i * frame->pitch * GST_VIDEO_INFO_HEIGHT (&self->info)); + copy_params.srcDevice = surface->devptr + + (i * surface->pitch * GST_VIDEO_INFO_HEIGHT (&self->info)); copy_params.dstDevice = dst_ptr; copy_params.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i); @@ -676,12 +539,12 @@ unmap_video_frame: static gboolean gst_nv_decoder_copy_frame_to_gl (GstNvDecoder * decoder, - GstGLContext * context, GstNvDecoderFrame * frame, GstBuffer * buffer) + GstGLContext * context, GstNvDecSurface * surface, GstBuffer * buffer) { GstNvDecoderCopyToGLData data; data.self = decoder; - data.frame = frame; + data.surface = surface; data.output_buffer = buffer; gst_gl_context_thread_add (context, @@ -695,7 +558,7 @@ gst_nv_decoder_copy_frame_to_gl (GstNvDecoder * decoder, static gboolean gst_nv_decoder_copy_frame_to_system (GstNvDecoder * decoder, - GstNvDecoderFrame * frame, GstBuffer * buffer) + GstNvDecSurface * surface, GstBuffer * buffer) { GstVideoFrame video_frame; CUDA_MEMCPY2D copy_params = { 0, }; @@ -708,21 +571,15 @@ gst_nv_decoder_copy_frame_to_system (GstNvDecoder * decoder, return FALSE; } - if (!gst_cuda_context_push (decoder->context)) { - GST_ERROR_OBJECT (decoder, "Failed to push CUDA context"); - gst_video_frame_unmap (&video_frame); - return FALSE; - } - copy_params.srcMemoryType = CU_MEMORYTYPE_DEVICE; - copy_params.srcPitch = frame->pitch; + copy_params.srcPitch = surface->pitch; copy_params.dstMemoryType = CU_MEMORYTYPE_HOST; copy_params.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (&decoder->info, 0) * GST_VIDEO_INFO_COMP_PSTRIDE (&decoder->info, 0); for (guint i = 0; i < GST_VIDEO_FRAME_N_PLANES (&video_frame); i++) { - copy_params.srcDevice = frame->devptr + - (i * frame->pitch * GST_VIDEO_INFO_HEIGHT (&decoder->info)); + copy_params.srcDevice = surface->devptr + + (i * surface->pitch * GST_VIDEO_INFO_HEIGHT (&decoder->info)); copy_params.dstHost = GST_VIDEO_FRAME_PLANE_DATA (&video_frame, i); copy_params.dstPitch = GST_VIDEO_FRAME_PLANE_STRIDE (&video_frame, i); copy_params.Height = GST_VIDEO_FRAME_COMP_HEIGHT (&video_frame, i); @@ -738,8 +595,6 @@ gst_nv_decoder_copy_frame_to_system (GstNvDecoder * decoder, ret = TRUE; done: - gst_cuda_context_pop (nullptr); - gst_video_frame_unmap (&video_frame); GST_LOG_OBJECT (decoder, "Copy frame to system ret %d", ret); @@ -749,8 +604,7 @@ done: static gboolean gst_nv_decoder_copy_frame_to_cuda (GstNvDecoder * decoder, - GstNvDecoderFrame * frame, GstBuffer * buffer, GstCudaStream * stream, - gboolean need_sync) + GstNvDecSurface * surface, GstBuffer * buffer, GstCudaStream * stream) { CUDA_MEMCPY2D copy_params = { 0, }; GstMemory *mem; @@ -771,19 +625,13 @@ gst_nv_decoder_copy_frame_to_cuda (GstNvDecoder * decoder, return FALSE; } - if (!gst_cuda_context_push (decoder->context)) { - gst_video_frame_unmap (&video_frame); - GST_ERROR_OBJECT (decoder, "Failed to push CUDA context"); - return FALSE; - } - copy_params.srcMemoryType = CU_MEMORYTYPE_DEVICE; - copy_params.srcPitch = frame->pitch; + copy_params.srcPitch = surface->pitch; copy_params.dstMemoryType = CU_MEMORYTYPE_DEVICE; for (guint i = 0; i < GST_VIDEO_INFO_N_PLANES (&decoder->info); i++) { - copy_params.srcDevice = frame->devptr + - (i * frame->pitch * GST_VIDEO_INFO_HEIGHT (&decoder->info)); + copy_params.srcDevice = surface->devptr + + (i * surface->pitch * GST_VIDEO_INFO_HEIGHT (&decoder->info)); copy_params.dstDevice = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (&video_frame, i); copy_params.dstPitch = GST_VIDEO_FRAME_PLANE_STRIDE (&video_frame, i); @@ -798,89 +646,115 @@ gst_nv_decoder_copy_frame_to_cuda (GstNvDecoder * decoder, } /* Don't sync if we are using downstream memory's stream */ - if (need_sync) - gst_cuda_result (CuStreamSynchronize (stream_handle)); + if (!stream) + gst_cuda_result (CuStreamSynchronize (nullptr)); + else + GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_SYNC); ret = TRUE; done: gst_video_frame_unmap (&video_frame); - gst_cuda_context_pop (nullptr); GST_LOG_OBJECT (decoder, "Copy frame to CUDA ret %d", ret); return ret; } -gboolean -gst_nv_decoder_finish_frame (GstNvDecoder * decoder, GstVideoDecoder * videodec, - GstVideoCodecState * input_state, GstNvDecoderFrame * frame, - GstBuffer ** buffer) +GstFlowReturn +gst_nv_decoder_finish_surface (GstNvDecoder * decoder, + GstVideoDecoder * videodec, GstVideoCodecState * input_state, + GstNvDecSurface * surface, GstBuffer ** buffer) { GstBuffer *outbuf = nullptr; gboolean ret = FALSE; GstCudaStream *stream; - GstCudaStream *mem_stream = nullptr; - gboolean need_sync = TRUE; + GstFlowReturn flow_ret; g_return_val_if_fail (GST_IS_NV_DECODER (decoder), GST_FLOW_ERROR); g_return_val_if_fail (GST_IS_VIDEO_DECODER (videodec), GST_FLOW_ERROR); - g_return_val_if_fail (frame != nullptr, GST_FLOW_ERROR); + g_return_val_if_fail (decoder->object != nullptr, GST_FLOW_ERROR); + g_return_val_if_fail (surface != nullptr, GST_FLOW_ERROR); g_return_val_if_fail (buffer != nullptr, GST_FLOW_ERROR); if (input_state) { if (!gst_nv_decoder_negotiate (decoder, videodec, input_state)) { GST_ERROR_OBJECT (videodec, "Couldn't re-negotiate with updated state"); - return FALSE; + return GST_FLOW_NOT_NEGOTIATED; } } + if (!gst_cuda_context_push (decoder->context)) { + GST_ERROR_OBJECT (decoder, "Couldn't push context"); + return GST_FLOW_ERROR; + } + + stream = decoder->stream; + flow_ret = gst_nv_dec_object_map_surface (decoder->object, surface, stream); + if (flow_ret != GST_FLOW_OK) { + gst_cuda_context_pop (nullptr); + return flow_ret; + } + + if (decoder->output_type == GST_NV_DECODER_OUTPUT_TYPE_CUDA && + (guint) decoder->create_info.ulNumOutputSurfaces >= + decoder->downstream_min_buffers) { + GstMemory *mem; + GstCudaMemory *cmem; + GstBuffer *buf; + GstVideoInfo *info = &decoder->info; + + flow_ret = gst_nv_dec_object_export_surface (decoder->object, + surface, stream, &mem); + if (flow_ret != GST_FLOW_OK) { + GST_WARNING_OBJECT (decoder, "Couldn't export surface"); + gst_nv_dec_object_unmap_surface (decoder->object, surface); + gst_cuda_context_pop (nullptr); + return flow_ret; + } + + gst_cuda_context_pop (nullptr); + + GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD); + + if (stream) + GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_SYNC); + + buf = gst_buffer_new (); + cmem = GST_CUDA_MEMORY_CAST (mem); + gst_buffer_append_memory (buf, mem); + gst_buffer_add_video_meta_full (buf, GST_VIDEO_FRAME_FLAG_NONE, + GST_VIDEO_INFO_FORMAT (info), GST_VIDEO_INFO_WIDTH (info), + GST_VIDEO_INFO_HEIGHT (info), GST_VIDEO_INFO_N_PLANES (info), + cmem->info.offset, cmem->info.stride); + + *buffer = buf; + return GST_FLOW_OK; + } + outbuf = gst_video_decoder_allocate_output_buffer (videodec); if (!outbuf) { GST_ERROR_OBJECT (videodec, "Couldn't allocate output buffer"); - return FALSE; - } - - stream = decoder->stream; - if (decoder->output_type == GST_NV_DECODER_OUTPUT_TYPE_CUDA) { - GstCudaMemory *cmem = (GstCudaMemory *) gst_buffer_peek_memory (outbuf, 0); - - /* Use downstream CUDA stream if available */ - mem_stream = gst_cuda_memory_get_stream (cmem); - if (mem_stream) { - need_sync = FALSE; - stream = mem_stream; - } - } - - if (!gst_cuda_context_push (decoder->context)) { - GST_ERROR_OBJECT (decoder, "Failed to push CUDA context"); - goto error; - } - - if (!gst_nv_decoder_frame_map (frame, stream)) { - GST_ERROR_OBJECT (decoder, "Couldn't map frame"); + gst_nv_dec_object_unmap_surface (decoder->object, surface); gst_cuda_context_pop (nullptr); - goto error; + return GST_FLOW_ERROR; } - gst_cuda_context_pop (nullptr); - switch (decoder->output_type) { case GST_NV_DECODER_OUTPUT_TYPE_SYSTEM: - ret = gst_nv_decoder_copy_frame_to_system (decoder, frame, outbuf); + ret = gst_nv_decoder_copy_frame_to_system (decoder, surface, outbuf); break; #ifdef HAVE_NVCODEC_GST_GL case GST_NV_DECODER_OUTPUT_TYPE_GL: g_assert (decoder->gl_context != nullptr); ret = gst_nv_decoder_copy_frame_to_gl (decoder, - GST_GL_CONTEXT (decoder->gl_context), frame, outbuf); + GST_GL_CONTEXT (decoder->gl_context), surface, outbuf); break; #endif case GST_NV_DECODER_OUTPUT_TYPE_CUDA: ret = gst_nv_decoder_copy_frame_to_cuda (decoder, - frame, outbuf, stream, need_sync); + surface, outbuf, stream); break; default: g_assert_not_reached (); @@ -896,11 +770,10 @@ gst_nv_decoder_finish_frame (GstNvDecoder * decoder, GstVideoDecoder * videodec, "Couldn't copy frame to GL memory, fallback to system memory"); decoder->output_type = GST_NV_DECODER_OUTPUT_TYPE_SYSTEM; - ret = gst_nv_decoder_copy_frame_to_system (decoder, frame, outbuf); + ret = gst_nv_decoder_copy_frame_to_system (decoder, surface, outbuf); } - gst_cuda_context_push (decoder->context); - gst_nv_decoder_frame_unmap (frame); + gst_nv_dec_object_unmap_surface (decoder->object, surface); gst_cuda_context_pop (nullptr); if (!ret) { @@ -910,11 +783,12 @@ gst_nv_decoder_finish_frame (GstNvDecoder * decoder, GstVideoDecoder * videodec, *buffer = outbuf; - return TRUE; + return GST_FLOW_OK; error: + gst_nv_dec_object_unmap_surface (decoder->object, surface); gst_clear_buffer (&outbuf); - return FALSE; + return GST_FLOW_ERROR; } typedef enum @@ -1577,9 +1451,10 @@ gst_nv_decoder_ensure_cuda_pool (GstNvDecoder * decoder, GstQuery * query) { GstCaps *outcaps; GstBufferPool *pool = nullptr; - guint n, size, min, max; + guint n, size, min = 0, max = 0; GstVideoInfo vinfo = { 0, }; GstStructure *config; + GstCudaStream *stream; gst_query_parse_allocation (query, &outcaps, nullptr); n = gst_query_get_n_allocation_pools (query); @@ -1598,10 +1473,26 @@ gst_nv_decoder_ensure_cuda_pool (GstNvDecoder * decoder, GstQuery * query) if (outcaps) gst_video_info_from_caps (&vinfo, outcaps); size = (guint) vinfo.size; - min = max = 0; } config = gst_buffer_pool_get_config (pool); + stream = gst_buffer_pool_config_get_cuda_stream (config); + if (stream) { + GST_DEBUG_OBJECT (decoder, "Downstream CUDA stream is available"); + gst_clear_cuda_stream (&decoder->stream); + decoder->stream = stream; + } else if (decoder->stream) { + GST_DEBUG_OBJECT (decoder, + "Downstream CUDA stream is not available, use ours"); + gst_buffer_pool_config_set_cuda_stream (config, decoder->stream); + } + + decoder->downstream_min_buffers = min; + GST_DEBUG_OBJECT (decoder, "Downstream min buffers %d", min); + + /* Since we don't use downstream buffer pool, pre-allocation is unnecessary */ + min = 0; + gst_buffer_pool_config_set_params (config, outcaps, size, min, max); gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META); gst_buffer_pool_set_config (pool, config); @@ -1701,3 +1592,20 @@ gst_nv_decoder_decide_allocation (GstNvDecoder * decoder, return ret; } + +void +gst_nv_decoder_set_flushing (GstNvDecoder * decoder, gboolean flushing) +{ + g_mutex_lock (&decoder->lock); + if (decoder->object) + gst_nv_dec_object_set_flushing (decoder->object, flushing); + g_mutex_unlock (&decoder->lock); +} + +void +gst_nv_decoder_reset (GstNvDecoder * decoder) +{ + g_mutex_lock (&decoder->lock); + gst_nv_decoder_reset_unlocked (decoder); + g_mutex_unlock (&decoder->lock); +} diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecoder.h b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecoder.h index bc10d5e612..0ebecd6d91 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecoder.h +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecoder.h @@ -24,6 +24,7 @@ #include #include #include "gstcuvidloader.h" +#include "gstnvdecobject.h" G_BEGIN_DECLS @@ -31,24 +32,6 @@ G_BEGIN_DECLS G_DECLARE_FINAL_TYPE (GstNvDecoder, gst_nv_decoder, GST, NV_DECODER, GstObject); -typedef struct _GstNvDecoderFrame -{ - /* CUVIDPICPARAMS::CurrPicIdx */ - gint index; - guintptr devptr; - guint pitch; - - gboolean mapped; - - /* Extra frame allocated for AV1 film grain */ - gint decode_frame_index; - - /*< private >*/ - GstNvDecoder *decoder; - - gint ref_count; -} GstNvDecoderFrame; - typedef struct _GstNvDecoderClassData { GstCaps *sink_caps; @@ -67,22 +50,25 @@ gboolean gst_nv_decoder_configure (GstNvDecoder * decoder, gint coded_height, guint coded_bitdepth, guint pool_size, - gboolean alloc_aux_frame); + gboolean alloc_aux_frame, + guint num_output_surfaces); -GstNvDecoderFrame * gst_nv_decoder_new_frame (GstNvDecoder * decoder); +GstFlowReturn gst_nv_decoder_acquire_surface (GstNvDecoder * decoder, + GstNvDecSurface ** surface); -GstNvDecoderFrame * gst_nv_decoder_frame_ref (GstNvDecoderFrame * frame); +gboolean gst_nv_decoder_decode (GstNvDecoder * decoder, + CUVIDPICPARAMS * params); -void gst_nv_decoder_frame_unref (GstNvDecoderFrame * frame); +GstFlowReturn gst_nv_decoder_finish_surface (GstNvDecoder * decoder, + GstVideoDecoder * videodec, + GstVideoCodecState * input_state, + GstNvDecSurface *surface, + GstBuffer ** buffer); -gboolean gst_nv_decoder_decode_picture (GstNvDecoder * decoder, - CUVIDPICPARAMS * params); +void gst_nv_decoder_set_flushing (GstNvDecoder * decoder, + gboolean flushing); -gboolean gst_nv_decoder_finish_frame (GstNvDecoder * decoder, - GstVideoDecoder * videodec, - GstVideoCodecState * input_state, - GstNvDecoderFrame *frame, - GstBuffer ** buffer); +void gst_nv_decoder_reset (GstNvDecoder * decoder); /* utils for class registration */ gboolean gst_nv_decoder_check_device_caps (CUcontext cuda_ctx, diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvh264dec.cpp b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvh264dec.cpp index 3110598fb6..42858f35f0 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvh264dec.cpp +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvh264dec.cpp @@ -125,6 +125,8 @@ typedef struct _GstNvH264Dec gboolean interlaced; GArray *ref_list; + + guint num_output_surfaces; } GstNvH264Dec; typedef struct _GstNvH264DecClass @@ -137,8 +139,11 @@ enum { PROP_0, PROP_CUDA_DEVICE_ID, + PROP_NUM_OUTPUT_SURFACES, }; +#define DEFAULT_NUM_OUTPUT_SURFACES 0 + static GTypeClass *parent_class = nullptr; #define GST_NV_H264_DEC(object) ((GstNvH264Dec *) (object)) @@ -146,6 +151,8 @@ static GTypeClass *parent_class = nullptr; (G_TYPE_INSTANCE_GET_CLASS ((object),G_TYPE_FROM_INSTANCE (object),GstNvH264DecClass)) static void gst_nv_h264_decoder_dispose (GObject * object); +static void gst_nv_h264_dec_set_property (GObject * object, guint prop_id, + const GValue * value, GParamSpec * pspec); static void gst_nv_h264_dec_get_property (GObject * object, guint prop_id, GValue * value, GParamSpec * pspec); @@ -153,11 +160,14 @@ static void gst_nv_h264_dec_set_context (GstElement * element, GstContext * context); static gboolean gst_nv_h264_dec_open (GstVideoDecoder * decoder); static gboolean gst_nv_h264_dec_close (GstVideoDecoder * decoder); +static gboolean gst_nv_h264_dec_stop (GstVideoDecoder * decoder); static gboolean gst_nv_h264_dec_negotiate (GstVideoDecoder * decoder); static gboolean gst_nv_h264_dec_decide_allocation (GstVideoDecoder * decoder, GstQuery * query); static gboolean gst_nv_h264_dec_src_query (GstVideoDecoder * decoder, GstQuery * query); +static gboolean gst_nv_h264_dec_sink_event (GstVideoDecoder * decoder, + GstEvent * event); /* GstH264Decoder */ static GstFlowReturn gst_nv_h264_dec_new_sequence (GstH264Decoder * decoder, @@ -190,6 +200,7 @@ gst_nv_h264_dec_class_init (GstNvH264DecClass * klass, GstH264DecoderClass *h264decoder_class = GST_H264_DECODER_CLASS (klass); object_class->dispose = gst_nv_h264_decoder_dispose; + object_class->set_property = gst_nv_h264_dec_set_property; object_class->get_property = gst_nv_h264_dec_get_property; /** @@ -204,6 +215,23 @@ gst_nv_h264_dec_class_init (GstNvH264DecClass * klass, "Assigned CUDA device id", 0, G_MAXINT, 0, (GParamFlags) (G_PARAM_READABLE | G_PARAM_STATIC_STRINGS))); + /** + * GstNvH264SLDec:num-output-surfaces: + * + * The number of output surfaces (0 = auto). This property will be used to + * calculate the CUVIDDECODECREATEINFO.ulNumOutputSurfaces parameter + * in case of CUDA output mode + * + * Since: 1.24 + */ + g_object_class_install_property (object_class, PROP_NUM_OUTPUT_SURFACES, + g_param_spec_uint ("num-output-surfaces", "Num Output Surfaces", + "Maximum number of output surfaces simultaneously mapped in CUDA " + "output mode (0 = auto)", + 0, 64, DEFAULT_NUM_OUTPUT_SURFACES, + (GParamFlags) (GST_PARAM_MUTABLE_READY | G_PARAM_READWRITE | + G_PARAM_STATIC_STRINGS))); + element_class->set_context = GST_DEBUG_FUNCPTR (gst_nv_h264_dec_set_context); parent_class = (GTypeClass *) g_type_class_peek_parent (klass); @@ -221,10 +249,12 @@ gst_nv_h264_dec_class_init (GstNvH264DecClass * klass, decoder_class->open = GST_DEBUG_FUNCPTR (gst_nv_h264_dec_open); decoder_class->close = GST_DEBUG_FUNCPTR (gst_nv_h264_dec_close); + decoder_class->stop = GST_DEBUG_FUNCPTR (gst_nv_h264_dec_stop); decoder_class->negotiate = GST_DEBUG_FUNCPTR (gst_nv_h264_dec_negotiate); decoder_class->decide_allocation = GST_DEBUG_FUNCPTR (gst_nv_h264_dec_decide_allocation); decoder_class->src_query = GST_DEBUG_FUNCPTR (gst_nv_h264_dec_src_query); + decoder_class->sink_event = GST_DEBUG_FUNCPTR (gst_nv_h264_dec_sink_event); h264decoder_class->new_sequence = GST_DEBUG_FUNCPTR (gst_nv_h264_dec_new_sequence); @@ -257,6 +287,8 @@ gst_nv_h264_dec_init (GstNvH264Dec * self) sizeof (GstH264Picture *), 16); g_array_set_clear_func (self->ref_list, (GDestroyNotify) gst_clear_h264_picture); + + self->num_output_surfaces = DEFAULT_NUM_OUTPUT_SURFACES; } static void @@ -269,16 +301,36 @@ gst_nv_h264_decoder_dispose (GObject * object) G_OBJECT_CLASS (parent_class)->dispose (object); } +static void +gst_nv_h264_dec_set_property (GObject * object, guint prop_id, + const GValue * value, GParamSpec * pspec) +{ + GstNvH264Dec *self = GST_NV_H264_DEC (object); + + switch (prop_id) { + case PROP_NUM_OUTPUT_SURFACES: + self->num_output_surfaces = g_value_get_uint (value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + static void gst_nv_h264_dec_get_property (GObject * object, guint prop_id, GValue * value, GParamSpec * pspec) { + GstNvH264Dec *self = GST_NV_H264_DEC (object); GstNvH264DecClass *klass = GST_NV_H264_DEC_GET_CLASS (object); switch (prop_id) { case PROP_CUDA_DEVICE_ID: g_value_set_uint (value, klass->cuda_device_id); break; + case PROP_NUM_OUTPUT_SURFACES: + g_value_set_uint (value, self->num_output_surfaces); + break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); break; @@ -362,6 +414,20 @@ gst_nv_h264_dec_close (GstVideoDecoder * decoder) return TRUE; } +static gboolean +gst_nv_h264_dec_stop (GstVideoDecoder * decoder) +{ + GstNvH264Dec *self = GST_NV_H264_DEC (decoder); + gboolean ret; + + ret = GST_VIDEO_DECODER_CLASS (parent_class)->stop (decoder); + + if (self->decoder) + gst_nv_decoder_reset (self->decoder); + + return ret; +} + static gboolean gst_nv_h264_dec_negotiate (GstVideoDecoder * decoder) { @@ -413,6 +479,29 @@ gst_nv_h264_dec_src_query (GstVideoDecoder * decoder, GstQuery * query) return GST_VIDEO_DECODER_CLASS (parent_class)->src_query (decoder, query); } +static gboolean +gst_nv_h264_dec_sink_event (GstVideoDecoder * decoder, GstEvent * event) +{ + GstNvH264Dec *self = GST_NV_H264_DEC (decoder); + + if (!self->decoder) + goto done; + + switch (GST_EVENT_TYPE (event)) { + case GST_EVENT_FLUSH_START: + gst_nv_decoder_set_flushing (self->decoder, TRUE); + break; + case GST_EVENT_FLUSH_STOP: + gst_nv_decoder_set_flushing (self->decoder, FALSE); + break; + default: + break; + } + +done: + return GST_VIDEO_DECODER_CLASS (parent_class)->sink_event (decoder, event); +} + static GstFlowReturn gst_nv_h264_dec_new_sequence (GstH264Decoder * decoder, const GstH264SPS * sps, gint max_dpb_size) @@ -492,7 +581,8 @@ gst_nv_h264_dec_new_sequence (GstH264Decoder * decoder, const GstH264SPS * sps, return GST_FLOW_NOT_NEGOTIATED; } - gst_video_info_set_format (&info, out_format, self->width, self->height); + gst_video_info_set_format (&info, out_format, GST_ROUND_UP_2 (self->width), + GST_ROUND_UP_2 (self->height)); if (self->interlaced) GST_VIDEO_INFO_INTERLACE_MODE (&info) = GST_VIDEO_INTERLACE_MODE_MIXED; @@ -500,7 +590,7 @@ gst_nv_h264_dec_new_sequence (GstH264Decoder * decoder, const GstH264SPS * sps, /* FIXME: add support cudaVideoCodec_H264_SVC and cudaVideoCodec_H264_MVC */ if (!gst_nv_decoder_configure (self->decoder, cudaVideoCodec_H264, &info, self->coded_width, self->coded_height, - self->bitdepth, max_dpb_size, FALSE)) { + self->bitdepth, max_dpb_size, FALSE, self->num_output_surfaces)) { GST_ERROR_OBJECT (self, "Failed to configure decoder"); return GST_FLOW_NOT_NEGOTIATED; } @@ -521,19 +611,18 @@ gst_nv_h264_dec_new_picture (GstH264Decoder * decoder, GstVideoCodecFrame * frame, GstH264Picture * picture) { GstNvH264Dec *self = GST_NV_H264_DEC (decoder); - GstNvDecoderFrame *nv_frame; + GstNvDecSurface *surface; + GstFlowReturn ret; - nv_frame = gst_nv_decoder_new_frame (self->decoder); - if (!nv_frame) { - GST_ERROR_OBJECT (self, "No available decoder frame"); - return GST_FLOW_ERROR; - } + ret = gst_nv_decoder_acquire_surface (self->decoder, &surface); + if (ret != GST_FLOW_OK) + return ret; GST_LOG_OBJECT (self, - "New decoder frame %p (index %d)", nv_frame, nv_frame->index); + "New decoder surface %p (index %d)", surface, surface->index); gst_h264_picture_set_user_data (picture, - nv_frame, (GDestroyNotify) gst_nv_decoder_frame_unref); + surface, (GDestroyNotify) gst_nv_dec_surface_unref); return GST_FLOW_OK; } @@ -542,19 +631,19 @@ static GstFlowReturn gst_nv_h264_dec_new_field_picture (GstH264Decoder * decoder, GstH264Picture * first_field, GstH264Picture * second_field) { - GstNvDecoderFrame *nv_frame; + GstNvDecSurface *surface; - nv_frame = (GstNvDecoderFrame *) + surface = (GstNvDecSurface *) gst_h264_picture_get_user_data (first_field); - if (!nv_frame) { + if (!surface) { GST_ERROR_OBJECT (decoder, "No decoder frame in the first picture %p", first_field); return GST_FLOW_ERROR; } gst_h264_picture_set_user_data (second_field, - gst_nv_decoder_frame_ref (nv_frame), - (GDestroyNotify) gst_nv_decoder_frame_unref); + gst_nv_dec_surface_ref (surface), + (GDestroyNotify) gst_nv_dec_surface_unref); return GST_FLOW_OK; } @@ -565,23 +654,22 @@ gst_nv_h264_dec_output_picture (GstH264Decoder * decoder, { GstNvH264Dec *self = GST_NV_H264_DEC (decoder); GstVideoDecoder *vdec = GST_VIDEO_DECODER (decoder); - GstNvDecoderFrame *decoder_frame; + GstNvDecSurface *surface; + GstFlowReturn ret = GST_FLOW_ERROR; GST_LOG_OBJECT (self, "Outputting picture %p (poc %d)", picture, picture->pic_order_cnt); - decoder_frame = - (GstNvDecoderFrame *) gst_h264_picture_get_user_data (picture); - if (!decoder_frame) { - GST_ERROR_OBJECT (self, "No decoder frame in picture %p", picture); + surface = (GstNvDecSurface *) gst_h264_picture_get_user_data (picture); + if (!surface) { + GST_ERROR_OBJECT (self, "No decoder surface in picture %p", picture); goto error; } - if (!gst_nv_decoder_finish_frame (self->decoder, vdec, picture->discont_state, - decoder_frame, &frame->output_buffer)) { - GST_ERROR_OBJECT (self, "Failed to handle output picture"); + ret = gst_nv_decoder_finish_surface (self->decoder, + vdec, picture->discont_state, surface, &frame->output_buffer); + if (ret != GST_FLOW_OK) goto error; - } if (picture->buffer_flags != 0) { gboolean interlaced = @@ -602,21 +690,20 @@ error: gst_h264_picture_unref (picture); gst_video_decoder_release_frame (vdec, frame); - return GST_FLOW_ERROR; + return ret; } -static GstNvDecoderFrame * -gst_nv_h264_dec_get_decoder_frame_from_picture (GstNvH264Dec * self, +static GstNvDecSurface * +gst_nv_h264_dec_get_decoder_surface_from_picture (GstNvH264Dec * self, GstH264Picture * picture) { - GstNvDecoderFrame *frame; + GstNvDecSurface *surface; - frame = (GstNvDecoderFrame *) gst_h264_picture_get_user_data (picture); + surface = (GstNvDecSurface *) gst_h264_picture_get_user_data (picture); + if (!surface) + GST_DEBUG_OBJECT (self, "current picture does not have decoder surface"); - if (!frame) - GST_DEBUG_OBJECT (self, "current picture does not have decoder frame"); - - return frame; + return surface; } static void @@ -711,16 +798,16 @@ static void gst_nv_h264_dec_fill_dpb (GstNvH264Dec * self, GstH264Picture * ref, CUVIDH264DPBENTRY * dpb) { - GstNvDecoderFrame *frame; + GstNvDecSurface *surface; dpb->not_existing = ref->nonexisting; dpb->PicIdx = -1; - frame = gst_nv_h264_dec_get_decoder_frame_from_picture (self, ref); - if (!frame) { + surface = gst_nv_h264_dec_get_decoder_surface_from_picture (self, ref); + if (!surface) { dpb->not_existing = 1; } else if (!dpb->not_existing) { - dpb->PicIdx = frame->index; + dpb->PicIdx = surface->index; } if (dpb->not_existing) @@ -778,16 +865,15 @@ gst_nv_h264_dec_start_picture (GstH264Decoder * decoder, const GstH264SliceHdr *slice_header = &slice->header; const GstH264SPS *sps; const GstH264PPS *pps; - GstNvDecoderFrame *frame; + GstNvDecSurface *surface; GArray *ref_list = self->ref_list; guint i, ref_frame_idx; g_return_val_if_fail (slice_header->pps != nullptr, GST_FLOW_ERROR); g_return_val_if_fail (slice_header->pps->sequence != nullptr, GST_FLOW_ERROR); - frame = gst_nv_h264_dec_get_decoder_frame_from_picture (self, picture); - - if (!frame) { + surface = gst_nv_h264_dec_get_decoder_surface_from_picture (self, picture); + if (!surface) { GST_ERROR_OBJECT (self, "Couldn't get decoder frame frame picture %p", picture); return GST_FLOW_ERROR; @@ -805,7 +891,7 @@ gst_nv_h264_dec_start_picture (GstH264Decoder * decoder, } else { params->FrameHeightInMbs = sps->pic_height_in_map_units_minus1 + 1; } - params->CurrPicIdx = frame->index; + params->CurrPicIdx = surface->index; params->field_pic_flag = slice_header->field_pic_flag; params->bottom_field_flag = picture->field == GST_H264_PICTURE_FIELD_BOTTOM_FIELD; @@ -923,7 +1009,7 @@ gst_nv_h264_dec_end_picture (GstH264Decoder * decoder, GstH264Picture * picture) GST_LOG_OBJECT (self, "End picture, bitstream len: %" G_GSIZE_FORMAT ", num slices %d", self->bitstream_buffer_offset, self->num_slices); - ret = gst_nv_decoder_decode_picture (self->decoder, &self->params); + ret = gst_nv_decoder_decode (self->decoder, &self->params); if (!ret) { GST_ERROR_OBJECT (self, "Failed to decode picture"); diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvh265dec.cpp b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvh265dec.cpp index a72d1fbfa5..b689661b0a 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvh265dec.cpp +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvh265dec.cpp @@ -121,6 +121,8 @@ typedef struct _GstNvH265Dec guint coded_width, coded_height; guint bitdepth; guint chroma_format_idc; + + guint num_output_surfaces; } GstNvH265Dec; typedef struct _GstNvH265DecClass @@ -133,14 +135,19 @@ enum { PROP_0, PROP_CUDA_DEVICE_ID, + PROP_NUM_OUTPUT_SURFACES, }; +#define DEFAULT_NUM_OUTPUT_SURFACES 0 + static GTypeClass *parent_class = nullptr; #define GST_NV_H265_DEC(object) ((GstNvH265Dec *) (object)) #define GST_NV_H265_DEC_GET_CLASS(object) \ (G_TYPE_INSTANCE_GET_CLASS ((object),G_TYPE_FROM_INSTANCE (object),GstNvH265DecClass)) +static void gst_nv_h265_dec_set_property (GObject * object, guint prop_id, + const GValue * value, GParamSpec * pspec); static void gst_nv_h265_dec_get_property (GObject * object, guint prop_id, GValue * value, GParamSpec * pspec); @@ -148,11 +155,14 @@ static void gst_nv_h265_dec_set_context (GstElement * element, GstContext * context); static gboolean gst_nv_h265_dec_open (GstVideoDecoder * decoder); static gboolean gst_nv_h265_dec_close (GstVideoDecoder * decoder); +static gboolean gst_nv_h265_dec_stop (GstVideoDecoder * decoder); static gboolean gst_nv_h265_dec_negotiate (GstVideoDecoder * decoder); static gboolean gst_nv_h265_dec_decide_allocation (GstVideoDecoder * decoder, GstQuery * query); static gboolean gst_nv_h265_dec_src_query (GstVideoDecoder * decoder, GstQuery * query); +static gboolean gst_nv_h265_dec_sink_event (GstVideoDecoder * decoder, + GstEvent * event); /* GstH265Decoder */ static GstFlowReturn gst_nv_h265_dec_new_sequence (GstH265Decoder * decoder, @@ -181,6 +191,7 @@ gst_nv_h265_dec_class_init (GstNvH265DecClass * klass, GstVideoDecoderClass *decoder_class = GST_VIDEO_DECODER_CLASS (klass); GstH265DecoderClass *h265decoder_class = GST_H265_DECODER_CLASS (klass); + object_class->set_property = gst_nv_h265_dec_set_property; object_class->get_property = gst_nv_h265_dec_get_property; /** @@ -195,6 +206,23 @@ gst_nv_h265_dec_class_init (GstNvH265DecClass * klass, "Assigned CUDA device id", 0, G_MAXINT, 0, (GParamFlags) (G_PARAM_READABLE | G_PARAM_STATIC_STRINGS))); + /** + * GstNvH265SLDec:num-output-surfaces: + * + * The number of output surfaces (0 = auto). This property will be used to + * calculate the CUVIDDECODECREATEINFO.ulNumOutputSurfaces parameter + * in case of CUDA output mode + * + * Since: 1.24 + */ + g_object_class_install_property (object_class, PROP_NUM_OUTPUT_SURFACES, + g_param_spec_uint ("num-output-surfaces", "Num Output Surfaces", + "Maximum number of output surfaces simultaneously mapped in CUDA " + "output mode (0 = auto)", + 0, 64, DEFAULT_NUM_OUTPUT_SURFACES, + (GParamFlags) (GST_PARAM_MUTABLE_READY | G_PARAM_READWRITE | + G_PARAM_STATIC_STRINGS))); + element_class->set_context = GST_DEBUG_FUNCPTR (gst_nv_h265_dec_set_context); parent_class = (GTypeClass *) g_type_class_peek_parent (klass); @@ -212,10 +240,12 @@ gst_nv_h265_dec_class_init (GstNvH265DecClass * klass, decoder_class->open = GST_DEBUG_FUNCPTR (gst_nv_h265_dec_open); decoder_class->close = GST_DEBUG_FUNCPTR (gst_nv_h265_dec_close); + decoder_class->stop = GST_DEBUG_FUNCPTR (gst_nv_h265_dec_stop); decoder_class->negotiate = GST_DEBUG_FUNCPTR (gst_nv_h265_dec_negotiate); decoder_class->decide_allocation = GST_DEBUG_FUNCPTR (gst_nv_h265_dec_decide_allocation); decoder_class->src_query = GST_DEBUG_FUNCPTR (gst_nv_h265_dec_src_query); + decoder_class->sink_event = GST_DEBUG_FUNCPTR (gst_nv_h265_dec_sink_event); h265decoder_class->new_sequence = GST_DEBUG_FUNCPTR (gst_nv_h265_dec_new_sequence); @@ -242,18 +272,39 @@ gst_nv_h265_dec_class_init (GstNvH265DecClass * klass, static void gst_nv_h265_dec_init (GstNvH265Dec * self) { + self->num_output_surfaces = DEFAULT_NUM_OUTPUT_SURFACES; +} + +static void +gst_nv_h265_dec_set_property (GObject * object, guint prop_id, + const GValue * value, GParamSpec * pspec) +{ + GstNvH265Dec *self = GST_NV_H265_DEC (object); + + switch (prop_id) { + case PROP_NUM_OUTPUT_SURFACES: + self->num_output_surfaces = g_value_get_uint (value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } } static void gst_nv_h265_dec_get_property (GObject * object, guint prop_id, GValue * value, GParamSpec * pspec) { + GstNvH265Dec *self = GST_NV_H265_DEC (object); GstNvH265DecClass *klass = GST_NV_H265_DEC_GET_CLASS (object); switch (prop_id) { case PROP_CUDA_DEVICE_ID: g_value_set_uint (value, klass->cuda_device_id); break; + case PROP_NUM_OUTPUT_SURFACES: + g_value_set_uint (value, self->num_output_surfaces); + break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); break; @@ -321,6 +372,20 @@ gst_nv_h265_dec_close (GstVideoDecoder * decoder) return TRUE; } +static gboolean +gst_nv_h265_dec_stop (GstVideoDecoder * decoder) +{ + GstNvH265Dec *self = GST_NV_H265_DEC (decoder); + gboolean ret; + + ret = GST_VIDEO_DECODER_CLASS (parent_class)->stop (decoder); + + if (self->decoder) + gst_nv_decoder_reset (self->decoder); + + return ret; +} + static gboolean gst_nv_h265_dec_negotiate (GstVideoDecoder * decoder) { @@ -372,6 +437,29 @@ gst_nv_h265_dec_src_query (GstVideoDecoder * decoder, GstQuery * query) return GST_VIDEO_DECODER_CLASS (parent_class)->src_query (decoder, query); } +static gboolean +gst_nv_h265_dec_sink_event (GstVideoDecoder * decoder, GstEvent * event) +{ + GstNvH265Dec *self = GST_NV_H265_DEC (decoder); + + if (!self->decoder) + goto done; + + switch (GST_EVENT_TYPE (event)) { + case GST_EVENT_FLUSH_START: + gst_nv_decoder_set_flushing (self->decoder, TRUE); + break; + case GST_EVENT_FLUSH_STOP: + gst_nv_decoder_set_flushing (self->decoder, FALSE); + break; + default: + break; + } + +done: + return GST_VIDEO_DECODER_CLASS (parent_class)->sink_event (decoder, event); +} + static GstFlowReturn gst_nv_h265_dec_new_sequence (GstH265Decoder * decoder, const GstH265SPS * sps, gint max_dpb_size) @@ -450,11 +538,12 @@ gst_nv_h265_dec_new_sequence (GstH265Decoder * decoder, const GstH265SPS * sps, return GST_FLOW_NOT_NEGOTIATED; } - gst_video_info_set_format (&info, out_format, self->width, self->height); + gst_video_info_set_format (&info, out_format, GST_ROUND_UP_2 (self->width), + GST_ROUND_UP_2 (self->height)); if (!gst_nv_decoder_configure (self->decoder, cudaVideoCodec_HEVC, &info, self->coded_width, self->coded_height, - self->bitdepth, max_dpb_size, FALSE)) { + self->bitdepth, max_dpb_size, FALSE, self->num_output_surfaces)) { GST_ERROR_OBJECT (self, "Failed to configure decoder"); return GST_FLOW_NOT_NEGOTIATED; } @@ -475,18 +564,18 @@ gst_nv_h265_dec_new_picture (GstH265Decoder * decoder, GstVideoCodecFrame * cframe, GstH265Picture * picture) { GstNvH265Dec *self = GST_NV_H265_DEC (decoder); - GstNvDecoderFrame *frame; + GstNvDecSurface *surface; + GstFlowReturn ret; - frame = gst_nv_decoder_new_frame (self->decoder); - if (!frame) { - GST_ERROR_OBJECT (self, "No available decoder frame"); - return GST_FLOW_ERROR; - } + ret = gst_nv_decoder_acquire_surface (self->decoder, &surface); + if (ret != GST_FLOW_OK) + return ret; - GST_LOG_OBJECT (self, "New decoder frame %p (index %d)", frame, frame->index); + GST_LOG_OBJECT (self, "New decoder surface %p (index %d)", + surface, surface->index); gst_h265_picture_set_user_data (picture, - frame, (GDestroyNotify) gst_nv_decoder_frame_unref); + surface, (GDestroyNotify) gst_nv_dec_surface_unref); return GST_FLOW_OK; } @@ -497,23 +586,22 @@ gst_nv_h265_dec_output_picture (GstH265Decoder * decoder, { GstNvH265Dec *self = GST_NV_H265_DEC (decoder); GstVideoDecoder *vdec = GST_VIDEO_DECODER (decoder); - GstNvDecoderFrame *decoder_frame; + GstNvDecSurface *surface; + GstFlowReturn ret = GST_FLOW_ERROR; GST_LOG_OBJECT (self, "Outputting picture %p (poc %d)", picture, picture->pic_order_cnt); - decoder_frame = - (GstNvDecoderFrame *) gst_h265_picture_get_user_data (picture); - if (!decoder_frame) { - GST_ERROR_OBJECT (self, "No decoder frame in picture %p", picture); + surface = (GstNvDecSurface *) gst_h265_picture_get_user_data (picture); + if (!surface) { + GST_ERROR_OBJECT (self, "No decoder surface in picture %p", picture); goto error; } - if (!gst_nv_decoder_finish_frame (self->decoder, vdec, picture->discont_state, - decoder_frame, &frame->output_buffer)) { - GST_ERROR_OBJECT (self, "Failed to handle output picture"); + ret = gst_nv_decoder_finish_surface (self->decoder, + vdec, picture->discont_state, surface, &frame->output_buffer); + if (ret != GST_FLOW_OK) goto error; - } gst_h265_picture_unref (picture); @@ -523,21 +611,20 @@ error: gst_video_decoder_drop_frame (vdec, frame); gst_h265_picture_unref (picture); - return GST_FLOW_ERROR; + return ret; } -static GstNvDecoderFrame * -gst_nv_h265_dec_get_decoder_frame_from_picture (GstNvH265Dec * self, +static GstNvDecSurface * +gst_nv_h265_dec_get_decoder_surface_from_picture (GstNvH265Dec * self, GstH265Picture * picture) { - GstNvDecoderFrame *frame; + GstNvDecSurface *surface; - frame = (GstNvDecoderFrame *) gst_h265_picture_get_user_data (picture); - - if (!frame) + surface = (GstNvDecSurface *) gst_h265_picture_get_user_data (picture); + if (!surface) GST_DEBUG_OBJECT (self, "current picture does not have decoder frame"); - return frame; + return surface; } static void @@ -719,7 +806,7 @@ gst_nv_h265_dec_start_picture (GstH265Decoder * decoder, const GstH265SliceHdr *slice_header = &slice->header; const GstH265SPS *sps; const GstH265PPS *pps; - GstNvDecoderFrame *frame; + GstNvDecSurface *surface; GArray *dpb_array; guint num_ref_pic; guint i, j, k; @@ -738,11 +825,10 @@ gst_nv_h265_dec_start_picture (GstH265Decoder * decoder, g_return_val_if_fail (slice_header->pps != nullptr, GST_FLOW_ERROR); g_return_val_if_fail (slice_header->pps->sps != nullptr, GST_FLOW_ERROR); - frame = gst_nv_h265_dec_get_decoder_frame_from_picture (self, picture); - - if (!frame) { + surface = gst_nv_h265_dec_get_decoder_surface_from_picture (self, picture); + if (!surface) { GST_ERROR_OBJECT (self, - "Couldn't get decoder frame frame picture %p", picture); + "Couldn't get decoder surface frame picture %p", picture); return GST_FLOW_ERROR; } @@ -754,7 +840,7 @@ gst_nv_h265_dec_start_picture (GstH265Decoder * decoder, /* FIXME: update sps/pps related params only when it's required */ params->PicWidthInMbs = sps->pic_width_in_luma_samples / 16; params->FrameHeightInMbs = sps->pic_height_in_luma_samples / 16; - params->CurrPicIdx = frame->index; + params->CurrPicIdx = surface->index; /* nBitstreamDataLen, pBitstreamData, nNumSlices and pSliceDataOffsets * will be set later */ @@ -804,7 +890,7 @@ gst_nv_h265_dec_start_picture (GstH265Decoder * decoder, num_ref_pic = 0; for (i = 0; i < dpb_array->len; i++) { GstH265Picture *other = g_array_index (dpb_array, GstH265Picture *, i); - GstNvDecoderFrame *other_frame; + GstNvDecSurface *other_surface; gint picture_index = -1; if (!other->ref) @@ -815,9 +901,10 @@ gst_nv_h265_dec_start_picture (GstH265Decoder * decoder, return GST_FLOW_ERROR; } - other_frame = gst_nv_h265_dec_get_decoder_frame_from_picture (self, other); - if (other_frame) - picture_index = other_frame->index; + other_surface = + gst_nv_h265_dec_get_decoder_surface_from_picture (self, other); + if (other_surface) + picture_index = other_surface->index; h265_params->RefPicIdx[num_ref_pic] = picture_index; h265_params->PicOrderCntVal[num_ref_pic] = other->pic_order_cnt; @@ -962,7 +1049,7 @@ gst_nv_h265_dec_end_picture (GstH265Decoder * decoder, GstH265Picture * picture) GST_LOG_OBJECT (self, "End picture, bitstream len: %" G_GSIZE_FORMAT ", num slices %d", self->bitstream_buffer_offset, self->num_slices); - ret = gst_nv_decoder_decode_picture (self->decoder, &self->params); + ret = gst_nv_decoder_decode (self->decoder, &self->params); if (!ret) { GST_ERROR_OBJECT (self, "Failed to decode picture"); diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvvp8dec.cpp b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvvp8dec.cpp index 9ffadf35f5..ffe90453bf 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvvp8dec.cpp +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvvp8dec.cpp @@ -53,6 +53,8 @@ typedef struct _GstNvVp8Dec CUVIDPICPARAMS params; guint width, height; + + guint num_output_surfaces; } GstNvVp8Dec; typedef struct _GstNvVp8DecClass @@ -65,14 +67,19 @@ enum { PROP_0, PROP_CUDA_DEVICE_ID, + PROP_NUM_OUTPUT_SURFACES, }; +#define DEFAULT_NUM_OUTPUT_SURFACES 0 + static GTypeClass *parent_class = nullptr; #define GST_NV_VP8_DEC(object) ((GstNvVp8Dec *) (object)) #define GST_NV_VP8_DEC_GET_CLASS(object) \ (G_TYPE_INSTANCE_GET_CLASS ((object),G_TYPE_FROM_INSTANCE (object),GstNvVp8DecClass)) +static void gst_nv_vp8_dec_set_property (GObject * object, guint prop_id, + const GValue * value, GParamSpec * pspec); static void gst_nv_vp8_dec_get_property (GObject * object, guint prop_id, GValue * value, GParamSpec * pspec); @@ -80,11 +87,14 @@ static void gst_nv_vp8_dec_set_context (GstElement * element, GstContext * context); static gboolean gst_nv_vp8_dec_open (GstVideoDecoder * decoder); static gboolean gst_nv_vp8_dec_close (GstVideoDecoder * decoder); +static gboolean gst_nv_vp8_dec_stop (GstVideoDecoder * decoder); static gboolean gst_nv_vp8_dec_negotiate (GstVideoDecoder * decoder); static gboolean gst_nv_vp8_dec_decide_allocation (GstVideoDecoder * decoder, GstQuery * query); static gboolean gst_nv_vp8_dec_src_query (GstVideoDecoder * decoder, GstQuery * query); +static gboolean gst_nv_vp8_dec_sink_event (GstVideoDecoder * decoder, + GstEvent * event); /* GstVp8Decoder */ static GstFlowReturn gst_nv_vp8_dec_new_sequence (GstVp8Decoder * decoder, @@ -107,6 +117,7 @@ gst_nv_vp8_dec_class_init (GstNvVp8DecClass * klass, GstVideoDecoderClass *decoder_class = GST_VIDEO_DECODER_CLASS (klass); GstVp8DecoderClass *vp8decoder_class = GST_VP8_DECODER_CLASS (klass); + object_class->set_property = gst_nv_vp8_dec_set_property; object_class->get_property = gst_nv_vp8_dec_get_property; /** @@ -121,6 +132,23 @@ gst_nv_vp8_dec_class_init (GstNvVp8DecClass * klass, "Assigned CUDA device id", 0, G_MAXINT, 0, (GParamFlags) (G_PARAM_READABLE | G_PARAM_STATIC_STRINGS))); + /** + * GstNvVp8SLDec:num-output-surfaces: + * + * The number of output surfaces (0 = auto). This property will be used to + * calculate the CUVIDDECODECREATEINFO.ulNumOutputSurfaces parameter + * in case of CUDA output mode + * + * Since: 1.24 + */ + g_object_class_install_property (object_class, PROP_NUM_OUTPUT_SURFACES, + g_param_spec_uint ("num-output-surfaces", "Num Output Surfaces", + "Maximum number of output surfaces simultaneously mapped in CUDA " + "output mode (0 = auto)", + 0, 64, DEFAULT_NUM_OUTPUT_SURFACES, + (GParamFlags) (GST_PARAM_MUTABLE_READY | G_PARAM_READWRITE | + G_PARAM_STATIC_STRINGS))); + element_class->set_context = GST_DEBUG_FUNCPTR (gst_nv_vp8_dec_set_context); parent_class = (GTypeClass *) g_type_class_peek_parent (klass); @@ -138,10 +166,12 @@ gst_nv_vp8_dec_class_init (GstNvVp8DecClass * klass, decoder_class->open = GST_DEBUG_FUNCPTR (gst_nv_vp8_dec_open); decoder_class->close = GST_DEBUG_FUNCPTR (gst_nv_vp8_dec_close); + decoder_class->stop = GST_DEBUG_FUNCPTR (gst_nv_vp8_dec_stop); decoder_class->negotiate = GST_DEBUG_FUNCPTR (gst_nv_vp8_dec_negotiate); decoder_class->decide_allocation = GST_DEBUG_FUNCPTR (gst_nv_vp8_dec_decide_allocation); decoder_class->src_query = GST_DEBUG_FUNCPTR (gst_nv_vp8_dec_src_query); + decoder_class->sink_event = GST_DEBUG_FUNCPTR (gst_nv_vp8_dec_sink_event); vp8decoder_class->new_sequence = GST_DEBUG_FUNCPTR (gst_nv_vp8_dec_new_sequence); @@ -164,18 +194,39 @@ gst_nv_vp8_dec_class_init (GstNvVp8DecClass * klass, static void gst_nv_vp8_dec_init (GstNvVp8Dec * self) { + self->num_output_surfaces = DEFAULT_NUM_OUTPUT_SURFACES; +} + +static void +gst_nv_vp8_dec_set_property (GObject * object, guint prop_id, + const GValue * value, GParamSpec * pspec) +{ + GstNvVp8Dec *self = GST_NV_VP8_DEC (object); + + switch (prop_id) { + case PROP_NUM_OUTPUT_SURFACES: + self->num_output_surfaces = g_value_get_uint (value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } } static void gst_nv_vp8_dec_get_property (GObject * object, guint prop_id, GValue * value, GParamSpec * pspec) { + GstNvVp8Dec *self = GST_NV_VP8_DEC (object); GstNvVp8DecClass *klass = GST_NV_VP8_DEC_GET_CLASS (object); switch (prop_id) { case PROP_CUDA_DEVICE_ID: g_value_set_uint (value, klass->cuda_device_id); break; + case PROP_NUM_OUTPUT_SURFACES: + g_value_set_uint (value, self->num_output_surfaces); + break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); break; @@ -237,6 +288,20 @@ gst_nv_vp8_dec_close (GstVideoDecoder * decoder) return TRUE; } +static gboolean +gst_nv_vp8_dec_stop (GstVideoDecoder * decoder) +{ + GstNvVp8Dec *self = GST_NV_VP8_DEC (decoder); + gboolean ret; + + ret = GST_VIDEO_DECODER_CLASS (parent_class)->stop (decoder); + + if (self->decoder) + gst_nv_decoder_reset (self->decoder); + + return ret; +} + static gboolean gst_nv_vp8_dec_negotiate (GstVideoDecoder * decoder) { @@ -288,6 +353,29 @@ gst_nv_vp8_dec_src_query (GstVideoDecoder * decoder, GstQuery * query) return GST_VIDEO_DECODER_CLASS (parent_class)->src_query (decoder, query); } +static gboolean +gst_nv_vp8_dec_sink_event (GstVideoDecoder * decoder, GstEvent * event) +{ + GstNvVp8Dec *self = GST_NV_VP8_DEC (decoder); + + if (!self->decoder) + goto done; + + switch (GST_EVENT_TYPE (event)) { + case GST_EVENT_FLUSH_START: + gst_nv_decoder_set_flushing (self->decoder, TRUE); + break; + case GST_EVENT_FLUSH_STOP: + gst_nv_decoder_set_flushing (self->decoder, FALSE); + break; + default: + break; + } + +done: + return GST_VIDEO_DECODER_CLASS (parent_class)->sink_event (decoder, event); +} + static GstFlowReturn gst_nv_vp8_dec_new_sequence (GstVp8Decoder * decoder, const GstVp8FrameHdr * frame_hdr, gint max_dpb_size) @@ -313,11 +401,12 @@ gst_nv_vp8_dec_new_sequence (GstVp8Decoder * decoder, GstVideoInfo info; gst_video_info_set_format (&info, - GST_VIDEO_FORMAT_NV12, self->width, self->height); + GST_VIDEO_FORMAT_NV12, GST_ROUND_UP_2 (self->width), + GST_ROUND_UP_2 (self->height)); if (!gst_nv_decoder_configure (self->decoder, cudaVideoCodec_VP8, &info, self->width, self->height, 8, - max_dpb_size, FALSE)) { + max_dpb_size, FALSE, self->num_output_surfaces)) { GST_ERROR_OBJECT (self, "Failed to configure decoder"); return GST_FLOW_NOT_NEGOTIATED; } @@ -344,35 +433,33 @@ gst_nv_vp8_dec_new_picture (GstVp8Decoder * decoder, GstVideoCodecFrame * frame, GstVp8Picture * picture) { GstNvVp8Dec *self = GST_NV_VP8_DEC (decoder); - GstNvDecoderFrame *nv_frame; + GstNvDecSurface *surface; + GstFlowReturn ret; - nv_frame = gst_nv_decoder_new_frame (self->decoder); - if (!nv_frame) { - GST_ERROR_OBJECT (self, "No available decoder frame"); - return GST_FLOW_ERROR; - } + ret = gst_nv_decoder_acquire_surface (self->decoder, &surface); + if (ret != GST_FLOW_OK) + return ret; GST_LOG_OBJECT (self, - "New decoder frame %p (index %d)", nv_frame, nv_frame->index); + "New decoder frame %p (index %d)", surface, surface->index); gst_vp8_picture_set_user_data (picture, - nv_frame, (GDestroyNotify) gst_nv_decoder_frame_unref); + surface, (GDestroyNotify) gst_nv_dec_surface_unref); return GST_FLOW_OK; } -static GstNvDecoderFrame * +static GstNvDecSurface * gst_nv_vp8_dec_get_decoder_frame_from_picture (GstNvVp8Dec * self, GstVp8Picture * picture) { - GstNvDecoderFrame *frame; + GstNvDecSurface *surface; - frame = (GstNvDecoderFrame *) gst_vp8_picture_get_user_data (picture); + surface = (GstNvDecSurface *) gst_vp8_picture_get_user_data (picture); + if (!surface) + GST_DEBUG_OBJECT (self, "current picture does not have decoder surface"); - if (!frame) - GST_DEBUG_OBJECT (self, "current picture does not have decoder frame"); - - return frame; + return surface; } static GstFlowReturn @@ -381,14 +468,14 @@ gst_nv_vp8_dec_decode_picture (GstVp8Decoder * decoder, { GstNvVp8Dec *self = GST_NV_VP8_DEC (decoder); GstVp8FrameHdr *frame_hdr = &picture->frame_hdr; - GstNvDecoderFrame *frame; - GstNvDecoderFrame *other_frame; + GstNvDecSurface *surface; + GstNvDecSurface *other_surface; guint offset = 0; GST_LOG_OBJECT (self, "Decode picture, size %" G_GSIZE_FORMAT, picture->size); - frame = gst_nv_vp8_dec_get_decoder_frame_from_picture (self, picture); - if (!frame) { + surface = gst_nv_vp8_dec_get_decoder_frame_from_picture (self, picture); + if (!surface) { GST_ERROR_OBJECT (self, "Decoder frame is unavailable"); return GST_FLOW_ERROR; } @@ -398,49 +485,49 @@ gst_nv_vp8_dec_decode_picture (GstVp8Decoder * decoder, self->params.nNumSlices = 1; self->params.pSliceDataOffsets = &offset; - self->params.CurrPicIdx = frame->index; + self->params.CurrPicIdx = surface->index; self->params.CodecSpecific.vp8.first_partition_size = frame_hdr->first_part_size; if (decoder->alt_ref_picture) { - other_frame = + other_surface = gst_nv_vp8_dec_get_decoder_frame_from_picture (self, decoder->alt_ref_picture); - if (!other_frame) { + if (!other_surface) { GST_ERROR_OBJECT (self, "Couldn't get decoder frame for AltRef"); return GST_FLOW_ERROR; } - self->params.CodecSpecific.vp8.AltRefIdx = other_frame->index; + self->params.CodecSpecific.vp8.AltRefIdx = other_surface->index; } else { self->params.CodecSpecific.vp8.AltRefIdx = 0xff; } if (decoder->golden_ref_picture) { - other_frame = + other_surface = gst_nv_vp8_dec_get_decoder_frame_from_picture (self, decoder->golden_ref_picture); - if (!other_frame) { + if (!other_surface) { GST_ERROR_OBJECT (self, "Couldn't get decoder frame for GoldenRef"); return GST_FLOW_ERROR; } - self->params.CodecSpecific.vp8.GoldenRefIdx = other_frame->index; + self->params.CodecSpecific.vp8.GoldenRefIdx = other_surface->index; } else { self->params.CodecSpecific.vp8.GoldenRefIdx = 0xff; } if (decoder->last_picture) { - other_frame = + other_surface = gst_nv_vp8_dec_get_decoder_frame_from_picture (self, decoder->last_picture); - if (!other_frame) { + if (!other_surface) { GST_ERROR_OBJECT (self, "Couldn't get decoder frame for LastRef"); return GST_FLOW_ERROR; } - self->params.CodecSpecific.vp8.LastRefIdx = other_frame->index; + self->params.CodecSpecific.vp8.LastRefIdx = other_surface->index; } else { self->params.CodecSpecific.vp8.LastRefIdx = 0xff; } @@ -454,7 +541,7 @@ gst_nv_vp8_dec_decode_picture (GstVp8Decoder * decoder, parser->segmentation.segmentation_enabled ? parser->segmentation.update_segment_feature_data : 0; - if (!gst_nv_decoder_decode_picture (self->decoder, &self->params)) + if (!gst_nv_decoder_decode (self->decoder, &self->params)) return GST_FLOW_ERROR; return GST_FLOW_OK; @@ -466,21 +553,21 @@ gst_nv_vp8_dec_output_picture (GstVp8Decoder * decoder, { GstNvVp8Dec *self = GST_NV_VP8_DEC (decoder); GstVideoDecoder *vdec = GST_VIDEO_DECODER (decoder); - GstNvDecoderFrame *decoder_frame; + GstNvDecSurface *surface; + GstFlowReturn ret = GST_FLOW_ERROR; GST_LOG_OBJECT (self, "Outputting picture %p", picture); - decoder_frame = (GstNvDecoderFrame *) gst_vp8_picture_get_user_data (picture); - if (!decoder_frame) { + surface = (GstNvDecSurface *) gst_vp8_picture_get_user_data (picture); + if (!surface) { GST_ERROR_OBJECT (self, "No decoder frame in picture %p", picture); goto error; } - if (!gst_nv_decoder_finish_frame (self->decoder, vdec, picture->discont_state, - decoder_frame, &frame->output_buffer)) { - GST_ERROR_OBJECT (self, "Failed to handle output picture"); + ret = gst_nv_decoder_finish_surface (self->decoder, + vdec, picture->discont_state, surface, &frame->output_buffer); + if (ret != GST_FLOW_OK) goto error; - } gst_vp8_picture_unref (picture); @@ -490,7 +577,7 @@ error: gst_video_decoder_drop_frame (vdec, frame); gst_vp8_picture_unref (picture); - return GST_FLOW_ERROR; + return ret; } static guint diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvvp9dec.cpp b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvvp9dec.cpp index 54bb1fc9b1..5c2dda64a1 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvvp9dec.cpp +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvvp9dec.cpp @@ -54,6 +54,8 @@ typedef struct _GstNvVp9Dec guint width, height; GstVP9Profile profile; + + guint num_output_surfaces; } GstNvVp9Dec; typedef struct _GstNvVp9DecClass @@ -66,14 +68,19 @@ enum { PROP_0, PROP_CUDA_DEVICE_ID, + PROP_NUM_OUTPUT_SURFACES, }; +#define DEFAULT_NUM_OUTPUT_SURFACES 0 + static GTypeClass *parent_class = nullptr; #define GST_NV_VP9_DEC(object) ((GstNvVp9Dec *) (object)) #define GST_NV_VP9_DEC_GET_CLASS(object) \ (G_TYPE_INSTANCE_GET_CLASS ((object),G_TYPE_FROM_INSTANCE (object),GstNvVp9DecClass)) +static void gst_nv_vp9_dec_set_property (GObject * object, guint prop_id, + const GValue * value, GParamSpec * pspec); static void gst_nv_vp9_dec_get_property (GObject * object, guint prop_id, GValue * value, GParamSpec * pspec); @@ -81,11 +88,14 @@ static void gst_nv_vp9_dec_set_context (GstElement * element, GstContext * context); static gboolean gst_nv_vp9_dec_open (GstVideoDecoder * decoder); static gboolean gst_nv_vp9_dec_close (GstVideoDecoder * decoder); +static gboolean gst_nv_vp9_dec_stop (GstVideoDecoder * decoder); static gboolean gst_nv_vp9_dec_negotiate (GstVideoDecoder * decoder); static gboolean gst_nv_vp9_dec_decide_allocation (GstVideoDecoder * decoder, GstQuery * query); static gboolean gst_nv_vp9_dec_src_query (GstVideoDecoder * decoder, GstQuery * query); +static gboolean gst_nv_vp9_dec_sink_event (GstVideoDecoder * decoder, + GstEvent * event); /* GstVp9Decoder */ static GstFlowReturn gst_nv_vp9_dec_new_sequence (GstVp9Decoder * decoder, @@ -110,6 +120,7 @@ gst_nv_vp9_dec_class_init (GstNvVp9DecClass * klass, GstVideoDecoderClass *decoder_class = GST_VIDEO_DECODER_CLASS (klass); GstVp9DecoderClass *vp9decoder_class = GST_VP9_DECODER_CLASS (klass); + object_class->set_property = gst_nv_vp9_dec_set_property; object_class->get_property = gst_nv_vp9_dec_get_property; /** @@ -124,6 +135,23 @@ gst_nv_vp9_dec_class_init (GstNvVp9DecClass * klass, "Assigned CUDA device id", 0, G_MAXINT, 0, (GParamFlags) (G_PARAM_READABLE | G_PARAM_STATIC_STRINGS))); + /** + * GstNvVp9SLDec:num-output-surfaces: + * + * The number of output surfaces (0 = auto). This property will be used to + * calculate the CUVIDDECODECREATEINFO.ulNumOutputSurfaces parameter + * in case of CUDA output mode + * + * Since: 1.24 + */ + g_object_class_install_property (object_class, PROP_NUM_OUTPUT_SURFACES, + g_param_spec_uint ("num-output-surfaces", "Num Output Surfaces", + "Maximum number of output surfaces simultaneously mapped in CUDA " + "output mode (0 = auto)", + 0, 64, DEFAULT_NUM_OUTPUT_SURFACES, + (GParamFlags) (GST_PARAM_MUTABLE_READY | G_PARAM_READWRITE | + G_PARAM_STATIC_STRINGS))); + element_class->set_context = GST_DEBUG_FUNCPTR (gst_nv_vp9_dec_set_context); parent_class = (GTypeClass *) g_type_class_peek_parent (klass); @@ -141,10 +169,12 @@ gst_nv_vp9_dec_class_init (GstNvVp9DecClass * klass, decoder_class->open = GST_DEBUG_FUNCPTR (gst_nv_vp9_dec_open); decoder_class->close = GST_DEBUG_FUNCPTR (gst_nv_vp9_dec_close); + decoder_class->stop = GST_DEBUG_FUNCPTR (gst_nv_vp9_dec_stop); decoder_class->negotiate = GST_DEBUG_FUNCPTR (gst_nv_vp9_dec_negotiate); decoder_class->decide_allocation = GST_DEBUG_FUNCPTR (gst_nv_vp9_dec_decide_allocation); decoder_class->src_query = GST_DEBUG_FUNCPTR (gst_nv_vp9_dec_src_query); + decoder_class->sink_event = GST_DEBUG_FUNCPTR (gst_nv_vp9_dec_sink_event); vp9decoder_class->new_sequence = GST_DEBUG_FUNCPTR (gst_nv_vp9_dec_new_sequence); @@ -169,18 +199,39 @@ gst_nv_vp9_dec_class_init (GstNvVp9DecClass * klass, static void gst_nv_vp9_dec_init (GstNvVp9Dec * self) { + self->num_output_surfaces = DEFAULT_NUM_OUTPUT_SURFACES; +} + +static void +gst_nv_vp9_dec_set_property (GObject * object, guint prop_id, + const GValue * value, GParamSpec * pspec) +{ + GstNvVp9Dec *self = GST_NV_VP9_DEC (object); + + switch (prop_id) { + case PROP_NUM_OUTPUT_SURFACES: + self->num_output_surfaces = g_value_get_uint (value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } } static void gst_nv_vp9_dec_get_property (GObject * object, guint prop_id, GValue * value, GParamSpec * pspec) { + GstNvVp9Dec *self = GST_NV_VP9_DEC (object); GstNvVp9DecClass *klass = GST_NV_VP9_DEC_GET_CLASS (object); switch (prop_id) { case PROP_CUDA_DEVICE_ID: g_value_set_uint (value, klass->cuda_device_id); break; + case PROP_NUM_OUTPUT_SURFACES: + g_value_set_uint (value, self->num_output_surfaces); + break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); break; @@ -247,6 +298,20 @@ gst_nv_vp9_dec_close (GstVideoDecoder * decoder) return TRUE; } +static gboolean +gst_nv_vp9_dec_stop (GstVideoDecoder * decoder) +{ + GstNvVp9Dec *self = GST_NV_VP9_DEC (decoder); + gboolean ret; + + ret = GST_VIDEO_DECODER_CLASS (parent_class)->stop (decoder); + + if (self->decoder) + gst_nv_decoder_reset (self->decoder); + + return ret; +} + static gboolean gst_nv_vp9_dec_negotiate (GstVideoDecoder * decoder) { @@ -298,6 +363,29 @@ gst_nv_vp9_dec_src_query (GstVideoDecoder * decoder, GstQuery * query) return GST_VIDEO_DECODER_CLASS (parent_class)->src_query (decoder, query); } +static gboolean +gst_nv_vp9_dec_sink_event (GstVideoDecoder * decoder, GstEvent * event) +{ + GstNvVp9Dec *self = GST_NV_VP9_DEC (decoder); + + if (!self->decoder) + goto done; + + switch (GST_EVENT_TYPE (event)) { + case GST_EVENT_FLUSH_START: + gst_nv_decoder_set_flushing (self->decoder, TRUE); + break; + case GST_EVENT_FLUSH_STOP: + gst_nv_decoder_set_flushing (self->decoder, FALSE); + break; + default: + break; + } + +done: + return GST_VIDEO_DECODER_CLASS (parent_class)->sink_event (decoder, event); +} + static GstFlowReturn gst_nv_vp9_dec_new_sequence (GstVp9Decoder * decoder, const GstVp9FrameHeader * frame_hdr, gint max_dpb_size) @@ -327,10 +415,12 @@ gst_nv_vp9_dec_new_sequence (GstVp9Decoder * decoder, return GST_FLOW_NOT_NEGOTIATED; } - gst_video_info_set_format (&info, out_format, self->width, self->height); + gst_video_info_set_format (&info, out_format, GST_ROUND_UP_2 (self->width), + GST_ROUND_UP_2 (self->height)); if (!gst_nv_decoder_configure (self->decoder, cudaVideoCodec_VP9, &info, self->width, self->height, - frame_hdr->bit_depth, max_dpb_size, FALSE)) { + frame_hdr->bit_depth, max_dpb_size, FALSE, + self->num_output_surfaces)) { GST_ERROR_OBJECT (self, "Failed to configure decoder"); return GST_FLOW_NOT_NEGOTIATED; } @@ -352,35 +442,33 @@ gst_nv_vp9_dec_new_picture (GstVp9Decoder * decoder, GstVideoCodecFrame * frame, GstVp9Picture * picture) { GstNvVp9Dec *self = GST_NV_VP9_DEC (decoder); - GstNvDecoderFrame *nv_frame; + GstNvDecSurface *surface; + GstFlowReturn ret; - nv_frame = gst_nv_decoder_new_frame (self->decoder); - if (!nv_frame) { - GST_ERROR_OBJECT (self, "No available decoder frame"); - return GST_FLOW_ERROR; - } + ret = gst_nv_decoder_acquire_surface (self->decoder, &surface); + if (ret != GST_FLOW_OK) + return ret; GST_LOG_OBJECT (self, - "New decoder frame %p (index %d)", nv_frame, nv_frame->index); + "New decoder frame %p (index %d)", surface, surface->index); gst_vp9_picture_set_user_data (picture, - nv_frame, (GDestroyNotify) gst_nv_decoder_frame_unref); + surface, (GDestroyNotify) gst_nv_dec_surface_unref); return GST_FLOW_OK; } -static GstNvDecoderFrame * +static GstNvDecSurface * gst_nv_vp9_dec_get_decoder_frame_from_picture (GstNvVp9Dec * self, GstVp9Picture * picture) { - GstNvDecoderFrame *frame; + GstNvDecSurface *surface; - frame = (GstNvDecoderFrame *) gst_vp9_picture_get_user_data (picture); + surface = (GstNvDecSurface *) gst_vp9_picture_get_user_data (picture); + if (!surface) + GST_DEBUG_OBJECT (self, "current picture does not have decoder surface"); - if (!frame) - GST_DEBUG_OBJECT (self, "current picture does not have decoder frame"); - - return frame; + return surface; } static GstVp9Picture * @@ -388,13 +476,13 @@ gst_nv_vp9_dec_duplicate_picture (GstVp9Decoder * decoder, GstVideoCodecFrame * frame, GstVp9Picture * picture) { GstNvVp9Dec *self = GST_NV_VP9_DEC (decoder); - GstNvDecoderFrame *nv_frame; + GstNvDecSurface *surface; GstVp9Picture *new_picture; - nv_frame = gst_nv_vp9_dec_get_decoder_frame_from_picture (self, picture); + surface = gst_nv_vp9_dec_get_decoder_frame_from_picture (self, picture); - if (!nv_frame) { - GST_ERROR_OBJECT (self, "Parent picture does not have decoder frame"); + if (!surface) { + GST_ERROR_OBJECT (self, "Parent picture does not have decoder surface"); return nullptr; } @@ -402,8 +490,8 @@ gst_nv_vp9_dec_duplicate_picture (GstVp9Decoder * decoder, new_picture->frame_hdr = picture->frame_hdr; gst_vp9_picture_set_user_data (new_picture, - gst_nv_decoder_frame_ref (nv_frame), - (GDestroyNotify) gst_nv_decoder_frame_unref); + gst_nv_dec_surface_ref (surface), + (GDestroyNotify) gst_nv_dec_surface_unref); return new_picture; } @@ -419,8 +507,8 @@ gst_nv_vp9_dec_decode_picture (GstVp9Decoder * decoder, const GstVp9QuantizationParams *qp = &frame_hdr->quantization_params; CUVIDPICPARAMS *params = &self->params; CUVIDVP9PICPARAMS *vp9_params = ¶ms->CodecSpecific.vp9; - GstNvDecoderFrame *frame; - GstNvDecoderFrame *other_frame; + GstNvDecSurface *surface; + GstNvDecSurface *other_surface; guint offset = 0; guint8 ref_frame_map[GST_VP9_REF_FRAMES]; gint i; @@ -454,8 +542,8 @@ gst_nv_vp9_dec_decode_picture (GstVp9Decoder * decoder, GST_LOG_OBJECT (self, "Decode picture, size %" G_GSIZE_FORMAT, picture->size); - frame = gst_nv_vp9_dec_get_decoder_frame_from_picture (self, picture); - if (!frame) { + surface = gst_nv_vp9_dec_get_decoder_frame_from_picture (self, picture); + if (!surface) { GST_ERROR_OBJECT (self, "Decoder frame is unavailable"); return GST_FLOW_ERROR; } @@ -467,21 +555,21 @@ gst_nv_vp9_dec_decode_picture (GstVp9Decoder * decoder, params->PicWidthInMbs = GST_ROUND_UP_16 (frame_hdr->width) >> 4; params->FrameHeightInMbs = GST_ROUND_UP_16 (frame_hdr->height) >> 4; - params->CurrPicIdx = frame->index; + params->CurrPicIdx = surface->index; vp9_params->width = frame_hdr->width; vp9_params->height = frame_hdr->height; for (i = 0; i < GST_VP9_REF_FRAMES; i++) { if (dpb->pic_list[i]) { - other_frame = gst_nv_vp9_dec_get_decoder_frame_from_picture (self, + other_surface = gst_nv_vp9_dec_get_decoder_frame_from_picture (self, dpb->pic_list[i]); - if (!other_frame) { + if (!other_surface) { GST_ERROR_OBJECT (self, "Couldn't get decoder frame from picture"); return GST_FLOW_ERROR; } - ref_frame_map[i] = other_frame->index; + ref_frame_map[i] = other_surface->index; } else { ref_frame_map[i] = 0xff; } @@ -549,7 +637,7 @@ gst_nv_vp9_dec_decode_picture (GstVp9Decoder * decoder, memcpy (vp9_params->segmentFeatureData, sp->feature_data, sizeof (sp->feature_data)); - if (!gst_nv_decoder_decode_picture (self->decoder, &self->params)) + if (!gst_nv_decoder_decode (self->decoder, &self->params)) return GST_FLOW_ERROR; return GST_FLOW_OK; @@ -561,21 +649,21 @@ gst_nv_vp9_dec_output_picture (GstVp9Decoder * decoder, { GstNvVp9Dec *self = GST_NV_VP9_DEC (decoder); GstVideoDecoder *vdec = GST_VIDEO_DECODER (decoder); - GstNvDecoderFrame *decoder_frame; + GstNvDecSurface *surface; + GstFlowReturn ret = GST_FLOW_ERROR; GST_LOG_OBJECT (self, "Outputting picture %p", picture); - decoder_frame = (GstNvDecoderFrame *) gst_vp9_picture_get_user_data (picture); - if (!decoder_frame) { + surface = (GstNvDecSurface *) gst_vp9_picture_get_user_data (picture); + if (!surface) { GST_ERROR_OBJECT (self, "No decoder frame in picture %p", picture); goto error; } - if (!gst_nv_decoder_finish_frame (self->decoder, vdec, picture->discont_state, - decoder_frame, &frame->output_buffer)) { - GST_ERROR_OBJECT (self, "Failed to handle output picture"); + ret = gst_nv_decoder_finish_surface (self->decoder, + vdec, picture->discont_state, surface, &frame->output_buffer); + if (ret != GST_FLOW_OK) goto error; - } gst_vp9_picture_unref (picture); @@ -585,7 +673,7 @@ error: gst_video_decoder_drop_frame (vdec, frame); gst_vp9_picture_unref (picture); - return GST_FLOW_ERROR; + return ret; } static guint diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/meson.build b/subprojects/gst-plugins-bad/sys/nvcodec/meson.build index c8be20c367..335fdd407f 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/meson.build +++ b/subprojects/gst-plugins-bad/sys/nvcodec/meson.build @@ -8,6 +8,7 @@ nvcodec_sources = [ 'gstnvav1dec.cpp', 'gstnvbaseenc.c', 'gstnvdec.c', + 'gstnvdecobject.cpp', 'gstnvdecoder.cpp', 'gstnvenc.c', 'gstnvencoder.cpp',