From 49bccf0433b2ce5387c824371a24a521175d034b Mon Sep 17 00:00:00 2001 From: Seungha Yang Date: Thu, 28 Nov 2019 18:54:31 +0900 Subject: [PATCH] nvcodec: Refactor plugin initialization Create CUDA context per device, instead of per codec and encoder/decoder. Allocating CUDA context is heavy operation so we should reuse it as much as possible. Fixes: https://gitlab.freedesktop.org/gstreamer/gst-plugins-bad/issues/1130 --- sys/nvcodec/gstcudaloader.c | 3 + sys/nvcodec/gstnvbaseenc.c | 1 + sys/nvcodec/gstnvdec.c | 55 ++++----------- sys/nvcodec/gstnvdec.h | 4 +- sys/nvcodec/gstnvenc.c | 133 +++++++++++++----------------------- sys/nvcodec/gstnvenc.h | 8 ++- sys/nvcodec/plugin.c | 74 ++++++++++++++++++-- 7 files changed, 143 insertions(+), 135 deletions(-) diff --git a/sys/nvcodec/gstcudaloader.c b/sys/nvcodec/gstcudaloader.c index 2cd7ab23d5..4eece17041 100644 --- a/sys/nvcodec/gstcudaloader.c +++ b/sys/nvcodec/gstcudaloader.c @@ -24,6 +24,9 @@ #include "gstcudaloader.h" #include +GST_DEBUG_CATEGORY_EXTERN (gst_nvcodec_debug); +#define GST_CAT_DEFAULT gst_nvcodec_debug + #ifndef G_OS_WIN32 #define CUDA_LIBNAME "libcuda.so.1" #else diff --git a/sys/nvcodec/gstnvbaseenc.c b/sys/nvcodec/gstnvbaseenc.c index 317ebaa3ac..c41145c71a 100644 --- a/sys/nvcodec/gstnvbaseenc.c +++ b/sys/nvcodec/gstnvbaseenc.c @@ -28,6 +28,7 @@ #include +GST_DEBUG_CATEGORY_EXTERN (gst_nvenc_debug); #define GST_CAT_DEFAULT gst_nvenc_debug #if HAVE_NVCODEC_GST_GL diff --git a/sys/nvcodec/gstnvdec.c b/sys/nvcodec/gstnvdec.c index c9d34de240..f2cffff6bd 100644 --- a/sys/nvcodec/gstnvdec.c +++ b/sys/nvcodec/gstnvdec.c @@ -34,8 +34,8 @@ #include -GST_DEBUG_CATEGORY_STATIC (gst_nvdec_debug_category); -#define GST_CAT_DEFAULT gst_nvdec_debug_category +GST_DEBUG_CATEGORY_EXTERN (gst_nvdec_debug); +#define GST_CAT_DEFAULT gst_nvdec_debug #ifdef HAVE_NVCODEC_GST_GL #define SUPPORTED_GL_APIS (GST_GL_API_OPENGL | GST_GL_API_OPENGL3 | GST_GL_API_GLES2) @@ -1544,13 +1544,9 @@ typedef struct static void gst_nvdec_register (GstPlugin * plugin, GType type, cudaVideoCodec codec_type, const gchar * codec, const gchar * sink_caps_string, guint rank, - gint device_count) + gint device_idx, CUcontext cuda_ctx) { - gint i; - - for (i = 0; i < device_count; i++) { - CUdevice cuda_device; - CUcontext cuda_ctx; + { CUresult cuda_ret; gint max_width = 0, min_width = G_MAXINT; gint max_height = 0, min_height = G_MAXINT; @@ -1575,18 +1571,12 @@ gst_nvdec_register (GstPlugin * plugin, GType type, cudaVideoCodec codec_type, GValue format = G_VALUE_INIT; GValue profile_list = G_VALUE_INIT; - if (CuDeviceGet (&cuda_device, i) != CUDA_SUCCESS) - continue; - - if (CuCtxCreate (&cuda_ctx, 0, cuda_device) != CUDA_SUCCESS) - continue; - g_value_init (&format_list, GST_TYPE_LIST); g_value_init (&format, G_TYPE_STRING); g_value_init (&profile_list, GST_TYPE_LIST); if (CuCtxPushCurrent (cuda_ctx) != CUDA_SUCCESS) - goto cuda_free; + goto done; for (c_idx = 0; c_idx < G_N_ELEMENTS (chroma_list); c_idx++) { for (b_idx = 0; b_idx < G_N_ELEMENTS (bitdepth_minus8); b_idx++) { @@ -1682,7 +1672,7 @@ gst_nvdec_register (GstPlugin * plugin, GType type, cudaVideoCodec codec_type, if (num_support == 0) { GST_INFO ("device can not support %s", codec); - goto cuda_free; + goto done; } src_templ = gst_caps_new_simple ("video/x-raw", @@ -1717,16 +1707,14 @@ gst_nvdec_register (GstPlugin * plugin, GType type, cudaVideoCodec codec_type, CuCtxPopCurrent (NULL); - cuda_free: - CuCtxDestroy (cuda_ctx); - + done: g_value_unset (&format_list); g_value_unset (&format); g_value_unset (&profile_list); if (sink_templ && src_templ) { - gst_nvdec_subclass_register (plugin, type, codec_type, codec, i, rank, - sink_templ, src_templ); + gst_nvdec_subclass_register (plugin, type, codec_type, codec, device_idx, + rank, sink_templ, src_templ); } gst_clear_caps (&sink_templ); @@ -1773,14 +1761,10 @@ const GstNvCodecMap codec_map[] = { }; void -gst_nvdec_plugin_init (GstPlugin * plugin) +gst_nvdec_plugin_init (GstPlugin * plugin, guint device_index, + CUcontext cuda_ctx) { gint i; - CUresult cuda_ret; - gint dev_count = 0; - - GST_DEBUG_CATEGORY_INIT (gst_nvdec_debug_category, "nvdec", 0, - "Debug category for the nvdec element"); if (!gst_cuvid_can_get_decoder_caps ()) { GstCaps *src_templ; @@ -1804,27 +1788,16 @@ gst_nvdec_plugin_init (GstPlugin * plugin) sink_templ = gst_caps_from_string (codec_map[i].sink_caps_string); gst_nvdec_subclass_register (plugin, GST_TYPE_NVDEC, codec_map[i].codec, - codec_map[i].codec_name, 0, GST_RANK_PRIMARY, sink_templ, src_templ); + codec_map[i].codec_name, device_index, GST_RANK_PRIMARY, + sink_templ, src_templ); } return; } - cuda_ret = CuInit (0); - if (cuda_ret != CUDA_SUCCESS) { - GST_ERROR ("Failed to initialize CUDA API"); - return; - } - - cuda_ret = CuDeviceGetCount (&dev_count); - if (cuda_ret != CUDA_SUCCESS || dev_count == 0) { - GST_ERROR ("No CUDA devices detected"); - return; - } - for (i = 0; i < G_N_ELEMENTS (codec_map); i++) { gst_nvdec_register (plugin, GST_TYPE_NVDEC, codec_map[i].codec, codec_map[i].codec_name, codec_map[i].sink_caps_string, - GST_RANK_PRIMARY, dev_count); + GST_RANK_PRIMARY, device_index, cuda_ctx); } } diff --git a/sys/nvcodec/gstnvdec.h b/sys/nvcodec/gstnvdec.h index 9a7e9252db..65bd011b4e 100644 --- a/sys/nvcodec/gstnvdec.h +++ b/sys/nvcodec/gstnvdec.h @@ -101,7 +101,9 @@ struct _GstNvDecClass GType gst_nvdec_get_type (void); -void gst_nvdec_plugin_init (GstPlugin * plugin); +void gst_nvdec_plugin_init (GstPlugin * plugin, + guint device_index, + CUcontext cuda_ctx); G_END_DECLS diff --git a/sys/nvcodec/gstnvenc.c b/sys/nvcodec/gstnvenc.c index 46450887e3..3523f7eed8 100644 --- a/sys/nvcodec/gstnvenc.c +++ b/sys/nvcodec/gstnvenc.c @@ -44,7 +44,7 @@ typedef NVENCSTATUS NVENCAPI tNvEncodeAPICreateInstance (NV_ENCODE_API_FUNCTION_LIST * functionList); tNvEncodeAPICreateInstance *nvEncodeAPICreateInstance; -GST_DEBUG_CATEGORY (gst_nvenc_debug); +GST_DEBUG_CATEGORY_EXTERN (gst_nvenc_debug); #define GST_CAT_DEFAULT gst_nvenc_debug static NV_ENCODE_API_FUNCTION_LIST nvenc_api; @@ -290,27 +290,6 @@ gst_nvenc_get_nv_buffer_format (GstVideoFormat fmt) return NV_ENC_BUFFER_FORMAT_UNDEFINED; } -static gboolean -load_nvenc_library (void) -{ - GModule *module; - - module = g_module_open (NVENC_LIBRARY_NAME, G_MODULE_BIND_LAZY); - if (module == NULL) { - GST_WARNING ("Could not open library %s, %s", - NVENC_LIBRARY_NAME, g_module_error ()); - return FALSE; - } - - if (!g_module_symbol (module, "NvEncodeAPICreateInstance", - (gpointer *) & nvEncodeAPICreateInstance)) { - GST_ERROR ("%s", g_module_error ()); - return FALSE; - } - - return TRUE; -} - typedef struct { GstVideoFormat gst_format; @@ -603,13 +582,9 @@ gst_nvenc_get_supported_codec_profiles (gpointer enc, GUID codec_id) static void gst_nv_enc_register (GstPlugin * plugin, GUID codec_id, const gchar * codec, - guint rank, gint device_count) + guint rank, gint device_index, CUcontext cuda_ctx) { - gint i; - - for (i = 0; i < device_count; i++) { - CUdevice cuda_device; - CUcontext cuda_ctx, dummy; + { GValue *formats = NULL; GValue *profiles; GValue *interlace_modes; @@ -626,23 +601,17 @@ gst_nv_enc_register (GstPlugin * plugin, GUID codec_id, const gchar * codec, gint j; GstNvEncDeviceCaps device_caps = { 0, }; - if (CuDeviceGet (&cuda_device, i) != CUDA_SUCCESS) - continue; - - if (CuCtxCreate (&cuda_ctx, 0, cuda_device) != CUDA_SUCCESS) - continue; - - if (CuCtxPopCurrent (&dummy) != CUDA_SUCCESS) { - goto cuda_free; - } - params.version = gst_nvenc_get_open_encode_session_ex_params_version (); params.apiVersion = gst_nvenc_get_api_version (); params.device = cuda_ctx; params.deviceType = NV_ENC_DEVICE_TYPE_CUDA; + if (CuCtxPushCurrent (cuda_ctx) != CUDA_SUCCESS) + goto done; + if (NvEncOpenEncodeSessionEx (¶ms, &enc) != NV_ENC_SUCCESS) { - goto cuda_free; + CuCtxPopCurrent (NULL); + goto done; } if (NvEncGetEncodeGUIDs (enc, guids, G_N_ELEMENTS (guids), @@ -692,7 +661,7 @@ gst_nv_enc_register (GstPlugin * plugin, GUID codec_id, const gchar * codec, device_caps.rc_modes = 0; } else { GST_DEBUG ("[device-%d %s] rate control modes: 0x%x", - i, codec, device_caps.rc_modes); + device_index, codec, device_caps.rc_modes); #define IS_SUPPORTED_RC(rc_modes,mode) \ ((((rc_modes) & (mode)) == mode) ? "supported" : "not supported") @@ -744,18 +713,20 @@ gst_nv_enc_register (GstPlugin * plugin, GUID codec_id, const gchar * codec, device_caps.bframes = 0; } - DEBUG_DEVICE_CAPS (i, + DEBUG_DEVICE_CAPS (device_index, codec, "weighted prediction", device_caps.weighted_prediction); - DEBUG_DEVICE_CAPS (i, codec, "custom vbv-buffer-size", + DEBUG_DEVICE_CAPS (device_index, codec, "custom vbv-buffer-size", device_caps.custom_vbv_bufsize); - DEBUG_DEVICE_CAPS (i, codec, "rc-loockahead", device_caps.lookahead); + DEBUG_DEVICE_CAPS (device_index, codec, "rc-loockahead", + device_caps.lookahead); - DEBUG_DEVICE_CAPS (i, codec, "temporal adaptive quantization", + DEBUG_DEVICE_CAPS (device_index, codec, "temporal adaptive quantization", device_caps.temporal_aq); - GST_DEBUG ("[device-%d %s] max bframes: %d", i, codec, device_caps.bframes); + GST_DEBUG ("[device-%d %s] max bframes: %d", device_index, codec, + device_caps.bframes); interlace_modes = gst_nvenc_get_interlace_modes (enc, codec_id); @@ -806,18 +777,17 @@ gst_nv_enc_register (GstPlugin * plugin, GUID codec_id, const gchar * codec, enc_free: NvEncDestroyEncoder (enc); + CuCtxPopCurrent (NULL); /* fall-through */ - cuda_free: - CuCtxDestroy (cuda_ctx); - + done: if (sink_templ && src_templ) { if (gst_nvenc_cmp_guid (codec_id, NV_ENC_CODEC_H264_GUID)) { - gst_nv_h264_enc_register (plugin, i, rank, sink_templ, src_templ, - &device_caps); + gst_nv_h264_enc_register (plugin, device_index, rank, sink_templ, + src_templ, &device_caps); } else if (gst_nvenc_cmp_guid (codec_id, NV_ENC_CODEC_HEVC_GUID)) { - gst_nv_h265_enc_register (plugin, i, rank, sink_templ, src_templ, - &device_caps); + gst_nv_h265_enc_register (plugin, device_index, rank, sink_templ, + src_templ, &device_caps); } else { g_assert_not_reached (); } @@ -837,19 +807,26 @@ gst_nv_enc_register (GstPlugin * plugin, GUID codec_id, const gchar * codec, static guint32 gst_nvenc_api_version = NVENCAPI_VERSION; -void -gst_nvenc_plugin_init (GstPlugin * plugin) +gboolean +gst_nvenc_load_library (void) { - NVENCSTATUS ret = NV_ENC_SUCCESS; + GModule *module; + NVENCSTATUS ret; - GST_DEBUG_CATEGORY_INIT (gst_nvenc_debug, "nvenc", 0, "Nvidia NVENC encoder"); - - nvenc_api.version = NV_ENCODE_API_FUNCTION_LIST_VER; - if (!load_nvenc_library ()) { - GST_INFO ("Failed to load nvenc library"); - return; + module = g_module_open (NVENC_LIBRARY_NAME, G_MODULE_BIND_LAZY); + if (module == NULL) { + GST_WARNING ("Could not open library %s, %s", + NVENC_LIBRARY_NAME, g_module_error ()); + return FALSE; } + if (!g_module_symbol (module, "NvEncodeAPICreateInstance", + (gpointer *) & nvEncodeAPICreateInstance)) { + GST_ERROR ("%s", g_module_error ()); + return FALSE; + } + + nvenc_api.version = NV_ENCODE_API_FUNCTION_LIST_VER; ret = nvEncodeAPICreateInstance (&nvenc_api); /* WARNING: Any developers who want to bump SDK version must ensure that @@ -892,31 +869,17 @@ gst_nvenc_plugin_init (GstPlugin * plugin) ret = nvEncodeAPICreateInstance (&nvenc_api); } - if (ret == NV_ENC_SUCCESS) { - CUresult cuda_ret; - gint dev_count = 0; + return ret == NV_ENC_SUCCESS; +} - GST_INFO ("Created NVEncodeAPI instance, got function table"); - - cuda_ret = CuInit (0); - if (cuda_ret != CUDA_SUCCESS) { - GST_ERROR ("Failed to initialize CUDA API"); - return; - } - - cuda_ret = CuDeviceGetCount (&dev_count); - if (cuda_ret != CUDA_SUCCESS || dev_count == 0) { - GST_ERROR ("No CUDA devices detected"); - return; - } - - gst_nv_enc_register (plugin, NV_ENC_CODEC_H264_GUID, - "h264", GST_RANK_PRIMARY * 2, dev_count); - gst_nv_enc_register (plugin, NV_ENC_CODEC_HEVC_GUID, - "h265", GST_RANK_PRIMARY * 2, dev_count); - } else { - GST_ERROR ("too old driver, could not load api vtable"); - } +void +gst_nvenc_plugin_init (GstPlugin * plugin, guint device_index, + CUcontext cuda_ctx) +{ + gst_nv_enc_register (plugin, NV_ENC_CODEC_H264_GUID, + "h264", GST_RANK_PRIMARY * 2, device_index, cuda_ctx); + gst_nv_enc_register (plugin, NV_ENC_CODEC_HEVC_GUID, + "h265", GST_RANK_PRIMARY * 2, device_index, cuda_ctx); } guint32 diff --git a/sys/nvcodec/gstnvenc.h b/sys/nvcodec/gstnvenc.h index 492108b5a9..dff0712675 100644 --- a/sys/nvcodec/gstnvenc.h +++ b/sys/nvcodec/gstnvenc.h @@ -26,8 +26,6 @@ #include "gstcudaloader.h" #include "nvEncodeAPI.h" -GST_DEBUG_CATEGORY_EXTERN (gst_nvenc_debug); - G_GNUC_INTERNAL gboolean gst_nvenc_cmp_guid (GUID g1, GUID g2); @@ -48,7 +46,9 @@ GValue * gst_nvenc_get_supported_codec_profiles (gpointer enc, GUID codec_id); G_GNUC_INTERNAL -void gst_nvenc_plugin_init (GstPlugin * plugin); +void gst_nvenc_plugin_init (GstPlugin * plugin, + guint device_index, + CUcontext cuda_ctx); G_GNUC_INTERNAL guint32 gst_nvenc_get_api_version (void); @@ -113,5 +113,7 @@ guint32 gst_nvenc_get_event_params_version (void); G_GNUC_INTERNAL guint32 gst_nvenc_get_open_encode_session_ex_params_version (void); +G_GNUC_INTERNAL +gboolean gst_nvenc_load_library (void); #endif /* __GST_NVENC_H_INCLUDED__ */ diff --git a/sys/nvcodec/plugin.c b/sys/nvcodec/plugin.c index edec694a78..f3d5d33f7e 100644 --- a/sys/nvcodec/plugin.c +++ b/sys/nvcodec/plugin.c @@ -32,17 +32,81 @@ #include "gstnvdec.h" #include "gstnvenc.h" +GST_DEBUG_CATEGORY (gst_nvcodec_debug); +GST_DEBUG_CATEGORY (gst_nvdec_debug); +GST_DEBUG_CATEGORY (gst_nvenc_debug); + +#define GST_CAT_DEFAULT gst_nvcodec_debug + static gboolean plugin_init (GstPlugin * plugin) { - if (!gst_cuda_load_library ()) - return TRUE; + CUresult cuda_ret; + gint dev_count = 0; + gint i; + gboolean nvdec_available = TRUE; + gboolean nvenc_available = TRUE; - if (gst_cuvid_load_library ()) { - gst_nvdec_plugin_init (plugin); + GST_DEBUG_CATEGORY_INIT (gst_nvcodec_debug, "nvcodec", 0, "nvcodec"); + GST_DEBUG_CATEGORY_INIT (gst_nvdec_debug, "nvdec", 0, "nvdec"); + GST_DEBUG_CATEGORY_INIT (gst_nvenc_debug, "nvenc", 0, "nvenc"); + + if (!gst_cuda_load_library ()) { + GST_WARNING ("Failed to load cuda library"); + return TRUE; } - gst_nvenc_plugin_init (plugin); + if (!gst_cuvid_load_library ()) { + GST_WARNING ("Failed to load nvdec library"); + nvdec_available = FALSE; + } + + if (!gst_nvenc_load_library ()) { + GST_WARNING ("Failed to load nvenc library"); + nvenc_available = FALSE; + } + + if (!nvdec_available && !nvenc_available) + return TRUE; + + cuda_ret = CuInit (0); + if (cuda_ret != CUDA_SUCCESS) { + GST_WARNING ("Failed to init cuda, ret: 0x%x", (gint) cuda_ret); + return TRUE; + } + + if (CuDeviceGetCount (&dev_count) != CUDA_SUCCESS || !dev_count) { + GST_WARNING ("No available device, ret: 0x%x", (gint) cuda_ret); + return TRUE; + } + + for (i = 0; i < dev_count; i++) { + CUdevice cuda_device; + CUcontext cuda_ctx; + + cuda_ret = CuDeviceGet (&cuda_device, i); + if (cuda_ret != CUDA_SUCCESS) { + GST_WARNING ("Failed to get device handle %d, ret: 0x%x", i, + (gint) cuda_ret); + continue; + } + + cuda_ret = CuCtxCreate (&cuda_ctx, 0, cuda_device); + if (cuda_ret != CUDA_SUCCESS) { + GST_WARNING ("Failed to create cuda context, ret: 0x%x", (gint) cuda_ret); + continue; + } + + CuCtxPopCurrent (NULL); + + if (nvdec_available) + gst_nvdec_plugin_init (plugin, i, cuda_ctx); + + if (nvenc_available) + gst_nvenc_plugin_init (plugin, i, cuda_ctx); + + CuCtxDestroy (cuda_ctx); + } return TRUE; }