nvcodec: Refactor plugin initialization

Create CUDA context per device, instead of per codec and encoder/decoder.
Allocating CUDA context is heavy operation so we should reuse it
as much as possible.

Fixes: https://gitlab.freedesktop.org/gstreamer/gst-plugins-bad/issues/1130
This commit is contained in:
Seungha Yang 2019-11-28 18:54:31 +09:00
parent 2a3205b294
commit 49bccf0433
7 changed files with 143 additions and 135 deletions

View File

@ -24,6 +24,9 @@
#include "gstcudaloader.h" #include "gstcudaloader.h"
#include <gmodule.h> #include <gmodule.h>
GST_DEBUG_CATEGORY_EXTERN (gst_nvcodec_debug);
#define GST_CAT_DEFAULT gst_nvcodec_debug
#ifndef G_OS_WIN32 #ifndef G_OS_WIN32
#define CUDA_LIBNAME "libcuda.so.1" #define CUDA_LIBNAME "libcuda.so.1"
#else #else

View File

@ -28,6 +28,7 @@
#include <string.h> #include <string.h>
GST_DEBUG_CATEGORY_EXTERN (gst_nvenc_debug);
#define GST_CAT_DEFAULT gst_nvenc_debug #define GST_CAT_DEFAULT gst_nvenc_debug
#if HAVE_NVCODEC_GST_GL #if HAVE_NVCODEC_GST_GL

View File

@ -34,8 +34,8 @@
#include <string.h> #include <string.h>
GST_DEBUG_CATEGORY_STATIC (gst_nvdec_debug_category); GST_DEBUG_CATEGORY_EXTERN (gst_nvdec_debug);
#define GST_CAT_DEFAULT gst_nvdec_debug_category #define GST_CAT_DEFAULT gst_nvdec_debug
#ifdef HAVE_NVCODEC_GST_GL #ifdef HAVE_NVCODEC_GST_GL
#define SUPPORTED_GL_APIS (GST_GL_API_OPENGL | GST_GL_API_OPENGL3 | GST_GL_API_GLES2) #define SUPPORTED_GL_APIS (GST_GL_API_OPENGL | GST_GL_API_OPENGL3 | GST_GL_API_GLES2)
@ -1544,13 +1544,9 @@ typedef struct
static void static void
gst_nvdec_register (GstPlugin * plugin, GType type, cudaVideoCodec codec_type, gst_nvdec_register (GstPlugin * plugin, GType type, cudaVideoCodec codec_type,
const gchar * codec, const gchar * sink_caps_string, guint rank, const gchar * codec, const gchar * sink_caps_string, guint rank,
gint device_count) gint device_idx, CUcontext cuda_ctx)
{ {
gint i; {
for (i = 0; i < device_count; i++) {
CUdevice cuda_device;
CUcontext cuda_ctx;
CUresult cuda_ret; CUresult cuda_ret;
gint max_width = 0, min_width = G_MAXINT; gint max_width = 0, min_width = G_MAXINT;
gint max_height = 0, min_height = G_MAXINT; gint max_height = 0, min_height = G_MAXINT;
@ -1575,18 +1571,12 @@ gst_nvdec_register (GstPlugin * plugin, GType type, cudaVideoCodec codec_type,
GValue format = G_VALUE_INIT; GValue format = G_VALUE_INIT;
GValue profile_list = G_VALUE_INIT; GValue profile_list = G_VALUE_INIT;
if (CuDeviceGet (&cuda_device, i) != CUDA_SUCCESS)
continue;
if (CuCtxCreate (&cuda_ctx, 0, cuda_device) != CUDA_SUCCESS)
continue;
g_value_init (&format_list, GST_TYPE_LIST); g_value_init (&format_list, GST_TYPE_LIST);
g_value_init (&format, G_TYPE_STRING); g_value_init (&format, G_TYPE_STRING);
g_value_init (&profile_list, GST_TYPE_LIST); g_value_init (&profile_list, GST_TYPE_LIST);
if (CuCtxPushCurrent (cuda_ctx) != CUDA_SUCCESS) if (CuCtxPushCurrent (cuda_ctx) != CUDA_SUCCESS)
goto cuda_free; goto done;
for (c_idx = 0; c_idx < G_N_ELEMENTS (chroma_list); c_idx++) { for (c_idx = 0; c_idx < G_N_ELEMENTS (chroma_list); c_idx++) {
for (b_idx = 0; b_idx < G_N_ELEMENTS (bitdepth_minus8); b_idx++) { for (b_idx = 0; b_idx < G_N_ELEMENTS (bitdepth_minus8); b_idx++) {
@ -1682,7 +1672,7 @@ gst_nvdec_register (GstPlugin * plugin, GType type, cudaVideoCodec codec_type,
if (num_support == 0) { if (num_support == 0) {
GST_INFO ("device can not support %s", codec); GST_INFO ("device can not support %s", codec);
goto cuda_free; goto done;
} }
src_templ = gst_caps_new_simple ("video/x-raw", src_templ = gst_caps_new_simple ("video/x-raw",
@ -1717,16 +1707,14 @@ gst_nvdec_register (GstPlugin * plugin, GType type, cudaVideoCodec codec_type,
CuCtxPopCurrent (NULL); CuCtxPopCurrent (NULL);
cuda_free: done:
CuCtxDestroy (cuda_ctx);
g_value_unset (&format_list); g_value_unset (&format_list);
g_value_unset (&format); g_value_unset (&format);
g_value_unset (&profile_list); g_value_unset (&profile_list);
if (sink_templ && src_templ) { if (sink_templ && src_templ) {
gst_nvdec_subclass_register (plugin, type, codec_type, codec, i, rank, gst_nvdec_subclass_register (plugin, type, codec_type, codec, device_idx,
sink_templ, src_templ); rank, sink_templ, src_templ);
} }
gst_clear_caps (&sink_templ); gst_clear_caps (&sink_templ);
@ -1773,14 +1761,10 @@ const GstNvCodecMap codec_map[] = {
}; };
void void
gst_nvdec_plugin_init (GstPlugin * plugin) gst_nvdec_plugin_init (GstPlugin * plugin, guint device_index,
CUcontext cuda_ctx)
{ {
gint i; gint i;
CUresult cuda_ret;
gint dev_count = 0;
GST_DEBUG_CATEGORY_INIT (gst_nvdec_debug_category, "nvdec", 0,
"Debug category for the nvdec element");
if (!gst_cuvid_can_get_decoder_caps ()) { if (!gst_cuvid_can_get_decoder_caps ()) {
GstCaps *src_templ; GstCaps *src_templ;
@ -1804,27 +1788,16 @@ gst_nvdec_plugin_init (GstPlugin * plugin)
sink_templ = gst_caps_from_string (codec_map[i].sink_caps_string); sink_templ = gst_caps_from_string (codec_map[i].sink_caps_string);
gst_nvdec_subclass_register (plugin, GST_TYPE_NVDEC, codec_map[i].codec, gst_nvdec_subclass_register (plugin, GST_TYPE_NVDEC, codec_map[i].codec,
codec_map[i].codec_name, 0, GST_RANK_PRIMARY, sink_templ, src_templ); codec_map[i].codec_name, device_index, GST_RANK_PRIMARY,
sink_templ, src_templ);
} }
return; return;
} }
cuda_ret = CuInit (0);
if (cuda_ret != CUDA_SUCCESS) {
GST_ERROR ("Failed to initialize CUDA API");
return;
}
cuda_ret = CuDeviceGetCount (&dev_count);
if (cuda_ret != CUDA_SUCCESS || dev_count == 0) {
GST_ERROR ("No CUDA devices detected");
return;
}
for (i = 0; i < G_N_ELEMENTS (codec_map); i++) { for (i = 0; i < G_N_ELEMENTS (codec_map); i++) {
gst_nvdec_register (plugin, GST_TYPE_NVDEC, codec_map[i].codec, gst_nvdec_register (plugin, GST_TYPE_NVDEC, codec_map[i].codec,
codec_map[i].codec_name, codec_map[i].sink_caps_string, codec_map[i].codec_name, codec_map[i].sink_caps_string,
GST_RANK_PRIMARY, dev_count); GST_RANK_PRIMARY, device_index, cuda_ctx);
} }
} }

View File

@ -101,7 +101,9 @@ struct _GstNvDecClass
GType gst_nvdec_get_type (void); GType gst_nvdec_get_type (void);
void gst_nvdec_plugin_init (GstPlugin * plugin); void gst_nvdec_plugin_init (GstPlugin * plugin,
guint device_index,
CUcontext cuda_ctx);
G_END_DECLS G_END_DECLS

View File

@ -44,7 +44,7 @@ typedef NVENCSTATUS NVENCAPI
tNvEncodeAPICreateInstance (NV_ENCODE_API_FUNCTION_LIST * functionList); tNvEncodeAPICreateInstance (NV_ENCODE_API_FUNCTION_LIST * functionList);
tNvEncodeAPICreateInstance *nvEncodeAPICreateInstance; tNvEncodeAPICreateInstance *nvEncodeAPICreateInstance;
GST_DEBUG_CATEGORY (gst_nvenc_debug); GST_DEBUG_CATEGORY_EXTERN (gst_nvenc_debug);
#define GST_CAT_DEFAULT gst_nvenc_debug #define GST_CAT_DEFAULT gst_nvenc_debug
static NV_ENCODE_API_FUNCTION_LIST nvenc_api; static NV_ENCODE_API_FUNCTION_LIST nvenc_api;
@ -290,27 +290,6 @@ gst_nvenc_get_nv_buffer_format (GstVideoFormat fmt)
return NV_ENC_BUFFER_FORMAT_UNDEFINED; return NV_ENC_BUFFER_FORMAT_UNDEFINED;
} }
static gboolean
load_nvenc_library (void)
{
GModule *module;
module = g_module_open (NVENC_LIBRARY_NAME, G_MODULE_BIND_LAZY);
if (module == NULL) {
GST_WARNING ("Could not open library %s, %s",
NVENC_LIBRARY_NAME, g_module_error ());
return FALSE;
}
if (!g_module_symbol (module, "NvEncodeAPICreateInstance",
(gpointer *) & nvEncodeAPICreateInstance)) {
GST_ERROR ("%s", g_module_error ());
return FALSE;
}
return TRUE;
}
typedef struct typedef struct
{ {
GstVideoFormat gst_format; GstVideoFormat gst_format;
@ -603,13 +582,9 @@ gst_nvenc_get_supported_codec_profiles (gpointer enc, GUID codec_id)
static void static void
gst_nv_enc_register (GstPlugin * plugin, GUID codec_id, const gchar * codec, gst_nv_enc_register (GstPlugin * plugin, GUID codec_id, const gchar * codec,
guint rank, gint device_count) guint rank, gint device_index, CUcontext cuda_ctx)
{ {
gint i; {
for (i = 0; i < device_count; i++) {
CUdevice cuda_device;
CUcontext cuda_ctx, dummy;
GValue *formats = NULL; GValue *formats = NULL;
GValue *profiles; GValue *profiles;
GValue *interlace_modes; GValue *interlace_modes;
@ -626,23 +601,17 @@ gst_nv_enc_register (GstPlugin * plugin, GUID codec_id, const gchar * codec,
gint j; gint j;
GstNvEncDeviceCaps device_caps = { 0, }; GstNvEncDeviceCaps device_caps = { 0, };
if (CuDeviceGet (&cuda_device, i) != CUDA_SUCCESS)
continue;
if (CuCtxCreate (&cuda_ctx, 0, cuda_device) != CUDA_SUCCESS)
continue;
if (CuCtxPopCurrent (&dummy) != CUDA_SUCCESS) {
goto cuda_free;
}
params.version = gst_nvenc_get_open_encode_session_ex_params_version (); params.version = gst_nvenc_get_open_encode_session_ex_params_version ();
params.apiVersion = gst_nvenc_get_api_version (); params.apiVersion = gst_nvenc_get_api_version ();
params.device = cuda_ctx; params.device = cuda_ctx;
params.deviceType = NV_ENC_DEVICE_TYPE_CUDA; params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
if (CuCtxPushCurrent (cuda_ctx) != CUDA_SUCCESS)
goto done;
if (NvEncOpenEncodeSessionEx (&params, &enc) != NV_ENC_SUCCESS) { if (NvEncOpenEncodeSessionEx (&params, &enc) != NV_ENC_SUCCESS) {
goto cuda_free; CuCtxPopCurrent (NULL);
goto done;
} }
if (NvEncGetEncodeGUIDs (enc, guids, G_N_ELEMENTS (guids), if (NvEncGetEncodeGUIDs (enc, guids, G_N_ELEMENTS (guids),
@ -692,7 +661,7 @@ gst_nv_enc_register (GstPlugin * plugin, GUID codec_id, const gchar * codec,
device_caps.rc_modes = 0; device_caps.rc_modes = 0;
} else { } else {
GST_DEBUG ("[device-%d %s] rate control modes: 0x%x", GST_DEBUG ("[device-%d %s] rate control modes: 0x%x",
i, codec, device_caps.rc_modes); device_index, codec, device_caps.rc_modes);
#define IS_SUPPORTED_RC(rc_modes,mode) \ #define IS_SUPPORTED_RC(rc_modes,mode) \
((((rc_modes) & (mode)) == mode) ? "supported" : "not supported") ((((rc_modes) & (mode)) == mode) ? "supported" : "not supported")
@ -744,18 +713,20 @@ gst_nv_enc_register (GstPlugin * plugin, GUID codec_id, const gchar * codec,
device_caps.bframes = 0; device_caps.bframes = 0;
} }
DEBUG_DEVICE_CAPS (i, DEBUG_DEVICE_CAPS (device_index,
codec, "weighted prediction", device_caps.weighted_prediction); codec, "weighted prediction", device_caps.weighted_prediction);
DEBUG_DEVICE_CAPS (i, codec, "custom vbv-buffer-size", DEBUG_DEVICE_CAPS (device_index, codec, "custom vbv-buffer-size",
device_caps.custom_vbv_bufsize); device_caps.custom_vbv_bufsize);
DEBUG_DEVICE_CAPS (i, codec, "rc-loockahead", device_caps.lookahead); DEBUG_DEVICE_CAPS (device_index, codec, "rc-loockahead",
device_caps.lookahead);
DEBUG_DEVICE_CAPS (i, codec, "temporal adaptive quantization", DEBUG_DEVICE_CAPS (device_index, codec, "temporal adaptive quantization",
device_caps.temporal_aq); device_caps.temporal_aq);
GST_DEBUG ("[device-%d %s] max bframes: %d", i, codec, device_caps.bframes); GST_DEBUG ("[device-%d %s] max bframes: %d", device_index, codec,
device_caps.bframes);
interlace_modes = gst_nvenc_get_interlace_modes (enc, codec_id); interlace_modes = gst_nvenc_get_interlace_modes (enc, codec_id);
@ -806,18 +777,17 @@ gst_nv_enc_register (GstPlugin * plugin, GUID codec_id, const gchar * codec,
enc_free: enc_free:
NvEncDestroyEncoder (enc); NvEncDestroyEncoder (enc);
CuCtxPopCurrent (NULL);
/* fall-through */ /* fall-through */
cuda_free: done:
CuCtxDestroy (cuda_ctx);
if (sink_templ && src_templ) { if (sink_templ && src_templ) {
if (gst_nvenc_cmp_guid (codec_id, NV_ENC_CODEC_H264_GUID)) { if (gst_nvenc_cmp_guid (codec_id, NV_ENC_CODEC_H264_GUID)) {
gst_nv_h264_enc_register (plugin, i, rank, sink_templ, src_templ, gst_nv_h264_enc_register (plugin, device_index, rank, sink_templ,
&device_caps); src_templ, &device_caps);
} else if (gst_nvenc_cmp_guid (codec_id, NV_ENC_CODEC_HEVC_GUID)) { } else if (gst_nvenc_cmp_guid (codec_id, NV_ENC_CODEC_HEVC_GUID)) {
gst_nv_h265_enc_register (plugin, i, rank, sink_templ, src_templ, gst_nv_h265_enc_register (plugin, device_index, rank, sink_templ,
&device_caps); src_templ, &device_caps);
} else { } else {
g_assert_not_reached (); g_assert_not_reached ();
} }
@ -837,19 +807,26 @@ gst_nv_enc_register (GstPlugin * plugin, GUID codec_id, const gchar * codec,
static guint32 gst_nvenc_api_version = NVENCAPI_VERSION; static guint32 gst_nvenc_api_version = NVENCAPI_VERSION;
void gboolean
gst_nvenc_plugin_init (GstPlugin * plugin) gst_nvenc_load_library (void)
{ {
NVENCSTATUS ret = NV_ENC_SUCCESS; GModule *module;
NVENCSTATUS ret;
GST_DEBUG_CATEGORY_INIT (gst_nvenc_debug, "nvenc", 0, "Nvidia NVENC encoder"); module = g_module_open (NVENC_LIBRARY_NAME, G_MODULE_BIND_LAZY);
if (module == NULL) {
nvenc_api.version = NV_ENCODE_API_FUNCTION_LIST_VER; GST_WARNING ("Could not open library %s, %s",
if (!load_nvenc_library ()) { NVENC_LIBRARY_NAME, g_module_error ());
GST_INFO ("Failed to load nvenc library"); return FALSE;
return;
} }
if (!g_module_symbol (module, "NvEncodeAPICreateInstance",
(gpointer *) & nvEncodeAPICreateInstance)) {
GST_ERROR ("%s", g_module_error ());
return FALSE;
}
nvenc_api.version = NV_ENCODE_API_FUNCTION_LIST_VER;
ret = nvEncodeAPICreateInstance (&nvenc_api); ret = nvEncodeAPICreateInstance (&nvenc_api);
/* WARNING: Any developers who want to bump SDK version must ensure that /* WARNING: Any developers who want to bump SDK version must ensure that
@ -892,31 +869,17 @@ gst_nvenc_plugin_init (GstPlugin * plugin)
ret = nvEncodeAPICreateInstance (&nvenc_api); ret = nvEncodeAPICreateInstance (&nvenc_api);
} }
if (ret == NV_ENC_SUCCESS) { return ret == NV_ENC_SUCCESS;
CUresult cuda_ret; }
gint dev_count = 0;
GST_INFO ("Created NVEncodeAPI instance, got function table"); void
gst_nvenc_plugin_init (GstPlugin * plugin, guint device_index,
cuda_ret = CuInit (0); CUcontext cuda_ctx)
if (cuda_ret != CUDA_SUCCESS) { {
GST_ERROR ("Failed to initialize CUDA API"); gst_nv_enc_register (plugin, NV_ENC_CODEC_H264_GUID,
return; "h264", GST_RANK_PRIMARY * 2, device_index, cuda_ctx);
} gst_nv_enc_register (plugin, NV_ENC_CODEC_HEVC_GUID,
"h265", GST_RANK_PRIMARY * 2, device_index, cuda_ctx);
cuda_ret = CuDeviceGetCount (&dev_count);
if (cuda_ret != CUDA_SUCCESS || dev_count == 0) {
GST_ERROR ("No CUDA devices detected");
return;
}
gst_nv_enc_register (plugin, NV_ENC_CODEC_H264_GUID,
"h264", GST_RANK_PRIMARY * 2, dev_count);
gst_nv_enc_register (plugin, NV_ENC_CODEC_HEVC_GUID,
"h265", GST_RANK_PRIMARY * 2, dev_count);
} else {
GST_ERROR ("too old driver, could not load api vtable");
}
} }
guint32 guint32

View File

@ -26,8 +26,6 @@
#include "gstcudaloader.h" #include "gstcudaloader.h"
#include "nvEncodeAPI.h" #include "nvEncodeAPI.h"
GST_DEBUG_CATEGORY_EXTERN (gst_nvenc_debug);
G_GNUC_INTERNAL G_GNUC_INTERNAL
gboolean gst_nvenc_cmp_guid (GUID g1, GUID g2); gboolean gst_nvenc_cmp_guid (GUID g1, GUID g2);
@ -48,7 +46,9 @@ GValue * gst_nvenc_get_supported_codec_profiles (gpointer enc,
GUID codec_id); GUID codec_id);
G_GNUC_INTERNAL G_GNUC_INTERNAL
void gst_nvenc_plugin_init (GstPlugin * plugin); void gst_nvenc_plugin_init (GstPlugin * plugin,
guint device_index,
CUcontext cuda_ctx);
G_GNUC_INTERNAL G_GNUC_INTERNAL
guint32 gst_nvenc_get_api_version (void); guint32 gst_nvenc_get_api_version (void);
@ -113,5 +113,7 @@ guint32 gst_nvenc_get_event_params_version (void);
G_GNUC_INTERNAL G_GNUC_INTERNAL
guint32 gst_nvenc_get_open_encode_session_ex_params_version (void); guint32 gst_nvenc_get_open_encode_session_ex_params_version (void);
G_GNUC_INTERNAL
gboolean gst_nvenc_load_library (void);
#endif /* __GST_NVENC_H_INCLUDED__ */ #endif /* __GST_NVENC_H_INCLUDED__ */

View File

@ -32,17 +32,81 @@
#include "gstnvdec.h" #include "gstnvdec.h"
#include "gstnvenc.h" #include "gstnvenc.h"
GST_DEBUG_CATEGORY (gst_nvcodec_debug);
GST_DEBUG_CATEGORY (gst_nvdec_debug);
GST_DEBUG_CATEGORY (gst_nvenc_debug);
#define GST_CAT_DEFAULT gst_nvcodec_debug
static gboolean static gboolean
plugin_init (GstPlugin * plugin) plugin_init (GstPlugin * plugin)
{ {
if (!gst_cuda_load_library ()) CUresult cuda_ret;
return TRUE; gint dev_count = 0;
gint i;
gboolean nvdec_available = TRUE;
gboolean nvenc_available = TRUE;
if (gst_cuvid_load_library ()) { GST_DEBUG_CATEGORY_INIT (gst_nvcodec_debug, "nvcodec", 0, "nvcodec");
gst_nvdec_plugin_init (plugin); GST_DEBUG_CATEGORY_INIT (gst_nvdec_debug, "nvdec", 0, "nvdec");
GST_DEBUG_CATEGORY_INIT (gst_nvenc_debug, "nvenc", 0, "nvenc");
if (!gst_cuda_load_library ()) {
GST_WARNING ("Failed to load cuda library");
return TRUE;
} }
gst_nvenc_plugin_init (plugin); if (!gst_cuvid_load_library ()) {
GST_WARNING ("Failed to load nvdec library");
nvdec_available = FALSE;
}
if (!gst_nvenc_load_library ()) {
GST_WARNING ("Failed to load nvenc library");
nvenc_available = FALSE;
}
if (!nvdec_available && !nvenc_available)
return TRUE;
cuda_ret = CuInit (0);
if (cuda_ret != CUDA_SUCCESS) {
GST_WARNING ("Failed to init cuda, ret: 0x%x", (gint) cuda_ret);
return TRUE;
}
if (CuDeviceGetCount (&dev_count) != CUDA_SUCCESS || !dev_count) {
GST_WARNING ("No available device, ret: 0x%x", (gint) cuda_ret);
return TRUE;
}
for (i = 0; i < dev_count; i++) {
CUdevice cuda_device;
CUcontext cuda_ctx;
cuda_ret = CuDeviceGet (&cuda_device, i);
if (cuda_ret != CUDA_SUCCESS) {
GST_WARNING ("Failed to get device handle %d, ret: 0x%x", i,
(gint) cuda_ret);
continue;
}
cuda_ret = CuCtxCreate (&cuda_ctx, 0, cuda_device);
if (cuda_ret != CUDA_SUCCESS) {
GST_WARNING ("Failed to create cuda context, ret: 0x%x", (gint) cuda_ret);
continue;
}
CuCtxPopCurrent (NULL);
if (nvdec_available)
gst_nvdec_plugin_init (plugin, i, cuda_ctx);
if (nvenc_available)
gst_nvenc_plugin_init (plugin, i, cuda_ctx);
CuCtxDestroy (cuda_ctx);
}
return TRUE; return TRUE;
} }