hip event handle wrapper object Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/9281>
1309 lines
43 KiB
C++
1309 lines
43 KiB
C++
/* GStreamer
|
|
* Copyright (C) 2025 Seungha Yang <seungha@centricular.com>
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Library General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Library General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Library General Public
|
|
* License along with this library; if not, write to the
|
|
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
|
|
* Boston, MA 02110-1301, USA.
|
|
*/
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
#include "config.h"
|
|
#endif
|
|
|
|
#include "gsthip-config.h"
|
|
|
|
#include "gsthip.h"
|
|
#include "gsthiploader.h"
|
|
#include <gmodule.h>
|
|
#include <mutex>
|
|
#include <hip/nvidia_hip_runtime_api.h>
|
|
#include <string.h>
|
|
|
|
#ifdef HAVE_GST_GL
|
|
#include "gsthiploader-gl.h"
|
|
#include <cudaGL.h>
|
|
#endif
|
|
|
|
#ifndef GST_DISABLE_GST_DEBUG
|
|
#define GST_CAT_DEFAULT ensure_debug_category()
|
|
static GstDebugCategory *
|
|
ensure_debug_category (void)
|
|
{
|
|
static GstDebugCategory *cat = nullptr;
|
|
static std::once_flag once;
|
|
|
|
std::call_once (once,[&] {
|
|
cat = _gst_debug_category_new ("hiploader", 0, "hiploader");
|
|
});
|
|
|
|
return cat;
|
|
}
|
|
#endif
|
|
|
|
/* *INDENT-OFF* */
|
|
struct GstHipFuncTableAmd
|
|
{
|
|
gboolean loaded = FALSE;
|
|
|
|
hipError_t (*hipInit) (unsigned int flags);
|
|
hipError_t (*hipDriverGetVersion) (int *driverVersion);
|
|
hipError_t (*hipRuntimeGetVersion) (int *runtimeVersion);
|
|
const char *(*hipGetErrorName) (hipError_t hip_error);
|
|
const char *(*hipGetErrorString) (hipError_t hipError);
|
|
hipError_t (*hipGetDeviceCount) (int *count);
|
|
hipError_t (*hipGetDeviceProperties) (hipDeviceProp_t * prop, int deviceId);
|
|
hipError_t (*hipDeviceGetAttribute) (int *pi, hipDeviceAttribute_t attr,
|
|
int deviceId);
|
|
hipError_t (*hipSetDevice) (int deviceId);
|
|
hipError_t (*hipMalloc) (void **ptr, size_t size);
|
|
hipError_t (*hipFree) (void *ptr);
|
|
hipError_t (*hipHostMalloc) (void **ptr, size_t size, unsigned int flags);
|
|
hipError_t (*hipHostFree) (void *ptr);
|
|
hipError_t (*hipStreamCreate) (hipStream_t* stream);
|
|
hipError_t (*hipStreamDestroy) (hipStream_t stream);
|
|
hipError_t (*hipStreamSynchronize) (hipStream_t stream);
|
|
hipError_t (*hipEventCreateWithFlags) (hipEvent_t* event, unsigned flags);
|
|
hipError_t (*hipEventRecord) (hipEvent_t event, hipStream_t stream);
|
|
hipError_t (*hipEventDestroy) (hipEvent_t event);
|
|
hipError_t (*hipEventSynchronize) (hipEvent_t event);
|
|
hipError_t (*hipEventQuery) (hipEvent_t event);
|
|
hipError_t (*hipModuleLoadData) (hipModule_t * module, const void *image);
|
|
hipError_t (*hipModuleUnload) (hipModule_t module);
|
|
hipError_t (*hipModuleGetFunction) (hipFunction_t * function,
|
|
hipModule_t module, const char *kname);
|
|
hipError_t (*hipModuleLaunchKernel) (hipFunction_t f, unsigned int gridDimX,
|
|
unsigned int gridDimY,
|
|
unsigned int gridDimZ, unsigned int blockDimX,
|
|
unsigned int blockDimY, unsigned int blockDimZ,
|
|
unsigned int sharedMemBytes, hipStream_t stream,
|
|
void **kernelParams, void **extra);
|
|
hipError_t (*hipMemcpyParam2DAsync) (const hip_Memcpy2D * pCopy,
|
|
hipStream_t stream);
|
|
hipError_t (*hipMemsetD8Async) (hipDeviceptr_t dest, unsigned char value,
|
|
size_t count, hipStream_t stream);
|
|
hipError_t (*hipMemsetD16Async) (hipDeviceptr_t dest, unsigned short value,
|
|
size_t count, hipStream_t stream);
|
|
hipError_t (*hipMemsetD32Async) (hipDeviceptr_t dst, int value, size_t count,
|
|
hipStream_t stream);
|
|
hipError_t (*hipTexObjectCreate) (hipTextureObject_t * pTexObject,
|
|
const HIP_RESOURCE_DESC * pResDesc, const HIP_TEXTURE_DESC * pTexDesc,
|
|
const HIP_RESOURCE_VIEW_DESC * pResViewDesc);
|
|
hipError_t (*hipTexObjectDestroy) (hipTextureObject_t texObject);
|
|
hipError_t (*hipGraphicsMapResources) (int count,
|
|
hipGraphicsResource_t* resources, hipStream_t stream);
|
|
hipError_t (*hipGraphicsResourceGetMappedPointer) (void** devPtr,
|
|
size_t* size, hipGraphicsResource_t resource);
|
|
hipError_t (*hipGraphicsUnmapResources) (int count,
|
|
hipGraphicsResource_t* resources, hipStream_t stream);
|
|
hipError_t (*hipGraphicsUnregisterResource) (hipGraphicsResource_t resource);
|
|
#ifdef HAVE_GST_GL
|
|
hipError_t (*hipGLGetDevices) (unsigned int* pHipDeviceCount,
|
|
int* pHipDevices, unsigned int hipDeviceCount,
|
|
hipGLDeviceList deviceList);
|
|
hipError_t (*hipGraphicsGLRegisterBuffer) (hipGraphicsResource** resource,
|
|
unsigned int buffer, unsigned int flags);
|
|
#endif
|
|
};
|
|
|
|
struct GstHipFuncTableCuda
|
|
{
|
|
gboolean loaded = FALSE;
|
|
|
|
CUresult (CUDAAPI *cuInit) (unsigned int flags);
|
|
CUresult (CUDAAPI *cuDriverGetVersion) (int *driverVersion);
|
|
CUresult (CUDAAPI *cuDeviceGetAttribute) (int *pi,
|
|
CUdevice_attribute attrib, CUdevice dev);
|
|
CUresult (CUDAAPI *cuModuleLoadData) (CUmodule * module, const void *image);
|
|
CUresult (CUDAAPI *cuModuleUnload) (CUmodule module);
|
|
CUresult (CUDAAPI *cuModuleGetFunction) (CUfunction * function,
|
|
CUmodule module, const char *kname);
|
|
CUresult (CUDAAPI *cuLaunchKernel) (CUfunction f, unsigned int gridDimX,
|
|
unsigned int gridDimY,
|
|
unsigned int gridDimZ, unsigned int blockDimX,
|
|
unsigned int blockDimY, unsigned int blockDimZ,
|
|
unsigned int sharedMemBytes, CUstream stream,
|
|
void **kernelParams, void **extra);
|
|
CUresult (CUDAAPI *cuMemcpy2DAsync) (const CUDA_MEMCPY2D * pCopy,
|
|
CUstream stream);
|
|
CUresult (CUDAAPI *cuMemsetD8Async) (CUdeviceptr dstDevice,
|
|
unsigned char uc, size_t N, CUstream hStream);
|
|
CUresult (CUDAAPI *cuMemsetD16Async) (CUdeviceptr dstDevice,
|
|
unsigned short us, size_t N, CUstream hStream);
|
|
CUresult (CUDAAPI *cuMemsetD32Async) (CUdeviceptr dstDevice, unsigned int ui,
|
|
size_t N, CUstream hStream);
|
|
CUresult (CUDAAPI *cuTexObjectCreate) (CUtexObject * pTexObject,
|
|
const CUDA_RESOURCE_DESC * pResDesc, const CUDA_TEXTURE_DESC * pTexDesc,
|
|
const CUDA_RESOURCE_VIEW_DESC * pResViewDesc);
|
|
CUresult (CUDAAPI *cuTexObjectDestroy) (CUtexObject texObject);
|
|
};
|
|
|
|
struct GstHipFuncTableCudaRt
|
|
{
|
|
gboolean loaded = FALSE;
|
|
|
|
cudaError_t (CUDAAPI *cudaRuntimeGetVersion) (int *runtimeVersion);
|
|
const char * (CUDAAPI *cudaGetErrorName) (cudaError_t error);
|
|
const char * (CUDAAPI *cudaGetErrorString) (cudaError_t error);
|
|
cudaError_t (CUDAAPI *cudaGetDeviceCount) (int *count);
|
|
cudaError_t (CUDAAPI *cudaGetDeviceProperties) (struct cudaDeviceProp * prop,
|
|
int device);
|
|
cudaError_t (CUDAAPI *cudaDeviceGetAttribute) (int *value, enum cudaDeviceAttr attr,
|
|
int device);
|
|
cudaError_t (CUDAAPI *cudaSetDevice) (int device);
|
|
cudaError_t (CUDAAPI *cudaMalloc) (void **ptr, size_t size);
|
|
cudaError_t (CUDAAPI *cudaFree) (void *ptr);
|
|
cudaError_t (CUDAAPI *cudaMallocHost) (void **ptr, size_t size, unsigned int flags);
|
|
cudaError_t (CUDAAPI *cudaFreeHost) (void *ptr);
|
|
cudaError_t (CUDAAPI *cudaStreamCreate) (cudaStream_t *pStream);
|
|
cudaError_t (CUDAAPI *cudaStreamDestroy) (cudaStream_t stream);
|
|
cudaError_t (CUDAAPI *cudaStreamSynchronize) (cudaStream_t stream);
|
|
cudaError_t (CUDAAPI *cudaEventCreateWithFlags) (cudaEvent_t *event,
|
|
unsigned int flags);
|
|
cudaError_t (CUDAAPI *cudaEventRecord) (cudaEvent_t event, cudaStream_t stream);
|
|
cudaError_t (CUDAAPI *cudaEventDestroy) (cudaEvent_t event);
|
|
cudaError_t (CUDAAPI *cudaEventSynchronize)(cudaEvent_t event);
|
|
cudaError_t (CUDAAPI *cudaEventQuery) (cudaEvent_t event);
|
|
cudaError_t (CUDAAPI *cudaGraphicsMapResources) (int count,
|
|
cudaGraphicsResource_t *resources, cudaStream_t stream);
|
|
cudaError_t (CUDAAPI *cudaGraphicsResourceGetMappedPointer) (void **devPtr,
|
|
size_t *size, cudaGraphicsResource_t resource);
|
|
cudaError_t (CUDAAPI *cudaGraphicsUnmapResources) (int count,
|
|
cudaGraphicsResource_t *resources, cudaStream_t stream);
|
|
cudaError_t (CUDAAPI *cudaGraphicsUnregisterResource) (cudaGraphicsResource_t resource);
|
|
#ifdef HAVE_GST_GL
|
|
cudaError_t (CUDAAPI *cudaGLGetDevices) (unsigned int *pCudaDeviceCount,
|
|
int *pCudaDevices, unsigned int cudaDeviceCount,
|
|
enum cudaGLDeviceList deviceList);
|
|
cudaError_t (CUDAAPI *cudaGraphicsGLRegisterBuffer) (struct cudaGraphicsResource **resource,
|
|
unsigned int buffer, unsigned int flags);
|
|
#endif
|
|
};
|
|
/* *INDENT-ON* */
|
|
|
|
static GstHipFuncTableAmd amd_ftable = { };
|
|
static GstHipFuncTableCuda cuda_ftable = { };
|
|
static GstHipFuncTableCudaRt cudart_ftable = { };
|
|
|
|
#define LOAD_SYMBOL(name) G_STMT_START { \
|
|
if (!g_module_symbol (module, G_STRINGIFY (name), (gpointer *) &table->name)) { \
|
|
GST_ERROR ("Failed to load '%s', %s", G_STRINGIFY (name), g_module_error()); \
|
|
g_module_close (module); \
|
|
return; \
|
|
} \
|
|
} G_STMT_END;
|
|
|
|
static void
|
|
load_amd_func_table (void)
|
|
{
|
|
GModule *module = nullptr;
|
|
#ifndef G_OS_WIN32
|
|
module = g_module_open ("libamdhip64.so", G_MODULE_BIND_LAZY);
|
|
if (!module)
|
|
module = g_module_open ("/opt/rocm/lib/libamdhip64.so", G_MODULE_BIND_LAZY);
|
|
#else
|
|
/* Prefer hip dll in SDK */
|
|
auto hip_root = g_getenv ("HIP_PATH");
|
|
if (hip_root) {
|
|
auto path = g_build_path (G_DIR_SEPARATOR_S, hip_root, "bin", nullptr);
|
|
auto dir = g_dir_open (path, 0, nullptr);
|
|
if (dir) {
|
|
const gchar *name;
|
|
while ((name = g_dir_read_name (dir))) {
|
|
if (g_str_has_prefix (name, "amdhip64_") && g_str_has_suffix (name,
|
|
".dll")) {
|
|
auto lib_path = g_build_filename (path, name, nullptr);
|
|
module = g_module_open (lib_path, G_MODULE_BIND_LAZY);
|
|
break;
|
|
}
|
|
}
|
|
|
|
g_dir_close (dir);
|
|
}
|
|
g_free (path);
|
|
}
|
|
|
|
/* Try dll in System32 */
|
|
if (!module)
|
|
module = g_module_open ("amdhip64_6.dll", G_MODULE_BIND_LAZY);
|
|
#endif
|
|
|
|
if (!module) {
|
|
GST_INFO ("Couldn't open HIP library");
|
|
return;
|
|
}
|
|
|
|
auto table = &amd_ftable;
|
|
LOAD_SYMBOL (hipInit);
|
|
LOAD_SYMBOL (hipDriverGetVersion);
|
|
LOAD_SYMBOL (hipRuntimeGetVersion);
|
|
LOAD_SYMBOL (hipGetErrorName);
|
|
LOAD_SYMBOL (hipGetErrorString);
|
|
LOAD_SYMBOL (hipGetDeviceCount);
|
|
LOAD_SYMBOL (hipGetDeviceProperties);
|
|
LOAD_SYMBOL (hipDeviceGetAttribute);
|
|
LOAD_SYMBOL (hipSetDevice);
|
|
LOAD_SYMBOL (hipMalloc);
|
|
LOAD_SYMBOL (hipFree);
|
|
LOAD_SYMBOL (hipHostMalloc);
|
|
LOAD_SYMBOL (hipHostFree);
|
|
LOAD_SYMBOL (hipStreamCreate);
|
|
LOAD_SYMBOL (hipStreamDestroy);
|
|
LOAD_SYMBOL (hipStreamSynchronize);
|
|
LOAD_SYMBOL (hipEventCreateWithFlags);
|
|
LOAD_SYMBOL (hipEventRecord);
|
|
LOAD_SYMBOL (hipEventDestroy);
|
|
LOAD_SYMBOL (hipEventSynchronize);
|
|
LOAD_SYMBOL (hipEventQuery);
|
|
LOAD_SYMBOL (hipModuleLoadData);
|
|
LOAD_SYMBOL (hipModuleUnload);
|
|
LOAD_SYMBOL (hipModuleGetFunction);
|
|
LOAD_SYMBOL (hipModuleLaunchKernel);
|
|
LOAD_SYMBOL (hipMemcpyParam2DAsync);
|
|
LOAD_SYMBOL (hipMemsetD8Async);
|
|
LOAD_SYMBOL (hipMemsetD16Async);
|
|
LOAD_SYMBOL (hipMemsetD32Async);
|
|
LOAD_SYMBOL (hipTexObjectCreate);
|
|
LOAD_SYMBOL (hipTexObjectDestroy);
|
|
LOAD_SYMBOL (hipGraphicsMapResources);
|
|
LOAD_SYMBOL (hipGraphicsResourceGetMappedPointer);
|
|
LOAD_SYMBOL (hipGraphicsUnmapResources);
|
|
LOAD_SYMBOL (hipGraphicsUnregisterResource);
|
|
#ifdef HAVE_GST_GL
|
|
LOAD_SYMBOL (hipGLGetDevices);
|
|
LOAD_SYMBOL (hipGraphicsGLRegisterBuffer);
|
|
#endif
|
|
|
|
table->loaded = TRUE;
|
|
}
|
|
|
|
static void
|
|
load_cuda_func_table (void)
|
|
{
|
|
GModule *module = nullptr;
|
|
#ifndef G_OS_WIN32
|
|
module = g_module_open ("libcuda.so", G_MODULE_BIND_LAZY);
|
|
#else
|
|
module = g_module_open ("nvcuda.dll", G_MODULE_BIND_LAZY);
|
|
#endif
|
|
|
|
if (!module) {
|
|
GST_INFO ("Couldn't open CUDA library");
|
|
return;
|
|
}
|
|
|
|
auto table = &cuda_ftable;
|
|
LOAD_SYMBOL (cuInit);
|
|
LOAD_SYMBOL (cuDriverGetVersion);
|
|
LOAD_SYMBOL (cuModuleLoadData);
|
|
LOAD_SYMBOL (cuModuleUnload);
|
|
LOAD_SYMBOL (cuModuleGetFunction);
|
|
LOAD_SYMBOL (cuLaunchKernel);
|
|
LOAD_SYMBOL (cuMemcpy2DAsync);
|
|
LOAD_SYMBOL (cuMemsetD8Async);
|
|
LOAD_SYMBOL (cuMemsetD16Async);
|
|
LOAD_SYMBOL (cuMemsetD32Async);
|
|
LOAD_SYMBOL (cuTexObjectCreate);
|
|
LOAD_SYMBOL (cuTexObjectDestroy);
|
|
|
|
table->loaded = TRUE;
|
|
}
|
|
|
|
static void
|
|
load_cudart_func_table (guint major_ver, guint minor_ver)
|
|
{
|
|
GModule *module = nullptr;
|
|
auto module_name = g_getenv ("GST_HIP_CUDART_LIBNAME");
|
|
if (module_name)
|
|
module = g_module_open (module_name, G_MODULE_BIND_LAZY);
|
|
|
|
if (!module) {
|
|
#ifndef G_OS_WIN32
|
|
module = g_module_open ("libcudart.so", G_MODULE_BIND_LAZY);
|
|
#else
|
|
auto lib_name = g_strdup_printf ("cudart64_%d.dll", major_ver);
|
|
module = g_module_open (lib_name, G_MODULE_BIND_LAZY);
|
|
g_free (lib_name);
|
|
|
|
if (!module) {
|
|
lib_name = g_strdup_printf ("cudart64_%d%d.dll", major_ver, minor_ver);
|
|
module = g_module_open (lib_name, G_MODULE_BIND_LAZY);
|
|
g_free (lib_name);
|
|
}
|
|
|
|
if (!module) {
|
|
auto cuda_root = g_getenv ("CUDA_PATH");
|
|
if (cuda_root) {
|
|
auto path = g_build_path (G_DIR_SEPARATOR_S, cuda_root, "bin", nullptr);
|
|
auto dir = g_dir_open (path, 0, nullptr);
|
|
if (dir) {
|
|
const gchar *name;
|
|
while ((name = g_dir_read_name (dir))) {
|
|
if (g_str_has_prefix (name, "cudart64_") &&
|
|
g_str_has_suffix (name, ".dll")) {
|
|
auto lib_path = g_build_filename (path, name, nullptr);
|
|
module = g_module_open (lib_path, G_MODULE_BIND_LAZY);
|
|
g_free (lib_path);
|
|
break;
|
|
}
|
|
}
|
|
|
|
g_dir_close (dir);
|
|
}
|
|
g_free (path);
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
if (!module) {
|
|
GST_INFO ("Couldn't open CUDA runtime library");
|
|
return;
|
|
}
|
|
|
|
auto table = &cudart_ftable;
|
|
LOAD_SYMBOL (cudaRuntimeGetVersion);
|
|
LOAD_SYMBOL (cudaGetErrorName);
|
|
LOAD_SYMBOL (cudaGetErrorString);
|
|
LOAD_SYMBOL (cudaGetDeviceCount);
|
|
LOAD_SYMBOL (cudaGetDeviceProperties);
|
|
LOAD_SYMBOL (cudaDeviceGetAttribute);
|
|
LOAD_SYMBOL (cudaSetDevice);
|
|
LOAD_SYMBOL (cudaMalloc);
|
|
LOAD_SYMBOL (cudaFree);
|
|
LOAD_SYMBOL (cudaMallocHost);
|
|
LOAD_SYMBOL (cudaFreeHost);
|
|
LOAD_SYMBOL (cudaStreamCreate);
|
|
LOAD_SYMBOL (cudaStreamDestroy);
|
|
LOAD_SYMBOL (cudaStreamSynchronize);
|
|
LOAD_SYMBOL (cudaEventCreateWithFlags);
|
|
LOAD_SYMBOL (cudaEventRecord);
|
|
LOAD_SYMBOL (cudaEventDestroy);
|
|
LOAD_SYMBOL (cudaEventSynchronize);
|
|
LOAD_SYMBOL (cudaEventQuery);
|
|
LOAD_SYMBOL (cudaGraphicsMapResources);
|
|
LOAD_SYMBOL (cudaGraphicsResourceGetMappedPointer);
|
|
LOAD_SYMBOL (cudaGraphicsUnmapResources);
|
|
LOAD_SYMBOL (cudaGraphicsUnregisterResource);
|
|
#ifdef HAVE_GST_GL
|
|
LOAD_SYMBOL (cudaGLGetDevices);
|
|
LOAD_SYMBOL (cudaGraphicsGLRegisterBuffer);
|
|
#endif
|
|
|
|
table->loaded = TRUE;
|
|
}
|
|
|
|
/* *INDENT-OFF* */
|
|
static gboolean
|
|
gst_hip_load_library_amd (void)
|
|
{
|
|
static std::once_flag once;
|
|
std::call_once (once,[]() {
|
|
load_amd_func_table ();
|
|
if (amd_ftable.loaded) {
|
|
auto ret = amd_ftable.hipInit (0);
|
|
if (ret != hipSuccess)
|
|
amd_ftable.loaded = FALSE;
|
|
}
|
|
});
|
|
|
|
return amd_ftable.loaded;
|
|
}
|
|
|
|
static gboolean
|
|
gst_hip_load_library_nvidia (void)
|
|
{
|
|
static std::once_flag once;
|
|
std::call_once (once,[]() {
|
|
load_cuda_func_table ();
|
|
if (cuda_ftable.loaded) {
|
|
auto ret = cuda_ftable.cuInit (0);
|
|
if (ret != CUDA_SUCCESS) {
|
|
cuda_ftable.loaded = FALSE;
|
|
return;
|
|
}
|
|
|
|
int cuda_ver = 0;
|
|
ret = cuda_ftable.cuDriverGetVersion (&cuda_ver);
|
|
if (ret != CUDA_SUCCESS)
|
|
return;
|
|
|
|
int major_ver = cuda_ver / 1000;
|
|
int minor_ver = (cuda_ver % 1000) / 10;
|
|
load_cudart_func_table (major_ver, minor_ver);
|
|
}
|
|
});
|
|
|
|
if (!cuda_ftable.loaded || !cudart_ftable.loaded)
|
|
return FALSE;
|
|
|
|
return TRUE;
|
|
}
|
|
/* *INDENT-ON* */
|
|
|
|
gboolean
|
|
gst_hip_load_library (GstHipVendor vendor)
|
|
{
|
|
switch (vendor) {
|
|
case GST_HIP_VENDOR_AMD:
|
|
return gst_hip_load_library_amd ();
|
|
case GST_HIP_VENDOR_NVIDIA:
|
|
return gst_hip_load_library_nvidia ();
|
|
case GST_HIP_VENDOR_UNKNOWN:
|
|
if (gst_hip_load_library_amd () || gst_hip_load_library_nvidia ())
|
|
return TRUE;
|
|
break;
|
|
}
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
#define CHECK_VENDOR(v) \
|
|
g_return_val_if_fail (vendor != GST_HIP_VENDOR_UNKNOWN, \
|
|
hipErrorNotInitialized); \
|
|
g_return_val_if_fail (gst_hip_load_library (vendor), hipErrorNotInitialized);
|
|
|
|
|
|
hipError_t
|
|
HipInit (GstHipVendor vendor, unsigned int flags)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipInit (flags);
|
|
|
|
auto cuda_ret = cuda_ftable.cuInit (flags);
|
|
return hipCUResultTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipDriverGetVersion (GstHipVendor vendor, int *driverVersion)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipDriverGetVersion (driverVersion);
|
|
|
|
auto cuda_ret = cuda_ftable.cuDriverGetVersion (driverVersion);
|
|
return hipCUResultTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipRuntimeGetVersion (GstHipVendor vendor, int *runtimeVersion)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipRuntimeGetVersion (runtimeVersion);
|
|
|
|
auto cuda_ret = cudart_ftable.cudaRuntimeGetVersion (runtimeVersion);
|
|
return hipCUDAErrorTohipError (cuda_ret);
|
|
}
|
|
|
|
const char *
|
|
HipGetErrorName (GstHipVendor vendor, hipError_t hip_error)
|
|
{
|
|
g_return_val_if_fail (vendor != GST_HIP_VENDOR_UNKNOWN, nullptr);
|
|
g_return_val_if_fail (gst_hip_load_library (vendor), nullptr);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipGetErrorName (hip_error);
|
|
|
|
auto cuda_ret = hipErrorToCudaError (hip_error);
|
|
return cudart_ftable.cudaGetErrorName (cuda_ret);
|
|
}
|
|
|
|
const char *
|
|
HipGetErrorString (GstHipVendor vendor, hipError_t hipError)
|
|
{
|
|
g_return_val_if_fail (vendor != GST_HIP_VENDOR_UNKNOWN, nullptr);
|
|
g_return_val_if_fail (gst_hip_load_library (vendor), nullptr);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipGetErrorString (hipError);
|
|
|
|
auto cuda_ret = hipErrorToCudaError (hipError);
|
|
return cudart_ftable.cudaGetErrorString (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipGetDeviceCount (GstHipVendor vendor, int *count)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipGetDeviceCount (count);
|
|
|
|
auto cuda_ret = cudart_ftable.cudaGetDeviceCount (count);
|
|
return hipCUDAErrorTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipGetDeviceProperties (GstHipVendor vendor, hipDeviceProp_t * prop,
|
|
int deviceId)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipGetDeviceProperties (prop, deviceId);
|
|
|
|
if (!prop)
|
|
return hipErrorInvalidValue;
|
|
|
|
struct cudaDeviceProp cdprop;
|
|
auto cuda_ret = cudart_ftable.cudaGetDeviceProperties (&cdprop, deviceId);
|
|
if (cuda_ret != cudaSuccess)
|
|
return hipCUDAErrorTohipError (cuda_ret);
|
|
|
|
strncpy (prop->name, cdprop.name, 256);
|
|
strncpy (prop->uuid.bytes, cdprop.uuid.bytes, 16);
|
|
strncpy (prop->luid, cdprop.luid, 8);
|
|
prop->luidDeviceNodeMask = cdprop.luidDeviceNodeMask;
|
|
prop->totalGlobalMem = cdprop.totalGlobalMem;
|
|
prop->sharedMemPerBlock = cdprop.sharedMemPerBlock;
|
|
prop->regsPerBlock = cdprop.regsPerBlock;
|
|
prop->memPitch = cdprop.memPitch;
|
|
prop->maxThreadsPerBlock = cdprop.maxThreadsPerBlock;
|
|
prop->maxThreadsDim[0] = cdprop.maxThreadsDim[0];
|
|
prop->maxThreadsDim[1] = cdprop.maxThreadsDim[1];
|
|
prop->maxThreadsDim[2] = cdprop.maxThreadsDim[2];
|
|
prop->maxGridSize[0] = cdprop.maxGridSize[0];
|
|
prop->maxGridSize[1] = cdprop.maxGridSize[1];
|
|
prop->maxGridSize[2] = cdprop.maxGridSize[2];
|
|
prop->clockRate = cdprop.clockRate;
|
|
prop->totalConstMem = cdprop.totalConstMem;
|
|
prop->major = cdprop.major;
|
|
prop->minor = cdprop.minor;
|
|
prop->textureAlignment = cdprop.textureAlignment;
|
|
prop->texturePitchAlignment = cdprop.texturePitchAlignment;
|
|
prop->deviceOverlap = cdprop.deviceOverlap;
|
|
prop->multiProcessorCount = cdprop.multiProcessorCount;
|
|
prop->kernelExecTimeoutEnabled = cdprop.kernelExecTimeoutEnabled;
|
|
prop->integrated = cdprop.integrated;
|
|
prop->canMapHostMemory = cdprop.canMapHostMemory;
|
|
prop->computeMode = cdprop.computeMode;
|
|
prop->maxTexture1D = cdprop.maxTexture1D;
|
|
prop->maxTexture1DMipmap = cdprop.maxTexture1DMipmap;
|
|
prop->maxTexture1DLinear = cdprop.maxTexture1DLinear;
|
|
prop->maxTexture2D[0] = cdprop.maxTexture2D[0];
|
|
prop->maxTexture2D[1] = cdprop.maxTexture2D[1];
|
|
prop->maxTexture2DMipmap[0] = cdprop.maxTexture2DMipmap[0];
|
|
prop->maxTexture2DMipmap[1] = cdprop.maxTexture2DMipmap[1];
|
|
prop->maxTexture2DLinear[0] = cdprop.maxTexture2DLinear[0];
|
|
prop->maxTexture2DLinear[1] = cdprop.maxTexture2DLinear[1];
|
|
prop->maxTexture2DLinear[2] = cdprop.maxTexture2DLinear[2];
|
|
prop->maxTexture2DGather[0] = cdprop.maxTexture2DGather[0];
|
|
prop->maxTexture2DGather[1] = cdprop.maxTexture2DGather[1];
|
|
prop->maxTexture3D[0] = cdprop.maxTexture3D[0];
|
|
prop->maxTexture3D[1] = cdprop.maxTexture3D[1];
|
|
prop->maxTexture3D[2] = cdprop.maxTexture3D[2];
|
|
prop->maxTexture3DAlt[0] = cdprop.maxTexture3DAlt[0];
|
|
prop->maxTexture3DAlt[1] = cdprop.maxTexture3DAlt[1];
|
|
prop->maxTexture3DAlt[2] = cdprop.maxTexture3DAlt[2];
|
|
prop->maxTextureCubemap = cdprop.maxTextureCubemap;
|
|
prop->maxTexture1DLayered[0] = cdprop.maxTexture1DLayered[0];
|
|
prop->maxTexture1DLayered[1] = cdprop.maxTexture1DLayered[1];
|
|
prop->maxTexture2DLayered[0] = cdprop.maxTexture2DLayered[0];
|
|
prop->maxTexture2DLayered[1] = cdprop.maxTexture2DLayered[1];
|
|
prop->maxTexture2DLayered[2] = cdprop.maxTexture2DLayered[2];
|
|
prop->maxTextureCubemapLayered[0] = cdprop.maxTextureCubemapLayered[0];
|
|
prop->maxTextureCubemapLayered[1] = cdprop.maxTextureCubemapLayered[1];
|
|
prop->maxSurface1D = cdprop.maxSurface1D;
|
|
prop->maxSurface2D[0] = cdprop.maxSurface2D[0];
|
|
prop->maxSurface2D[1] = cdprop.maxSurface2D[1];
|
|
prop->maxSurface3D[0] = cdprop.maxSurface3D[0];
|
|
prop->maxSurface3D[1] = cdprop.maxSurface3D[1];
|
|
prop->maxSurface3D[2] = cdprop.maxSurface3D[2];
|
|
prop->maxSurface1DLayered[0] = cdprop.maxSurface1DLayered[0];
|
|
prop->maxSurface1DLayered[1] = cdprop.maxSurface1DLayered[1];
|
|
prop->maxSurface2DLayered[0] = cdprop.maxSurface2DLayered[0];
|
|
prop->maxSurface2DLayered[1] = cdprop.maxSurface2DLayered[1];
|
|
prop->maxSurface2DLayered[2] = cdprop.maxSurface2DLayered[2];
|
|
prop->maxSurfaceCubemap = cdprop.maxSurfaceCubemap;
|
|
prop->maxSurfaceCubemapLayered[0] = cdprop.maxSurfaceCubemapLayered[0];
|
|
prop->maxSurfaceCubemapLayered[1] = cdprop.maxSurfaceCubemapLayered[1];
|
|
prop->surfaceAlignment = cdprop.surfaceAlignment;
|
|
prop->concurrentKernels = cdprop.concurrentKernels;
|
|
prop->ECCEnabled = cdprop.ECCEnabled;
|
|
prop->pciBusID = cdprop.pciBusID;
|
|
prop->pciDeviceID = cdprop.pciDeviceID;
|
|
prop->pciDomainID = cdprop.pciDomainID;
|
|
prop->tccDriver = cdprop.tccDriver;
|
|
prop->asyncEngineCount = cdprop.asyncEngineCount;
|
|
prop->unifiedAddressing = cdprop.unifiedAddressing;
|
|
prop->memoryClockRate = cdprop.memoryClockRate;
|
|
prop->memoryBusWidth = cdprop.memoryBusWidth;
|
|
prop->l2CacheSize = cdprop.l2CacheSize;
|
|
prop->maxThreadsPerMultiProcessor = cdprop.maxThreadsPerMultiProcessor;
|
|
prop->streamPrioritiesSupported = cdprop.streamPrioritiesSupported;
|
|
prop->globalL1CacheSupported = cdprop.globalL1CacheSupported;
|
|
prop->localL1CacheSupported = cdprop.localL1CacheSupported;
|
|
prop->sharedMemPerMultiprocessor = cdprop.sharedMemPerMultiprocessor;
|
|
prop->regsPerMultiprocessor = cdprop.regsPerMultiprocessor;
|
|
prop->managedMemory = cdprop.managedMemory;
|
|
prop->isMultiGpuBoard = cdprop.isMultiGpuBoard;
|
|
prop->multiGpuBoardGroupID = cdprop.multiGpuBoardGroupID;
|
|
prop->hostNativeAtomicSupported = cdprop.hostNativeAtomicSupported;
|
|
prop->singleToDoublePrecisionPerfRatio =
|
|
cdprop.singleToDoublePrecisionPerfRatio;
|
|
prop->pageableMemoryAccess = cdprop.pageableMemoryAccess;
|
|
prop->concurrentManagedAccess = cdprop.concurrentManagedAccess;
|
|
prop->computePreemptionSupported = cdprop.computePreemptionSupported;
|
|
prop->canUseHostPointerForRegisteredMem =
|
|
cdprop.canUseHostPointerForRegisteredMem;
|
|
prop->cooperativeLaunch = cdprop.cooperativeLaunch;
|
|
prop->cooperativeMultiDeviceLaunch = cdprop.cooperativeMultiDeviceLaunch;
|
|
prop->sharedMemPerBlockOptin = cdprop.sharedMemPerBlockOptin;
|
|
prop->pageableMemoryAccessUsesHostPageTables =
|
|
cdprop.pageableMemoryAccessUsesHostPageTables;
|
|
prop->directManagedMemAccessFromHost = cdprop.directManagedMemAccessFromHost;
|
|
prop->accessPolicyMaxWindowSize = cdprop.accessPolicyMaxWindowSize;
|
|
prop->maxBlocksPerMultiProcessor = cdprop.maxBlocksPerMultiProcessor;
|
|
prop->persistingL2CacheMaxSize = cdprop.persistingL2CacheMaxSize;
|
|
prop->reservedSharedMemPerBlock = cdprop.reservedSharedMemPerBlock;
|
|
prop->warpSize = cdprop.warpSize;
|
|
prop->clusterLaunch = cdprop.clusterLaunch;
|
|
prop->deferredMappingHipArraySupported =
|
|
cdprop.deferredMappingCudaArraySupported;
|
|
prop->gpuDirectRDMAFlushWritesOptions =
|
|
cdprop.gpuDirectRDMAFlushWritesOptions;
|
|
prop->gpuDirectRDMASupported = cdprop.gpuDirectRDMASupported;
|
|
prop->gpuDirectRDMAWritesOrdering = cdprop.gpuDirectRDMAWritesOrdering;
|
|
prop->hostRegisterReadOnlySupported = cdprop.hostRegisterReadOnlySupported;
|
|
prop->hostRegisterSupported = cdprop.hostRegisterSupported;
|
|
prop->ipcEventSupported = cdprop.ipcEventSupported;
|
|
prop->memoryPoolSupportedHandleTypes = cdprop.memoryPoolSupportedHandleTypes;
|
|
prop->memoryPoolsSupported = cdprop.memoryPoolsSupported;
|
|
prop->sparseHipArraySupported = cdprop.sparseCudaArraySupported;
|
|
prop->timelineSemaphoreInteropSupported =
|
|
cdprop.timelineSemaphoreInteropSupported;
|
|
prop->unifiedFunctionPointers = cdprop.unifiedFunctionPointers;
|
|
|
|
return hipSuccess;
|
|
}
|
|
|
|
hipError_t
|
|
HipDeviceGetAttribute (GstHipVendor vendor, int *pi, hipDeviceAttribute_t attr,
|
|
int deviceId)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipDeviceGetAttribute (pi, attr, deviceId);
|
|
|
|
enum cudaDeviceAttr cdattr;
|
|
switch (attr) {
|
|
case hipDeviceAttributeMaxThreadsPerBlock:
|
|
cdattr = cudaDevAttrMaxThreadsPerBlock;
|
|
break;
|
|
case hipDeviceAttributeMaxBlockDimX:
|
|
cdattr = cudaDevAttrMaxBlockDimX;
|
|
break;
|
|
case hipDeviceAttributeMaxBlockDimY:
|
|
cdattr = cudaDevAttrMaxBlockDimY;
|
|
break;
|
|
case hipDeviceAttributeMaxBlockDimZ:
|
|
cdattr = cudaDevAttrMaxBlockDimZ;
|
|
break;
|
|
case hipDeviceAttributeMaxGridDimX:
|
|
cdattr = cudaDevAttrMaxGridDimX;
|
|
break;
|
|
case hipDeviceAttributeMaxGridDimY:
|
|
cdattr = cudaDevAttrMaxGridDimY;
|
|
break;
|
|
case hipDeviceAttributeMaxGridDimZ:
|
|
cdattr = cudaDevAttrMaxGridDimZ;
|
|
break;
|
|
case hipDeviceAttributeMaxSharedMemoryPerBlock:
|
|
cdattr = cudaDevAttrMaxSharedMemoryPerBlock;
|
|
break;
|
|
case hipDeviceAttributeTotalConstantMemory:
|
|
cdattr = cudaDevAttrTotalConstantMemory;
|
|
break;
|
|
case hipDeviceAttributeWarpSize:
|
|
cdattr = cudaDevAttrWarpSize;
|
|
break;
|
|
case hipDeviceAttributeMaxRegistersPerBlock:
|
|
cdattr = cudaDevAttrMaxRegistersPerBlock;
|
|
break;
|
|
case hipDeviceAttributeClockRate:
|
|
cdattr = cudaDevAttrClockRate;
|
|
break;
|
|
case hipDeviceAttributeMemoryClockRate:
|
|
cdattr = cudaDevAttrMemoryClockRate;
|
|
break;
|
|
case hipDeviceAttributeMemoryBusWidth:
|
|
cdattr = cudaDevAttrGlobalMemoryBusWidth;
|
|
break;
|
|
case hipDeviceAttributeMultiprocessorCount:
|
|
cdattr = cudaDevAttrMultiProcessorCount;
|
|
break;
|
|
case hipDeviceAttributeComputeMode:
|
|
cdattr = cudaDevAttrComputeMode;
|
|
break;
|
|
case hipDeviceAttributeL2CacheSize:
|
|
cdattr = cudaDevAttrL2CacheSize;
|
|
break;
|
|
case hipDeviceAttributeMaxThreadsPerMultiProcessor:
|
|
cdattr = cudaDevAttrMaxThreadsPerMultiProcessor;
|
|
break;
|
|
case hipDeviceAttributeComputeCapabilityMajor:
|
|
cdattr = cudaDevAttrComputeCapabilityMajor;
|
|
break;
|
|
case hipDeviceAttributeComputeCapabilityMinor:
|
|
cdattr = cudaDevAttrComputeCapabilityMinor;
|
|
break;
|
|
case hipDeviceAttributeConcurrentKernels:
|
|
cdattr = cudaDevAttrConcurrentKernels;
|
|
break;
|
|
case hipDeviceAttributePciBusId:
|
|
cdattr = cudaDevAttrPciBusId;
|
|
break;
|
|
case hipDeviceAttributePciDeviceId:
|
|
cdattr = cudaDevAttrPciDeviceId;
|
|
break;
|
|
case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor:
|
|
cdattr = cudaDevAttrMaxSharedMemoryPerMultiprocessor;
|
|
break;
|
|
case hipDeviceAttributeIsMultiGpuBoard:
|
|
cdattr = cudaDevAttrIsMultiGpuBoard;
|
|
break;
|
|
case hipDeviceAttributeIntegrated:
|
|
cdattr = cudaDevAttrIntegrated;
|
|
break;
|
|
case hipDeviceAttributeMaxTexture1DWidth:
|
|
cdattr = cudaDevAttrMaxTexture1DWidth;
|
|
break;
|
|
case hipDeviceAttributeMaxTexture2DWidth:
|
|
cdattr = cudaDevAttrMaxTexture2DWidth;
|
|
break;
|
|
case hipDeviceAttributeMaxTexture2DHeight:
|
|
cdattr = cudaDevAttrMaxTexture2DHeight;
|
|
break;
|
|
case hipDeviceAttributeMaxTexture3DWidth:
|
|
cdattr = cudaDevAttrMaxTexture3DWidth;
|
|
break;
|
|
case hipDeviceAttributeMaxTexture3DHeight:
|
|
cdattr = cudaDevAttrMaxTexture3DHeight;
|
|
break;
|
|
case hipDeviceAttributeMaxTexture3DDepth:
|
|
cdattr = cudaDevAttrMaxTexture3DDepth;
|
|
break;
|
|
case hipDeviceAttributeMaxPitch:
|
|
cdattr = cudaDevAttrMaxPitch;
|
|
break;
|
|
case hipDeviceAttributeTextureAlignment:
|
|
cdattr = cudaDevAttrTextureAlignment;
|
|
break;
|
|
case hipDeviceAttributeTexturePitchAlignment:
|
|
cdattr = cudaDevAttrTexturePitchAlignment;
|
|
break;
|
|
case hipDeviceAttributeKernelExecTimeout:
|
|
cdattr = cudaDevAttrKernelExecTimeout;
|
|
break;
|
|
case hipDeviceAttributeCanMapHostMemory:
|
|
cdattr = cudaDevAttrCanMapHostMemory;
|
|
break;
|
|
case hipDeviceAttributeEccEnabled:
|
|
cdattr = cudaDevAttrEccEnabled;
|
|
break;
|
|
case hipDeviceAttributeCooperativeLaunch:
|
|
cdattr = cudaDevAttrCooperativeLaunch;
|
|
break;
|
|
case hipDeviceAttributeCooperativeMultiDeviceLaunch:
|
|
cdattr = cudaDevAttrCooperativeMultiDeviceLaunch;
|
|
break;
|
|
case hipDeviceAttributeHostRegisterSupported:
|
|
cdattr = cudaDevAttrHostRegisterSupported;
|
|
break;
|
|
case hipDeviceAttributeConcurrentManagedAccess:
|
|
cdattr = cudaDevAttrConcurrentManagedAccess;
|
|
break;
|
|
case hipDeviceAttributeManagedMemory:
|
|
cdattr = cudaDevAttrManagedMemory;
|
|
break;
|
|
case hipDeviceAttributePageableMemoryAccessUsesHostPageTables:
|
|
cdattr = cudaDevAttrPageableMemoryAccessUsesHostPageTables;
|
|
break;
|
|
case hipDeviceAttributePageableMemoryAccess:
|
|
cdattr = cudaDevAttrPageableMemoryAccess;
|
|
break;
|
|
case hipDeviceAttributeDirectManagedMemAccessFromHost:
|
|
cdattr = cudaDevAttrDirectManagedMemAccessFromHost;
|
|
break;
|
|
case hipDeviceAttributeGlobalL1CacheSupported:
|
|
cdattr = cudaDevAttrGlobalL1CacheSupported;
|
|
break;
|
|
case hipDeviceAttributeMaxBlocksPerMultiProcessor:
|
|
cdattr = cudaDevAttrMaxBlocksPerMultiprocessor;
|
|
break;
|
|
case hipDeviceAttributeMultiGpuBoardGroupID:
|
|
cdattr = cudaDevAttrMultiGpuBoardGroupID;
|
|
break;
|
|
case hipDeviceAttributeReservedSharedMemPerBlock:
|
|
cdattr = cudaDevAttrReservedSharedMemoryPerBlock;
|
|
break;
|
|
case hipDeviceAttributeSingleToDoublePrecisionPerfRatio:
|
|
cdattr = cudaDevAttrSingleToDoublePrecisionPerfRatio;
|
|
break;
|
|
case hipDeviceAttributeStreamPrioritiesSupported:
|
|
cdattr = cudaDevAttrStreamPrioritiesSupported;
|
|
break;
|
|
case hipDeviceAttributeSurfaceAlignment:
|
|
cdattr = cudaDevAttrSurfaceAlignment;
|
|
break;
|
|
case hipDeviceAttributeTccDriver:
|
|
cdattr = cudaDevAttrTccDriver;
|
|
break;
|
|
case hipDeviceAttributeUnifiedAddressing:
|
|
cdattr = cudaDevAttrUnifiedAddressing;
|
|
break;
|
|
case hipDeviceAttributeMemoryPoolsSupported:
|
|
cdattr = cudaDevAttrMemoryPoolsSupported;
|
|
break;
|
|
case hipDeviceAttributeVirtualMemoryManagementSupported:
|
|
{
|
|
auto cuda_ret = cuda_ftable.cuDeviceGetAttribute (pi,
|
|
CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED,
|
|
deviceId);
|
|
return hipCUResultTohipError (cuda_ret);
|
|
}
|
|
case hipDeviceAttributeAccessPolicyMaxWindowSize:
|
|
cdattr = cudaDevAttrMaxAccessPolicyWindowSize;
|
|
break;
|
|
case hipDeviceAttributeAsyncEngineCount:
|
|
cdattr = cudaDevAttrAsyncEngineCount;
|
|
break;
|
|
case hipDeviceAttributeCanUseHostPointerForRegisteredMem:
|
|
cdattr = cudaDevAttrCanUseHostPointerForRegisteredMem;
|
|
break;
|
|
case hipDeviceAttributeComputePreemptionSupported:
|
|
cdattr = cudaDevAttrComputePreemptionSupported;
|
|
break;
|
|
case hipDeviceAttributeHostNativeAtomicSupported:
|
|
cdattr = cudaDevAttrHostNativeAtomicSupported;
|
|
break;
|
|
default:
|
|
return hipCUDAErrorTohipError (cudaErrorInvalidValue);
|
|
}
|
|
|
|
auto cuda_ret = cudart_ftable.cudaDeviceGetAttribute (pi, cdattr, deviceId);
|
|
return hipCUDAErrorTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipSetDevice (GstHipVendor vendor, int deviceId)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipSetDevice (deviceId);
|
|
|
|
auto cuda_ret = cudart_ftable.cudaSetDevice (deviceId);
|
|
return hipCUDAErrorTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipMalloc (GstHipVendor vendor, void **ptr, size_t size)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipMalloc (ptr, size);
|
|
|
|
auto cuda_ret = cudart_ftable.cudaMalloc (ptr, size);
|
|
return hipCUDAErrorTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipFree (GstHipVendor vendor, void *ptr)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipFree (ptr);
|
|
|
|
auto cuda_ret = cudart_ftable.cudaFree (ptr);
|
|
return hipCUDAErrorTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipHostMalloc (GstHipVendor vendor, void **ptr, size_t size, unsigned int flags)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipHostMalloc (ptr, size, flags);
|
|
|
|
auto cuda_ret = cudart_ftable.cudaMallocHost (ptr, size, flags);
|
|
return hipCUDAErrorTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipHostFree (GstHipVendor vendor, void *ptr)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipHostFree (ptr);
|
|
|
|
auto cuda_ret = cudart_ftable.cudaFreeHost (ptr);
|
|
return hipCUDAErrorTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipStreamCreate (GstHipVendor vendor, hipStream_t * stream)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipStreamCreate (stream);
|
|
|
|
auto cuda_ret = cudart_ftable.cudaStreamCreate ((cudaStream_t *) stream);
|
|
return hipCUDAErrorTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipStreamDestroy (GstHipVendor vendor, hipStream_t stream)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipStreamDestroy (stream);
|
|
|
|
auto cuda_ret = cudart_ftable.cudaStreamDestroy (stream);
|
|
return hipCUDAErrorTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipStreamSynchronize (GstHipVendor vendor, hipStream_t stream)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipStreamSynchronize (stream);
|
|
|
|
auto cuda_ret = cudart_ftable.cudaStreamSynchronize (stream);
|
|
return hipCUDAErrorTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipEventCreateWithFlags (GstHipVendor vendor, hipEvent_t * event,
|
|
unsigned flags)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipEventCreateWithFlags (event, flags);
|
|
|
|
auto cuda_ret = cudart_ftable.cudaEventCreateWithFlags ((cudaEvent_t *) event,
|
|
flags);
|
|
return hipCUDAErrorTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipEventRecord (GstHipVendor vendor, hipEvent_t event, hipStream_t stream)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipEventRecord (event, stream);
|
|
|
|
auto cuda_ret = cudart_ftable.cudaEventRecord ((cudaEvent_t) event, stream);
|
|
return hipCUDAErrorTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipEventDestroy (GstHipVendor vendor, hipEvent_t event)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipEventDestroy (event);
|
|
|
|
auto cuda_ret = cudart_ftable.cudaEventDestroy ((cudaEvent_t) event);
|
|
return hipCUDAErrorTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipEventSynchronize (GstHipVendor vendor, hipEvent_t event)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipEventSynchronize (event);
|
|
|
|
auto cuda_ret = cudart_ftable.cudaEventSynchronize ((cudaEvent_t) event);
|
|
return hipCUDAErrorTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipEventQuery (GstHipVendor vendor, hipEvent_t event)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipEventQuery (event);
|
|
|
|
auto cuda_ret = cudart_ftable.cudaEventQuery ((cudaEvent_t) event);
|
|
return hipCUDAErrorTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipModuleLoadData (GstHipVendor vendor, hipModule_t * module, const void *image)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipModuleLoadData (module, image);
|
|
|
|
auto cuda_ret = cuda_ftable.cuModuleLoadData ((CUmodule *) module, image);
|
|
return hipCUResultTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipModuleUnload (GstHipVendor vendor, hipModule_t module)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipModuleUnload (module);
|
|
|
|
auto cuda_ret = cuda_ftable.cuModuleUnload ((CUmodule) module);
|
|
return hipCUResultTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipModuleGetFunction (GstHipVendor vendor, hipFunction_t * function,
|
|
hipModule_t module, const char *kname)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipModuleGetFunction (function, module, kname);
|
|
|
|
|
|
auto cuda_ret = cuda_ftable.cuModuleGetFunction ((CUfunction *) function,
|
|
(CUmodule) module, kname);
|
|
return hipCUResultTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipModuleLaunchKernel (GstHipVendor vendor, hipFunction_t f,
|
|
unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ,
|
|
unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ,
|
|
unsigned int sharedMemBytes, hipStream_t stream, void **kernelParams,
|
|
void **extra)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipModuleLaunchKernel (f, gridDimX, gridDimY, gridDimZ,
|
|
blockDimX, blockDimY, blockDimZ, sharedMemBytes, stream,
|
|
kernelParams, extra);
|
|
|
|
auto cuda_ret = cuda_ftable.cuLaunchKernel ((CUfunction) f, gridDimX,
|
|
gridDimY, gridDimZ,
|
|
blockDimX, blockDimY, blockDimZ, sharedMemBytes, (CUstream) stream,
|
|
kernelParams, extra);
|
|
return hipCUResultTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipMemcpyParam2DAsync (GstHipVendor vendor, const hip_Memcpy2D * pCopy,
|
|
hipStream_t stream)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipMemcpyParam2DAsync (pCopy, stream);
|
|
|
|
CUresult cuda_ret;
|
|
if (!pCopy) {
|
|
cuda_ret = cuda_ftable.cuMemcpy2DAsync (nullptr, (CUstream) stream);
|
|
} else {
|
|
CUDA_MEMCPY2D cudaCopy = { };
|
|
hipMemcpy2DTocudaMemcpy2D (cudaCopy, pCopy);
|
|
cuda_ret = cuda_ftable.cuMemcpy2DAsync (&cudaCopy, (CUstream) stream);
|
|
}
|
|
|
|
return hipCUResultTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipMemsetD8Async (GstHipVendor vendor, hipDeviceptr_t dest, unsigned char value,
|
|
size_t count, hipStream_t stream)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipMemsetD8Async (dest, value, count, stream);
|
|
|
|
auto cuda_ret = cuda_ftable.cuMemsetD8Async ((CUdeviceptr) dest, value,
|
|
count, (CUstream) stream);
|
|
return hipCUResultTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipMemsetD16Async (GstHipVendor vendor, hipDeviceptr_t dest,
|
|
unsigned short value, size_t count, hipStream_t stream)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipMemsetD16Async (dest, value, count, stream);
|
|
|
|
auto cuda_ret = cuda_ftable.cuMemsetD16Async ((CUdeviceptr) dest, value,
|
|
count, (CUstream) stream);
|
|
return hipCUResultTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipMemsetD32Async (GstHipVendor vendor, hipDeviceptr_t dst, int value,
|
|
size_t count, hipStream_t stream)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipMemsetD32Async (dst, value, count, stream);
|
|
|
|
auto cuda_ret = cuda_ftable.cuMemsetD32Async ((CUdeviceptr) dst, value,
|
|
count, (CUstream) stream);
|
|
return hipCUResultTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipTexObjectCreate (GstHipVendor vendor, hipTextureObject_t * pTexObject,
|
|
const HIP_RESOURCE_DESC * pResDesc,
|
|
const HIP_TEXTURE_DESC * pTexDesc,
|
|
const HIP_RESOURCE_VIEW_DESC * pResViewDesc)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipTexObjectCreate (pTexObject, pResDesc, pTexDesc,
|
|
pResViewDesc);
|
|
|
|
auto cuda_ret = cuda_ftable.cuTexObjectCreate ((CUtexObject *) pTexObject,
|
|
(const CUDA_RESOURCE_DESC *) pResDesc,
|
|
(const CUDA_TEXTURE_DESC *) pTexDesc,
|
|
(const CUDA_RESOURCE_VIEW_DESC *) pResViewDesc);
|
|
|
|
return hipCUResultTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipTexObjectDestroy (GstHipVendor vendor, hipTextureObject_t texObject)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipTexObjectDestroy (texObject);
|
|
|
|
auto cuda_ret = cuda_ftable.cuTexObjectDestroy ((CUtexObject) texObject);
|
|
return hipCUResultTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipGraphicsMapResources (GstHipVendor vendor, int count,
|
|
hipGraphicsResource_t * resources, hipStream_t stream)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipGraphicsMapResources (count, resources, stream);
|
|
|
|
auto cuda_ret = cudart_ftable.cudaGraphicsMapResources (count,
|
|
(cudaGraphicsResource_t *) resources, stream);
|
|
return hipCUDAErrorTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipGraphicsResourceGetMappedPointer (GstHipVendor vendor, void **devPtr,
|
|
size_t *size, hipGraphicsResource_t resource)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD) {
|
|
return amd_ftable.hipGraphicsResourceGetMappedPointer (devPtr,
|
|
size, resource);
|
|
}
|
|
|
|
auto cuda_ret = cudart_ftable.cudaGraphicsResourceGetMappedPointer (devPtr,
|
|
size, (cudaGraphicsResource_t) resource);
|
|
return hipCUDAErrorTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipGraphicsUnmapResources (GstHipVendor vendor, int count,
|
|
hipGraphicsResource_t * resources, hipStream_t stream)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipGraphicsUnmapResources (count, resources, stream);
|
|
|
|
auto cuda_ret = cudart_ftable.cudaGraphicsUnmapResources (count,
|
|
(cudaGraphicsResource_t *) resources, stream);
|
|
return hipCUDAErrorTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipGraphicsUnregisterResource (GstHipVendor vendor,
|
|
hipGraphicsResource_t resource)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipGraphicsUnregisterResource (resource);
|
|
|
|
auto cuda_ret =
|
|
cudart_ftable.cudaGraphicsUnregisterResource ((cudaGraphicsResource_t)
|
|
resource);
|
|
return hipCUDAErrorTohipError (cuda_ret);
|
|
}
|
|
|
|
#ifdef HAVE_GST_GL
|
|
hipError_t
|
|
HipGLGetDevices (GstHipVendor vendor, unsigned int *pHipDeviceCount,
|
|
int *pHipDevices, unsigned int hipDeviceCount, hipGLDeviceList deviceList)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD) {
|
|
return amd_ftable.hipGLGetDevices (pHipDeviceCount, pHipDevices,
|
|
hipDeviceCount, deviceList);
|
|
}
|
|
|
|
auto cuda_ret = cudart_ftable.cudaGLGetDevices (pHipDeviceCount, pHipDevices,
|
|
hipDeviceCount, (enum cudaGLDeviceList) deviceList);
|
|
return hipCUDAErrorTohipError (cuda_ret);
|
|
}
|
|
|
|
hipError_t
|
|
HipGraphicsGLRegisterBuffer (GstHipVendor vendor,
|
|
hipGraphicsResource ** resource, unsigned int buffer, unsigned int flags)
|
|
{
|
|
CHECK_VENDOR (vendor);
|
|
|
|
if (vendor == GST_HIP_VENDOR_AMD)
|
|
return amd_ftable.hipGraphicsGLRegisterBuffer (resource, buffer, flags);
|
|
|
|
auto cuda_ret =
|
|
cudart_ftable.cudaGraphicsGLRegisterBuffer ((struct cudaGraphicsResource
|
|
**) resource,
|
|
buffer, flags);
|
|
return hipCUDAErrorTohipError (cuda_ret);
|
|
}
|
|
#endif
|