hip: Remove build-time SDK dependency

Use dlopen at runtime

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/8923>
This commit is contained in:
Seungha Yang 2025-05-06 20:16:29 +09:00
parent 0f9ce9de90
commit 24b93f2c41
16 changed files with 1571 additions and 104 deletions

View File

@ -27,4 +27,5 @@
#include "gsthipmemory.h"
#include "gsthipbufferpool.h"
#include "gsthiputils.h"
#include "gsthiploader.h"

View File

@ -813,34 +813,34 @@ gst_hip_converter_dispose (GObject * object)
if (self->device && gst_hip_device_set_current (self->device)) {
if (priv->unpack_module) {
hipModuleUnload (priv->unpack_module);
HipModuleUnload (priv->unpack_module);
priv->unpack_module = nullptr;
}
if (priv->main_module) {
hipModuleUnload (priv->main_module);
HipModuleUnload (priv->main_module);
priv->main_module = nullptr;
}
for (guint i = 0; i < G_N_ELEMENTS (priv->fallback_buffer); i++) {
if (priv->fallback_buffer[i].ptr) {
if (priv->fallback_buffer[i].texture) {
hipTexObjectDestroy (priv->fallback_buffer[i].texture);
HipTexObjectDestroy (priv->fallback_buffer[i].texture);
priv->fallback_buffer[i].texture = nullptr;
}
hipFree (priv->fallback_buffer[i].ptr);
HipFree (priv->fallback_buffer[i].ptr);
priv->fallback_buffer[i].ptr = 0;
}
}
if (priv->unpack_buffer.ptr) {
if (priv->unpack_buffer.texture) {
hipTexObjectDestroy (priv->unpack_buffer.texture);
HipTexObjectDestroy (priv->unpack_buffer.texture);
priv->unpack_buffer.texture = 0;
}
hipFree (priv->unpack_buffer.ptr);
HipFree (priv->unpack_buffer.ptr);
priv->unpack_buffer.ptr = 0;
}
}
@ -1357,7 +1357,7 @@ gst_hip_converter_setup (GstHipConverter * self)
if (program && !priv->main_module) {
GST_DEBUG_OBJECT (self, "Loading PTX module");
ret = hipModuleLoadData (&priv->main_module, program);
ret = HipModuleLoadData (&priv->main_module, program);
if (ret != hipSuccess) {
GST_ERROR_OBJECT (self, "Could not load module from PTX");
program = nullptr;
@ -1371,7 +1371,7 @@ gst_hip_converter_setup (GstHipConverter * self)
return FALSE;
}
ret = hipModuleGetFunction (&priv->main_func,
ret = HipModuleGetFunction (&priv->main_func,
priv->main_module, "GstHipConverterMain");
if (!gst_hip_result (ret)) {
GST_ERROR_OBJECT (self, "Could not get main function");
@ -1388,7 +1388,7 @@ gst_hip_converter_setup (GstHipConverter * self)
stride = do_align (stride, priv->tex_align);
priv->unpack_buffer.stride = stride;
ret = hipMalloc (&priv->unpack_buffer.ptr, stride *
ret = HipMalloc (&priv->unpack_buffer.ptr, stride *
GST_VIDEO_INFO_HEIGHT (texture_info));
if (!gst_hip_result (ret)) {
@ -1410,7 +1410,7 @@ gst_hip_converter_setup (GstHipConverter * self)
texture_desc.addressMode[1] = (HIPaddress_mode) 1;
texture_desc.addressMode[2] = (HIPaddress_mode) 1;
ret = hipTexObjectCreate (&texture, &resource_desc, &texture_desc, nullptr);
ret = HipTexObjectCreate (&texture, &resource_desc, &texture_desc, nullptr);
if (!gst_hip_result (ret)) {
GST_ERROR_OBJECT (self, "Couldn't create unpack texture");
return FALSE;
@ -1439,7 +1439,7 @@ gst_hip_converter_setup (GstHipConverter * self)
if (program && !priv->unpack_module) {
GST_DEBUG_OBJECT (self, "PTX CUBIN module");
ret = hipModuleLoadData (&priv->unpack_module, program);
ret = HipModuleLoadData (&priv->unpack_module, program);
if (!gst_hip_result (ret)) {
GST_ERROR_OBJECT (self, "Could not load module from PTX");
program = nullptr;
@ -1453,7 +1453,7 @@ gst_hip_converter_setup (GstHipConverter * self)
return FALSE;
}
ret = hipModuleGetFunction (&priv->unpack_func,
ret = HipModuleGetFunction (&priv->unpack_func,
priv->unpack_module, unpack_name.c_str ());
if (!gst_hip_result (ret)) {
GST_ERROR_OBJECT (self, "Could not get unpack function");
@ -1556,7 +1556,7 @@ gst_hip_converter_create_texture_unchecked (GstHipConverter * self,
texture_desc.addressMode[1] = (HIPaddress_mode) 1;
texture_desc.addressMode[2] = (HIPaddress_mode) 1;
auto hip_ret = hipTexObjectCreate (&texture,
auto hip_ret = HipTexObjectCreate (&texture,
&resource_desc, &texture_desc, nullptr);
if (!gst_hip_result (hip_ret)) {
GST_ERROR_OBJECT (self, "Could not create texture");
@ -1577,7 +1577,7 @@ ensure_fallback_buffer (GstHipConverter * self, gint width_in_bytes,
size_t pitch = do_align (width_in_bytes, priv->tex_align);
priv->fallback_buffer[plane].stride = pitch;
auto hip_ret = hipMalloc (&priv->fallback_buffer[plane].ptr,
auto hip_ret = HipMalloc (&priv->fallback_buffer[plane].ptr,
pitch * height);
if (!gst_hip_result (hip_ret)) {
@ -1610,7 +1610,7 @@ gst_hip_converter_create_texture (GstHipConverter * self,
* GST_VIDEO_INFO_COMP_PSTRIDE (&priv->in_info, plane),
params.Height = GST_VIDEO_INFO_COMP_HEIGHT (&priv->in_info, plane);
auto hip_ret = hipMemcpyParam2DAsync (&params, nullptr);
auto hip_ret = HipMemcpyParam2DAsync (&params, nullptr);
if (!gst_hip_result (hip_ret)) {
GST_ERROR_OBJECT (self, "Couldn't copy to fallback buffer");
return nullptr;
@ -1647,7 +1647,7 @@ gst_hip_converter_unpack_rgb (GstHipConverter * self, GstVideoFrame * src_frame)
src_stride = GST_VIDEO_FRAME_PLANE_STRIDE (src_frame, 0);
dst_stride = (gint) priv->unpack_buffer.stride;
auto hip_ret = hipModuleLaunchKernel (priv->unpack_func,
auto hip_ret = HipModuleLaunchKernel (priv->unpack_func,
DIV_UP (width, HIP_BLOCK_X), DIV_UP (height, HIP_BLOCK_Y), 1,
HIP_BLOCK_X, HIP_BLOCK_Y, 1, 0, nullptr, args, nullptr);
if (!gst_hip_result (hip_ret)) {
@ -1766,7 +1766,7 @@ gst_hip_converter_convert_frame (GstHipConverter * converter,
if (GST_VIDEO_FRAME_N_PLANES (&out_frame) > 1)
stride[1] = GST_VIDEO_FRAME_PLANE_STRIDE (&out_frame, 1);
auto hip_ret = hipModuleLaunchKernel (priv->main_func,
auto hip_ret = HipModuleLaunchKernel (priv->main_func,
DIV_UP (width, HIP_BLOCK_X), DIV_UP (height, HIP_BLOCK_Y), 1,
HIP_BLOCK_X, HIP_BLOCK_Y, 1,
0, nullptr, args, nullptr);
@ -1779,7 +1779,7 @@ gst_hip_converter_convert_frame (GstHipConverter * converter,
return FALSE;
}
hipStreamSynchronize (nullptr);
HipStreamSynchronize (nullptr);
return TRUE;
}

View File

@ -22,6 +22,7 @@
#endif
#include "gsthip.h"
#include "gsthiploader.h"
#include <mutex>
#ifndef GST_DISABLE_GST_DEBUG
@ -123,7 +124,7 @@ gst_hip_init_once (void)
static std::once_flag once;
std::call_once (once,[&] {
ret = hipInit (0);
ret = HipInit (0);
});
return ret;
@ -132,6 +133,11 @@ gst_hip_init_once (void)
GstHipDevice *
gst_hip_device_new (guint device_id)
{
if (!gst_hip_load_library ()) {
GST_INFO ("Couldn't load HIP library");
return nullptr;
}
auto hip_ret = gst_hip_init_once ();
if (hip_ret != hipSuccess) {
GST_DEBUG ("Couldn't initialize HIP, error: %d", hip_ret);
@ -139,7 +145,7 @@ gst_hip_device_new (guint device_id)
}
int num_dev = 0;
hip_ret = hipGetDeviceCount (&num_dev);
hip_ret = HipGetDeviceCount (&num_dev);
if (hip_ret != hipSuccess || num_dev <= 0) {
GST_DEBUG ("No supported HIP device, error: %d", hip_ret);
return nullptr;
@ -152,13 +158,13 @@ gst_hip_device_new (guint device_id)
gboolean texture_support = FALSE;
int val = 0;
hip_ret = hipDeviceGetAttribute (&val,
hip_ret = HipDeviceGetAttribute (&val,
hipDeviceAttributeMaxTexture2DWidth, device_id);
if (hip_ret == hipSuccess && val > 0) {
hip_ret = hipDeviceGetAttribute (&val,
hip_ret = HipDeviceGetAttribute (&val,
hipDeviceAttributeMaxTexture2DHeight, device_id);
if (hip_ret == hipSuccess && val > 0) {
hip_ret = hipDeviceGetAttribute (&val,
hip_ret = HipDeviceGetAttribute (&val,
hipDeviceAttributeTextureAlignment, device_id);
if (hip_ret == hipSuccess && val > 0) {
texture_support = TRUE;
@ -179,7 +185,7 @@ gst_hip_device_set_current (GstHipDevice * device)
{
g_return_val_if_fail (GST_IS_HIP_DEVICE (device), FALSE);
auto hip_ret = hipSetDevice (device->priv->device_id);
auto hip_ret = HipSetDevice (device->priv->device_id);
if (!gst_hip_result (hip_ret)) {
GST_ERROR_OBJECT (device, "hipSetDevice result %d", hip_ret);
return FALSE;
@ -194,7 +200,7 @@ gst_hip_device_get_attribute (GstHipDevice * device, hipDeviceAttribute_t attr,
{
g_return_val_if_fail (GST_IS_HIP_DEVICE (device), hipErrorInvalidDevice);
return hipDeviceGetAttribute (value, attr, device->priv->device_id);
return HipDeviceGetAttribute (value, attr, device->priv->device_id);
}
gboolean

View File

@ -0,0 +1,327 @@
/* GStreamer
* Copyright (C) 2025 Seungha Yang <seungha@centricular.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "gsthip.h"
#include "gsthiploader.h"
#include <gmodule.h>
#include <mutex>
#ifndef GST_DISABLE_GST_DEBUG
#define GST_CAT_DEFAULT ensure_debug_category()
static GstDebugCategory *
ensure_debug_category (void)
{
static GstDebugCategory *cat = nullptr;
static std::once_flag once;
std::call_once (once,[&] {
cat = _gst_debug_category_new ("hiploader", 0, "hiploader");
});
return cat;
}
#endif
#define LOAD_SYMBOL(name) G_STMT_START { \
if (!g_module_symbol (module, G_STRINGIFY (name), (gpointer *) &table->name)) { \
GST_ERROR ("Failed to load '%s', %s", G_STRINGIFY (name), g_module_error()); \
g_module_close (module); \
return; \
} \
} G_STMT_END;
/* *INDENT-OFF* */
struct GstHipFuncTableAmd
{
gboolean loaded = FALSE;
const char *(*hipGetErrorName) (hipError_t hip_error);
const char *(*hipGetErrorString) (hipError_t hipError);
hipError_t (*hipInit) (unsigned int flags);
hipError_t (*hipGetDeviceCount) (int *count);
hipError_t (*hipGetDeviceProperties) (hipDeviceProp_t * prop, int deviceId);
hipError_t (*hipDeviceGetAttribute) (int *pi, hipDeviceAttribute_t attr,
int deviceId);
hipError_t (*hipSetDevice) (int deviceId);
hipError_t (*hipMalloc) (void **ptr, size_t size);
hipError_t (*hipFree) (void *ptr);
hipError_t (*hipHostMalloc) (void **ptr, size_t size, unsigned int flags);
hipError_t (*hipHostFree) (void *ptr);
hipError_t (*hipStreamSynchronize) (hipStream_t stream);
hipError_t (*hipModuleLoadData) (hipModule_t * module, const void *image);
hipError_t (*hipModuleUnload) (hipModule_t module);
hipError_t (*hipModuleGetFunction) (hipFunction_t * function,
hipModule_t module, const char *kname);
hipError_t (*hipModuleLaunchKernel) (hipFunction_t f, unsigned int gridDimX,
unsigned int gridDimY,
unsigned int gridDimZ, unsigned int blockDimX,
unsigned int blockDimY, unsigned int blockDimZ,
unsigned int sharedMemBytes, hipStream_t stream,
void **kernelParams, void **extra);
hipError_t (*hipMemcpyParam2DAsync) (const hip_Memcpy2D * pCopy,
hipStream_t stream);
hipError_t (*hipTexObjectCreate) (hipTextureObject_t * pTexObject,
const HIP_RESOURCE_DESC * pResDesc, const HIP_TEXTURE_DESC * pTexDesc,
const HIP_RESOURCE_VIEW_DESC * pResViewDesc);
hipError_t (*hipTexObjectDestroy) (hipTextureObject_t texObject);
};
/* *INDENT-ON* */
static GstHipFuncTableAmd amd_ftable = { };
static void
load_amd_func_table (void)
{
GModule *module = nullptr;
#ifndef G_OS_WIN32
module = g_module_open ("libamdhip64.so", G_MODULE_BIND_LAZY);
if (!module)
module = g_module_open ("/opt/rocm/lib/libamdhip64.so", G_MODULE_BIND_LAZY);
#else
/* Prefer hip dll in SDK */
auto hip_root = g_getenv ("HIP_PATH");
if (hip_root) {
auto path = g_build_path (G_DIR_SEPARATOR_S, hip_root, "bin", nullptr);
auto dir = g_dir_open (path, 0, nullptr);
if (dir) {
const gchar *name;
while ((name = g_dir_read_name (dir))) {
if (g_str_has_prefix (name, "amdhip64_") && g_str_has_suffix (name,
".dll")) {
auto lib_path = g_build_filename (path, name, nullptr);
module = g_module_open (lib_path, G_MODULE_BIND_LAZY);
break;
}
}
g_dir_close (dir);
}
g_free (path);
}
/* Try dll in System32 */
if (!module)
module = g_module_open ("amdhip64.dll", G_MODULE_BIND_LAZY);
#endif
if (!module) {
GST_INFO ("Couldn't open HIP library");
return;
}
auto table = &amd_ftable;
LOAD_SYMBOL (hipGetErrorName);
LOAD_SYMBOL (hipGetErrorString);
LOAD_SYMBOL (hipInit);
LOAD_SYMBOL (hipGetDeviceCount);
LOAD_SYMBOL (hipGetDeviceProperties);
LOAD_SYMBOL (hipDeviceGetAttribute);
LOAD_SYMBOL (hipSetDevice);
LOAD_SYMBOL (hipMalloc);
LOAD_SYMBOL (hipFree);
LOAD_SYMBOL (hipHostMalloc);
LOAD_SYMBOL (hipHostFree);
LOAD_SYMBOL (hipStreamSynchronize);
LOAD_SYMBOL (hipModuleLoadData);
LOAD_SYMBOL (hipModuleUnload);
LOAD_SYMBOL (hipModuleGetFunction);
LOAD_SYMBOL (hipModuleLaunchKernel);
LOAD_SYMBOL (hipMemcpyParam2DAsync);
LOAD_SYMBOL (hipTexObjectCreate);
LOAD_SYMBOL (hipTexObjectDestroy);
table->loaded = TRUE;
}
gboolean
gst_hip_load_library (void)
{
static std::once_flag once;
std::call_once (once,[]() {
load_amd_func_table ();
});
return amd_ftable.loaded;
}
const char *
HipGetErrorName (hipError_t hip_error)
{
g_return_val_if_fail (gst_hip_load_library (), nullptr);
return amd_ftable.hipGetErrorName (hip_error);
}
const char *
HipGetErrorString (hipError_t hipError)
{
g_return_val_if_fail (gst_hip_load_library (), nullptr);
return amd_ftable.hipGetErrorString (hipError);
}
hipError_t
HipInit (unsigned int flags)
{
g_return_val_if_fail (gst_hip_load_library (), hipErrorNotInitialized);
return amd_ftable.hipInit (flags);
}
hipError_t
HipGetDeviceCount (int *count)
{
g_return_val_if_fail (gst_hip_load_library (), hipErrorNotInitialized);
return amd_ftable.hipGetDeviceCount (count);
}
hipError_t
HipGetDeviceProperties (hipDeviceProp_t * prop, int deviceId)
{
g_return_val_if_fail (gst_hip_load_library (), hipErrorNotInitialized);
return amd_ftable.hipGetDeviceProperties (prop, deviceId);
}
hipError_t
HipDeviceGetAttribute (int *pi, hipDeviceAttribute_t attr, int deviceId)
{
g_return_val_if_fail (gst_hip_load_library (), hipErrorNotInitialized);
return amd_ftable.hipDeviceGetAttribute (pi, attr, deviceId);
}
hipError_t
HipSetDevice (int deviceId)
{
g_return_val_if_fail (gst_hip_load_library (), hipErrorNotInitialized);
return amd_ftable.hipSetDevice (deviceId);
}
hipError_t
HipMalloc (void **ptr, size_t size)
{
g_return_val_if_fail (gst_hip_load_library (), hipErrorNotInitialized);
return amd_ftable.hipMalloc (ptr, size);
}
hipError_t
HipFree (void *ptr)
{
g_return_val_if_fail (gst_hip_load_library (), hipErrorNotInitialized);
return amd_ftable.hipFree (ptr);
}
hipError_t
HipHostMalloc (void **ptr, size_t size, unsigned int flags)
{
g_return_val_if_fail (gst_hip_load_library (), hipErrorNotInitialized);
return amd_ftable.hipHostMalloc (ptr, size, flags);
}
hipError_t
HipHostFree (void *ptr)
{
g_return_val_if_fail (gst_hip_load_library (), hipErrorNotInitialized);
return amd_ftable.hipHostFree (ptr);
}
hipError_t
HipStreamSynchronize (hipStream_t stream)
{
g_return_val_if_fail (gst_hip_load_library (), hipErrorNotInitialized);
return amd_ftable.hipStreamSynchronize (stream);
}
hipError_t
HipModuleLoadData (hipModule_t * module, const void *image)
{
g_return_val_if_fail (gst_hip_load_library (), hipErrorNotInitialized);
return amd_ftable.hipModuleLoadData (module, image);
}
hipError_t
HipModuleUnload (hipModule_t module)
{
g_return_val_if_fail (gst_hip_load_library (), hipErrorNotInitialized);
return amd_ftable.hipModuleUnload (module);
}
hipError_t
HipModuleGetFunction (hipFunction_t * function, hipModule_t module,
const char *kname)
{
g_return_val_if_fail (gst_hip_load_library (), hipErrorNotInitialized);
return amd_ftable.hipModuleGetFunction (function, module, kname);
}
hipError_t
HipModuleLaunchKernel (hipFunction_t f, unsigned int gridDimX,
unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX,
unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes,
hipStream_t stream, void **kernelParams, void **extra)
{
g_return_val_if_fail (gst_hip_load_library (), hipErrorNotInitialized);
return amd_ftable.hipModuleLaunchKernel (f, gridDimX, gridDimY, gridDimZ,
blockDimX, blockDimY, blockDimZ, sharedMemBytes, stream,
kernelParams, extra);
}
hipError_t
HipMemcpyParam2DAsync (const hip_Memcpy2D * pCopy, hipStream_t stream)
{
g_return_val_if_fail (gst_hip_load_library (), hipErrorNotInitialized);
return amd_ftable.hipMemcpyParam2DAsync (pCopy, stream);
}
hipError_t
HipTexObjectCreate (hipTextureObject_t * pTexObject,
const HIP_RESOURCE_DESC * pResDesc,
const HIP_TEXTURE_DESC * pTexDesc,
const HIP_RESOURCE_VIEW_DESC * pResViewDesc)
{
g_return_val_if_fail (gst_hip_load_library (), hipErrorNotInitialized);
return amd_ftable.hipTexObjectCreate (pTexObject, pResDesc, pTexDesc,
pResViewDesc);
}
hipError_t
HipTexObjectDestroy (hipTextureObject_t texObject)
{
g_return_val_if_fail (gst_hip_load_library (), hipErrorNotInitialized);
return amd_ftable.hipTexObjectDestroy (texObject);
}

View File

@ -0,0 +1,78 @@
/* GStreamer
* Copyright (C) 2025 Seungha Yang <seungha@centricular.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#pragma once
#include <gst/gst.h>
#include <hip/hip_runtime.h>
G_BEGIN_DECLS
gboolean gst_hip_load_library (void);
const char* HipGetErrorName(hipError_t hip_error);
const char* HipGetErrorString(hipError_t hipError);
hipError_t HipInit(unsigned int flags);
hipError_t HipGetDeviceCount(int* count);
hipError_t HipGetDeviceProperties(hipDeviceProp_t* prop, int deviceId);
hipError_t HipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int deviceId);
hipError_t HipSetDevice(int deviceId);
hipError_t HipMalloc(void** ptr, size_t size);
hipError_t HipFree(void* ptr);
hipError_t HipHostMalloc(void** ptr, size_t size, unsigned int flags);
hipError_t HipHostFree(void* ptr);
hipError_t HipStreamSynchronize(hipStream_t stream);
hipError_t HipModuleLoadData(hipModule_t* module, const void* image);
hipError_t HipModuleUnload(hipModule_t module);
hipError_t HipModuleGetFunction(hipFunction_t* function, hipModule_t module, const char* kname);
hipError_t HipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY,
unsigned int gridDimZ, unsigned int blockDimX,
unsigned int blockDimY, unsigned int blockDimZ,
unsigned int sharedMemBytes, hipStream_t stream,
void** kernelParams, void** extra);
hipError_t HipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, hipStream_t stream);
hipError_t HipTexObjectCreate(
hipTextureObject_t* pTexObject,
const HIP_RESOURCE_DESC* pResDesc,
const HIP_TEXTURE_DESC* pTexDesc,
const HIP_RESOURCE_VIEW_DESC* pResViewDesc);
hipError_t HipTexObjectDestroy(
hipTextureObject_t texObject);
G_END_DECLS

View File

@ -236,7 +236,7 @@ gst_hip_allocator_alloc_internal (GstHipAllocator * self,
auto pitch = do_align (width_in_bytes, texture_align);
void *data;
hip_ret = hipMalloc (&data, pitch * alloc_height);
hip_ret = HipMalloc (&data, pitch * alloc_height);
if (!gst_hip_result (hip_ret)) {
GST_ERROR_OBJECT (self, "Failed to allocate memory");
return nullptr;
@ -245,7 +245,7 @@ gst_hip_allocator_alloc_internal (GstHipAllocator * self,
GstVideoInfo alloc_info;
if (!gst_hip_allocator_update_info (info, pitch, alloc_height, &alloc_info)) {
GST_ERROR_OBJECT (self, "Couldn't calculate aligned info");
hipFree (data);
HipFree (data);
return nullptr;
}
@ -282,16 +282,16 @@ gst_hip_allocator_free (GstAllocator * allocator, GstMemory * mem)
for (guint j = 0; j < N_TEX_ADDR_MODES; j++) {
for (guint k = 0; k < N_TEX_FILTER_MODES; k++) {
if (priv->texture[i][j][k]) {
hipTexObjectDestroy (priv->texture[i][j][k]);
HipTexObjectDestroy (priv->texture[i][j][k]);
}
}
}
}
hipFree (priv->data);
HipFree (priv->data);
if (priv->staging)
hipHostFree (priv->staging);
HipHostFree (priv->staging);
gst_object_unref (hmem->device);
@ -327,9 +327,9 @@ gst_hip_memory_upload (GstHipAllocator * self, GstHipMemory * mem)
param.Height = priv->height;
/* TODO use stream */
auto hip_ret = hipMemcpyParam2DAsync (&param, nullptr);
auto hip_ret = HipMemcpyParam2DAsync (&param, nullptr);
if (gst_hip_result (hip_ret))
hip_ret = hipStreamSynchronize (nullptr);
hip_ret = HipStreamSynchronize (nullptr);
GST_MEMORY_FLAG_UNSET (mem, GST_HIP_MEMORY_TRANSFER_NEED_UPLOAD);
@ -351,7 +351,7 @@ gst_hip_memory_download (GstHipAllocator * self, GstHipMemory * mem)
}
if (!priv->staging) {
auto hip_ret = hipHostMalloc (&priv->staging, GST_MEMORY_CAST (mem)->size,
auto hip_ret = HipHostMalloc (&priv->staging, GST_MEMORY_CAST (mem)->size,
0);
if (!gst_hip_result (hip_ret)) {
@ -371,9 +371,9 @@ gst_hip_memory_download (GstHipAllocator * self, GstHipMemory * mem)
param.Height = priv->height;
/* TODO use stream */
auto hip_ret = hipMemcpyParam2DAsync (&param, nullptr);
auto hip_ret = HipMemcpyParam2DAsync (&param, nullptr);
if (gst_hip_result (hip_ret))
hip_ret = hipStreamSynchronize (nullptr);
hip_ret = HipStreamSynchronize (nullptr);
GST_MEMORY_FLAG_UNSET (mem, GST_HIP_MEMORY_TRANSFER_NEED_DOWNLOAD);
@ -481,9 +481,9 @@ hip_mem_copy (GstMemory * mem, gssize offset, gssize size)
param.Height = src_mem->priv->height;
/* TODO: use stream */
auto ret = hipMemcpyParam2DAsync (&param, nullptr);
auto ret = HipMemcpyParam2DAsync (&param, nullptr);
if (gst_hip_result (ret))
ret = hipStreamSynchronize (nullptr);
ret = HipStreamSynchronize (nullptr);
gst_memory_unmap (mem, &src_info);
gst_memory_unmap (copy, &dst_info);
@ -642,7 +642,7 @@ gst_hip_memory_get_texture (GstHipMemory * mem, guint plane,
tex_desc.addressMode[2] = (HIPaddress_mode) address_mode;
hipTextureObject_t tex_obj;
auto hip_ret = hipTexObjectCreate (&tex_obj, &res_desc, &tex_desc, nullptr);
auto hip_ret = HipTexObjectCreate (&tex_obj, &res_desc, &tex_desc, nullptr);
if (!gst_hip_result (hip_ret)) {
GST_ERROR_OBJECT (mem->device, "Couldn't create texture object");
return FALSE;

View File

@ -27,6 +27,8 @@
#include <mutex>
#include <vector>
#include <string>
#include <gmodule.h>
#include <string.h>
#ifndef GST_DISABLE_GST_DEBUG
#define GST_CAT_DEFAULT ensure_debug_category()
@ -44,12 +46,104 @@ ensure_debug_category (void)
}
#endif
#define LOAD_SYMBOL(name) G_STMT_START { \
if (!g_module_symbol (module, G_STRINGIFY (name), (gpointer *) &table->name)) { \
GST_ERROR ("Failed to load '%s', %s", G_STRINGIFY (name), g_module_error()); \
g_module_close (module); \
return; \
} \
} G_STMT_END;
/* *INDENT-OFF* */
struct GstHipRtcFuncTableAmd
{
gboolean loaded = FALSE;
hiprtcResult (*hiprtcCreateProgram) (hiprtcProgram * prog,
const char *src,
const char *name,
int numHeaders, const char **headers, const char **includeNames);
hiprtcResult (*hiprtcCompileProgram) (hiprtcProgram prog,
int numOptions, const char **options);
hiprtcResult (*hiprtcGetProgramLog) (hiprtcProgram prog, char *log);
hiprtcResult (*hiprtcGetProgramLogSize) (hiprtcProgram prog,
size_t *logSizeRet);
hiprtcResult (*hiprtcGetCodeSize) (hiprtcProgram prog, size_t *codeSizeRet);
hiprtcResult (*hiprtcGetCode) (hiprtcProgram prog, char *code);
hiprtcResult (*hiprtcDestroyProgram) (hiprtcProgram * prog);
};
/* *INDENT-ON* */
static GstHipRtcFuncTableAmd amd_ftable = { };
static void
load_rtc_amd_func_table (void)
{
GModule *module = nullptr;
#ifndef G_OS_WIN32
module = g_module_open ("libhiprtc.so", G_MODULE_BIND_LAZY);
if (!module)
module = g_module_open ("/opt/rocm/lib/libhiprtc.so", G_MODULE_BIND_LAZY);
#else
/* Prefer hip dll in SDK */
auto hip_root = g_getenv ("HIP_PATH");
if (hip_root) {
auto path = g_build_path (G_DIR_SEPARATOR_S, hip_root, "bin", nullptr);
auto dir = g_dir_open (path, 0, nullptr);
if (dir) {
const gchar *name;
while ((name = g_dir_read_name (dir))) {
if (g_str_has_prefix (name, "hiprtc") && g_str_has_suffix (name,
".dll") && !strstr (name, "builtins")) {
auto lib_path = g_build_filename (path, name, nullptr);
module = g_module_open (lib_path, G_MODULE_BIND_LAZY);
break;
}
}
g_dir_close (dir);
}
g_free (path);
}
#endif
if (!module) {
GST_INFO ("Couldn't open HIP RTC library");
return;
}
auto table = &amd_ftable;
LOAD_SYMBOL (hiprtcCreateProgram);
LOAD_SYMBOL (hiprtcCompileProgram);
LOAD_SYMBOL (hiprtcGetProgramLog);
LOAD_SYMBOL (hiprtcGetProgramLogSize);
LOAD_SYMBOL (hiprtcGetCodeSize);
LOAD_SYMBOL (hiprtcGetCode);
LOAD_SYMBOL (hiprtcDestroyProgram);
table->loaded = TRUE;
}
gboolean
gst_hip_rtc_load_library (void)
{
static std::once_flag once;
std::call_once (once,[]() {
load_rtc_amd_func_table ();
});
return amd_ftable.loaded;
}
gchar *
gst_hip_rtc_compile (GstHipDevice * device,
const gchar * source, const gchar ** options, guint num_options)
{
if (!gst_hip_rtc_load_library ())
return nullptr;
hiprtcProgram prog;
auto rtc_ret = hiprtcCreateProgram (&prog, source, "program.cpp",
auto rtc_ret = amd_ftable.hiprtcCreateProgram (&prog, source, "program.cpp",
0, nullptr, nullptr);
if (rtc_ret != HIPRTC_SUCCESS) {
@ -60,22 +154,15 @@ gst_hip_rtc_compile (GstHipDevice * device,
guint device_id;
g_object_get (device, "device-id", &device_id, nullptr);
hipDeviceProp_t props = { };
auto hip_ret = hipGetDeviceProperties (&props, device_id);
if (!gst_hip_result (hip_ret)) {
GST_ERROR_OBJECT (device, "Couldn't query device property");
return nullptr;
}
rtc_ret = hiprtcCompileProgram (prog, num_options, options);
rtc_ret = amd_ftable.hiprtcCompileProgram (prog, num_options, options);
if (rtc_ret != HIPRTC_SUCCESS) {
size_t log_size = 0;
gchar *err_str = nullptr;
rtc_ret = hiprtcGetProgramLogSize (prog, &log_size);
rtc_ret = amd_ftable.hiprtcGetProgramLogSize (prog, &log_size);
if (rtc_ret == HIPRTC_SUCCESS) {
err_str = (gchar *) g_malloc0 (log_size);
err_str[log_size - 1] = '\0';
hiprtcGetProgramLog (prog, err_str);
amd_ftable.hiprtcGetProgramLog (prog, err_str);
}
GST_ERROR_OBJECT (device, "Couldn't compile program, ret: %d (%s)",
@ -85,14 +172,14 @@ gst_hip_rtc_compile (GstHipDevice * device,
}
size_t code_size;
rtc_ret = hiprtcGetCodeSize (prog, &code_size);
rtc_ret = amd_ftable.hiprtcGetCodeSize (prog, &code_size);
if (rtc_ret != HIPRTC_SUCCESS) {
GST_ERROR_OBJECT (device, "Couldn't get code size, ret: %d", rtc_ret);
return nullptr;
}
auto code = (gchar *) g_malloc0 (code_size);
rtc_ret = hiprtcGetCode (prog, code);
rtc_ret = amd_ftable.hiprtcGetCode (prog, code);
if (rtc_ret != HIPRTC_SUCCESS) {
GST_ERROR_OBJECT (device, "Couldn't get code, ret: %d", rtc_ret);
@ -100,7 +187,7 @@ gst_hip_rtc_compile (GstHipDevice * device,
return nullptr;
}
hiprtcDestroyProgram (&prog);
amd_ftable.hiprtcDestroyProgram (&prog);
return code;
}

View File

@ -24,6 +24,8 @@
G_BEGIN_DECLS
gboolean gst_hip_rtc_load_library (void);
gchar * gst_hip_rtc_compile (GstHipDevice * device,
const gchar * source,
const gchar ** options,

View File

@ -46,8 +46,8 @@ _gst_hip_result (hipError_t result, GstDebugCategory * cat, const gchar * file,
{
if (result != hipSuccess) {
#ifndef GST_DISABLE_GST_DEBUG
auto error_name = hipGetErrorName (result);
auto error_str = hipGetErrorString (result);
auto error_name = HipGetErrorName (result);
auto error_str = HipGetErrorString (result);
gst_debug_log (cat, GST_LEVEL_ERROR, file, function, line,
NULL, "HIP call failed: %s, %s", error_name, error_str);
#endif

View File

@ -4,6 +4,7 @@ hip_sources = [
'gsthipconverter.cpp',
'gsthipconvertscale.cpp',
'gsthipdevice.cpp',
'gsthiploader.cpp',
'gsthipmemory.cpp',
'gsthipmemorycopy.cpp',
'gsthiprtc.cpp',
@ -22,8 +23,6 @@ plugin_sources += {
extra_args = [
'-DGST_USE_UNSTABLE_API',
'-D__HIP_PLATFORM_AMD__',
'-D__HIP_DISABLE_CPP_FUNCTIONS__',
]
hip_option = get_option('hip')
@ -35,54 +34,13 @@ if host_system not in ['linux', 'windows']
subdir_done()
endif
hip_dep = dependency('', required: false)
hip_root = run_command(python3, '-c', 'import os; print(os.environ.get("HIP_PATH"))', check: false).stdout().strip()
if host_system == 'windows'
hip_root = run_command(python3, '-c', 'import os; print(os.environ.get("HIP_PATH"))', check: false).stdout().strip()
else
# HIP does not offer pkg-config it seems
hip_root = '/opt/rocm'
endif
if hip_root != '' and hip_root != 'None'
hip_lib_dir = join_paths(hip_root, 'lib')
hip_lib = cc.find_library('amdhip64', dirs: hip_lib_dir,
required : hip_option)
hiprtc_lib = cc.find_library('hiprtc', dirs: hip_lib_dir,
required : hip_option)
hip_inc_dir = include_directories(join_paths(hip_root, 'include'))
has_hip_header = cc.has_header('hip/hip_runtime.h',
include_directories: hip_inc_dir,
required: hip_option)
has_hiprtc_header = cc.has_header('hip/hiprtc.h',
include_directories: hip_inc_dir,
required: hip_option)
if hip_lib.found() and hiprtc_lib.found() and has_hip_header and has_hiprtc_header
hip_dep = declare_dependency(include_directories: hip_inc_dir,
dependencies: [hip_lib, hiprtc_lib])
endif
endif
if not hip_dep.found()
if hip_option.enabled()
error('The hip was enabled explicitly, but required dependencies were not found')
endif
subdir_done()
endif
extra_args += cc.get_supported_arguments([
'-Wno-undef'
])
hip_incdir = include_directories('./stub')
gsthip = library('gsthip', hip_sources,
c_args : gst_plugins_bad_args + extra_args,
cpp_args: gst_plugins_bad_args + extra_args,
include_directories : [configinc],
dependencies : [gstbase_dep, gstvideo_dep, hip_dep],
include_directories : [configinc, hip_incdir],
dependencies : [gstbase_dep, gstvideo_dep, gmodule_dep],
install : true,
install_dir : plugins_install_dir,
)

View File

@ -25,6 +25,7 @@
#include "gsthipdevice.h"
#include "gsthipmemorycopy.h"
#include "gsthipconvertscale.h"
#include "gsthiprtc.h"
static gboolean
plugin_init (GstPlugin * plugin)
@ -40,16 +41,24 @@ plugin_init (GstPlugin * plugin)
gboolean texture_support = FALSE;
g_object_get (device, "texture2d-support", &texture_support, nullptr);
if (texture_support) {
if (!texture_support) {
gst_plugin_add_status_info (plugin,
"Texture2D not supported by HIP device");
}
auto have_rtc = gst_hip_rtc_load_library ();
if (!have_rtc) {
gst_plugin_add_status_info (plugin,
"Couldn't find runtime kernel compiler library");
}
if (texture_support && have_rtc) {
gst_element_register (plugin,
"hipconvertscale", GST_RANK_NONE, GST_TYPE_HIP_CONVERT_SCALE);
gst_element_register (plugin,
"hipconvert", GST_RANK_NONE, GST_TYPE_HIP_CONVERT);
gst_element_register (plugin,
"hipscale", GST_RANK_NONE, GST_TYPE_HIP_SCALE);
} else {
gst_plugin_add_status_info (plugin,
"Texture2D not supported by HIP device");
}
gst_clear_object (&device);

View File

@ -0,0 +1,429 @@
/*
Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <hip/hip_runtime_api.h>
#ifndef __cplusplus
#include <stdbool.h>
#endif
typedef void* hipDeviceptr_t;
typedef enum hipChannelFormatKind {
hipChannelFormatKindSigned = 0,
hipChannelFormatKindUnsigned = 1,
hipChannelFormatKindFloat = 2,
hipChannelFormatKindNone = 3
}hipChannelFormatKind;
typedef struct hipChannelFormatDesc {
int x;
int y;
int z;
int w;
enum hipChannelFormatKind f;
}hipChannelFormatDesc;
#define HIP_TRSA_OVERRIDE_FORMAT 0x01
#define HIP_TRSF_READ_AS_INTEGER 0x01
#define HIP_TRSF_NORMALIZED_COORDINATES 0x02
#define HIP_TRSF_SRGB 0x10
typedef struct hipArray* hipArray_t;
typedef const struct hipArray* hipArray_const_t;
typedef enum hipArray_Format {
HIP_AD_FORMAT_UNSIGNED_INT8 = 0x01,
HIP_AD_FORMAT_UNSIGNED_INT16 = 0x02,
HIP_AD_FORMAT_UNSIGNED_INT32 = 0x03,
HIP_AD_FORMAT_SIGNED_INT8 = 0x08,
HIP_AD_FORMAT_SIGNED_INT16 = 0x09,
HIP_AD_FORMAT_SIGNED_INT32 = 0x0a,
HIP_AD_FORMAT_HALF = 0x10,
HIP_AD_FORMAT_FLOAT = 0x20
}hipArray_Format;
typedef struct HIP_ARRAY_DESCRIPTOR {
size_t Width;
size_t Height;
enum hipArray_Format Format;
unsigned int NumChannels;
}HIP_ARRAY_DESCRIPTOR;
typedef struct HIP_ARRAY3D_DESCRIPTOR {
size_t Width;
size_t Height;
size_t Depth;
enum hipArray_Format Format;
unsigned int NumChannels;
unsigned int Flags;
}HIP_ARRAY3D_DESCRIPTOR;
typedef struct hip_Memcpy2D {
size_t srcXInBytes;
size_t srcY;
hipMemoryType srcMemoryType;
const void* srcHost;
hipDeviceptr_t srcDevice;
hipArray_t srcArray;
size_t srcPitch;
size_t dstXInBytes;
size_t dstY;
hipMemoryType dstMemoryType;
void* dstHost;
hipDeviceptr_t dstDevice;
hipArray_t dstArray;
size_t dstPitch;
size_t WidthInBytes;
size_t Height;
} hip_Memcpy2D;
typedef struct hipMipmappedArray {
void* data;
struct hipChannelFormatDesc desc;
unsigned int type;
unsigned int width;
unsigned int height;
unsigned int depth;
unsigned int min_mipmap_level;
unsigned int max_mipmap_level;
unsigned int flags;
enum hipArray_Format format;
unsigned int num_channels;
} hipMipmappedArray;
typedef struct hipMipmappedArray* hipMipmappedArray_t;
typedef hipMipmappedArray_t hipmipmappedArray;
typedef const struct hipMipmappedArray* hipMipmappedArray_const_t;
/**
* hip resource types
*/
typedef enum hipResourceType {
hipResourceTypeArray = 0x00,
hipResourceTypeMipmappedArray = 0x01,
hipResourceTypeLinear = 0x02,
hipResourceTypePitch2D = 0x03
}hipResourceType;
typedef enum HIPresourcetype_enum {
HIP_RESOURCE_TYPE_ARRAY = 0x00, /**< Array resoure */
HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01, /**< Mipmapped array resource */
HIP_RESOURCE_TYPE_LINEAR = 0x02, /**< Linear resource */
HIP_RESOURCE_TYPE_PITCH2D = 0x03 /**< Pitch 2D resource */
} HIPresourcetype, hipResourcetype;
/**
* hip address modes
*/
typedef enum HIPaddress_mode_enum {
HIP_TR_ADDRESS_MODE_WRAP = 0,
HIP_TR_ADDRESS_MODE_CLAMP = 1,
HIP_TR_ADDRESS_MODE_MIRROR = 2,
HIP_TR_ADDRESS_MODE_BORDER = 3
} HIPaddress_mode;
/**
* hip filter modes
*/
typedef enum HIPfilter_mode_enum {
HIP_TR_FILTER_MODE_POINT = 0,
HIP_TR_FILTER_MODE_LINEAR = 1
} HIPfilter_mode;
/**
* Texture descriptor
*/
typedef struct HIP_TEXTURE_DESC_st {
HIPaddress_mode addressMode[3]; /**< Address modes */
HIPfilter_mode filterMode; /**< Filter mode */
unsigned int flags; /**< Flags */
unsigned int maxAnisotropy; /**< Maximum anisotropy ratio */
HIPfilter_mode mipmapFilterMode; /**< Mipmap filter mode */
float mipmapLevelBias; /**< Mipmap level bias */
float minMipmapLevelClamp; /**< Mipmap minimum level clamp */
float maxMipmapLevelClamp; /**< Mipmap maximum level clamp */
float borderColor[4]; /**< Border Color */
int reserved[12];
} HIP_TEXTURE_DESC;
/**
* hip texture resource view formats
*/
typedef enum hipResourceViewFormat {
hipResViewFormatNone = 0x00,
hipResViewFormatUnsignedChar1 = 0x01,
hipResViewFormatUnsignedChar2 = 0x02,
hipResViewFormatUnsignedChar4 = 0x03,
hipResViewFormatSignedChar1 = 0x04,
hipResViewFormatSignedChar2 = 0x05,
hipResViewFormatSignedChar4 = 0x06,
hipResViewFormatUnsignedShort1 = 0x07,
hipResViewFormatUnsignedShort2 = 0x08,
hipResViewFormatUnsignedShort4 = 0x09,
hipResViewFormatSignedShort1 = 0x0a,
hipResViewFormatSignedShort2 = 0x0b,
hipResViewFormatSignedShort4 = 0x0c,
hipResViewFormatUnsignedInt1 = 0x0d,
hipResViewFormatUnsignedInt2 = 0x0e,
hipResViewFormatUnsignedInt4 = 0x0f,
hipResViewFormatSignedInt1 = 0x10,
hipResViewFormatSignedInt2 = 0x11,
hipResViewFormatSignedInt4 = 0x12,
hipResViewFormatHalf1 = 0x13,
hipResViewFormatHalf2 = 0x14,
hipResViewFormatHalf4 = 0x15,
hipResViewFormatFloat1 = 0x16,
hipResViewFormatFloat2 = 0x17,
hipResViewFormatFloat4 = 0x18,
hipResViewFormatUnsignedBlockCompressed1 = 0x19,
hipResViewFormatUnsignedBlockCompressed2 = 0x1a,
hipResViewFormatUnsignedBlockCompressed3 = 0x1b,
hipResViewFormatUnsignedBlockCompressed4 = 0x1c,
hipResViewFormatSignedBlockCompressed4 = 0x1d,
hipResViewFormatUnsignedBlockCompressed5 = 0x1e,
hipResViewFormatSignedBlockCompressed5 = 0x1f,
hipResViewFormatUnsignedBlockCompressed6H = 0x20,
hipResViewFormatSignedBlockCompressed6H = 0x21,
hipResViewFormatUnsignedBlockCompressed7 = 0x22
}hipResourceViewFormat;
typedef enum HIPresourceViewFormat_enum
{
HIP_RES_VIEW_FORMAT_NONE = 0x00, /**< No resource view format (use underlying resource format) */
HIP_RES_VIEW_FORMAT_UINT_1X8 = 0x01, /**< 1 channel unsigned 8-bit integers */
HIP_RES_VIEW_FORMAT_UINT_2X8 = 0x02, /**< 2 channel unsigned 8-bit integers */
HIP_RES_VIEW_FORMAT_UINT_4X8 = 0x03, /**< 4 channel unsigned 8-bit integers */
HIP_RES_VIEW_FORMAT_SINT_1X8 = 0x04, /**< 1 channel signed 8-bit integers */
HIP_RES_VIEW_FORMAT_SINT_2X8 = 0x05, /**< 2 channel signed 8-bit integers */
HIP_RES_VIEW_FORMAT_SINT_4X8 = 0x06, /**< 4 channel signed 8-bit integers */
HIP_RES_VIEW_FORMAT_UINT_1X16 = 0x07, /**< 1 channel unsigned 16-bit integers */
HIP_RES_VIEW_FORMAT_UINT_2X16 = 0x08, /**< 2 channel unsigned 16-bit integers */
HIP_RES_VIEW_FORMAT_UINT_4X16 = 0x09, /**< 4 channel unsigned 16-bit integers */
HIP_RES_VIEW_FORMAT_SINT_1X16 = 0x0a, /**< 1 channel signed 16-bit integers */
HIP_RES_VIEW_FORMAT_SINT_2X16 = 0x0b, /**< 2 channel signed 16-bit integers */
HIP_RES_VIEW_FORMAT_SINT_4X16 = 0x0c, /**< 4 channel signed 16-bit integers */
HIP_RES_VIEW_FORMAT_UINT_1X32 = 0x0d, /**< 1 channel unsigned 32-bit integers */
HIP_RES_VIEW_FORMAT_UINT_2X32 = 0x0e, /**< 2 channel unsigned 32-bit integers */
HIP_RES_VIEW_FORMAT_UINT_4X32 = 0x0f, /**< 4 channel unsigned 32-bit integers */
HIP_RES_VIEW_FORMAT_SINT_1X32 = 0x10, /**< 1 channel signed 32-bit integers */
HIP_RES_VIEW_FORMAT_SINT_2X32 = 0x11, /**< 2 channel signed 32-bit integers */
HIP_RES_VIEW_FORMAT_SINT_4X32 = 0x12, /**< 4 channel signed 32-bit integers */
HIP_RES_VIEW_FORMAT_FLOAT_1X16 = 0x13, /**< 1 channel 16-bit floating point */
HIP_RES_VIEW_FORMAT_FLOAT_2X16 = 0x14, /**< 2 channel 16-bit floating point */
HIP_RES_VIEW_FORMAT_FLOAT_4X16 = 0x15, /**< 4 channel 16-bit floating point */
HIP_RES_VIEW_FORMAT_FLOAT_1X32 = 0x16, /**< 1 channel 32-bit floating point */
HIP_RES_VIEW_FORMAT_FLOAT_2X32 = 0x17, /**< 2 channel 32-bit floating point */
HIP_RES_VIEW_FORMAT_FLOAT_4X32 = 0x18, /**< 4 channel 32-bit floating point */
HIP_RES_VIEW_FORMAT_UNSIGNED_BC1 = 0x19, /**< Block compressed 1 */
HIP_RES_VIEW_FORMAT_UNSIGNED_BC2 = 0x1a, /**< Block compressed 2 */
HIP_RES_VIEW_FORMAT_UNSIGNED_BC3 = 0x1b, /**< Block compressed 3 */
HIP_RES_VIEW_FORMAT_UNSIGNED_BC4 = 0x1c, /**< Block compressed 4 unsigned */
HIP_RES_VIEW_FORMAT_SIGNED_BC4 = 0x1d, /**< Block compressed 4 signed */
HIP_RES_VIEW_FORMAT_UNSIGNED_BC5 = 0x1e, /**< Block compressed 5 unsigned */
HIP_RES_VIEW_FORMAT_SIGNED_BC5 = 0x1f, /**< Block compressed 5 signed */
HIP_RES_VIEW_FORMAT_UNSIGNED_BC6H = 0x20, /**< Block compressed 6 unsigned half-float */
HIP_RES_VIEW_FORMAT_SIGNED_BC6H = 0x21, /**< Block compressed 6 signed half-float */
HIP_RES_VIEW_FORMAT_UNSIGNED_BC7 = 0x22 /**< Block compressed 7 */
} HIPresourceViewFormat;
/**
* HIP resource descriptor
*/
typedef struct hipResourceDesc {
enum hipResourceType resType;
union {
struct {
hipArray_t array;
} array;
struct {
hipMipmappedArray_t mipmap;
} mipmap;
struct {
void* devPtr;
struct hipChannelFormatDesc desc;
size_t sizeInBytes;
} linear;
struct {
void* devPtr;
struct hipChannelFormatDesc desc;
size_t width;
size_t height;
size_t pitchInBytes;
} pitch2D;
} res;
}hipResourceDesc;
typedef struct HIP_RESOURCE_DESC_st
{
HIPresourcetype resType; /**< Resource type */
union {
struct {
hipArray_t hArray; /**< HIP array */
} array;
struct {
hipMipmappedArray_t hMipmappedArray; /**< HIP mipmapped array */
} mipmap;
struct {
hipDeviceptr_t devPtr; /**< Device pointer */
hipArray_Format format; /**< Array format */
unsigned int numChannels; /**< Channels per array element */
size_t sizeInBytes; /**< Size in bytes */
} linear;
struct {
hipDeviceptr_t devPtr; /**< Device pointer */
hipArray_Format format; /**< Array format */
unsigned int numChannels; /**< Channels per array element */
size_t width; /**< Width of the array in elements */
size_t height; /**< Height of the array in elements */
size_t pitchInBytes; /**< Pitch between two rows in bytes */
} pitch2D;
struct {
int reserved[32];
} reserved;
} res;
unsigned int flags; /**< Flags (must be zero) */
} HIP_RESOURCE_DESC;
/**
* hip resource view descriptor
*/
struct hipResourceViewDesc {
enum hipResourceViewFormat format;
size_t width;
size_t height;
size_t depth;
unsigned int firstMipmapLevel;
unsigned int lastMipmapLevel;
unsigned int firstLayer;
unsigned int lastLayer;
};
/**
* Resource view descriptor
*/
typedef struct HIP_RESOURCE_VIEW_DESC_st
{
HIPresourceViewFormat format; /**< Resource view format */
size_t width; /**< Width of the resource view */
size_t height; /**< Height of the resource view */
size_t depth; /**< Depth of the resource view */
unsigned int firstMipmapLevel; /**< First defined mipmap level */
unsigned int lastMipmapLevel; /**< Last defined mipmap level */
unsigned int firstLayer; /**< First layer index */
unsigned int lastLayer; /**< Last layer index */
unsigned int reserved[16];
} HIP_RESOURCE_VIEW_DESC;
/**
* Memory copy types
*
*/
typedef enum hipMemcpyKind {
hipMemcpyHostToHost = 0, ///< Host-to-Host Copy
hipMemcpyHostToDevice = 1, ///< Host-to-Device Copy
hipMemcpyDeviceToHost = 2, ///< Device-to-Host Copy
hipMemcpyDeviceToDevice = 3, ///< Device-to-Device Copy
hipMemcpyDefault = 4, ///< Runtime will automatically determine
///<copy-kind based on virtual addresses.
hipMemcpyDeviceToDeviceNoCU = 1024 ///< Device-to-Device Copy without using compute units
} hipMemcpyKind;
typedef struct hipPitchedPtr {
void* ptr;
size_t pitch;
size_t xsize;
size_t ysize;
}hipPitchedPtr;
typedef struct hipExtent {
size_t width; // Width in elements when referring to array memory, in bytes when referring to
// linear memory
size_t height;
size_t depth;
}hipExtent;
typedef struct hipPos {
size_t x;
size_t y;
size_t z;
}hipPos;
typedef struct hipMemcpy3DParms {
hipArray_t srcArray;
struct hipPos srcPos;
struct hipPitchedPtr srcPtr;
hipArray_t dstArray;
struct hipPos dstPos;
struct hipPitchedPtr dstPtr;
struct hipExtent extent;
enum hipMemcpyKind kind;
} hipMemcpy3DParms;
typedef struct HIP_MEMCPY3D {
size_t srcXInBytes;
size_t srcY;
size_t srcZ;
size_t srcLOD;
hipMemoryType srcMemoryType;
const void* srcHost;
hipDeviceptr_t srcDevice;
hipArray_t srcArray;
size_t srcPitch;
size_t srcHeight;
size_t dstXInBytes;
size_t dstY;
size_t dstZ;
size_t dstLOD;
hipMemoryType dstMemoryType;
void* dstHost;
hipDeviceptr_t dstDevice;
hipArray_t dstArray;
size_t dstPitch;
size_t dstHeight;
size_t WidthInBytes;
size_t Height;
size_t Depth;
} HIP_MEMCPY3D;
typedef enum hipFunction_attribute {
HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES,
HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES,
HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES,
HIP_FUNC_ATTRIBUTE_NUM_REGS,
HIP_FUNC_ATTRIBUTE_PTX_VERSION,
HIP_FUNC_ATTRIBUTE_BINARY_VERSION,
HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA,
HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES,
HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT,
HIP_FUNC_ATTRIBUTE_MAX
} hipFunction_attribute;
typedef enum hipPointer_attribute {
HIP_POINTER_ATTRIBUTE_CONTEXT = 1, ///< The context on which a pointer was allocated
///< @warning - not supported in HIP
HIP_POINTER_ATTRIBUTE_MEMORY_TYPE, ///< memory type describing location of a pointer
HIP_POINTER_ATTRIBUTE_DEVICE_POINTER,///< address at which the pointer is allocated on device
HIP_POINTER_ATTRIBUTE_HOST_POINTER, ///< address at which the pointer is allocated on host
HIP_POINTER_ATTRIBUTE_P2P_TOKENS, ///< A pair of tokens for use with linux kernel interface
///< @warning - not supported in HIP
HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS, ///< Synchronize every synchronous memory operation
///< initiated on this region
HIP_POINTER_ATTRIBUTE_BUFFER_ID, ///< Unique ID for an allocated memory region
HIP_POINTER_ATTRIBUTE_IS_MANAGED, ///< Indicates if the pointer points to managed memory
HIP_POINTER_ATTRIBUTE_DEVICE_ORDINAL,///< device ordinal of a device on which a pointer
///< was allocated or registered
HIP_POINTER_ATTRIBUTE_IS_LEGACY_HIP_IPC_CAPABLE, ///< if this pointer maps to an allocation
///< that is suitable for hipIpcGetMemHandle
///< @warning - not supported in HIP
HIP_POINTER_ATTRIBUTE_RANGE_START_ADDR,///< Starting address for this requested pointer
HIP_POINTER_ATTRIBUTE_RANGE_SIZE, ///< Size of the address range for this requested pointer
HIP_POINTER_ATTRIBUTE_MAPPED, ///< tells if this pointer is in a valid address range
///< that is mapped to a backing allocation
HIP_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES,///< Bitmask of allowed hipmemAllocationHandleType
///< for this allocation @warning - not supported in HIP
HIP_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE, ///< returns if the memory referenced by
///< this pointer can be used with the GPUDirect RDMA API
///< @warning - not supported in HIP
HIP_POINTER_ATTRIBUTE_ACCESS_FLAGS, ///< Returns the access flags the device associated with
///< for the corresponding memory referenced by the ptr
HIP_POINTER_ATTRIBUTE_MEMPOOL_HANDLE ///< Returns the mempool handle for the allocation if
///< it was allocated from a mempool
///< @warning - not supported in HIP
} hipPointer_attribute;

View File

@ -0,0 +1,27 @@
/*
Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <hip/texture_types.h>
#include <hip/hip_runtime_api.h>
#include <hip/driver_types.h>

View File

@ -0,0 +1,472 @@
/*
Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <hip/texture_types.h>
typedef struct {
// 32-bit Atomics
unsigned hasGlobalInt32Atomics : 1; ///< 32-bit integer atomics for global memory.
unsigned hasGlobalFloatAtomicExch : 1; ///< 32-bit float atomic exch for global memory.
unsigned hasSharedInt32Atomics : 1; ///< 32-bit integer atomics for shared memory.
unsigned hasSharedFloatAtomicExch : 1; ///< 32-bit float atomic exch for shared memory.
unsigned hasFloatAtomicAdd : 1; ///< 32-bit float atomic add in global and shared memory.
// 64-bit Atomics
unsigned hasGlobalInt64Atomics : 1; ///< 64-bit integer atomics for global memory.
unsigned hasSharedInt64Atomics : 1; ///< 64-bit integer atomics for shared memory.
// Doubles
unsigned hasDoubles : 1; ///< Double-precision floating point.
// Warp cross-lane operations
unsigned hasWarpVote : 1; ///< Warp vote instructions (__any, __all).
unsigned hasWarpBallot : 1; ///< Warp ballot instructions (__ballot).
unsigned hasWarpShuffle : 1; ///< Warp shuffle operations. (__shfl_*).
unsigned hasFunnelShift : 1; ///< Funnel two words into one with shift&mask caps.
// Sync
unsigned hasThreadFenceSystem : 1; ///< __threadfence_system.
unsigned hasSyncThreadsExt : 1; ///< __syncthreads_count, syncthreads_and, syncthreads_or.
// Misc
unsigned hasSurfaceFuncs : 1; ///< Surface functions.
unsigned has3dGrid : 1; ///< Grid and group dims are 3D (rather than 2D).
unsigned hasDynamicParallelism : 1; ///< Dynamic parallelism.
} hipDeviceArch_t;
typedef struct hipUUID_t {
char bytes[16];
} hipUUID;
typedef enum hipDeviceAttribute_t {
hipDeviceAttributeCudaCompatibleBegin = 0,
hipDeviceAttributeEccEnabled = hipDeviceAttributeCudaCompatibleBegin, ///< Whether ECC support is enabled.
hipDeviceAttributeAccessPolicyMaxWindowSize, ///< Cuda only. The maximum size of the window policy in bytes.
hipDeviceAttributeAsyncEngineCount, ///< Asynchronous engines number.
hipDeviceAttributeCanMapHostMemory, ///< Whether host memory can be mapped into device address space
hipDeviceAttributeCanUseHostPointerForRegisteredMem,///< Device can access host registered memory
///< at the same virtual address as the CPU
hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz.
hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in.
hipDeviceAttributeComputePreemptionSupported, ///< Device supports Compute Preemption.
hipDeviceAttributeConcurrentKernels, ///< Device can possibly execute multiple kernels concurrently.
hipDeviceAttributeConcurrentManagedAccess, ///< Device can coherently access managed memory concurrently with the CPU
hipDeviceAttributeCooperativeLaunch, ///< Support cooperative launch
hipDeviceAttributeCooperativeMultiDeviceLaunch, ///< Support cooperative launch on multiple devices
hipDeviceAttributeDeviceOverlap, ///< Device can concurrently copy memory and execute a kernel.
///< Deprecated. Use instead asyncEngineCount.
hipDeviceAttributeDirectManagedMemAccessFromHost, ///< Host can directly access managed memory on
///< the device without migration
hipDeviceAttributeGlobalL1CacheSupported, ///< Device supports caching globals in L1
hipDeviceAttributeHostNativeAtomicSupported, ///< Link between the device and the host supports native atomic operations
hipDeviceAttributeIntegrated, ///< Device is integrated GPU
hipDeviceAttributeIsMultiGpuBoard, ///< Multiple GPU devices.
hipDeviceAttributeKernelExecTimeout, ///< Run time limit for kernels executed on the device
hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache.
hipDeviceAttributeLocalL1CacheSupported, ///< caching locals in L1 is supported
hipDeviceAttributeLuid, ///< 8-byte locally unique identifier in 8 bytes. Undefined on TCC and non-Windows platforms
hipDeviceAttributeLuidDeviceNodeMask, ///< Luid device node mask. Undefined on TCC and non-Windows platforms
hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number.
hipDeviceAttributeManagedMemory, ///< Device supports allocating managed memory on this system
hipDeviceAttributeMaxBlocksPerMultiProcessor, ///< Max block size per multiprocessor
hipDeviceAttributeMaxBlockDimX, ///< Max block size in width.
hipDeviceAttributeMaxBlockDimY, ///< Max block size in height.
hipDeviceAttributeMaxBlockDimZ, ///< Max block size in depth.
hipDeviceAttributeMaxGridDimX, ///< Max grid size in width.
hipDeviceAttributeMaxGridDimY, ///< Max grid size in height.
hipDeviceAttributeMaxGridDimZ, ///< Max grid size in depth.
hipDeviceAttributeMaxSurface1D, ///< Maximum size of 1D surface.
hipDeviceAttributeMaxSurface1DLayered, ///< Cuda only. Maximum dimensions of 1D layered surface.
hipDeviceAttributeMaxSurface2D, ///< Maximum dimension (width, height) of 2D surface.
hipDeviceAttributeMaxSurface2DLayered, ///< Cuda only. Maximum dimensions of 2D layered surface.
hipDeviceAttributeMaxSurface3D, ///< Maximum dimension (width, height, depth) of 3D surface.
hipDeviceAttributeMaxSurfaceCubemap, ///< Cuda only. Maximum dimensions of Cubemap surface.
hipDeviceAttributeMaxSurfaceCubemapLayered, ///< Cuda only. Maximum dimension of Cubemap layered surface.
hipDeviceAttributeMaxTexture1DWidth, ///< Maximum size of 1D texture.
hipDeviceAttributeMaxTexture1DLayered, ///< Maximum dimensions of 1D layered texture.
hipDeviceAttributeMaxTexture1DLinear, ///< Maximum number of elements allocatable in a 1D linear texture.
///< Use cudaDeviceGetTexture1DLinearMaxWidth() instead on Cuda.
hipDeviceAttributeMaxTexture1DMipmap, ///< Maximum size of 1D mipmapped texture.
hipDeviceAttributeMaxTexture2DWidth, ///< Maximum dimension width of 2D texture.
hipDeviceAttributeMaxTexture2DHeight, ///< Maximum dimension hight of 2D texture.
hipDeviceAttributeMaxTexture2DGather, ///< Maximum dimensions of 2D texture if gather operations performed.
hipDeviceAttributeMaxTexture2DLayered, ///< Maximum dimensions of 2D layered texture.
hipDeviceAttributeMaxTexture2DLinear, ///< Maximum dimensions (width, height, pitch) of 2D textures bound to pitched memory.
hipDeviceAttributeMaxTexture2DMipmap, ///< Maximum dimensions of 2D mipmapped texture.
hipDeviceAttributeMaxTexture3DWidth, ///< Maximum dimension width of 3D texture.
hipDeviceAttributeMaxTexture3DHeight, ///< Maximum dimension height of 3D texture.
hipDeviceAttributeMaxTexture3DDepth, ///< Maximum dimension depth of 3D texture.
hipDeviceAttributeMaxTexture3DAlt, ///< Maximum dimensions of alternate 3D texture.
hipDeviceAttributeMaxTextureCubemap, ///< Maximum dimensions of Cubemap texture
hipDeviceAttributeMaxTextureCubemapLayered, ///< Maximum dimensions of Cubemap layered texture.
hipDeviceAttributeMaxThreadsDim, ///< Maximum dimension of a block
hipDeviceAttributeMaxThreadsPerBlock, ///< Maximum number of threads per block.
hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per multiprocessor.
hipDeviceAttributeMaxPitch, ///< Maximum pitch in bytes allowed by memory copies
hipDeviceAttributeMemoryBusWidth, ///< Global memory bus width in bits.
hipDeviceAttributeMemoryClockRate, ///< Peak memory clock frequency in kilohertz.
hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number.
hipDeviceAttributeMultiGpuBoardGroupID, ///< Unique ID of device group on the same multi-GPU board
hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device.
hipDeviceAttributeUnused1, ///< Previously hipDeviceAttributeName
hipDeviceAttributePageableMemoryAccess, ///< Device supports coherently accessing pageable memory
///< without calling hipHostRegister on it
hipDeviceAttributePageableMemoryAccessUsesHostPageTables, ///< Device accesses pageable memory via the host's page tables
hipDeviceAttributePciBusId, ///< PCI Bus ID.
hipDeviceAttributePciDeviceId, ///< PCI Device ID.
hipDeviceAttributePciDomainID, ///< PCI Domain ID.
hipDeviceAttributePersistingL2CacheMaxSize, ///< Maximum l2 persisting lines capacity in bytes
hipDeviceAttributeMaxRegistersPerBlock, ///< 32-bit registers available to a thread block. This number is shared
///< by all thread blocks simultaneously resident on a multiprocessor.
hipDeviceAttributeMaxRegistersPerMultiprocessor, ///< 32-bit registers available per block.
hipDeviceAttributeReservedSharedMemPerBlock, ///< Shared memory reserved by CUDA driver per block.
hipDeviceAttributeMaxSharedMemoryPerBlock, ///< Maximum shared memory available per block in bytes.
hipDeviceAttributeSharedMemPerBlockOptin, ///< Maximum shared memory per block usable by special opt in.
hipDeviceAttributeSharedMemPerMultiprocessor, ///< Shared memory available per multiprocessor.
hipDeviceAttributeSingleToDoublePrecisionPerfRatio, ///< Cuda only. Performance ratio of single precision to double precision.
hipDeviceAttributeStreamPrioritiesSupported, ///< Whether to support stream priorities.
hipDeviceAttributeSurfaceAlignment, ///< Alignment requirement for surfaces
hipDeviceAttributeTccDriver, ///< Cuda only. Whether device is a Tesla device using TCC driver
hipDeviceAttributeTextureAlignment, ///< Alignment requirement for textures
hipDeviceAttributeTexturePitchAlignment, ///< Pitch alignment requirement for 2D texture references bound to pitched memory;
hipDeviceAttributeTotalConstantMemory, ///< Constant memory size in bytes.
hipDeviceAttributeTotalGlobalMem, ///< Global memory available on devicice.
hipDeviceAttributeUnifiedAddressing, ///< Cuda only. An unified address space shared with the host.
hipDeviceAttributeUnused2, ///< Previously hipDeviceAttributeUuid
hipDeviceAttributeWarpSize, ///< Warp size in threads.
hipDeviceAttributeMemoryPoolsSupported, ///< Device supports HIP Stream Ordered Memory Allocator
hipDeviceAttributeVirtualMemoryManagementSupported, ///< Device supports HIP virtual memory management
hipDeviceAttributeHostRegisterSupported, ///< Can device support host memory registration via hipHostRegister
hipDeviceAttributeMemoryPoolSupportedHandleTypes, ///< Supported handle mask for HIP Stream Ordered Memory Allocator
hipDeviceAttributeCudaCompatibleEnd = 9999,
hipDeviceAttributeAmdSpecificBegin = 10000,
hipDeviceAttributeClockInstructionRate = hipDeviceAttributeAmdSpecificBegin, ///< Frequency in khz of the timer used by the device-side "clock*"
hipDeviceAttributeUnused3, ///< Previously hipDeviceAttributeArch
hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, ///< Maximum Shared Memory PerMultiprocessor.
hipDeviceAttributeUnused4, ///< Previously hipDeviceAttributeGcnArch
hipDeviceAttributeUnused5, ///< Previously hipDeviceAttributeGcnArchName
hipDeviceAttributeHdpMemFlushCntl, ///< Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register
hipDeviceAttributeHdpRegFlushCntl, ///< Address of the HDP_REG_COHERENCY_FLUSH_CNTL register
hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc, ///< Supports cooperative launch on multiple
///< devices with unmatched functions
hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim, ///< Supports cooperative launch on multiple
///< devices with unmatched grid dimensions
hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim, ///< Supports cooperative launch on multiple
///< devices with unmatched block dimensions
hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem, ///< Supports cooperative launch on multiple
///< devices with unmatched shared memories
hipDeviceAttributeIsLargeBar, ///< Whether it is LargeBar
hipDeviceAttributeAsicRevision, ///< Revision of the GPU in this device
hipDeviceAttributeCanUseStreamWaitValue, ///< '1' if Device supports hipStreamWaitValue32() and
///< hipStreamWaitValue64(), '0' otherwise.
hipDeviceAttributeImageSupport, ///< '1' if Device supports image, '0' otherwise.
hipDeviceAttributePhysicalMultiProcessorCount, ///< All available physical compute
///< units for the device
hipDeviceAttributeFineGrainSupport, ///< '1' if Device supports fine grain, '0' otherwise
hipDeviceAttributeWallClockRate, ///< Constant frequency of wall clock in kilohertz.
hipDeviceAttributeAmdSpecificEnd = 19999,
hipDeviceAttributeVendorSpecificBegin = 20000,
// Extended attributes for vendors
} hipDeviceAttribute_t;
#define hipGetDeviceProperties hipGetDevicePropertiesR0600
#define hipDeviceProp_t hipDeviceProp_tR0600
#define hipChooseDevice hipChooseDeviceR0600
typedef struct hipDeviceProp_t {
char name[256]; ///< Device name.
hipUUID uuid; ///< UUID of a device
char luid[8]; ///< 8-byte unique identifier. Only valid on windows
unsigned int luidDeviceNodeMask; ///< LUID node mask
size_t totalGlobalMem; ///< Size of global memory region (in bytes).
size_t sharedMemPerBlock; ///< Size of shared memory per block (in bytes).
int regsPerBlock; ///< Registers per block.
int warpSize; ///< Warp size.
size_t memPitch; ///< Maximum pitch in bytes allowed by memory copies
///< pitched memory
int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size.
int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block.
int maxGridSize[3]; ///< Max grid dimensions (XYZ).
int clockRate; ///< Max clock frequency of the multiProcessors in khz.
size_t totalConstMem; ///< Size of shared constant memory region on the device
///< (in bytes).
int major; ///< Major compute capability. On HCC, this is an approximation and features may
///< differ from CUDA CC. See the arch feature flags for portable ways to query
///< feature caps.
int minor; ///< Minor compute capability. On HCC, this is an approximation and features may
///< differ from CUDA CC. See the arch feature flags for portable ways to query
///< feature caps.
size_t textureAlignment; ///< Alignment requirement for textures
size_t texturePitchAlignment; ///< Pitch alignment requirement for texture references bound to
int deviceOverlap; ///< Deprecated. Use asyncEngineCount instead
int multiProcessorCount; ///< Number of multi-processors (compute units).
int kernelExecTimeoutEnabled; ///< Run time limit for kernels executed on the device
int integrated; ///< APU vs dGPU
int canMapHostMemory; ///< Check whether HIP can map host memory
int computeMode; ///< Compute mode.
int maxTexture1D; ///< Maximum number of elements in 1D images
int maxTexture1DMipmap; ///< Maximum 1D mipmap texture size
int maxTexture1DLinear; ///< Maximum size for 1D textures bound to linear memory
int maxTexture2D[2]; ///< Maximum dimensions (width, height) of 2D images, in image elements
int maxTexture2DMipmap[2]; ///< Maximum number of elements in 2D array mipmap of images
int maxTexture2DLinear[3]; ///< Maximum 2D tex dimensions if tex are bound to pitched memory
int maxTexture2DGather[2]; ///< Maximum 2D tex dimensions if gather has to be performed
int maxTexture3D[3]; ///< Maximum dimensions (width, height, depth) of 3D images, in image
///< elements
int maxTexture3DAlt[3]; ///< Maximum alternate 3D texture dims
int maxTextureCubemap; ///< Maximum cubemap texture dims
int maxTexture1DLayered[2]; ///< Maximum number of elements in 1D array images
int maxTexture2DLayered[3]; ///< Maximum number of elements in 2D array images
int maxTextureCubemapLayered[2]; ///< Maximum cubemaps layered texture dims
int maxSurface1D; ///< Maximum 1D surface size
int maxSurface2D[2]; ///< Maximum 2D surface size
int maxSurface3D[3]; ///< Maximum 3D surface size
int maxSurface1DLayered[2]; ///< Maximum 1D layered surface size
int maxSurface2DLayered[3]; ///< Maximum 2D layared surface size
int maxSurfaceCubemap; ///< Maximum cubemap surface size
int maxSurfaceCubemapLayered[2]; ///< Maximum cubemap layered surface size
size_t surfaceAlignment; ///< Alignment requirement for surface
int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently.
int ECCEnabled; ///< Device has ECC support enabled
int pciBusID; ///< PCI Bus ID.
int pciDeviceID; ///< PCI Device ID.
int pciDomainID; ///< PCI Domain ID
int tccDriver; ///< 1:If device is Tesla device using TCC driver, else 0
int asyncEngineCount; ///< Number of async engines
int unifiedAddressing; ///< Does device and host share unified address space
int memoryClockRate; ///< Max global memory clock frequency in khz.
int memoryBusWidth; ///< Global memory bus width in bits.
int l2CacheSize; ///< L2 cache size.
int persistingL2CacheMaxSize; ///< Device's max L2 persisting lines in bytes
int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor.
int streamPrioritiesSupported; ///< Device supports stream priority
int globalL1CacheSupported; ///< Indicates globals are cached in L1
int localL1CacheSupported; ///< Locals are cahced in L1
size_t sharedMemPerMultiprocessor; ///< Amount of shared memory available per multiprocessor.
int regsPerMultiprocessor; ///< registers available per multiprocessor
int managedMemory; ///< Device supports allocating managed memory on this system
int isMultiGpuBoard; ///< 1 if device is on a multi-GPU board, 0 if not.
int multiGpuBoardGroupID; ///< Unique identifier for a group of devices on same multiboard GPU
int hostNativeAtomicSupported; ///< Link between host and device supports native atomics
int singleToDoublePrecisionPerfRatio; ///< Deprecated. CUDA only.
int pageableMemoryAccess; ///< Device supports coherently accessing pageable memory
///< without calling hipHostRegister on it
int concurrentManagedAccess; ///< Device can coherently access managed memory concurrently with
///< the CPU
int computePreemptionSupported; ///< Is compute preemption supported on the device
int canUseHostPointerForRegisteredMem; ///< Device can access host registered memory with same
///< address as the host
int cooperativeLaunch; ///< HIP device supports cooperative launch
int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple
///< devices
size_t
sharedMemPerBlockOptin; ///< Per device m ax shared mem per block usable by special opt in
int pageableMemoryAccessUsesHostPageTables; ///< Device accesses pageable memory via the host's
///< page tables
int directManagedMemAccessFromHost; ///< Host can directly access managed memory on the device
///< without migration
int maxBlocksPerMultiProcessor; ///< Max number of blocks on CU
int accessPolicyMaxWindowSize; ///< Max value of access policy window
size_t reservedSharedMemPerBlock; ///< Shared memory reserved by driver per block
int hostRegisterSupported; ///< Device supports hipHostRegister
int sparseHipArraySupported; ///< Indicates if device supports sparse hip arrays
int hostRegisterReadOnlySupported; ///< Device supports using the hipHostRegisterReadOnly flag
///< with hipHostRegistger
int timelineSemaphoreInteropSupported; ///< Indicates external timeline semaphore support
int memoryPoolsSupported; ///< Indicates if device supports hipMallocAsync and hipMemPool APIs
int gpuDirectRDMASupported; ///< Indicates device support of RDMA APIs
unsigned int gpuDirectRDMAFlushWritesOptions; ///< Bitmask to be interpreted according to
///< hipFlushGPUDirectRDMAWritesOptions
int gpuDirectRDMAWritesOrdering; ///< value of hipGPUDirectRDMAWritesOrdering
unsigned int
memoryPoolSupportedHandleTypes; ///< Bitmask of handle types support with mempool based IPC
int deferredMappingHipArraySupported; ///< Device supports deferred mapping HIP arrays and HIP
///< mipmapped arrays
int ipcEventSupported; ///< Device supports IPC events
int clusterLaunch; ///< Device supports cluster launch
int unifiedFunctionPointers; ///< Indicates device supports unified function pointers
int reserved[63]; ///< CUDA Reserved.
int hipReserved[32]; ///< Reserved for adding new entries for HIP/CUDA.
/* HIP Only struct members */
char gcnArchName[256]; ///< AMD GCN Arch Name. HIP Only.
size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per CU. HIP Only.
int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*"
///< instructions. New for HIP.
hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP.
unsigned int* hdpMemFlushCntl; ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register
unsigned int* hdpRegFlushCntl; ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register
int cooperativeMultiDeviceUnmatchedFunc; ///< HIP device supports cooperative launch on
///< multiple
/// devices with unmatched functions
int cooperativeMultiDeviceUnmatchedGridDim; ///< HIP device supports cooperative launch on
///< multiple
/// devices with unmatched grid dimensions
int cooperativeMultiDeviceUnmatchedBlockDim; ///< HIP device supports cooperative launch on
///< multiple
/// devices with unmatched block dimensions
int cooperativeMultiDeviceUnmatchedSharedMem; ///< HIP device supports cooperative launch on
///< multiple
/// devices with unmatched shared memories
int isLargeBar; ///< 1: if it is a large PCI bar device, else 0
int asicRevision; ///< Revision of the GPU in this device
} hipDeviceProp_t;
typedef enum hipMemoryType {
hipMemoryTypeUnregistered = 0, ///< Unregistered memory
hipMemoryTypeHost = 1, ///< Memory is physically located on host
hipMemoryTypeDevice = 2, ///< Memory is physically located on device. (see deviceId for
///< specific device)
hipMemoryTypeManaged = 3, ///< Managed memory, automaticallly managed by the unified
///< memory system
///< place holder for new values.
hipMemoryTypeArray = 10, ///< Array memory, physically located on device. (see deviceId for
///< specific device)
hipMemoryTypeUnified = 11 ///< unified address space
} hipMemoryType;
typedef enum hipError_t {
hipSuccess = 0, ///< Successful completion.
hipErrorInvalidValue = 1, ///< One or more of the parameters passed to the API call is NULL
///< or not in an acceptable range.
hipErrorOutOfMemory = 2, ///< out of memory range.
// Deprecated
hipErrorMemoryAllocation = 2, ///< Memory allocation error.
hipErrorNotInitialized = 3, ///< Invalid not initialized
// Deprecated
hipErrorInitializationError = 3,
hipErrorDeinitialized = 4, ///< Deinitialized
hipErrorProfilerDisabled = 5,
hipErrorProfilerNotInitialized = 6,
hipErrorProfilerAlreadyStarted = 7,
hipErrorProfilerAlreadyStopped = 8,
hipErrorInvalidConfiguration = 9, ///< Invalide configuration
hipErrorInvalidPitchValue = 12, ///< Invalid pitch value
hipErrorInvalidSymbol = 13, ///< Invalid symbol
hipErrorInvalidDevicePointer = 17, ///< Invalid Device Pointer
hipErrorInvalidMemcpyDirection = 21, ///< Invalid memory copy direction
hipErrorInsufficientDriver = 35,
hipErrorMissingConfiguration = 52,
hipErrorPriorLaunchFailure = 53,
hipErrorInvalidDeviceFunction = 98, ///< Invalid device function
hipErrorNoDevice = 100, ///< Call to hipGetDeviceCount returned 0 devices
hipErrorInvalidDevice = 101, ///< DeviceID must be in range from 0 to compute-devices.
hipErrorInvalidImage = 200, ///< Invalid image
hipErrorInvalidContext = 201, ///< Produced when input context is invalid.
hipErrorContextAlreadyCurrent = 202,
hipErrorMapFailed = 205,
// Deprecated
hipErrorMapBufferObjectFailed = 205, ///< Produced when the IPC memory attach failed from ROCr.
hipErrorUnmapFailed = 206,
hipErrorArrayIsMapped = 207,
hipErrorAlreadyMapped = 208,
hipErrorNoBinaryForGpu = 209,
hipErrorAlreadyAcquired = 210,
hipErrorNotMapped = 211,
hipErrorNotMappedAsArray = 212,
hipErrorNotMappedAsPointer = 213,
hipErrorECCNotCorrectable = 214,
hipErrorUnsupportedLimit = 215, ///< Unsupported limit
hipErrorContextAlreadyInUse = 216, ///< The context is already in use
hipErrorPeerAccessUnsupported = 217,
hipErrorInvalidKernelFile = 218, ///< In CUDA DRV, it is CUDA_ERROR_INVALID_PTX
hipErrorInvalidGraphicsContext = 219,
hipErrorInvalidSource = 300, ///< Invalid source.
hipErrorFileNotFound = 301, ///< the file is not found.
hipErrorSharedObjectSymbolNotFound = 302,
hipErrorSharedObjectInitFailed = 303, ///< Failed to initialize shared object.
hipErrorOperatingSystem = 304, ///< Not the correct operating system
hipErrorInvalidHandle = 400, ///< Invalide handle
// Deprecated
hipErrorInvalidResourceHandle = 400, ///< Resource handle (hipEvent_t or hipStream_t) invalid.
hipErrorIllegalState = 401, ///< Resource required is not in a valid state to perform operation.
hipErrorNotFound = 500, ///< Not found
hipErrorNotReady = 600, ///< Indicates that asynchronous operations enqueued earlier are not
///< ready. This is not actually an error, but is used to distinguish
///< from hipSuccess (which indicates completion). APIs that return
///< this error include hipEventQuery and hipStreamQuery.
hipErrorIllegalAddress = 700,
hipErrorLaunchOutOfResources = 701, ///< Out of resources error.
hipErrorLaunchTimeOut = 702, ///< Timeout for the launch.
hipErrorPeerAccessAlreadyEnabled = 704, ///< Peer access was already enabled from the current
///< device.
hipErrorPeerAccessNotEnabled = 705, ///< Peer access was never enabled from the current device.
hipErrorSetOnActiveProcess = 708, ///< The process is active.
hipErrorContextIsDestroyed = 709, ///< The context is already destroyed
hipErrorAssert = 710, ///< Produced when the kernel calls assert.
hipErrorHostMemoryAlreadyRegistered = 712, ///< Produced when trying to lock a page-locked
///< memory.
hipErrorHostMemoryNotRegistered = 713, ///< Produced when trying to unlock a non-page-locked
///< memory.
hipErrorLaunchFailure = 719, ///< An exception occurred on the device while executing a kernel.
hipErrorCooperativeLaunchTooLarge = 720, ///< This error indicates that the number of blocks
///< launched per grid for a kernel that was launched
///< via cooperative launch APIs exceeds the maximum
///< number of allowed blocks for the current device.
hipErrorNotSupported = 801, ///< Produced when the hip API is not supported/implemented
hipErrorStreamCaptureUnsupported = 900, ///< The operation is not permitted when the stream
///< is capturing.
hipErrorStreamCaptureInvalidated = 901, ///< The current capture sequence on the stream
///< has been invalidated due to a previous error.
hipErrorStreamCaptureMerge = 902, ///< The operation would have resulted in a merge of
///< two independent capture sequences.
hipErrorStreamCaptureUnmatched = 903, ///< The capture was not initiated in this stream.
hipErrorStreamCaptureUnjoined = 904, ///< The capture sequence contains a fork that was not
///< joined to the primary stream.
hipErrorStreamCaptureIsolation = 905, ///< A dependency would have been created which crosses
///< the capture sequence boundary. Only implicit
///< in-stream ordering dependencies are allowed
///< to cross the boundary
hipErrorStreamCaptureImplicit = 906, ///< The operation would have resulted in a disallowed
///< implicit dependency on a current capture sequence
///< from hipStreamLegacy.
hipErrorCapturedEvent = 907, ///< The operation is not permitted on an event which was last
///< recorded in a capturing stream.
hipErrorStreamCaptureWrongThread = 908, ///< A stream capture sequence not initiated with
///< the hipStreamCaptureModeRelaxed argument to
///< hipStreamBeginCapture was passed to
///< hipStreamEndCapture in a different thread.
hipErrorGraphExecUpdateFailure = 910, ///< This error indicates that the graph update
///< not performed because it included changes which
///< violated constraintsspecific to instantiated graph
///< update.
hipErrorUnknown = 999, ///< Unknown error.
// HSA Runtime Error Codes start here.
hipErrorRuntimeMemory = 1052, ///< HSA runtime memory call returned error. Typically not seen
///< in production systems.
hipErrorRuntimeOther = 1053, ///< HSA runtime call other than memory returned error. Typically
///< not seen in production systems.
hipErrorTbd ///< Marker that more error codes are needed.
} hipError_t;
typedef struct ihipStream_t* hipStream_t;
typedef struct ihipModule_t* hipModule_t;
typedef struct ihipModuleSymbol_t* hipFunction_t;

View File

@ -0,0 +1,45 @@
/*
Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
G_BEGIN_DECLS
typedef enum hiprtcResult {
HIPRTC_SUCCESS = 0, ///< Success
HIPRTC_ERROR_OUT_OF_MEMORY = 1, ///< Out of memory
HIPRTC_ERROR_PROGRAM_CREATION_FAILURE = 2, ///< Failed to create program
HIPRTC_ERROR_INVALID_INPUT = 3, ///< Invalid input
HIPRTC_ERROR_INVALID_PROGRAM = 4, ///< Invalid program
HIPRTC_ERROR_INVALID_OPTION = 5, ///< Invalid option
HIPRTC_ERROR_COMPILATION = 6, ///< Compilation error
HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7, ///< Failed in builtin operation
HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8, ///< No name expression after compilation
HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9, ///< No lowered names before compilation
HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10, ///< Invalid name expression
HIPRTC_ERROR_INTERNAL_ERROR = 11, ///< Internal error
HIPRTC_ERROR_LINKING = 100 ///< Error in linking
} hiprtcResult;
typedef struct _hiprtcProgram* hiprtcProgram;
G_END_DECLS

View File

@ -0,0 +1,26 @@
/*
Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
struct __hip_texture;
typedef struct __hip_texture* hipTextureObject_t;