hip: Add support for AMD kerenl precompile

Adding "hip-amd-precompile" build option. If enabled, AMD kernels
will be precompiled at build time. Also "hip-hipcc-arch" build option
(corresponding to --offload-arch hipcc option) is added
so that user can specify target GPU arch instead of auto-detection by hipcc

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/8923>
This commit is contained in:
Seungha Yang 2025-05-14 13:17:00 +09:00
parent d26755f4d4
commit eb925e4212
7 changed files with 309 additions and 35 deletions

View File

@ -282,6 +282,10 @@ option('vulkan-windowing', type : 'array',
option('gpl', type: 'feature', value: 'disabled', yield: true,
description: 'Allow build plugins that have (A)GPL-licensed dependencies')
# HIP plugin options
option('hip-amd-precompile', type : 'feature', value : 'disabled', description : 'Enable HIP kernel precompile for AMD')
option('hip-hipcc-arch', type : 'string', value : '', description : 'GPU architectur for hipcc --offload-arch option')
# Common feature options
option('examples', type : 'feature', value : 'auto', yield : true)
option('tools', type : 'feature', value : 'auto', yield : true)

View File

@ -21,6 +21,8 @@
#include "config.h"
#endif
#include "gsthip-config.h"
#include "gsthip.h"
#include "gsthipconverter.h"
#include "gsthiprtc.h"
@ -34,6 +36,12 @@
#include "kernel/converter-unpack.cu"
/* *INDENT-OFF* */
#ifdef HIP_AMD_PRECOMPILED
#include "kernel/converter_hsaco.h"
#else
static std::unordered_map<std::string, const unsigned char *> g_precompiled_hsaco_table;
#endif
static std::unordered_map<std::string, const char *> g_ptx_table;
static std::mutex g_kernel_table_lock;
/* *INDENT-ON* */
@ -1331,33 +1339,52 @@ gst_hip_converter_setup (GstHipConverter * self)
return FALSE;
}
/* TODO: distinguish amd and nvidia */
guint device_id;
g_object_get (self->device, "device-id", &device_id, nullptr);
std::string kernel_name = "GstHipConverterMain_" +
std::string (priv->texture_fmt->sample_func) + "_" + output_name + "_" +
std::to_string (device_id);
if (priv->vendor == GST_HIP_VENDOR_AMD)
kernel_name += "_amd";
else
kernel_name += "_nvidia";
std::string sampler_define = std::string ("-DSAMPLER=Sample") +
std::string (priv->texture_fmt->sample_func);
std::string output_define = std::string ("-DOUTPUT=Output") + output_name;
std::string texture_define;
if (priv->vendor == GST_HIP_VENDOR_AMD) {
texture_define = std::string ("-DTextureObject_t=hipTextureObject_t");
} else {
texture_define = std::string ("-DTextureObject_t=cudaTextureObject_t");
}
std::vector < const char *>opts;
opts.push_back (sampler_define.c_str ());
opts.push_back (output_define.c_str ());
opts.push_back (texture_define.c_str ());
auto device_id = gst_hip_device_get_device_id (self->device);
const gchar *program = nullptr;
{
std::string kernel_name_base = "GstHipConverterMain_" +
std::string (priv->texture_fmt->sample_func) + "_" + output_name;
if (priv->vendor == GST_HIP_VENDOR_AMD) {
auto kernel_name = kernel_name_base + "_amd";
auto precompiled = g_precompiled_hsaco_table.find (kernel_name);
if (precompiled != g_precompiled_hsaco_table.end ()) {
program = (const gchar *) precompiled->second;
ret = HipModuleLoadData (priv->vendor, &priv->main_module, program);
if (ret != hipSuccess) {
GST_WARNING_OBJECT (self,
"Could not load module from hsaco, ret %d", ret);
program = nullptr;
priv->main_module = nullptr;
} else {
GST_DEBUG_OBJECT (self, "Loaded precompiled hsaco");
}
}
}
if (!program) {
std::string kernel_name = kernel_name_base + "_" +
std::to_string (device_id);
if (priv->vendor == GST_HIP_VENDOR_AMD)
kernel_name += "_amd";
else
kernel_name += "_nvidia";
std::string sampler_define = std::string ("-DSAMPLER=Sample") +
std::string (priv->texture_fmt->sample_func);
std::string output_define = std::string ("-DOUTPUT=Output") + output_name;
std::string texture_define;
if (priv->vendor == GST_HIP_VENDOR_AMD) {
texture_define = std::string ("-DTextureObject_t=hipTextureObject_t");
} else {
texture_define = std::string ("-DTextureObject_t=cudaTextureObject_t");
}
std::vector < const char *>opts;
opts.push_back (sampler_define.c_str ());
opts.push_back (output_define.c_str ());
opts.push_back (texture_define.c_str ());
std::lock_guard < std::mutex > lk (g_kernel_table_lock);
auto ptx = g_ptx_table.find (kernel_name);
@ -1437,18 +1464,36 @@ gst_hip_converter_setup (GstHipConverter * self)
}
priv->unpack_buffer.texture = texture;
program = nullptr;
{
std::lock_guard < std::mutex > lk (g_kernel_table_lock);
std::string unpack_module_name =
"GstHipConverterUnpack_device_" + std::to_string (device_id);
std::string unpack_module_name_base = "GstHipConverterUnpack";
if (priv->vendor == GST_HIP_VENDOR_AMD) {
auto kernel_name = unpack_module_name_base + "_amd";
auto precompiled = g_precompiled_hsaco_table.find (kernel_name);
if (precompiled != g_precompiled_hsaco_table.end ()) {
program = (const gchar *) precompiled->second;
ret = HipModuleLoadData (priv->vendor, &priv->unpack_module, program);
if (ret != hipSuccess) {
GST_WARNING_OBJECT (self,
"Could not load module from hsaco, ret %d", ret);
program = nullptr;
priv->unpack_module = nullptr;
} else {
GST_DEBUG_OBJECT (self, "Loaded precompiled hsaco");
}
}
}
if (!program) {
std::string unpack_module_name = unpack_module_name_base + "_" +
std::to_string (device_id);
if (priv->vendor == GST_HIP_VENDOR_AMD)
unpack_module_name += "_amd";
else
unpack_module_name += "_nvidia";
std::lock_guard < std::mutex > lk (g_kernel_table_lock);
auto ptx = g_ptx_table.find (unpack_module_name);
if (ptx == g_ptx_table.end ()) {
GST_DEBUG_OBJECT (self, "Building PTX");

View File

@ -0,0 +1,92 @@
#!/usr/bin/env python3
# GStreamer
# Copyright (C) 2025 Seungha Yang <seungha@centricular.com>
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Library General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Library General Public License for more details.
#
# You should have received a copy of the GNU Library General Public
# License along with this library; if not, write to the
# Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
# Boston, MA 02110-1301, USA.
import sys
import os
import argparse
start_header = """/*
* This file is autogenerated by collect_hsaco_headers.py
*/
#pragma once
"""
start_map = """
#define MAKE_BYTECODE(name) { G_STRINGIFY (name), g_##name }
static std::unordered_map<std::string, const unsigned char *>
"""
end_map = """};
#undef MAKE_BYTECODE
"""
def convert_hsaco_to_header(hsaco_file, header_file):
with open(hsaco_file, 'rb') as f:
hsaco_content = f.read()
header_lines = []
header_lines.append("// Generated by collect_hsaco_headers.py")
header_lines.append("#pragma once")
header_lines.append("/* Generated by bin2header.py */")
header_lines.append("static const unsigned char g_{}[] = {{".format(os.path.splitext(os.path.basename(hsaco_file))[0]))
bytes_per_line = 12
for i in range(0, len(hsaco_content), bytes_per_line):
chunk = hsaco_content[i:i+bytes_per_line]
line = " " + ", ".join("0x{:02x}".format(b) for b in chunk)
if i + bytes_per_line < len(hsaco_content):
line += ","
header_lines.append(line)
header_lines.append("};")
header_lines.append("")
header_content = "\n".join(header_lines)
with open(header_file, "w", encoding='utf8') as f:
f.write(header_content)
def main(args):
parser = argparse.ArgumentParser(description='Read HIP HSACO from directory and make single header')
parser.add_argument("--input", help="the precompiled HIP HSACO directory")
parser.add_argument("--output", help="output header file location")
parser.add_argument("--prefix", help="HIP HSACO header filename prefix")
parser.add_argument("--name", help="Hash map variable name")
args = parser.parse_args(args)
hsaco_files = [os.path.join(args.input, file) for file in os.listdir(args.input) if file.startswith(args.prefix) and file.endswith(".hsaco") ]
with open(args.output, 'w', newline='\n', encoding='utf8') as f:
f.write(start_header)
for hsaco_file in hsaco_files:
header_file = os.path.splitext(hsaco_file)[0] + '.h'
convert_hsaco_to_header(hsaco_file, header_file)
f.write("#include \"")
f.write(os.path.basename(header_file))
f.write("\"\n")
f.write(start_map)
f.write(args.name)
f.write(" = {\n")
for hsaco_file in hsaco_files:
f.write(" MAKE_BYTECODE ({}),\n".format(os.path.splitext(os.path.basename(hsaco_file))[0]))
f.write(end_map)
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))

View File

@ -18,6 +18,10 @@
*/
#if defined(__NVCC__) || defined(__HIPCC__)
#ifdef __HIPCC__
#include <hip/hip_runtime.h>
#endif
extern "C" {
__global__ void
GstHipConverterUnpack_RGB_RGBx

View File

@ -18,10 +18,11 @@
*/
#if defined(__NVCC__) || defined(__HIPCC__)
#ifdef __NVCC__
#define TextureObject_t cudaTextureObject_t
#else
#ifdef __HIPCC__
#include <hip/hip_runtime.h>
#define TextureObject_t hipTextureObject_t
#else
#define TextureObject_t cudaTextureObject_t
#endif
struct ColorMatrix

View File

@ -0,0 +1,102 @@
conv_source = files('converter.cu')
conv_comm_source = files('converter-unpack.cu')
conv_input_formats = [
'I420',
'YV12',
'I420_10',
'I420_12',
'NV12',
'NV21',
'VUYA',
'RGBA',
'BGRA',
'RGBx',
'BGRx',
'ARGB',
'ABGR',
'RGBP',
'BGRP',
'GBR',
'GBR_10',
'GBR_12',
'GBRA',
]
conv_output_formats = [
'I420',
'YV12',
'NV12',
'NV21',
'P010',
'I420_10',
'I420_12',
'Y444',
'Y444_10',
'Y444_12',
'Y444_16',
'Y42B',
'I422_10',
'I422_12',
'VUYA',
'RGBA',
'RGBx',
'BGRA',
'BGRx',
'ARGB',
'ABGR',
'RGB',
'BGR',
'RGB10A2',
'BGR10A2',
'RGBP',
'GBR',
'GBR_10',
'GBR_12',
'GBR_16',
'GBRA',
]
amd_header_collector = find_program('collect_hsaco_headers.py')
amd_conv_precompiled = []
amd_opt_common = ['-w', '--genco', '-c', '@INPUT@', '-o', '@OUTPUT@']
amd_arch_opt = get_option('hip-hipcc-arch')
if amd_arch_opt != ''
amd_opt_common += ['--offload-arch=' + amd_arch_opt]
endif
foreach input_format : conv_input_formats
foreach output_format : conv_output_formats
hsaco_name = 'GstHipConverterMain_@0@_@1@_amd.hsaco'.format(input_format, output_format)
opts = amd_opt_common + ['-DSAMPLER=Sample@0@'.format(input_format),
'-DOUTPUT=Output@0@'.format(output_format)]
compiled_kernel = custom_target(hsaco_name,
input : conv_source,
output : hsaco_name,
command : [hipcc] + opts)
amd_conv_precompiled += [compiled_kernel]
endforeach
endforeach
hsaco_name = 'GstHipConverterUnpack_amd.hsaco'
compiled_kernel = custom_target(hsaco_name,
input : conv_comm_source,
output : hsaco_name,
command : [hipcc] + amd_opt_common)
amd_conv_precompiled += [compiled_kernel]
amd_conv_hsaco_collection = custom_target('hip_converter_hsaco',
input : amd_conv_precompiled,
output : 'converter_hsaco.h',
command : [amd_header_collector,
'--input', meson.current_build_dir(),
'--prefix', 'GstHipConverter',
'--name', 'g_precompiled_hsaco_table',
'--output', '@OUTPUT@'
])
hip_kernel_amd_precompiled += [
amd_conv_precompiled,
amd_conv_hsaco_collection,
]

View File

@ -27,6 +27,7 @@ extra_args = [
]
extra_deps = []
hip_kernel_amd_precompiled = []
hip_option = get_option('hip')
if hip_option.disabled()
@ -37,8 +38,32 @@ if host_system not in ['linux', 'windows']
subdir_done()
endif
hip_incdir = include_directories('./stub')
hip_precompile_amd_opt = get_option('hip-amd-precompile')
have_hipcc = false
if not hip_precompile_amd_opt.disabled() and not meson.is_cross_build()
if host_system == 'windows'
hipcc = find_program('hipcc.bin', required: false)
if not hipcc.found()
hip_root = run_command(python3, '-c', 'import os; print(os.environ.get("HIP_PATH"))', check: false).stdout().strip()
if hip_root != '' and hip_root != 'None'
hip_bin_path = join_paths(hip_root, 'bin')
hipcc = find_program('hipcc.bin',
dirs: [hip_bin_path],
required: hip_precompile_amd_opt)
endif
endif
else
hipcc = find_program('hipcc', required: hip_precompile_amd_opt)
endif
have_hipcc = hipcc.found()
endif
hip_cdata = configuration_data()
if have_hipcc
hip_cdata.set('HIP_AMD_PRECOMPILED', true)
subdir('kernel')
endif
if gstcuda_dep.found()
hip_cdata.set('HAVE_GST_CUDA', true)
extra_deps += [gstcuda_dep]
@ -49,7 +74,8 @@ configure_file(
configuration: hip_cdata,
)
gsthip = library('gsthip', hip_sources,
hip_incdir = include_directories('./stub')
gsthip = library('gsthip', hip_sources + hip_kernel_amd_precompiled,
c_args : gst_plugins_bad_args + extra_args,
cpp_args: gst_plugins_bad_args + extra_args,
include_directories : [configinc, hip_incdir],