hip: Add support for AMD kerenl precompile
Adding "hip-amd-precompile" build option. If enabled, AMD kernels will be precompiled at build time. Also "hip-hipcc-arch" build option (corresponding to --offload-arch hipcc option) is added so that user can specify target GPU arch instead of auto-detection by hipcc Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/8923>
This commit is contained in:
parent
d26755f4d4
commit
eb925e4212
@ -282,6 +282,10 @@ option('vulkan-windowing', type : 'array',
|
||||
option('gpl', type: 'feature', value: 'disabled', yield: true,
|
||||
description: 'Allow build plugins that have (A)GPL-licensed dependencies')
|
||||
|
||||
# HIP plugin options
|
||||
option('hip-amd-precompile', type : 'feature', value : 'disabled', description : 'Enable HIP kernel precompile for AMD')
|
||||
option('hip-hipcc-arch', type : 'string', value : '', description : 'GPU architectur for hipcc --offload-arch option')
|
||||
|
||||
# Common feature options
|
||||
option('examples', type : 'feature', value : 'auto', yield : true)
|
||||
option('tools', type : 'feature', value : 'auto', yield : true)
|
||||
|
@ -21,6 +21,8 @@
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "gsthip-config.h"
|
||||
|
||||
#include "gsthip.h"
|
||||
#include "gsthipconverter.h"
|
||||
#include "gsthiprtc.h"
|
||||
@ -34,6 +36,12 @@
|
||||
#include "kernel/converter-unpack.cu"
|
||||
|
||||
/* *INDENT-OFF* */
|
||||
#ifdef HIP_AMD_PRECOMPILED
|
||||
#include "kernel/converter_hsaco.h"
|
||||
#else
|
||||
static std::unordered_map<std::string, const unsigned char *> g_precompiled_hsaco_table;
|
||||
#endif
|
||||
|
||||
static std::unordered_map<std::string, const char *> g_ptx_table;
|
||||
static std::mutex g_kernel_table_lock;
|
||||
/* *INDENT-ON* */
|
||||
@ -1331,33 +1339,52 @@ gst_hip_converter_setup (GstHipConverter * self)
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* TODO: distinguish amd and nvidia */
|
||||
guint device_id;
|
||||
g_object_get (self->device, "device-id", &device_id, nullptr);
|
||||
std::string kernel_name = "GstHipConverterMain_" +
|
||||
std::string (priv->texture_fmt->sample_func) + "_" + output_name + "_" +
|
||||
std::to_string (device_id);
|
||||
if (priv->vendor == GST_HIP_VENDOR_AMD)
|
||||
kernel_name += "_amd";
|
||||
else
|
||||
kernel_name += "_nvidia";
|
||||
std::string sampler_define = std::string ("-DSAMPLER=Sample") +
|
||||
std::string (priv->texture_fmt->sample_func);
|
||||
std::string output_define = std::string ("-DOUTPUT=Output") + output_name;
|
||||
std::string texture_define;
|
||||
if (priv->vendor == GST_HIP_VENDOR_AMD) {
|
||||
texture_define = std::string ("-DTextureObject_t=hipTextureObject_t");
|
||||
} else {
|
||||
texture_define = std::string ("-DTextureObject_t=cudaTextureObject_t");
|
||||
}
|
||||
std::vector < const char *>opts;
|
||||
opts.push_back (sampler_define.c_str ());
|
||||
opts.push_back (output_define.c_str ());
|
||||
opts.push_back (texture_define.c_str ());
|
||||
|
||||
auto device_id = gst_hip_device_get_device_id (self->device);
|
||||
const gchar *program = nullptr;
|
||||
|
||||
{
|
||||
std::string kernel_name_base = "GstHipConverterMain_" +
|
||||
std::string (priv->texture_fmt->sample_func) + "_" + output_name;
|
||||
|
||||
if (priv->vendor == GST_HIP_VENDOR_AMD) {
|
||||
auto kernel_name = kernel_name_base + "_amd";
|
||||
auto precompiled = g_precompiled_hsaco_table.find (kernel_name);
|
||||
if (precompiled != g_precompiled_hsaco_table.end ()) {
|
||||
program = (const gchar *) precompiled->second;
|
||||
ret = HipModuleLoadData (priv->vendor, &priv->main_module, program);
|
||||
if (ret != hipSuccess) {
|
||||
GST_WARNING_OBJECT (self,
|
||||
"Could not load module from hsaco, ret %d", ret);
|
||||
program = nullptr;
|
||||
priv->main_module = nullptr;
|
||||
} else {
|
||||
GST_DEBUG_OBJECT (self, "Loaded precompiled hsaco");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!program) {
|
||||
std::string kernel_name = kernel_name_base + "_" +
|
||||
std::to_string (device_id);
|
||||
if (priv->vendor == GST_HIP_VENDOR_AMD)
|
||||
kernel_name += "_amd";
|
||||
else
|
||||
kernel_name += "_nvidia";
|
||||
|
||||
std::string sampler_define = std::string ("-DSAMPLER=Sample") +
|
||||
std::string (priv->texture_fmt->sample_func);
|
||||
std::string output_define = std::string ("-DOUTPUT=Output") + output_name;
|
||||
std::string texture_define;
|
||||
if (priv->vendor == GST_HIP_VENDOR_AMD) {
|
||||
texture_define = std::string ("-DTextureObject_t=hipTextureObject_t");
|
||||
} else {
|
||||
texture_define = std::string ("-DTextureObject_t=cudaTextureObject_t");
|
||||
}
|
||||
|
||||
std::vector < const char *>opts;
|
||||
opts.push_back (sampler_define.c_str ());
|
||||
opts.push_back (output_define.c_str ());
|
||||
opts.push_back (texture_define.c_str ());
|
||||
|
||||
std::lock_guard < std::mutex > lk (g_kernel_table_lock);
|
||||
|
||||
auto ptx = g_ptx_table.find (kernel_name);
|
||||
@ -1437,18 +1464,36 @@ gst_hip_converter_setup (GstHipConverter * self)
|
||||
}
|
||||
|
||||
priv->unpack_buffer.texture = texture;
|
||||
|
||||
program = nullptr;
|
||||
|
||||
{
|
||||
std::lock_guard < std::mutex > lk (g_kernel_table_lock);
|
||||
std::string unpack_module_name =
|
||||
"GstHipConverterUnpack_device_" + std::to_string (device_id);
|
||||
std::string unpack_module_name_base = "GstHipConverterUnpack";
|
||||
|
||||
if (priv->vendor == GST_HIP_VENDOR_AMD) {
|
||||
auto kernel_name = unpack_module_name_base + "_amd";
|
||||
auto precompiled = g_precompiled_hsaco_table.find (kernel_name);
|
||||
if (precompiled != g_precompiled_hsaco_table.end ()) {
|
||||
program = (const gchar *) precompiled->second;
|
||||
ret = HipModuleLoadData (priv->vendor, &priv->unpack_module, program);
|
||||
if (ret != hipSuccess) {
|
||||
GST_WARNING_OBJECT (self,
|
||||
"Could not load module from hsaco, ret %d", ret);
|
||||
program = nullptr;
|
||||
priv->unpack_module = nullptr;
|
||||
} else {
|
||||
GST_DEBUG_OBJECT (self, "Loaded precompiled hsaco");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!program) {
|
||||
std::string unpack_module_name = unpack_module_name_base + "_" +
|
||||
std::to_string (device_id);
|
||||
if (priv->vendor == GST_HIP_VENDOR_AMD)
|
||||
unpack_module_name += "_amd";
|
||||
else
|
||||
unpack_module_name += "_nvidia";
|
||||
|
||||
std::lock_guard < std::mutex > lk (g_kernel_table_lock);
|
||||
auto ptx = g_ptx_table.find (unpack_module_name);
|
||||
if (ptx == g_ptx_table.end ()) {
|
||||
GST_DEBUG_OBJECT (self, "Building PTX");
|
||||
|
@ -0,0 +1,92 @@
|
||||
#!/usr/bin/env python3
|
||||
# GStreamer
|
||||
# Copyright (C) 2025 Seungha Yang <seungha@centricular.com>
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Library General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Library General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Library General Public
|
||||
# License along with this library; if not, write to the
|
||||
# Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
|
||||
# Boston, MA 02110-1301, USA.
|
||||
|
||||
import sys
|
||||
import os
|
||||
import argparse
|
||||
|
||||
start_header = """/*
|
||||
* This file is autogenerated by collect_hsaco_headers.py
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
"""
|
||||
|
||||
start_map = """
|
||||
#define MAKE_BYTECODE(name) { G_STRINGIFY (name), g_##name }
|
||||
static std::unordered_map<std::string, const unsigned char *>
|
||||
"""
|
||||
|
||||
end_map = """};
|
||||
#undef MAKE_BYTECODE
|
||||
"""
|
||||
|
||||
def convert_hsaco_to_header(hsaco_file, header_file):
|
||||
with open(hsaco_file, 'rb') as f:
|
||||
hsaco_content = f.read()
|
||||
|
||||
header_lines = []
|
||||
header_lines.append("// Generated by collect_hsaco_headers.py")
|
||||
header_lines.append("#pragma once")
|
||||
header_lines.append("/* Generated by bin2header.py */")
|
||||
header_lines.append("static const unsigned char g_{}[] = {{".format(os.path.splitext(os.path.basename(hsaco_file))[0]))
|
||||
|
||||
bytes_per_line = 12
|
||||
for i in range(0, len(hsaco_content), bytes_per_line):
|
||||
chunk = hsaco_content[i:i+bytes_per_line]
|
||||
line = " " + ", ".join("0x{:02x}".format(b) for b in chunk)
|
||||
if i + bytes_per_line < len(hsaco_content):
|
||||
line += ","
|
||||
header_lines.append(line)
|
||||
|
||||
header_lines.append("};")
|
||||
header_lines.append("")
|
||||
header_content = "\n".join(header_lines)
|
||||
|
||||
with open(header_file, "w", encoding='utf8') as f:
|
||||
f.write(header_content)
|
||||
|
||||
def main(args):
|
||||
parser = argparse.ArgumentParser(description='Read HIP HSACO from directory and make single header')
|
||||
parser.add_argument("--input", help="the precompiled HIP HSACO directory")
|
||||
parser.add_argument("--output", help="output header file location")
|
||||
parser.add_argument("--prefix", help="HIP HSACO header filename prefix")
|
||||
parser.add_argument("--name", help="Hash map variable name")
|
||||
|
||||
args = parser.parse_args(args)
|
||||
|
||||
hsaco_files = [os.path.join(args.input, file) for file in os.listdir(args.input) if file.startswith(args.prefix) and file.endswith(".hsaco") ]
|
||||
|
||||
with open(args.output, 'w', newline='\n', encoding='utf8') as f:
|
||||
f.write(start_header)
|
||||
for hsaco_file in hsaco_files:
|
||||
header_file = os.path.splitext(hsaco_file)[0] + '.h'
|
||||
convert_hsaco_to_header(hsaco_file, header_file)
|
||||
f.write("#include \"")
|
||||
f.write(os.path.basename(header_file))
|
||||
f.write("\"\n")
|
||||
f.write(start_map)
|
||||
f.write(args.name)
|
||||
f.write(" = {\n")
|
||||
for hsaco_file in hsaco_files:
|
||||
f.write(" MAKE_BYTECODE ({}),\n".format(os.path.splitext(os.path.basename(hsaco_file))[0]))
|
||||
f.write(end_map)
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main(sys.argv[1:]))
|
@ -18,6 +18,10 @@
|
||||
*/
|
||||
|
||||
#if defined(__NVCC__) || defined(__HIPCC__)
|
||||
#ifdef __HIPCC__
|
||||
#include <hip/hip_runtime.h>
|
||||
#endif
|
||||
|
||||
extern "C" {
|
||||
__global__ void
|
||||
GstHipConverterUnpack_RGB_RGBx
|
||||
|
@ -18,10 +18,11 @@
|
||||
*/
|
||||
|
||||
#if defined(__NVCC__) || defined(__HIPCC__)
|
||||
#ifdef __NVCC__
|
||||
#define TextureObject_t cudaTextureObject_t
|
||||
#else
|
||||
#ifdef __HIPCC__
|
||||
#include <hip/hip_runtime.h>
|
||||
#define TextureObject_t hipTextureObject_t
|
||||
#else
|
||||
#define TextureObject_t cudaTextureObject_t
|
||||
#endif
|
||||
|
||||
struct ColorMatrix
|
||||
|
102
subprojects/gst-plugins-bad/sys/hip/kernel/meson.build
Normal file
102
subprojects/gst-plugins-bad/sys/hip/kernel/meson.build
Normal file
@ -0,0 +1,102 @@
|
||||
conv_source = files('converter.cu')
|
||||
conv_comm_source = files('converter-unpack.cu')
|
||||
|
||||
conv_input_formats = [
|
||||
'I420',
|
||||
'YV12',
|
||||
'I420_10',
|
||||
'I420_12',
|
||||
'NV12',
|
||||
'NV21',
|
||||
'VUYA',
|
||||
'RGBA',
|
||||
'BGRA',
|
||||
'RGBx',
|
||||
'BGRx',
|
||||
'ARGB',
|
||||
'ABGR',
|
||||
'RGBP',
|
||||
'BGRP',
|
||||
'GBR',
|
||||
'GBR_10',
|
||||
'GBR_12',
|
||||
'GBRA',
|
||||
]
|
||||
|
||||
conv_output_formats = [
|
||||
'I420',
|
||||
'YV12',
|
||||
'NV12',
|
||||
'NV21',
|
||||
'P010',
|
||||
'I420_10',
|
||||
'I420_12',
|
||||
'Y444',
|
||||
'Y444_10',
|
||||
'Y444_12',
|
||||
'Y444_16',
|
||||
'Y42B',
|
||||
'I422_10',
|
||||
'I422_12',
|
||||
'VUYA',
|
||||
'RGBA',
|
||||
'RGBx',
|
||||
'BGRA',
|
||||
'BGRx',
|
||||
'ARGB',
|
||||
'ABGR',
|
||||
'RGB',
|
||||
'BGR',
|
||||
'RGB10A2',
|
||||
'BGR10A2',
|
||||
'RGBP',
|
||||
'GBR',
|
||||
'GBR_10',
|
||||
'GBR_12',
|
||||
'GBR_16',
|
||||
'GBRA',
|
||||
]
|
||||
|
||||
amd_header_collector = find_program('collect_hsaco_headers.py')
|
||||
|
||||
amd_conv_precompiled = []
|
||||
amd_opt_common = ['-w', '--genco', '-c', '@INPUT@', '-o', '@OUTPUT@']
|
||||
amd_arch_opt = get_option('hip-hipcc-arch')
|
||||
if amd_arch_opt != ''
|
||||
amd_opt_common += ['--offload-arch=' + amd_arch_opt]
|
||||
endif
|
||||
|
||||
foreach input_format : conv_input_formats
|
||||
foreach output_format : conv_output_formats
|
||||
hsaco_name = 'GstHipConverterMain_@0@_@1@_amd.hsaco'.format(input_format, output_format)
|
||||
opts = amd_opt_common + ['-DSAMPLER=Sample@0@'.format(input_format),
|
||||
'-DOUTPUT=Output@0@'.format(output_format)]
|
||||
compiled_kernel = custom_target(hsaco_name,
|
||||
input : conv_source,
|
||||
output : hsaco_name,
|
||||
command : [hipcc] + opts)
|
||||
amd_conv_precompiled += [compiled_kernel]
|
||||
endforeach
|
||||
endforeach
|
||||
|
||||
hsaco_name = 'GstHipConverterUnpack_amd.hsaco'
|
||||
compiled_kernel = custom_target(hsaco_name,
|
||||
input : conv_comm_source,
|
||||
output : hsaco_name,
|
||||
command : [hipcc] + amd_opt_common)
|
||||
amd_conv_precompiled += [compiled_kernel]
|
||||
|
||||
amd_conv_hsaco_collection = custom_target('hip_converter_hsaco',
|
||||
input : amd_conv_precompiled,
|
||||
output : 'converter_hsaco.h',
|
||||
command : [amd_header_collector,
|
||||
'--input', meson.current_build_dir(),
|
||||
'--prefix', 'GstHipConverter',
|
||||
'--name', 'g_precompiled_hsaco_table',
|
||||
'--output', '@OUTPUT@'
|
||||
])
|
||||
|
||||
hip_kernel_amd_precompiled += [
|
||||
amd_conv_precompiled,
|
||||
amd_conv_hsaco_collection,
|
||||
]
|
@ -27,6 +27,7 @@ extra_args = [
|
||||
]
|
||||
|
||||
extra_deps = []
|
||||
hip_kernel_amd_precompiled = []
|
||||
|
||||
hip_option = get_option('hip')
|
||||
if hip_option.disabled()
|
||||
@ -37,8 +38,32 @@ if host_system not in ['linux', 'windows']
|
||||
subdir_done()
|
||||
endif
|
||||
|
||||
hip_incdir = include_directories('./stub')
|
||||
hip_precompile_amd_opt = get_option('hip-amd-precompile')
|
||||
have_hipcc = false
|
||||
if not hip_precompile_amd_opt.disabled() and not meson.is_cross_build()
|
||||
if host_system == 'windows'
|
||||
hipcc = find_program('hipcc.bin', required: false)
|
||||
if not hipcc.found()
|
||||
hip_root = run_command(python3, '-c', 'import os; print(os.environ.get("HIP_PATH"))', check: false).stdout().strip()
|
||||
if hip_root != '' and hip_root != 'None'
|
||||
hip_bin_path = join_paths(hip_root, 'bin')
|
||||
hipcc = find_program('hipcc.bin',
|
||||
dirs: [hip_bin_path],
|
||||
required: hip_precompile_amd_opt)
|
||||
endif
|
||||
endif
|
||||
else
|
||||
hipcc = find_program('hipcc', required: hip_precompile_amd_opt)
|
||||
endif
|
||||
have_hipcc = hipcc.found()
|
||||
endif
|
||||
|
||||
hip_cdata = configuration_data()
|
||||
if have_hipcc
|
||||
hip_cdata.set('HIP_AMD_PRECOMPILED', true)
|
||||
subdir('kernel')
|
||||
endif
|
||||
|
||||
if gstcuda_dep.found()
|
||||
hip_cdata.set('HAVE_GST_CUDA', true)
|
||||
extra_deps += [gstcuda_dep]
|
||||
@ -49,7 +74,8 @@ configure_file(
|
||||
configuration: hip_cdata,
|
||||
)
|
||||
|
||||
gsthip = library('gsthip', hip_sources,
|
||||
hip_incdir = include_directories('./stub')
|
||||
gsthip = library('gsthip', hip_sources + hip_kernel_amd_precompiled,
|
||||
c_args : gst_plugins_bad_args + extra_args,
|
||||
cpp_args: gst_plugins_bad_args + extra_args,
|
||||
include_directories : [configinc, hip_incdir],
|
||||
|
Loading…
x
Reference in New Issue
Block a user