hip: Add support for NVIDIA kernel precompile
... with "hip-nvidia-precompile" and "hip-nvcc-arch" build options Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/8923>
This commit is contained in:
parent
eb925e4212
commit
fc8f7c349b
@ -285,6 +285,8 @@ option('gpl', type: 'feature', value: 'disabled', yield: true,
|
||||
# HIP plugin options
|
||||
option('hip-amd-precompile', type : 'feature', value : 'disabled', description : 'Enable HIP kernel precompile for AMD')
|
||||
option('hip-hipcc-arch', type : 'string', value : '', description : 'GPU architectur for hipcc --offload-arch option')
|
||||
option('hip-nvidia-precompile', type : 'feature', value : 'disabled', description : 'Enable HIP kernel precompile for NVIDIA')
|
||||
option('hip-nvcc-arch', type : 'string', value : 'compute_52', description : 'GPU architectur for nvcc -arch option')
|
||||
|
||||
# Common feature options
|
||||
option('examples', type : 'feature', value : 'auto', yield : true)
|
||||
|
@ -42,6 +42,12 @@
|
||||
static std::unordered_map<std::string, const unsigned char *> g_precompiled_hsaco_table;
|
||||
#endif
|
||||
|
||||
#ifdef HIP_NVIDIA_PRECOMPILED
|
||||
#include "kernel/converter_ptx.h"
|
||||
#else
|
||||
static std::unordered_map<std::string, const char *> g_precompiled_ptx_table;
|
||||
#endif
|
||||
|
||||
static std::unordered_map<std::string, const char *> g_ptx_table;
|
||||
static std::mutex g_kernel_table_lock;
|
||||
/* *INDENT-ON* */
|
||||
@ -1348,17 +1354,24 @@ gst_hip_converter_setup (GstHipConverter * self)
|
||||
if (priv->vendor == GST_HIP_VENDOR_AMD) {
|
||||
auto kernel_name = kernel_name_base + "_amd";
|
||||
auto precompiled = g_precompiled_hsaco_table.find (kernel_name);
|
||||
if (precompiled != g_precompiled_hsaco_table.end ()) {
|
||||
if (precompiled != g_precompiled_hsaco_table.end ())
|
||||
program = (const gchar *) precompiled->second;
|
||||
ret = HipModuleLoadData (priv->vendor, &priv->main_module, program);
|
||||
if (ret != hipSuccess) {
|
||||
GST_WARNING_OBJECT (self,
|
||||
"Could not load module from hsaco, ret %d", ret);
|
||||
program = nullptr;
|
||||
priv->main_module = nullptr;
|
||||
} else {
|
||||
GST_DEBUG_OBJECT (self, "Loaded precompiled hsaco");
|
||||
}
|
||||
} else {
|
||||
auto kernel_name = kernel_name_base + "_nvidia";
|
||||
auto precompiled = g_precompiled_ptx_table.find (kernel_name);
|
||||
if (precompiled != g_precompiled_ptx_table.end ())
|
||||
program = precompiled->second;
|
||||
}
|
||||
|
||||
if (program) {
|
||||
ret = HipModuleLoadData (priv->vendor, &priv->main_module, program);
|
||||
if (ret != hipSuccess) {
|
||||
GST_WARNING_OBJECT (self,
|
||||
"Could not load module from precompiled, ret %d", ret);
|
||||
program = nullptr;
|
||||
priv->main_module = nullptr;
|
||||
} else {
|
||||
GST_DEBUG_OBJECT (self, "Loaded precompiled kernel");
|
||||
}
|
||||
}
|
||||
|
||||
@ -1471,17 +1484,24 @@ gst_hip_converter_setup (GstHipConverter * self)
|
||||
if (priv->vendor == GST_HIP_VENDOR_AMD) {
|
||||
auto kernel_name = unpack_module_name_base + "_amd";
|
||||
auto precompiled = g_precompiled_hsaco_table.find (kernel_name);
|
||||
if (precompiled != g_precompiled_hsaco_table.end ()) {
|
||||
if (precompiled != g_precompiled_hsaco_table.end ())
|
||||
program = (const gchar *) precompiled->second;
|
||||
ret = HipModuleLoadData (priv->vendor, &priv->unpack_module, program);
|
||||
if (ret != hipSuccess) {
|
||||
GST_WARNING_OBJECT (self,
|
||||
"Could not load module from hsaco, ret %d", ret);
|
||||
program = nullptr;
|
||||
priv->unpack_module = nullptr;
|
||||
} else {
|
||||
GST_DEBUG_OBJECT (self, "Loaded precompiled hsaco");
|
||||
}
|
||||
} else {
|
||||
auto kernel_name = unpack_module_name_base + "_nvidia";
|
||||
auto precompiled = g_precompiled_ptx_table.find (kernel_name);
|
||||
if (precompiled != g_precompiled_ptx_table.end ())
|
||||
program = precompiled->second;
|
||||
}
|
||||
|
||||
if (program) {
|
||||
ret = HipModuleLoadData (priv->vendor, &priv->unpack_module, program);
|
||||
if (ret != hipSuccess) {
|
||||
GST_WARNING_OBJECT (self,
|
||||
"Could not load module from precompiled, ret %d", ret);
|
||||
program = nullptr;
|
||||
priv->unpack_module = nullptr;
|
||||
} else {
|
||||
GST_DEBUG_OBJECT (self, "Loaded precompiled kernel");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,79 @@
|
||||
#!/usr/bin/env python3
|
||||
# GStreamer
|
||||
# Copyright (C) 2025 Seungha Yang <seungha@centricular.com>
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Library General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Library General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Library General Public
|
||||
# License along with this library; if not, write to the
|
||||
# Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
|
||||
# Boston, MA 02110-1301, USA.
|
||||
|
||||
import sys
|
||||
import os
|
||||
import argparse
|
||||
|
||||
start_header = """/*
|
||||
* This file is autogenerated by collect_ptx_headers.py
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
"""
|
||||
|
||||
start_map = """
|
||||
#define MAKE_BYTECODE(name) { G_STRINGIFY (name), g_##name }
|
||||
static std::unordered_map<std::string, const char *>
|
||||
"""
|
||||
|
||||
end_map = """};
|
||||
#undef MAKE_BYTECODE
|
||||
"""
|
||||
|
||||
def convert_ptx_to_header(ptx_file, header_file):
|
||||
with open(ptx_file, 'r', encoding='utf8') as ptx:
|
||||
ptx_content = ptx.read()
|
||||
|
||||
with open(header_file, 'w', newline='\n', encoding='utf8') as header:
|
||||
header.write('#pragma once\n')
|
||||
header.write('// This file is autogenerated by collect_ptx_headers.py\n')
|
||||
header.write(f'static const char* g_{os.path.splitext(os.path.basename(ptx_file))[0]} = R"(\n')
|
||||
header.write(ptx_content)
|
||||
header.write(')";\n\n')
|
||||
|
||||
|
||||
def main(args):
|
||||
parser = argparse.ArgumentParser(description='Read CUDA PTX from directory and make single header')
|
||||
parser.add_argument("--input", help="the precompiled CUDA PTX directory")
|
||||
parser.add_argument("--output", help="output header file location")
|
||||
parser.add_argument("--prefix", help="CUDA PTX header filename prefix")
|
||||
parser.add_argument("--name", help="Hash map variable name")
|
||||
|
||||
args = parser.parse_args(args)
|
||||
|
||||
ptx_files = [os.path.join(args.input, file) for file in os.listdir(args.input) if file.startswith(args.prefix) and file.endswith(".ptx") ]
|
||||
|
||||
with open(args.output, 'w', newline='\n', encoding='utf8') as f:
|
||||
f.write(start_header)
|
||||
for ptx_file in ptx_files:
|
||||
header_file = os.path.splitext(ptx_file)[0] + '.h'
|
||||
convert_ptx_to_header(ptx_file, header_file)
|
||||
f.write("#include \"")
|
||||
f.write(os.path.basename(header_file))
|
||||
f.write("\"\n")
|
||||
f.write(start_map)
|
||||
f.write(args.name)
|
||||
f.write(" = {\n")
|
||||
for ptx_file in ptx_files:
|
||||
f.write(" MAKE_BYTECODE ({}),\n".format(os.path.splitext(os.path.basename(ptx_file))[0]))
|
||||
f.write(end_map)
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main(sys.argv[1:]))
|
@ -57,46 +57,92 @@ conv_output_formats = [
|
||||
'GBRA',
|
||||
]
|
||||
|
||||
amd_header_collector = find_program('collect_hsaco_headers.py')
|
||||
if have_hipcc
|
||||
amd_header_collector = find_program('collect_hsaco_headers.py')
|
||||
amd_conv_precompiled = []
|
||||
amd_opt_common = ['-w', '--genco', '-c', '@INPUT@', '-o', '@OUTPUT@']
|
||||
amd_arch_opt = get_option('hip-hipcc-arch')
|
||||
if amd_arch_opt != ''
|
||||
amd_opt_common += ['--offload-arch=' + amd_arch_opt]
|
||||
endif
|
||||
|
||||
amd_conv_precompiled = []
|
||||
amd_opt_common = ['-w', '--genco', '-c', '@INPUT@', '-o', '@OUTPUT@']
|
||||
amd_arch_opt = get_option('hip-hipcc-arch')
|
||||
if amd_arch_opt != ''
|
||||
amd_opt_common += ['--offload-arch=' + amd_arch_opt]
|
||||
foreach input_format : conv_input_formats
|
||||
foreach output_format : conv_output_formats
|
||||
hsaco_name = 'GstHipConverterMain_@0@_@1@_amd.hsaco'.format(input_format, output_format)
|
||||
opts = amd_opt_common + ['-DSAMPLER=Sample@0@'.format(input_format),
|
||||
'-DOUTPUT=Output@0@'.format(output_format)]
|
||||
compiled_kernel = custom_target(hsaco_name,
|
||||
input : conv_source,
|
||||
output : hsaco_name,
|
||||
command : [hipcc] + opts)
|
||||
amd_conv_precompiled += [compiled_kernel]
|
||||
endforeach
|
||||
endforeach
|
||||
|
||||
hsaco_name = 'GstHipConverterUnpack_amd.hsaco'
|
||||
compiled_kernel = custom_target(hsaco_name,
|
||||
input : conv_comm_source,
|
||||
output : hsaco_name,
|
||||
command : [hipcc] + amd_opt_common)
|
||||
amd_conv_precompiled += [compiled_kernel]
|
||||
|
||||
amd_conv_hsaco_collection = custom_target('hip_converter_hsaco',
|
||||
input : amd_conv_precompiled,
|
||||
output : 'converter_hsaco.h',
|
||||
command : [amd_header_collector,
|
||||
'--input', meson.current_build_dir(),
|
||||
'--prefix', 'GstHipConverter',
|
||||
'--name', 'g_precompiled_hsaco_table',
|
||||
'--output', '@OUTPUT@'
|
||||
])
|
||||
|
||||
hip_amd_precompiled += [
|
||||
amd_conv_precompiled,
|
||||
amd_conv_hsaco_collection,
|
||||
]
|
||||
endif
|
||||
|
||||
foreach input_format : conv_input_formats
|
||||
foreach output_format : conv_output_formats
|
||||
hsaco_name = 'GstHipConverterMain_@0@_@1@_amd.hsaco'.format(input_format, output_format)
|
||||
opts = amd_opt_common + ['-DSAMPLER=Sample@0@'.format(input_format),
|
||||
'-DOUTPUT=Output@0@'.format(output_format)]
|
||||
compiled_kernel = custom_target(hsaco_name,
|
||||
input : conv_source,
|
||||
output : hsaco_name,
|
||||
command : [hipcc] + opts)
|
||||
amd_conv_precompiled += [compiled_kernel]
|
||||
if have_nvcc
|
||||
nvidia_header_collector = find_program('collect_ptx_headers.py')
|
||||
nvidia_conv_precompiled = []
|
||||
nvidia_opt_common = ['-ptx', '-w', '-o', '@OUTPUT@']
|
||||
nvidia_arch_opt = get_option('hip-nvcc-arch')
|
||||
if nvidia_arch_opt != ''
|
||||
nvidia_opt_common += ['-arch=' + nvidia_arch_opt]
|
||||
endif
|
||||
|
||||
foreach input_format : conv_input_formats
|
||||
foreach output_format : conv_output_formats
|
||||
ptx_name = 'GstHipConverterMain_@0@_@1@_nvidia.ptx'.format(input_format, output_format)
|
||||
opts = nvidia_opt_common + ['-DSAMPLER=Sample@0@'.format(input_format),
|
||||
'-DOUTPUT=Output@0@'.format(output_format), '@INPUT@']
|
||||
compiled_kernel = custom_target(ptx_name,
|
||||
input : conv_source,
|
||||
output : ptx_name,
|
||||
command : [nvcc] + opts)
|
||||
nvidia_conv_precompiled += [compiled_kernel]
|
||||
endforeach
|
||||
endforeach
|
||||
endforeach
|
||||
|
||||
hsaco_name = 'GstHipConverterUnpack_amd.hsaco'
|
||||
compiled_kernel = custom_target(hsaco_name,
|
||||
input : conv_comm_source,
|
||||
output : hsaco_name,
|
||||
command : [hipcc] + amd_opt_common)
|
||||
amd_conv_precompiled += [compiled_kernel]
|
||||
ptx_name = 'GstHipConverterUnpack_nvidia.ptx'
|
||||
compiled_kernel = custom_target(ptx_name,
|
||||
input : conv_comm_source,
|
||||
output : ptx_name,
|
||||
command : [nvcc] + nvidia_opt_common + ['@INPUT@'])
|
||||
nvidia_conv_precompiled += [compiled_kernel]
|
||||
|
||||
amd_conv_hsaco_collection = custom_target('hip_converter_hsaco',
|
||||
input : amd_conv_precompiled,
|
||||
output : 'converter_hsaco.h',
|
||||
command : [amd_header_collector,
|
||||
'--input', meson.current_build_dir(),
|
||||
'--prefix', 'GstHipConverter',
|
||||
'--name', 'g_precompiled_hsaco_table',
|
||||
'--output', '@OUTPUT@'
|
||||
])
|
||||
nvidia_conv_ptx_collection = custom_target('hip_converter_ptx',
|
||||
input : nvidia_conv_precompiled,
|
||||
output : 'converter_ptx.h',
|
||||
command : [nvidia_header_collector,
|
||||
'--input', meson.current_build_dir(),
|
||||
'--prefix', 'GstHipConverter',
|
||||
'--name', 'g_precompiled_ptx_table',
|
||||
'--output', '@OUTPUT@'
|
||||
])
|
||||
|
||||
hip_kernel_amd_precompiled += [
|
||||
amd_conv_precompiled,
|
||||
amd_conv_hsaco_collection,
|
||||
]
|
||||
hip_nvidia_precompiled += [
|
||||
nvidia_conv_precompiled,
|
||||
nvidia_conv_ptx_collection,
|
||||
]
|
||||
endif
|
@ -27,7 +27,8 @@ extra_args = [
|
||||
]
|
||||
|
||||
extra_deps = []
|
||||
hip_kernel_amd_precompiled = []
|
||||
hip_amd_precompiled = []
|
||||
hip_nvidia_precompiled = []
|
||||
|
||||
hip_option = get_option('hip')
|
||||
if hip_option.disabled()
|
||||
@ -39,7 +40,9 @@ if host_system not in ['linux', 'windows']
|
||||
endif
|
||||
|
||||
hip_precompile_amd_opt = get_option('hip-amd-precompile')
|
||||
hip_precompile_nvidia_opt = get_option('hip-nvidia-precompile')
|
||||
have_hipcc = false
|
||||
have_nvcc = false
|
||||
if not hip_precompile_amd_opt.disabled() and not meson.is_cross_build()
|
||||
if host_system == 'windows'
|
||||
hipcc = find_program('hipcc.bin', required: false)
|
||||
@ -58,9 +61,15 @@ if not hip_precompile_amd_opt.disabled() and not meson.is_cross_build()
|
||||
have_hipcc = hipcc.found()
|
||||
endif
|
||||
|
||||
if not hip_precompile_nvidia_opt.disabled() and not meson.is_cross_build()
|
||||
nvcc = find_program('nvcc', required: hip_precompile_nvidia_opt)
|
||||
have_nvcc = nvcc.found()
|
||||
endif
|
||||
|
||||
hip_cdata = configuration_data()
|
||||
if have_hipcc
|
||||
hip_cdata.set('HIP_AMD_PRECOMPILED', true)
|
||||
if have_hipcc or have_nvcc
|
||||
hip_cdata.set('HIP_AMD_PRECOMPILED', have_hipcc)
|
||||
hip_cdata.set('HIP_NVIDIA_PRECOMPILED', have_nvcc)
|
||||
subdir('kernel')
|
||||
endif
|
||||
|
||||
@ -75,7 +84,7 @@ configure_file(
|
||||
)
|
||||
|
||||
hip_incdir = include_directories('./stub')
|
||||
gsthip = library('gsthip', hip_sources + hip_kernel_amd_precompiled,
|
||||
gsthip = library('gsthip', hip_sources + hip_amd_precompiled + hip_nvidia_precompiled,
|
||||
c_args : gst_plugins_bad_args + extra_args,
|
||||
cpp_args: gst_plugins_bad_args + extra_args,
|
||||
include_directories : [configinc, hip_incdir],
|
||||
|
Loading…
x
Reference in New Issue
Block a user