Skip to content

Export kernel descriptor for amdgpu kernels #135909

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions compiler/rustc_codegen_ssa/src/back/linker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1776,6 +1776,7 @@ fn exported_symbols_for_non_proc_macro(tcx: TyCtxt<'_>, crate_type: CrateType) -
symbols.push(symbol_export::exporting_symbol_name_for_instance_in_crate(
tcx, symbol, cnum,
));
symbol_export::extend_exported_symbols(&mut symbols, tcx, symbol, cnum);
}
});

Expand Down
92 changes: 60 additions & 32 deletions compiler/rustc_codegen_ssa/src/back/symbol_export.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@ use rustc_middle::middle::exported_symbols::{
ExportedSymbol, SymbolExportInfo, SymbolExportKind, SymbolExportLevel, metadata_symbol_name,
};
use rustc_middle::query::LocalCrate;
use rustc_middle::ty::{self, GenericArgKind, GenericArgsRef, Instance, SymbolName, TyCtxt};
use rustc_middle::ty::{self, GenericArgKind, GenericArgsRef, Instance, SymbolName, Ty, TyCtxt};
use rustc_middle::util::Providers;
use rustc_session::config::{CrateType, OomStrategy};
use rustc_target::callconv::Conv;
use rustc_target::spec::{SanitizerSet, TlsModel};
use tracing::debug;

Expand Down Expand Up @@ -551,6 +552,42 @@ pub(crate) fn symbol_name_for_instance_in_crate<'tcx>(
}
}

fn calling_convention_for_symbol<'tcx>(
tcx: TyCtxt<'tcx>,
symbol: ExportedSymbol<'tcx>,
) -> (Conv, &'tcx [rustc_target::callconv::ArgAbi<'tcx, Ty<'tcx>>]) {
let instance = match symbol {
ExportedSymbol::NonGeneric(def_id) | ExportedSymbol::Generic(def_id, _)
if tcx.is_static(def_id) =>
{
None
}
ExportedSymbol::NonGeneric(def_id) => Some(Instance::mono(tcx, def_id)),
ExportedSymbol::Generic(def_id, args) => Some(Instance::new(def_id, args)),
// DropGlue always use the Rust calling convention and thus follow the target's default
// symbol decoration scheme.
ExportedSymbol::DropGlue(..) => None,
// AsyncDropGlueCtorShim always use the Rust calling convention and thus follow the
// target's default symbol decoration scheme.
ExportedSymbol::AsyncDropGlueCtorShim(..) => None,
// NoDefId always follow the target's default symbol decoration scheme.
ExportedSymbol::NoDefId(..) => None,
// ThreadLocalShim always follow the target's default symbol decoration scheme.
ExportedSymbol::ThreadLocalShim(..) => None,
};

instance
.map(|i| {
tcx.fn_abi_of_instance(
ty::TypingEnv::fully_monomorphized().as_query_input((i, ty::List::empty())),
)
.unwrap_or_else(|_| bug!("fn_abi_of_instance({i:?}) failed"))
})
.map(|fnabi| (fnabi.conv, &fnabi.args[..]))
// FIXME(workingjubilee): why don't we know the convention here?
.unwrap_or((Conv::Rust, &[]))
}

/// This is the symbol name of the given instance as seen by the linker.
///
/// On 32-bit Windows symbols are decorated according to their calling conventions.
Expand All @@ -559,8 +596,6 @@ pub(crate) fn linking_symbol_name_for_instance_in_crate<'tcx>(
symbol: ExportedSymbol<'tcx>,
instantiating_crate: CrateNum,
) -> String {
use rustc_target::callconv::Conv;

let mut undecorated = symbol_name_for_instance_in_crate(tcx, symbol, instantiating_crate);

// thread local will not be a function call,
Expand All @@ -584,35 +619,7 @@ pub(crate) fn linking_symbol_name_for_instance_in_crate<'tcx>(
_ => return undecorated,
};

let instance = match symbol {
ExportedSymbol::NonGeneric(def_id) | ExportedSymbol::Generic(def_id, _)
if tcx.is_static(def_id) =>
{
None
}
ExportedSymbol::NonGeneric(def_id) => Some(Instance::mono(tcx, def_id)),
ExportedSymbol::Generic(def_id, args) => Some(Instance::new(def_id, args)),
// DropGlue always use the Rust calling convention and thus follow the target's default
// symbol decoration scheme.
ExportedSymbol::DropGlue(..) => None,
// AsyncDropGlueCtorShim always use the Rust calling convention and thus follow the
// target's default symbol decoration scheme.
ExportedSymbol::AsyncDropGlueCtorShim(..) => None,
// NoDefId always follow the target's default symbol decoration scheme.
ExportedSymbol::NoDefId(..) => None,
// ThreadLocalShim always follow the target's default symbol decoration scheme.
ExportedSymbol::ThreadLocalShim(..) => None,
};

let (conv, args) = instance
.map(|i| {
tcx.fn_abi_of_instance(
ty::TypingEnv::fully_monomorphized().as_query_input((i, ty::List::empty())),
)
.unwrap_or_else(|_| bug!("fn_abi_of_instance({i:?}) failed"))
})
.map(|fnabi| (fnabi.conv, &fnabi.args[..]))
.unwrap_or((Conv::Rust, &[]));
let (conv, args) = calling_convention_for_symbol(tcx, symbol);

// Decorate symbols with prefixes, suffixes and total number of bytes of arguments.
// Reference: https://docs.microsoft.com/en-us/cpp/build/reference/decorated-names?view=msvc-170
Expand Down Expand Up @@ -644,6 +651,27 @@ pub(crate) fn exporting_symbol_name_for_instance_in_crate<'tcx>(
maybe_emutls_symbol_name(tcx, symbol, &undecorated).unwrap_or(undecorated)
}

/// On amdhsa, `gpu-kernel` functions have an associated metadata object with a `.kd` suffix.
/// Add it to the symbols list for all kernel functions, so that it is exported in the linked
/// object.
pub(crate) fn extend_exported_symbols<'tcx>(
symbols: &mut Vec<String>,
tcx: TyCtxt<'tcx>,
symbol: ExportedSymbol<'tcx>,
instantiating_crate: CrateNum,
) {
let (conv, _) = calling_convention_for_symbol(tcx, symbol);

if conv != Conv::GpuKernel || tcx.sess.target.os != "amdhsa" {
return;
}

let undecorated = symbol_name_for_instance_in_crate(tcx, symbol, instantiating_crate);

// Add the symbol for the kernel descriptor (with .kd suffix)
symbols.push(format!("{undecorated}.kd"));
}

fn maybe_emutls_symbol_name<'tcx>(
tcx: TyCtxt<'tcx>,
symbol: ExportedSymbol<'tcx>,
Expand Down
11 changes: 11 additions & 0 deletions tests/run-make/amdgpu-kd/foo.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#![allow(internal_features)]
#![feature(no_core, lang_items, abi_gpu_kernel)]
#![no_core]
#![no_std]

// This is needed because of #![no_core]:
#[lang = "sized"]
trait Sized {}

#[no_mangle]
extern "gpu-kernel" fn kernel() {}
20 changes: 20 additions & 0 deletions tests/run-make/amdgpu-kd/rmake.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// On the amdhsa OS, the host runtime (HIP or HSA) expects a kernel descriptor object for each
// kernel in the ELF file. The amdgpu LLVM backend generates the object. It is created as a symbol
// with the name of the kernel plus a .kd suffix.
// Check that the produced object has the .kd symbol exported.

//@ needs-llvm-components: amdgpu
//@ needs-rust-lld

use run_make_support::{llvm_readobj, rustc};

fn main() {
rustc()
.crate_name("foo")
.target("amdgcn-amd-amdhsa")
.arg("-Ctarget-cpu=gfx900")
.crate_type("cdylib")
.input("foo.rs")
.run();
llvm_readobj().input("foo.elf").symbols().run().assert_stdout_contains("kernel.kd");
}
Loading