-
Notifications
You must be signed in to change notification settings - Fork 12.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[clang][X86] Support __attribute__((model("small"/"large"))) #124834
Conversation
Following up llvm#72078, on x86-64 this allows a global to be considered small or large regardless of the code model. For example, x86-64's medium code model by default classifies globals as small or large depending on their size relative to -mlarge-data-threshold.
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-clang Author: Arthur Eubanks (aeubanks) ChangesFollowing up #72078, on x86-64 this allows a global to be considered small or large regardless of the code model. For example, x86-64's medium code model by default classifies globals as small or large depending on their size relative to -mlarge-data-threshold. Full diff: https://github.com/llvm/llvm-project/pull/124834.diff 5 Files Affected:
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 7b5562a80a35a6..dba3ed593dae28 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -657,6 +657,11 @@ Attribute Changes in Clang
- Clang now disallows the use of attributes after the namespace name. (#GH121407)
+- Clang now allows ``__attribute__((model("small")))`` and
+ ``__attribute__((model("large")))`` on non-TLS globals in x86-64 compilations.
+ This forces the global to be considered small or large in regards to the
+ x86-64 code model, regardless of the code model specified for the compilation.
+
Improvements to Clang's diagnostics
-----------------------------------
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index f4ba2bc3c6de31..092dc751d79f22 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -462,6 +462,7 @@ def TargetMSP430 : TargetArch<["msp430"]>;
def TargetM68k : TargetArch<["m68k"]>;
def TargetRISCV : TargetArch<["riscv32", "riscv64"]>;
def TargetX86 : TargetArch<["x86"]>;
+def TargetX86_64 : TargetArch<["x86_64"]>;
def TargetAnyX86 : TargetArch<["x86", "x86_64"]>;
def TargetWebAssembly : TargetArch<["wasm32", "wasm64"]>;
def TargetNVPTX : TargetArch<["nvptx", "nvptx64"]>;
@@ -3117,11 +3118,15 @@ def PragmaClangTextSection : InheritableAttr {
let Documentation = [InternalOnly];
}
-def CodeModel : InheritableAttr, TargetSpecificAttr<TargetLoongArch> {
+def CodeModel : InheritableAttr,
+ TargetSpecificAttr<TargetArch<!listconcat(
+ TargetLoongArch.Arches, TargetX86_64.Arches)>> {
let Spellings = [GCC<"model">];
- let Args = [EnumArgument<"Model", "llvm::CodeModel::Model", /*is_string=*/1,
- ["normal", "medium", "extreme"], ["Small", "Medium", "Large"],
- /*opt=*/0, /*fake=*/0, /*isExternalType=*/1, /*isCovered=*/0>];
+ let Args = [EnumArgument<
+ "Model", "llvm::CodeModel::Model",
+ /*is_string=*/1, ["small", "normal", "medium", "large", "extreme"],
+ ["Small", "Small", "Medium", "Large", "Large"],
+ /*opt=*/0, /*fake=*/0, /*isExternalType=*/1, /*isCovered=*/0>];
let Subjects = SubjectList<[NonTLSGlobalVar], ErrorDiag>;
let Documentation = [CodeModelDocs];
}
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 9d7d22590bce4b..e2c752df06c25a 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -64,6 +64,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TargetParser/Triple.h"
@@ -2949,6 +2950,13 @@ static void handleSectionAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
}
}
+static bool isValidCodeModelAttr(Sema &S, StringRef Str) {
+ bool IsX8664Target =
+ S.Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86_64;
+ bool IsX8664Spelling = Str == "small" || Str == "large";
+ return IsX8664Target == IsX8664Spelling;
+}
+
static void handleCodeModelAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
StringRef Str;
SourceLocation LiteralLoc;
@@ -2957,7 +2965,8 @@ static void handleCodeModelAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
return;
llvm::CodeModel::Model CM;
- if (!CodeModelAttr::ConvertStrToModel(Str, CM)) {
+ if (!CodeModelAttr::ConvertStrToModel(Str, CM) ||
+ !isValidCodeModelAttr(S, Str)) {
S.Diag(LiteralLoc, diag::err_attr_codemodel_arg) << Str;
return;
}
diff --git a/clang/test/CodeGen/X86/codemodel.cpp b/clang/test/CodeGen/X86/codemodel.cpp
new file mode 100644
index 00000000000000..60a840665b49cd
--- /dev/null
+++ b/clang/test/CodeGen/X86/codemodel.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -emit-llvm -triple x86_64-unknown-unknown %s -o - | FileCheck %s
+
+// CHECK: @_ZL2v1 ={{.*}} global i32 0, code_model "small"
+static int v1 __attribute__((model("small")));
+
+void use1() {
+ v1 = 1;
+}
+
+// CHECK: @v2 ={{.*}} global float 0.000000e+00, code_model "large"
+float v2 __attribute__((model("large")));
+
+// CHECK: @_ZL2v3IiE ={{.*}} global i32 0, code_model "small"
+template <typename T>
+static T v3 __attribute__((model("small")));
+
+void use2() {
+ v3<int> = 1;
+}
+struct S {
+ double d;
+};
+
+typedef void (*F)();
+
+// CHECK: @v4 ={{.*}} global ptr null, code_model "large"
+F v4 __attribute__((model("large")));
diff --git a/clang/test/Sema/attr-model.cpp b/clang/test/Sema/attr-model.cpp
index 898cc039398436..725ec502df3962 100644
--- a/clang/test/Sema/attr-model.cpp
+++ b/clang/test/Sema/attr-model.cpp
@@ -5,7 +5,7 @@
// RUN: %clang_cc1 -triple riscv64 -verify=expected,riscv64 -fsyntax-only %s
// RUN: %clang_cc1 -triple x86_64 -verify=expected,x86_64 -fsyntax-only %s
-#if defined(__loongarch__) && !__has_attribute(model)
+#if (defined(__loongarch__) || defined(__x86_64__)) && !__has_attribute(model)
#error "Should support model attribute"
#endif
@@ -14,51 +14,49 @@ int a __attribute((model("tiny"))); // aarch64-warning {{unknown attribute 'm
// mips64-warning {{unknown attribute 'model' ignored}} \
// powerpc64-warning {{unknown attribute 'model' ignored}} \
// riscv64-warning {{unknown attribute 'model' ignored}} \
- // x86_64-warning {{unknown attribute 'model' ignored}}
+ // x86_64-error {{code model 'tiny' is not supported on this target}}
int b __attribute((model("small"))); // aarch64-warning {{unknown attribute 'model' ignored}} \
// loongarch64-error {{code model 'small' is not supported on this target}} \
// mips64-warning {{unknown attribute 'model' ignored}} \
// powerpc64-warning {{unknown attribute 'model' ignored}} \
- // riscv64-warning {{unknown attribute 'model' ignored}} \
- // x86_64-warning {{unknown attribute 'model' ignored}}
+ // riscv64-warning {{unknown attribute 'model' ignored}}
int c __attribute((model("normal"))); // aarch64-warning {{unknown attribute 'model' ignored}} \
// mips64-warning {{unknown attribute 'model' ignored}} \
// powerpc64-warning {{unknown attribute 'model' ignored}} \
// riscv64-warning {{unknown attribute 'model' ignored}} \
- // x86_64-warning {{unknown attribute 'model' ignored}}
+ // x86_64-error {{code model 'normal' is not supported on this target}}
int d __attribute((model("kernel"))); // aarch64-warning {{unknown attribute 'model' ignored}} \
// loongarch64-error {{code model 'kernel' is not supported on this target}} \
// mips64-warning {{unknown attribute 'model' ignored}} \
// powerpc64-warning {{unknown attribute 'model' ignored}} \
// riscv64-warning {{unknown attribute 'model' ignored}} \
- // x86_64-warning {{unknown attribute 'model' ignored}}
+ // x86_64-error {{code model 'kernel' is not supported on this target}}
int e __attribute((model("medium"))); // aarch64-warning {{unknown attribute 'model' ignored}} \
// mips64-warning {{unknown attribute 'model' ignored}} \
// powerpc64-warning {{unknown attribute 'model' ignored}} \
// riscv64-warning {{unknown attribute 'model' ignored}} \
- // x86_64-warning {{unknown attribute 'model' ignored}}
+ // x86_64-error {{code model 'medium' is not supported on this target}}
int f __attribute((model("large"))); // aarch64-warning {{unknown attribute 'model' ignored}} \
// loongarch64-error {{code model 'large' is not supported on this target}} \
// mips64-warning {{unknown attribute 'model' ignored}} \
// powerpc64-warning {{unknown attribute 'model' ignored}} \
- // riscv64-warning {{unknown attribute 'model' ignored}} \
- // x86_64-warning {{unknown attribute 'model' ignored}}
+ // riscv64-warning {{unknown attribute 'model' ignored}}
int g __attribute((model("extreme"))); // aarch64-warning {{unknown attribute 'model' ignored}} \
// mips64-warning {{unknown attribute 'model' ignored}} \
// powerpc64-warning {{unknown attribute 'model' ignored}} \
// riscv64-warning {{unknown attribute 'model' ignored}} \
- // x86_64-warning {{unknown attribute 'model' ignored}}
+ // x86_64-error {{code model 'extreme' is not supported on this target}}
void __attribute((model("extreme"))) h() {} // aarch64-warning {{unknown attribute 'model' ignored}} \
// loongarch64-error {{'model' attribute only applies to non-TLS global variables}} \
// mips64-warning {{unknown attribute 'model' ignored}} \
// powerpc64-warning {{unknown attribute 'model' ignored}} \
// riscv64-warning {{unknown attribute 'model' ignored}} \
- // x86_64-warning {{unknown attribute 'model' ignored}}
+ // x86_64-error {{'model' attribute only applies to non-TLS global variables}}
thread_local int i __attribute((model("extreme"))); // aarch64-warning {{unknown attribute 'model' ignored}} \
// loongarch64-error {{'model' attribute only applies to non-TLS global variables}} \
// mips64-warning {{unknown attribute 'model' ignored}} \
// powerpc64-warning {{unknown attribute 'model' ignored}} \
// riscv64-warning {{unknown attribute 'model' ignored}} \
- // x86_64-warning {{unknown attribute 'model' ignored}}
+ // x86_64-error {{'model' attribute only applies to non-TLS global variables}}
|
clang/test/Sema/attr-model.cpp
Outdated
// RUN: %clang_cc1 -triple x86_64 -verify=expected,x86_64 -fsyntax-only %s | ||
// RUN: %clang_cc1 -triple nvptx64-unknown-cuda -fcuda-is-device -x cuda -verify=expected,unsupported -fsyntax-only %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If the test passes this way (I.e. you do see complaints about the attributes) that means that the change will break CUDA.
Considering that we will see these attributes in the host code and that NVPTX itself can't do anything useful with them, NVPTX compilation should continue to work and ignore those attributes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The attribute is technically is ignored, with a warning, but the warning is likely non-actionable as it would be issues for a perfectly correct host code which just happens to be seen by a GPU compilation.
I think the correct way to handle it would be to issue a deferred diagnostic which will fire only if we end up code-gen-ing something that uses the attribute.
@yxsamliu Sam, does that make sense?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see, although it seems pretty annoying to have to special case NVPTX compilations. Is there a previous example of having done something like this, or should I just ad-hoc skip the Sema code for NVPTX compilations?
(oops comment race)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the correct way to handle it would be to issue a deferred diagnostic which will fire only if we end up code-gen-ing something that uses the attribute.
is it common to define a global in a TU that's shared between the host and device?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. global scope is implicitly considered to belong to host. Data that's supposed to reside on the device must be explicitly annotated with appropriate attributes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm tempted to keep things as is since it seems like this is not the only issue that can come up with nvptx compiles. For example, the thread_local
global in the variable has the same issue where nvptx doesn't support thread local globals, which clang complains about, and I don't really see a difference between __attribute__((model()))
and thread_local
in that they can both be used on arbitrary globals.
Do you think it's reasonable to postpone this suggestion until people actually hit it? For example, if people aren't hitting the thread_local
issue, then perhaps people won't hit a warning with this attribute.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Up to you.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The attribute is technically is ignored, with a warning, but the warning is likely non-actionable as it would be issues for a perfectly correct host code which just happens to be seen by a GPU compilation.
I think the correct way to handle it would be to issue a deferred diagnostic which will fire only if we end up code-gen-ing something that uses the attribute.
@yxsamliu Sam, does that make sense?
My understanding is that deferred diagnostics only works in a function, but this diagnostic happens outside functions.
I assume it is safe for all device side compilations to ignore this attribute. Presumably device code uses fully general patterns to access global data that could live in any shared object. Most device ISAs are virtual anyway, it's not like they're going to use PC relative addressing to access this data, they'll have a full-width pointer somewhere / somehow. So, if there's an easy way to suppress the warning for device ISAs, that would be reasonable IMO. |
Issuing the warning suppression every time a TU happens to see a perfectly valid use of the attribute in the host code (even transitively, in someone else's headers), would be counterproductive, IMO. The only response to the warning is to suppress it (i.e. there's nothing for the user to fix). At the very least it creates unnecessary noise. It will also disrupt existing builds that happen to be done with |
I've allowed the attribute for NVPTX targets and ignored it in SemaDeclAttr rather than figure out how to conditionally emit a warning during codegen, does that work? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
clang/lib/Sema/SemaDeclAttr.cpp
Outdated
static void handleCodeModelAttr(Sema &S, Decl *D, const ParsedAttr &AL) { | ||
StringRef Str; | ||
SourceLocation LiteralLoc; | ||
// Check that it is a string. | ||
if (!S.checkStringLiteralArgumentAttr(AL, 0, Str, &LiteralLoc)) | ||
return; | ||
|
||
// Ignore the attribute for NVPTX compiles since it only applies to host | ||
// globals. | ||
if (S.Context.getTargetInfo().getTriple().isNVPTX()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Pls do the same for AMDGPU and SPIRV/SPIRV64
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done, PTAL. it seems really bad that we have to explicitly ignore every attribute that's not explicitly supported by the GPU targets
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We could get GPU back-ends accept and ignore the attribute. if we know that the object it's applied to is for the host. We'll likely need this for other attributes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
@@ -3117,11 +3120,20 @@ def PragmaClangTextSection : InheritableAttr { | |||
let Documentation = [InternalOnly]; | |||
} | |||
|
|||
def CodeModel : InheritableAttr, TargetSpecificAttr<TargetLoongArch> { | |||
// The code model attribute only applies to LoongArch and x86-64, but for NVPTX |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
- Should we update AttrDocs.td?
- Why is it reasonable to silently ignore the attribute? (That's generally not something we ever do because it becomes almost impossible to realize that your attribute isn't behaving the way you expect.) Please add that description to the patch summary.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
updated AttrDocs.td.
updated description, hopefully that makes sense and the previous discussion makes sense
clang/lib/Sema/SemaDeclAttr.cpp
Outdated
static void handleCodeModelAttr(Sema &S, Decl *D, const ParsedAttr &AL) { | ||
StringRef Str; | ||
SourceLocation LiteralLoc; | ||
// Check that it is a string. | ||
if (!S.checkStringLiteralArgumentAttr(AL, 0, Str, &LiteralLoc)) | ||
return; | ||
|
||
// Ignore the attribute for GPU device compiles since it only applies to host | ||
// globals. | ||
if (S.Context.getTargetInfo().getTriple().isNVPTX() || |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should diagnose if this doesn't apply to host and device (see getAuxTriple). The only valid reason to ignore this is if we see it is not in host, but IS in device (aux).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we'll get a warning on the host side in the case the host target doesn't support the attribute and there's no reason to emit the warning again on the device side right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps? Perhaps not. WE aren't necessarily the compiler on both sides, so we're better off making sure we diagnose it as soon as we know what is going on.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Aren't we? I mean, I'm sure nvidia and vendors everywhere do crazy stuff, but I would like to clarify what the support scope of the Clang project is. My understanding of all the CUDA functionality we've ever added to Clang was that it assumes we're using the Clang compiler on both sides, not the janky nvcc split pre-processing model.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Its not just NVIDIA CUDA. SYCL does it, and IIRC, the OMP offload supports arbitrary host compilers as well. Either way, we should be diagnosing "not supported" as early as we know anyway, which means during 'device' compilation (if it is first).
I don't know anything about Clang CUDA though (nor very much about NVIDIA CUDA for that matter).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
now we warn if both the current target and getAuxTriple()
don't support the attribute. however, if we're a pure GPU compile with no getAuxTriple()
, we silently ignore the attribute since we don't know what the host could be. does this work?
@@ -62,6 +62,10 @@ def CodeModelDocs : Documentation { | |||
let Content = [{ | |||
The ``model`` attribute allows overriding the translation unit's | |||
code model (specified by ``-mcmodel``) for a specific global variable. | |||
|
|||
On LoongArch, allowed values are "normal", "medium", "extreme". |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I suspect aaron would like to see more details on this, including what each of those things mean.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've added some description at least on the x86-64 side of things
clang/lib/Sema/SemaDeclAttr.cpp
Outdated
@@ -2950,12 +2950,11 @@ static void handleSectionAttr(Sema &S, Decl *D, const ParsedAttr &AL) { | |||
} | |||
} | |||
|
|||
static bool isValidCodeModelAttr(Sema &S, StringRef Str) { | |||
if (S.Context.getTargetInfo().getTriple().isLoongArch()) { | |||
static bool isValidCodeModelAttr(llvm::Triple Triple, StringRef Str) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Triple contains a std::string
, I wouldn't treat it as a cheap-to-copy value type:
https://github.com/llvm/llvm-project/blob/main/llvm/include/llvm/TargetParser/Triple.h#L321
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right. This should be fixed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done
auto SupportedTripleIt = llvm::find_if(Triples, IsTripleSupported); | ||
if (SupportedTripleIt == Triples.end()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can this be if (llvm::none_of(Triple, IsTripleSupported))
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this was done this way because we also need the SupportedTripleIt
later in line 2995
S.Context.getTargetInfo().getTriple()}; | ||
if (auto *aux = S.Context.getAuxTargetInfo()) { | ||
Triples.push_back(aux->getTriple()); | ||
} else if (S.Context.getTargetInfo().getTriple().isNVPTX() || |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is not actionable in this patch, but this points to the need for some kind of isGPU
/ isVirtualISA
predicate.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i agree.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, I'm happy with this patch.
I believe all concerns have been addressed, will merge now with the approvals. Happy to follow up on any other concerns. |
Following up #72078, on x86-64 this allows a global to be considered small or large regardless of the code model. For example, x86-64's medium code model by default classifies globals as small or large depending on their size relative to -mlarge-data-threshold.
GPU compilations compile the same TU for both the host and device, but only codegen the host or device portions of it depending on attributes. However, we still Sema the TU, and will warn on an unknown attribute for the device compilation since this attribute is target-specific. Since they're intended for the host, accept but ignore this attribute for device compilations.
Co-authored-by: @pranavk