Skip to content

Commit 631d187

Browse files
authored
codegen: pass the pgcstack as an argument to specsig calls (#50093)
The safepoint at function entry made it so that every function call did a relatively expensive load from the PTLS, we can instead pass the PTLS as an argument to functions making it significantly cheaper. Also use the swift calling conventions, that together with the `swiftself` attribute makes it so it's very likely the argument is kept in a register between calls. Fixes: #50068
1 parent 71c5115 commit 631d187

File tree

10 files changed

+87
-35
lines changed

10 files changed

+87
-35
lines changed

base/reflection.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1194,6 +1194,7 @@ struct CodegenParams
11941194
gnu_pubnames::Cint
11951195
debug_info_kind::Cint
11961196
safepoint_on_entry::Cint
1197+
gcstack_arg::Cint
11971198

11981199
lookup::Ptr{Cvoid}
11991200

@@ -1203,13 +1204,15 @@ struct CodegenParams
12031204
prefer_specsig::Bool=false,
12041205
gnu_pubnames=true, debug_info_kind::Cint = default_debug_info_kind(),
12051206
safepoint_on_entry::Bool=true,
1207+
gcstack_arg::Bool=true,
12061208
lookup::Ptr{Cvoid}=unsafe_load(cglobal(:jl_rettype_inferred_addr, Ptr{Cvoid})),
12071209
generic_context = nothing)
12081210
return new(
12091211
Cint(track_allocations), Cint(code_coverage),
12101212
Cint(prefer_specsig),
12111213
Cint(gnu_pubnames), debug_info_kind,
12121214
Cint(safepoint_on_entry),
1215+
Cint(gcstack_arg),
12131216
lookup, generic_context)
12141217
end
12151218
end

src/codegen.cpp

Lines changed: 49 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1296,6 +1296,7 @@ extern "C" {
12961296
#endif
12971297
(int) DICompileUnit::DebugEmissionKind::FullDebug,
12981298
1,
1299+
1,
12991300
jl_rettype_inferred_addr, NULL };
13001301
}
13011302

@@ -1719,7 +1720,7 @@ jl_aliasinfo_t jl_aliasinfo_t::fromTBAA(jl_codectx_t &ctx, MDNode *tbaa) {
17191720
}
17201721

17211722
static Type *julia_type_to_llvm(jl_codectx_t &ctx, jl_value_t *jt, bool *isboxed = NULL);
1722-
static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure);
1723+
static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, bool gcstack_arg);
17231724
static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval = -1);
17241725
static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t *s,
17251726
jl_binding_t **pbnd, bool assign);
@@ -4107,7 +4108,8 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos
41074108
{
41084109
++EmittedSpecfunCalls;
41094110
// emit specialized call site
4110-
jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, callee, specFunctionObject, specTypes, jlretty, is_opaque_closure);
4111+
bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg);
4112+
jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, callee, specFunctionObject, specTypes, jlretty, is_opaque_closure, gcstack_arg);
41114113
FunctionType *cft = returninfo.decl.getFunctionType();
41124114
*cc = returninfo.cc;
41134115
*return_roots = returninfo.return_roots;
@@ -4141,7 +4143,10 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos
41414143
argvals[idx] = return_roots;
41424144
idx++;
41434145
}
4144-
4146+
if (gcstack_arg) {
4147+
argvals[idx] = ctx.pgcstack;
4148+
idx++;
4149+
}
41454150
for (size_t i = 0; i < nargs; i++) {
41464151
jl_value_t *jt = jl_nth_slot_type(specTypes, i);
41474152
// n.b.: specTypes is required to be a datatype by construction for specsig
@@ -4205,6 +4210,8 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos
42054210
}
42064211
CallInst *call = ctx.builder.CreateCall(cft, TheCallee, argvals);
42074212
call->setAttributes(returninfo.attrs);
4213+
if (gcstack_arg)
4214+
call->setCallingConv(CallingConv::Swift);
42084215

42094216
jl_cgval_t retval;
42104217
switch (returninfo.cc) {
@@ -5273,7 +5280,7 @@ static std::pair<Function*, Function*> get_oc_function(jl_codectx_t &ctx, jl_met
52735280
specF = closure_m.getModuleUnlocked()->getFunction(closure_decls.specFunctionObject);
52745281
if (specF) {
52755282
jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, NULL,
5276-
closure_decls.specFunctionObject, sigtype, rettype, true);
5283+
closure_decls.specFunctionObject, sigtype, rettype, true, JL_FEAT_TEST(ctx,gcstack_arg));
52775284
specF = cast<Function>(returninfo.decl.getCallee());
52785285
}
52795286
}
@@ -5786,13 +5793,15 @@ static void emit_cfunc_invalidate(
57865793
DebugLoc noDbg;
57875794
ctx.builder.SetCurrentDebugLocation(noDbg);
57885795
allocate_gc_frame(ctx, b0);
5789-
57905796
Function::arg_iterator AI = gf_thunk->arg_begin();
57915797
SmallVector<jl_cgval_t> myargs(nargs);
57925798
if (cc == jl_returninfo_t::SRet || cc == jl_returninfo_t::Union)
57935799
++AI;
57945800
if (return_roots)
57955801
++AI;
5802+
if (JL_FEAT_TEST(ctx,gcstack_arg)){
5803+
++AI; // gcstack_arg
5804+
}
57965805
for (size_t i = 0; i < nargs; i++) {
57975806
jl_value_t *jt = jl_nth_slot_type(calltype, i);
57985807
// n.b. specTypes is required to be a datatype by construction for specsig
@@ -6258,8 +6267,9 @@ static Function* gen_cfun_wrapper(
62586267
bool is_opaque_closure = jl_is_method(lam->def.value) && lam->def.method->is_for_opaque_closure;
62596268
assert(calltype == 3);
62606269
// emit a specsig call
6270+
bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg);
62616271
StringRef protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)callptr, codeinst);
6262-
jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, protoname, lam->specTypes, astrt, is_opaque_closure);
6272+
jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, protoname, lam->specTypes, astrt, is_opaque_closure, gcstack_arg);
62636273
FunctionType *cft = returninfo.decl.getFunctionType();
62646274
jlfunc_sret = (returninfo.cc == jl_returninfo_t::SRet);
62656275

@@ -6286,6 +6296,8 @@ static Function* gen_cfun_wrapper(
62866296
AllocaInst *return_roots = emit_static_alloca(ctx, get_returnroots_type(ctx, returninfo.return_roots));
62876297
args.push_back(return_roots);
62886298
}
6299+
if (gcstack_arg)
6300+
args.push_back(ctx.pgcstack);
62896301
for (size_t i = 0; i < nargs + 1; i++) {
62906302
// figure out how to repack the arguments
62916303
jl_cgval_t &inputarg = inputargs[i];
@@ -6332,11 +6344,15 @@ static Function* gen_cfun_wrapper(
63326344
emit_cfunc_invalidate(gf_thunk, returninfo.cc, returninfo.return_roots, lam->specTypes, codeinst->rettype, is_opaque_closure, nargs + 1, ctx.emission_context);
63336345
theFptr = ctx.builder.CreateSelect(age_ok, theFptr, gf_thunk);
63346346
}
6347+
63356348
assert(cast<PointerType>(theFptr->getType())->isOpaqueOrPointeeTypeMatches(returninfo.decl.getFunctionType()));
63366349
CallInst *call = ctx.builder.CreateCall(
63376350
returninfo.decl.getFunctionType(),
63386351
theFptr, ArrayRef<Value*>(args));
63396352
call->setAttributes(returninfo.attrs);
6353+
if (gcstack_arg)
6354+
call->setCallingConv(CallingConv::Swift);
6355+
63406356
switch (returninfo.cc) {
63416357
case jl_returninfo_t::Boxed:
63426358
retval = mark_julia_type(ctx, call, true, astrt);
@@ -6710,7 +6726,11 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
67106726
args[idx] = return_roots;
67116727
idx++;
67126728
}
6713-
6729+
bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg);
6730+
if (gcstack_arg) {
6731+
args[idx] = ctx.pgcstack;
6732+
idx++;
6733+
}
67146734
bool is_opaque_closure = jl_is_method(lam->def.value) && lam->def.method->is_for_opaque_closure;
67156735
for (size_t i = 0; i < jl_nparams(lam->specTypes) && idx < nfargs; ++i) {
67166736
jl_value_t *ty = ((i == 0) && is_opaque_closure) ? (jl_value_t*)jl_any_type :
@@ -6748,7 +6768,8 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
67486768
}
67496769
CallInst *call = ctx.builder.CreateCall(f.decl, args);
67506770
call->setAttributes(f.attrs);
6751-
6771+
if (gcstack_arg)
6772+
call->setCallingConv(CallingConv::Swift);
67526773
jl_cgval_t retval;
67536774
if (retarg != -1) {
67546775
Value *theArg;
@@ -6790,7 +6811,7 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
67906811
return w;
67916812
}
67926813

6793-
static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure)
6814+
static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, bool gcstack_arg)
67946815
{
67956816
jl_returninfo_t props = {};
67966817
SmallVector<Type*, 8> fsig;
@@ -6875,6 +6896,14 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value
68756896
fsig.push_back(get_returnroots_type(ctx, props.return_roots)->getPointerTo(0));
68766897
}
68776898

6899+
if (gcstack_arg){
6900+
AttrBuilder param(ctx.builder.getContext());
6901+
param.addAttribute(Attribute::SwiftSelf);
6902+
param.addAttribute(Attribute::NonNull);
6903+
attrs.push_back(AttributeSet::get(ctx.builder.getContext(), param));
6904+
fsig.push_back(PointerType::get(JuliaType::get_ppjlvalue_ty(ctx.builder.getContext()), 0));
6905+
}
6906+
68786907
for (size_t i = 0; i < jl_nparams(sig); i++) {
68796908
jl_value_t *jt = jl_tparam(sig, i);
68806909
bool isboxed = false;
@@ -6936,7 +6965,8 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value
69366965
else
69376966
fval = emit_bitcast(ctx, fval, ftype->getPointerTo());
69386967
}
6939-
6968+
if (gcstack_arg && isa<Function>(fval))
6969+
cast<Function>(fval)->setCallingConv(CallingConv::Swift);
69406970
props.decl = FunctionCallee(ftype, fval);
69416971
props.attrs = attributes;
69426972
return props;
@@ -7163,7 +7193,8 @@ static jl_llvm_functions_t
71637193
Function *f = NULL;
71647194
bool has_sret = false;
71657195
if (specsig) { // assumes !va and !needsparams
7166-
returninfo = get_specsig_function(ctx, M, NULL, declarations.specFunctionObject, lam->specTypes, jlrettype, ctx.is_opaque_closure);
7196+
returninfo = get_specsig_function(ctx, M, NULL, declarations.specFunctionObject, lam->specTypes,
7197+
jlrettype, ctx.is_opaque_closure, JL_FEAT_TEST(ctx,gcstack_arg));
71677198
f = cast<Function>(returninfo.decl.getCallee());
71687199
has_sret = (returninfo.cc == jl_returninfo_t::SRet || returninfo.cc == jl_returninfo_t::Union);
71697200
jl_init_function(f, ctx.emission_context.TargetTriple);
@@ -7348,7 +7379,6 @@ static jl_llvm_functions_t
73487379
ctx.spvals_ptr = &*AI++;
73497380
}
73507381
}
7351-
73527382
// step 6. set up GC frame
73537383
allocate_gc_frame(ctx, b0);
73547384
Value *last_age = NULL;
@@ -7554,6 +7584,12 @@ static jl_llvm_functions_t
75547584
param.addAlignmentAttr(Align(sizeof(jl_value_t*)));
75557585
attrs.at(Arg->getArgNo()) = AttributeSet::get(Arg->getContext(), param); // function declaration attributes
75567586
}
7587+
if (specsig && JL_FEAT_TEST(ctx, gcstack_arg)){
7588+
Argument *Arg = &*AI;
7589+
++AI;
7590+
AttrBuilder param(ctx.builder.getContext());
7591+
attrs.at(Arg->getArgNo()) = AttributeSet::get(Arg->getContext(), param);
7592+
}
75577593
for (i = 0; i < nreq; i++) {
75587594
jl_sym_t *s = slot_symbol(ctx, i);
75597595
jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i);
@@ -8564,7 +8600,7 @@ static jl_llvm_functions_t jl_emit_oc_wrapper(orc::ThreadSafeModule &m, jl_codeg
85648600
jl_llvm_functions_t declarations;
85658601
declarations.functionObject = "jl_f_opaque_closure_call";
85668602
if (uses_specsig(mi->specTypes, false, true, rettype, true)) {
8567-
jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, funcName, mi->specTypes, rettype, 1);
8603+
jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, funcName, mi->specTypes, rettype, true, JL_FEAT_TEST(ctx,gcstack_arg));
85688604
Function *gf_thunk = cast<Function>(returninfo.decl.getCallee());
85698605
jl_init_function(gf_thunk, ctx.emission_context.TargetTriple);
85708606
size_t nrealargs = jl_nparams(mi->specTypes);

src/julia.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2344,6 +2344,7 @@ typedef struct {
23442344
// limited, standalone
23452345

23462346
int safepoint_on_entry; // Emit a safepoint on entry to each function
2347+
int gcstack_arg; // Pass the ptls value as an argument with swiftself
23472348

23482349
// Cache access. Default: jl_rettype_inferred.
23492350
jl_codeinstance_lookup_t lookup;

src/llvm-ptls.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,19 @@ bool LowerPTLS::run(bool *CFGModified)
314314
for (auto it = pgcstack_getter->user_begin(); it != pgcstack_getter->user_end();) {
315315
auto call = cast<CallInst>(*it);
316316
++it;
317+
auto f = call->getCaller();
318+
Value *pgcstack = NULL;
319+
for (Function::arg_iterator arg = f->arg_begin(); arg != f->arg_end();++arg) {
320+
if (arg->hasSwiftSelfAttr()){
321+
pgcstack = &*arg;
322+
break;
323+
}
324+
}
325+
if (pgcstack) {
326+
call->replaceAllUsesWith(pgcstack);
327+
call->eraseFromParent();
328+
continue;
329+
}
317330
assert(call->getCalledOperand() == pgcstack_getter);
318331
fix_pgcstack_use(call, pgcstack_getter, or_new, CFGModified);
319332
}

stdlib/InteractiveUtils/src/codeview.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrappe
172172
raw::Bool, dump_module::Bool, syntax::Symbol,
173173
optimize::Bool, debuginfo::Symbol, binary::Bool)
174174
params = CodegenParams(debug_info_kind=Cint(0),
175-
safepoint_on_entry=raw)
175+
safepoint_on_entry=raw, gcstack_arg=raw)
176176
_dump_function(f, t, native, wrapper, raw, dump_module, syntax,
177177
optimize, debuginfo, binary, params)
178178
end

test/compiler/codegen.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ end
1717

1818
# The tests below assume a certain format and safepoint_on_entry=true breaks that.
1919
function get_llvm(@nospecialize(f), @nospecialize(t), raw=true, dump_module=false, optimize=true)
20-
params = Base.CodegenParams(safepoint_on_entry=false)
20+
params = Base.CodegenParams(safepoint_on_entry=false, gcstack_arg = false)
2121
d = InteractiveUtils._dump_function(f, t, false, false, raw, dump_module, :att, optimize, :none, false, params)
2222
sprint(print, d)
2323
end

test/llvmpasses/fastmath.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ include(joinpath("..", "testhelpers", "llvmpasses.jl"))
1414

1515
import Base.FastMath
1616

17-
# CHECK: call fast float @llvm.sqrt.f32(float %0)
17+
# CHECK: call fast float @llvm.sqrt.f32(float %{{[0-9]+}})
1818
emit(FastMath.sqrt_fast, Float32)
1919

2020

test/llvmpasses/llvmcall.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,5 +28,5 @@ emit(foo, Core.LLVMPtr{Float32, 3})
2828
# CHECK: call { i32, i32 } @foo({ i32, i32 } %{{[0-9]+}})
2929
emit(foo, Foo)
3030

31-
# CHECK: define <2 x half> @julia_bar_{{[0-9]+}}([2 x half]
31+
# CHECK: define {{(swiftcc )?}}<2 x half> @julia_bar_{{[0-9]+}}(
3232
emit(bar, NTuple{2, Float16})

test/llvmpasses/loopinfo.jl

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,10 @@ end
6464
# CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO3:![0-9]+]]
6565
# LOWER-NOT: call void @julia.loopinfo_marker()
6666
# LOWER: br {{.*}}, !llvm.loop [[LOOPID3:![0-9]+]]
67-
# FINAL: call void @j_iteration
68-
# FINAL: call void @j_iteration
69-
# FINAL: call void @j_iteration
70-
# FINAL-NOT: call void @j_iteration
67+
# FINAL: call {{(swiftcc )?}}void @j_iteration
68+
# FINAL: call {{(swiftcc )?}}void @j_iteration
69+
# FINAL: call {{(swiftcc )?}}void @j_iteration
70+
# FINAL-NOT: call {{(swiftcc )?}}void @j_iteration
7171
# FINAL: br
7272
end
7373
end
@@ -90,17 +90,17 @@ end
9090
# CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO4:![0-9]+]]
9191
# LOWER-NOT: call void @julia.loopinfo_marker()
9292
# LOWER: br {{.*}}, !llvm.loop [[LOOPID4:![0-9]+]]
93-
# FINAL: call void @j_iteration
94-
# FINAL: call void @j_iteration
95-
# FINAL: call void @j_iteration
96-
# FINAL: call void @j_iteration
97-
# FINAL: call void @j_iteration
98-
# FINAL: call void @j_iteration
99-
# FINAL: call void @j_iteration
100-
# FINAL: call void @j_iteration
101-
# FINAL: call void @j_iteration
102-
# FINAL: call void @j_iteration
103-
# FINAL-NOT: call void @j_iteration
93+
# FINAL: call {{(swiftcc )?}}void @j_iteration
94+
# FINAL: call {{(swiftcc )?}}void @j_iteration
95+
# FINAL: call {{(swiftcc )?}}void @j_iteration
96+
# FINAL: call {{(swiftcc )?}}void @j_iteration
97+
# FINAL: call {{(swiftcc )?}}void @j_iteration
98+
# FINAL: call {{(swiftcc )?}}void @j_iteration
99+
# FINAL: call {{(swiftcc )?}}void @j_iteration
100+
# FINAL: call {{(swiftcc )?}}void @j_iteration
101+
# FINAL: call {{(swiftcc )?}}void @j_iteration
102+
# FINAL: call {{(swiftcc )?}}void @j_iteration
103+
# FINAL-NOT: call {{(swiftcc )?}}void @j_iteration
104104
end
105105
end
106106

@@ -111,8 +111,8 @@ end
111111
1 <= j <= I && continue
112112
@show (i,j)
113113
iteration(i)
114-
# FINAL: call void @j_iteration
115-
# FINAL-NOT: call void @j_iteration
114+
# FINAL: call {{(swiftcc )?}}void @j_iteration
115+
# FINAL-NOT: call {{(swiftcc )?}}void @j_iteration
116116
end
117117
$(Expr(:loopinfo, (Symbol("llvm.loop.unroll.disable"),)))
118118
end

test/llvmpasses/pipeline-o0.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ include(joinpath("..", "testhelpers", "llvmpasses.jl"))
99

1010
# CHECK-LABEL: @julia_simple
1111
# CHECK-NOT: julia.get_pgcstack
12-
# CHECK: asm
1312
# CHECK-NOT: julia.gc_alloc_obj
1413
# CHECK: ijl_gc_pool_alloc
1514
# COM: we want something vaguely along the lines of asm load from the fs register -> allocate bytes

0 commit comments

Comments
 (0)