Skip to content

Commit

Permalink
Refactor code that is specific to stock GC
Browse files Browse the repository at this point in the history
  • Loading branch information
udesou committed Feb 3, 2025
1 parent e592169 commit 19eb356
Show file tree
Hide file tree
Showing 8 changed files with 207 additions and 102 deletions.
4 changes: 2 additions & 2 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,9 @@ UV_HEADERS += uv/*.h
endif
PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h gc-interface.h gc-tls-common.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h)
ifneq (${MMTK_PLAN},None)
PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,gc-tls-mmtk.h)
PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,gc-tls-mmtk.h gc-wb-mmtk.h)
else
PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,gc-tls-stock.h)
PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,gc-tls-stock.h gc-wb-stock.h)
endif
ifeq ($(OS),WINNT)
PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,win32_ucontext.h)
Expand Down
34 changes: 34 additions & 0 deletions src/gc-wb-mmtk.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// This file is a part of Julia. License is MIT: https://julialang.org/license

/*
write barriers which should be inlined by the compiler
*/

#ifndef JL_GC_WB_H
#define JL_GC_WB_H

#ifdef __cplusplus
extern "C" {
#endif

// GC write barriers

// TODO: implement these functions for MMTk
STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT
{
}

STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t*
{
}

STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT
{
}


#ifdef __cplusplus
}
#endif

#endif
50 changes: 50 additions & 0 deletions src/gc-wb-stock.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// This file is a part of Julia. License is MIT: https://julialang.org/license

/*
write barriers which should be inlined by the compiler
*/

#ifndef JL_GC_WB_H
#define JL_GC_WB_H

#ifdef __cplusplus
extern "C" {
#endif

// GC write barriers

STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT
{
// parent and ptr isa jl_value_t*
if (__unlikely(jl_astaggedvalue(parent)->bits.gc == 3 /* GC_OLD_MARKED */ && // parent is old and not in remset
(jl_astaggedvalue(ptr)->bits.gc & 1 /* GC_MARKED */) == 0)) // ptr is young
jl_gc_queue_root((jl_value_t*)parent);
}

STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t*
{
// if ptr is old
if (__unlikely(jl_astaggedvalue(ptr)->bits.gc == 3 /* GC_OLD_MARKED */)) {
jl_gc_queue_root((jl_value_t*)ptr);
}
}

STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT
{
// 3 == GC_OLD_MARKED
// ptr is an immutable object
if (__likely(jl_astaggedvalue(parent)->bits.gc != 3))
return; // parent is young or in remset
if (__likely(jl_astaggedvalue(ptr)->bits.gc == 3))
return; // ptr is old and not in remset (thus it does not point to young)
jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(ptr);
const jl_datatype_layout_t *ly = dt->layout;
if (ly->npointers)
jl_gc_queue_multiroot((jl_value_t*)parent, ptr, dt);
}

#ifdef __cplusplus
}
#endif

#endif
36 changes: 5 additions & 31 deletions src/julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -1137,37 +1137,11 @@ JL_DLLEXPORT void jl_free_stack(void *stkbuf, size_t bufsz);
// thread-local allocator of the current thread.
JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value);

// GC write barriers

STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT
{
// parent and ptr isa jl_value_t*
if (__unlikely(jl_astaggedvalue(parent)->bits.gc == 3 /* GC_OLD_MARKED */ && // parent is old and not in remset
(jl_astaggedvalue(ptr)->bits.gc & 1 /* GC_MARKED */) == 0)) // ptr is young
jl_gc_queue_root((jl_value_t*)parent);
}

STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t*
{
// if ptr is old
if (__unlikely(jl_astaggedvalue(ptr)->bits.gc == 3 /* GC_OLD_MARKED */)) {
jl_gc_queue_root((jl_value_t*)ptr);
}
}

STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT
{
// 3 == GC_OLD_MARKED
// ptr is an immutable object
if (__likely(jl_astaggedvalue(parent)->bits.gc != 3))
return; // parent is young or in remset
if (__likely(jl_astaggedvalue(ptr)->bits.gc == 3))
return; // ptr is old and not in remset (thus it does not point to young)
jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(ptr);
const jl_datatype_layout_t *ly = dt->layout;
if (ly->npointers)
jl_gc_queue_multiroot((jl_value_t*)parent, ptr, dt);
}
#ifndef MMTK_GC
#include "gc-wb-stock.h"
#else
#include "gc-wb-mmtk.h"
#endif

JL_DLLEXPORT void jl_gc_safepoint(void);
JL_DLLEXPORT int jl_safepoint_suspend_thread(int tid, int waitstate);
Expand Down
27 changes: 27 additions & 0 deletions src/llvm-gc-interface-passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -413,4 +413,31 @@ struct FinalLowerGC: private JuliaPassContext {
void lowerSafepoint(CallInst *target, Function &F);
};

// The functions below are common to all GC implementations

// Enable this optimization only on LLVM 4.0+ since this cause LLVM to optimize
// constant store loop to produce a `memset_pattern16` with a global variable
// that's initialized by `addrspacecast`. Such a global variable is not supported by the backend.
// This is not a problem on 4.0+ since that transformation (in loop-idiom) is disabled
// for NI pointers.
static SmallVector<int, 1> *FindRefinements(Value *V, State *S)
{
if (!S)
return nullptr;
auto it = S->AllPtrNumbering.find(V);
if (it == S->AllPtrNumbering.end())
return nullptr;
auto rit = S->Refinements.find(it->second);
return rit != S->Refinements.end() && !rit->second.empty() ? &rit->second : nullptr;
}

inline bool IsPermRooted(Value *V, State *S)
{
if (isa<Constant>(V))
return true;
if (auto *RefinePtr = FindRefinements(V, S))
return RefinePtr->size() == 1 && (*RefinePtr)[0] == -2;
return false;
}

#endif // LLVM_GC_PASSES_H
45 changes: 45 additions & 0 deletions src/llvm-late-gc-lowering-mmtk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,48 @@ Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F)
}
return target;
}

// FIXME: implement this function for MMTk lowering its specific write barrier
void LateLowerGCFrame::CleanupWriteBarriers(Function &F, State *S, const SmallVector<CallInst*, 0> &WriteBarriers, bool *CFGModified) {
auto T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());
for (auto CI : WriteBarriers) {
auto parent = CI->getArgOperand(0);
if (std::all_of(CI->op_begin() + 1, CI->op_end(),
[parent, &S](Value *child) { return parent == child || IsPermRooted(child, S); })) {
CI->eraseFromParent();
continue;
}
if (CFGModified) {
*CFGModified = true;
}

IRBuilder<> builder(CI);
builder.SetCurrentDebugLocation(CI->getDebugLoc());
auto parBits = builder.CreateAnd(EmitLoadTag(builder, T_size, parent), GC_OLD_MARKED, "parent_bits");
auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, GC_OLD_MARKED), "parent_old_marked");
auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false);
builder.SetInsertPoint(mayTrigTerm);
mayTrigTerm->getParent()->setName("may_trigger_wb");
Value *anyChldNotMarked = NULL;
for (unsigned i = 1; i < CI->arg_size(); i++) {
Value *child = CI->getArgOperand(i);
Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, T_size, child), GC_MARKED, "child_bit");
Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0), "child_not_marked");
anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked;
}
assert(anyChldNotMarked); // handled by all_of test above
MDBuilder MDB(parent->getContext());
SmallVector<uint32_t, 2> Weights{1, 9};
auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false,
MDB.createBranchWeights(Weights));
trigTerm->getParent()->setName("trigger_wb");
builder.SetInsertPoint(trigTerm);
if (CI->getCalledOperand() == write_barrier_func) {
builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent);
}
else {
assert(false);
}
CI->eraseFromParent();
}
}
44 changes: 44 additions & 0 deletions src/llvm-late-gc-lowering-stock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,47 @@ Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F)
// Do nothing for the stock GC
return target;
}

void LateLowerGCFrame::CleanupWriteBarriers(Function &F, State *S, const SmallVector<CallInst*, 0> &WriteBarriers, bool *CFGModified) {
auto T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());
for (auto CI : WriteBarriers) {
auto parent = CI->getArgOperand(0);
if (std::all_of(CI->op_begin() + 1, CI->op_end(),
[parent, &S](Value *child) { return parent == child || IsPermRooted(child, S); })) {
CI->eraseFromParent();
continue;
}
if (CFGModified) {
*CFGModified = true;
}

IRBuilder<> builder(CI);
builder.SetCurrentDebugLocation(CI->getDebugLoc());
auto parBits = builder.CreateAnd(EmitLoadTag(builder, T_size, parent), GC_OLD_MARKED, "parent_bits");
auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, GC_OLD_MARKED), "parent_old_marked");
auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false);
builder.SetInsertPoint(mayTrigTerm);
mayTrigTerm->getParent()->setName("may_trigger_wb");
Value *anyChldNotMarked = NULL;
for (unsigned i = 1; i < CI->arg_size(); i++) {
Value *child = CI->getArgOperand(i);
Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, T_size, child), GC_MARKED, "child_bit");
Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0), "child_not_marked");
anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked;
}
assert(anyChldNotMarked); // handled by all_of test above
MDBuilder MDB(parent->getContext());
SmallVector<uint32_t, 2> Weights{1, 9};
auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false,
MDB.createBranchWeights(Weights));
trigTerm->getParent()->setName("trigger_wb");
builder.SetInsertPoint(trigTerm);
if (CI->getCalledOperand() == write_barrier_func) {
builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent);
}
else {
assert(false);
}
CI->eraseFromParent();
}
}
69 changes: 0 additions & 69 deletions src/llvm-late-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1868,31 +1868,6 @@ Value *LateLowerGCFrame::EmitLoadTag(IRBuilder<> &builder, Type *T_size, Value *
return load;
}

// Enable this optimization only on LLVM 4.0+ since this cause LLVM to optimize
// constant store loop to produce a `memset_pattern16` with a global variable
// that's initialized by `addrspacecast`. Such a global variable is not supported by the backend.
// This is not a problem on 4.0+ since that transformation (in loop-idiom) is disabled
// for NI pointers.
static SmallVector<int, 1> *FindRefinements(Value *V, State *S)
{
if (!S)
return nullptr;
auto it = S->AllPtrNumbering.find(V);
if (it == S->AllPtrNumbering.end())
return nullptr;
auto rit = S->Refinements.find(it->second);
return rit != S->Refinements.end() && !rit->second.empty() ? &rit->second : nullptr;
}

static bool IsPermRooted(Value *V, State *S)
{
if (isa<Constant>(V))
return true;
if (auto *RefinePtr = FindRefinements(V, S))
return RefinePtr->size() == 1 && (*RefinePtr)[0] == -2;
return false;
}

static inline void UpdatePtrNumbering(Value *From, Value *To, State *S)
{
if (!S)
Expand All @@ -1911,50 +1886,6 @@ MDNode *createMutableTBAAAccessTag(MDNode *Tag) {
return MDBuilder(Tag->getContext()).createMutableTBAAAccessTag(Tag);
}

void LateLowerGCFrame::CleanupWriteBarriers(Function &F, State *S, const SmallVector<CallInst*, 0> &WriteBarriers, bool *CFGModified) {
auto T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());
for (auto CI : WriteBarriers) {
auto parent = CI->getArgOperand(0);
if (std::all_of(CI->op_begin() + 1, CI->op_end(),
[parent, &S](Value *child) { return parent == child || IsPermRooted(child, S); })) {
CI->eraseFromParent();
continue;
}
if (CFGModified) {
*CFGModified = true;
}

IRBuilder<> builder(CI);
builder.SetCurrentDebugLocation(CI->getDebugLoc());
auto parBits = builder.CreateAnd(EmitLoadTag(builder, T_size, parent), GC_OLD_MARKED, "parent_bits");
auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, GC_OLD_MARKED), "parent_old_marked");
auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false);
builder.SetInsertPoint(mayTrigTerm);
mayTrigTerm->getParent()->setName("may_trigger_wb");
Value *anyChldNotMarked = NULL;
for (unsigned i = 1; i < CI->arg_size(); i++) {
Value *child = CI->getArgOperand(i);
Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, T_size, child), GC_MARKED, "child_bit");
Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0), "child_not_marked");
anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked;
}
assert(anyChldNotMarked); // handled by all_of test above
MDBuilder MDB(parent->getContext());
SmallVector<uint32_t, 2> Weights{1, 9};
auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false,
MDB.createBranchWeights(Weights));
trigTerm->getParent()->setName("trigger_wb");
builder.SetInsertPoint(trigTerm);
if (CI->getCalledOperand() == write_barrier_func) {
builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent);
}
else {
assert(false);
}
CI->eraseFromParent();
}
}

bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
auto T_int32 = Type::getInt32Ty(F.getContext());
auto T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());
Expand Down

0 comments on commit 19eb356

Please sign in to comment.