Skip to content

Commit 891dc4d

Browse files
committed
Refactor code that is specific to stock GC
1 parent a358f58 commit 891dc4d

8 files changed

+207
-102
lines changed

src/Makefile

+2-2
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,9 @@ UV_HEADERS += uv/*.h
123123
endif
124124
PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h gc-interface.h gc-tls-common.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h)
125125
ifneq (${MMTK_PLAN},None)
126-
PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,gc-tls-mmtk.h)
126+
PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,gc-tls-mmtk.h gc-wb-mmtk.h)
127127
else
128-
PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,gc-tls-stock.h)
128+
PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,gc-tls-stock.h gc-wb-stock.h)
129129
endif
130130
ifeq ($(OS),WINNT)
131131
PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,win32_ucontext.h)

src/gc-wb-mmtk.h

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// This file is a part of Julia. License is MIT: https://julialang.org/license
2+
3+
/*
4+
write barriers which should be inlined by the compiler
5+
*/
6+
7+
#ifndef JL_GC_WB_H
8+
#define JL_GC_WB_H
9+
10+
#ifdef __cplusplus
11+
extern "C" {
12+
#endif
13+
14+
// GC write barriers
15+
16+
// TODO: implement these functions for MMTk
17+
STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT
18+
{
19+
}
20+
21+
STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t*
22+
{
23+
}
24+
25+
STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT
26+
{
27+
}
28+
29+
30+
#ifdef __cplusplus
31+
}
32+
#endif
33+
34+
#endif

src/gc-wb-stock.h

+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
// This file is a part of Julia. License is MIT: https://julialang.org/license
2+
3+
/*
4+
write barriers which should be inlined by the compiler
5+
*/
6+
7+
#ifndef JL_GC_WB_H
8+
#define JL_GC_WB_H
9+
10+
#ifdef __cplusplus
11+
extern "C" {
12+
#endif
13+
14+
// GC write barriers
15+
16+
STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT
17+
{
18+
// parent and ptr isa jl_value_t*
19+
if (__unlikely(jl_astaggedvalue(parent)->bits.gc == 3 /* GC_OLD_MARKED */ && // parent is old and not in remset
20+
(jl_astaggedvalue(ptr)->bits.gc & 1 /* GC_MARKED */) == 0)) // ptr is young
21+
jl_gc_queue_root((jl_value_t*)parent);
22+
}
23+
24+
STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t*
25+
{
26+
// if ptr is old
27+
if (__unlikely(jl_astaggedvalue(ptr)->bits.gc == 3 /* GC_OLD_MARKED */)) {
28+
jl_gc_queue_root((jl_value_t*)ptr);
29+
}
30+
}
31+
32+
STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT
33+
{
34+
// 3 == GC_OLD_MARKED
35+
// ptr is an immutable object
36+
if (__likely(jl_astaggedvalue(parent)->bits.gc != 3))
37+
return; // parent is young or in remset
38+
if (__likely(jl_astaggedvalue(ptr)->bits.gc == 3))
39+
return; // ptr is old and not in remset (thus it does not point to young)
40+
jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(ptr);
41+
const jl_datatype_layout_t *ly = dt->layout;
42+
if (ly->npointers)
43+
jl_gc_queue_multiroot((jl_value_t*)parent, ptr, dt);
44+
}
45+
46+
#ifdef __cplusplus
47+
}
48+
#endif
49+
50+
#endif

src/julia.h

+5-31
Original file line numberDiff line numberDiff line change
@@ -1137,37 +1137,11 @@ JL_DLLEXPORT void jl_free_stack(void *stkbuf, size_t bufsz);
11371137
// thread-local allocator of the current thread.
11381138
JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value);
11391139

1140-
// GC write barriers
1141-
1142-
STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT
1143-
{
1144-
// parent and ptr isa jl_value_t*
1145-
if (__unlikely(jl_astaggedvalue(parent)->bits.gc == 3 /* GC_OLD_MARKED */ && // parent is old and not in remset
1146-
(jl_astaggedvalue(ptr)->bits.gc & 1 /* GC_MARKED */) == 0)) // ptr is young
1147-
jl_gc_queue_root((jl_value_t*)parent);
1148-
}
1149-
1150-
STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t*
1151-
{
1152-
// if ptr is old
1153-
if (__unlikely(jl_astaggedvalue(ptr)->bits.gc == 3 /* GC_OLD_MARKED */)) {
1154-
jl_gc_queue_root((jl_value_t*)ptr);
1155-
}
1156-
}
1157-
1158-
STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT
1159-
{
1160-
// 3 == GC_OLD_MARKED
1161-
// ptr is an immutable object
1162-
if (__likely(jl_astaggedvalue(parent)->bits.gc != 3))
1163-
return; // parent is young or in remset
1164-
if (__likely(jl_astaggedvalue(ptr)->bits.gc == 3))
1165-
return; // ptr is old and not in remset (thus it does not point to young)
1166-
jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(ptr);
1167-
const jl_datatype_layout_t *ly = dt->layout;
1168-
if (ly->npointers)
1169-
jl_gc_queue_multiroot((jl_value_t*)parent, ptr, dt);
1170-
}
1140+
#ifndef MMTK_GC
1141+
#include "gc-wb-stock.h"
1142+
#else
1143+
#include "gc-wb-mmtk.h"
1144+
#endif
11711145

11721146
JL_DLLEXPORT void jl_gc_safepoint(void);
11731147
JL_DLLEXPORT int jl_safepoint_suspend_thread(int tid, int waitstate);

src/llvm-gc-interface-passes.h

+27
Original file line numberDiff line numberDiff line change
@@ -413,4 +413,31 @@ struct FinalLowerGC: private JuliaPassContext {
413413
void lowerSafepoint(CallInst *target, Function &F);
414414
};
415415

416+
// The functions below are common to all GC implementations
417+
418+
// Enable this optimization only on LLVM 4.0+ since this cause LLVM to optimize
419+
// constant store loop to produce a `memset_pattern16` with a global variable
420+
// that's initialized by `addrspacecast`. Such a global variable is not supported by the backend.
421+
// This is not a problem on 4.0+ since that transformation (in loop-idiom) is disabled
422+
// for NI pointers.
423+
static SmallVector<int, 1> *FindRefinements(Value *V, State *S)
424+
{
425+
if (!S)
426+
return nullptr;
427+
auto it = S->AllPtrNumbering.find(V);
428+
if (it == S->AllPtrNumbering.end())
429+
return nullptr;
430+
auto rit = S->Refinements.find(it->second);
431+
return rit != S->Refinements.end() && !rit->second.empty() ? &rit->second : nullptr;
432+
}
433+
434+
inline bool IsPermRooted(Value *V, State *S)
435+
{
436+
if (isa<Constant>(V))
437+
return true;
438+
if (auto *RefinePtr = FindRefinements(V, S))
439+
return RefinePtr->size() == 1 && (*RefinePtr)[0] == -2;
440+
return false;
441+
}
442+
416443
#endif // LLVM_GC_PASSES_H

src/llvm-late-gc-lowering-mmtk.cpp

+45
Original file line numberDiff line numberDiff line change
@@ -94,3 +94,48 @@ Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F)
9494
}
9595
return target;
9696
}
97+
98+
// FIXME: implement this function for MMTk lowering its specific write barrier
99+
void LateLowerGCFrame::CleanupWriteBarriers(Function &F, State *S, const SmallVector<CallInst*, 0> &WriteBarriers, bool *CFGModified) {
100+
auto T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());
101+
for (auto CI : WriteBarriers) {
102+
auto parent = CI->getArgOperand(0);
103+
if (std::all_of(CI->op_begin() + 1, CI->op_end(),
104+
[parent, &S](Value *child) { return parent == child || IsPermRooted(child, S); })) {
105+
CI->eraseFromParent();
106+
continue;
107+
}
108+
if (CFGModified) {
109+
*CFGModified = true;
110+
}
111+
112+
IRBuilder<> builder(CI);
113+
builder.SetCurrentDebugLocation(CI->getDebugLoc());
114+
auto parBits = builder.CreateAnd(EmitLoadTag(builder, T_size, parent), GC_OLD_MARKED, "parent_bits");
115+
auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, GC_OLD_MARKED), "parent_old_marked");
116+
auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false);
117+
builder.SetInsertPoint(mayTrigTerm);
118+
mayTrigTerm->getParent()->setName("may_trigger_wb");
119+
Value *anyChldNotMarked = NULL;
120+
for (unsigned i = 1; i < CI->arg_size(); i++) {
121+
Value *child = CI->getArgOperand(i);
122+
Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, T_size, child), GC_MARKED, "child_bit");
123+
Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0), "child_not_marked");
124+
anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked;
125+
}
126+
assert(anyChldNotMarked); // handled by all_of test above
127+
MDBuilder MDB(parent->getContext());
128+
SmallVector<uint32_t, 2> Weights{1, 9};
129+
auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false,
130+
MDB.createBranchWeights(Weights));
131+
trigTerm->getParent()->setName("trigger_wb");
132+
builder.SetInsertPoint(trigTerm);
133+
if (CI->getCalledOperand() == write_barrier_func) {
134+
builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent);
135+
}
136+
else {
137+
assert(false);
138+
}
139+
CI->eraseFromParent();
140+
}
141+
}

src/llvm-late-gc-lowering-stock.cpp

+44
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,47 @@ Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F)
77
// Do nothing for the stock GC
88
return target;
99
}
10+
11+
void LateLowerGCFrame::CleanupWriteBarriers(Function &F, State *S, const SmallVector<CallInst*, 0> &WriteBarriers, bool *CFGModified) {
12+
auto T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());
13+
for (auto CI : WriteBarriers) {
14+
auto parent = CI->getArgOperand(0);
15+
if (std::all_of(CI->op_begin() + 1, CI->op_end(),
16+
[parent, &S](Value *child) { return parent == child || IsPermRooted(child, S); })) {
17+
CI->eraseFromParent();
18+
continue;
19+
}
20+
if (CFGModified) {
21+
*CFGModified = true;
22+
}
23+
24+
IRBuilder<> builder(CI);
25+
builder.SetCurrentDebugLocation(CI->getDebugLoc());
26+
auto parBits = builder.CreateAnd(EmitLoadTag(builder, T_size, parent), GC_OLD_MARKED, "parent_bits");
27+
auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, GC_OLD_MARKED), "parent_old_marked");
28+
auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false);
29+
builder.SetInsertPoint(mayTrigTerm);
30+
mayTrigTerm->getParent()->setName("may_trigger_wb");
31+
Value *anyChldNotMarked = NULL;
32+
for (unsigned i = 1; i < CI->arg_size(); i++) {
33+
Value *child = CI->getArgOperand(i);
34+
Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, T_size, child), GC_MARKED, "child_bit");
35+
Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0), "child_not_marked");
36+
anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked;
37+
}
38+
assert(anyChldNotMarked); // handled by all_of test above
39+
MDBuilder MDB(parent->getContext());
40+
SmallVector<uint32_t, 2> Weights{1, 9};
41+
auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false,
42+
MDB.createBranchWeights(Weights));
43+
trigTerm->getParent()->setName("trigger_wb");
44+
builder.SetInsertPoint(trigTerm);
45+
if (CI->getCalledOperand() == write_barrier_func) {
46+
builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent);
47+
}
48+
else {
49+
assert(false);
50+
}
51+
CI->eraseFromParent();
52+
}
53+
}

src/llvm-late-gc-lowering.cpp

-69
Original file line numberDiff line numberDiff line change
@@ -1868,31 +1868,6 @@ Value *LateLowerGCFrame::EmitLoadTag(IRBuilder<> &builder, Type *T_size, Value *
18681868
return load;
18691869
}
18701870

1871-
// Enable this optimization only on LLVM 4.0+ since this cause LLVM to optimize
1872-
// constant store loop to produce a `memset_pattern16` with a global variable
1873-
// that's initialized by `addrspacecast`. Such a global variable is not supported by the backend.
1874-
// This is not a problem on 4.0+ since that transformation (in loop-idiom) is disabled
1875-
// for NI pointers.
1876-
static SmallVector<int, 1> *FindRefinements(Value *V, State *S)
1877-
{
1878-
if (!S)
1879-
return nullptr;
1880-
auto it = S->AllPtrNumbering.find(V);
1881-
if (it == S->AllPtrNumbering.end())
1882-
return nullptr;
1883-
auto rit = S->Refinements.find(it->second);
1884-
return rit != S->Refinements.end() && !rit->second.empty() ? &rit->second : nullptr;
1885-
}
1886-
1887-
static bool IsPermRooted(Value *V, State *S)
1888-
{
1889-
if (isa<Constant>(V))
1890-
return true;
1891-
if (auto *RefinePtr = FindRefinements(V, S))
1892-
return RefinePtr->size() == 1 && (*RefinePtr)[0] == -2;
1893-
return false;
1894-
}
1895-
18961871
static inline void UpdatePtrNumbering(Value *From, Value *To, State *S)
18971872
{
18981873
if (!S)
@@ -1911,50 +1886,6 @@ MDNode *createMutableTBAAAccessTag(MDNode *Tag) {
19111886
return MDBuilder(Tag->getContext()).createMutableTBAAAccessTag(Tag);
19121887
}
19131888

1914-
void LateLowerGCFrame::CleanupWriteBarriers(Function &F, State *S, const SmallVector<CallInst*, 0> &WriteBarriers, bool *CFGModified) {
1915-
auto T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());
1916-
for (auto CI : WriteBarriers) {
1917-
auto parent = CI->getArgOperand(0);
1918-
if (std::all_of(CI->op_begin() + 1, CI->op_end(),
1919-
[parent, &S](Value *child) { return parent == child || IsPermRooted(child, S); })) {
1920-
CI->eraseFromParent();
1921-
continue;
1922-
}
1923-
if (CFGModified) {
1924-
*CFGModified = true;
1925-
}
1926-
1927-
IRBuilder<> builder(CI);
1928-
builder.SetCurrentDebugLocation(CI->getDebugLoc());
1929-
auto parBits = builder.CreateAnd(EmitLoadTag(builder, T_size, parent), GC_OLD_MARKED, "parent_bits");
1930-
auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, GC_OLD_MARKED), "parent_old_marked");
1931-
auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false);
1932-
builder.SetInsertPoint(mayTrigTerm);
1933-
mayTrigTerm->getParent()->setName("may_trigger_wb");
1934-
Value *anyChldNotMarked = NULL;
1935-
for (unsigned i = 1; i < CI->arg_size(); i++) {
1936-
Value *child = CI->getArgOperand(i);
1937-
Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, T_size, child), GC_MARKED, "child_bit");
1938-
Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0), "child_not_marked");
1939-
anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked;
1940-
}
1941-
assert(anyChldNotMarked); // handled by all_of test above
1942-
MDBuilder MDB(parent->getContext());
1943-
SmallVector<uint32_t, 2> Weights{1, 9};
1944-
auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false,
1945-
MDB.createBranchWeights(Weights));
1946-
trigTerm->getParent()->setName("trigger_wb");
1947-
builder.SetInsertPoint(trigTerm);
1948-
if (CI->getCalledOperand() == write_barrier_func) {
1949-
builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent);
1950-
}
1951-
else {
1952-
assert(false);
1953-
}
1954-
CI->eraseFromParent();
1955-
}
1956-
}
1957-
19581889
bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
19591890
auto T_int32 = Type::getInt32Ty(F.getContext());
19601891
auto T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());

0 commit comments

Comments
 (0)