Skip to content

Commit

Permalink
Optimizatins
Browse files Browse the repository at this point in the history
  • Loading branch information
Thomas Schatzl committed Mar 20, 2024
1 parent 450b27d commit 068a1d5
Show file tree
Hide file tree
Showing 8 changed files with 149 additions and 45 deletions.
23 changes: 12 additions & 11 deletions src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,14 +223,14 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
// get the address of the card
__ load_byte_map_base(tmp2);
__ add(card_addr, card_addr, tmp2);
__ ldrb(tmp2, Address(card_addr));
__ cmpw(tmp2, (int)G1CardTable::g1_young_card_val());
__ br(Assembler::EQ, done);
if (!UseNewCode) {
__ ldrb(tmp2, Address(card_addr));
__ cmpw(tmp2, (int)G1CardTable::g1_young_card_val());
__ br(Assembler::EQ, done);

__ membar(Assembler::StoreLoad);
}
assert((int)CardTable::dirty_card_val() == 0, "must be 0");

__ membar(Assembler::StoreLoad);

__ ldrb(tmp2, Address(card_addr));
__ cbzw(tmp2, done);

Expand Down Expand Up @@ -444,13 +444,14 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler*
__ load_parameter(0, card_offset);
__ lsr(card_offset, card_offset, CardTable::card_shift());
__ load_byte_map_base(byte_map_base);
__ ldrb(rscratch1, Address(byte_map_base, card_offset));
__ cmpw(rscratch1, (int)G1CardTable::g1_young_card_val());
__ br(Assembler::EQ, done);
if (!UseNewCode) {
__ ldrb(rscratch1, Address(byte_map_base, card_offset));
__ cmpw(rscratch1, (int)G1CardTable::g1_young_card_val());
__ br(Assembler::EQ, done);

__ membar(Assembler::StoreLoad);
}
assert((int)CardTable::dirty_card_val() == 0, "must be 0");

__ membar(Assembler::StoreLoad);
__ ldrb(rscratch1, Address(byte_map_base, card_offset));
__ cbzw(rscratch1, done);

Expand Down
91 changes: 84 additions & 7 deletions src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,80 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm

void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
Register addr, Register count, Register tmp) {

if (true) {
assert(sizeof(CardTable::CardValue) == 1, "must be");

Label done;
__ testptr(count, count);
__ jcc(Assembler::equal, done); // nothing to do if empty ref array.

// Calculate end address.
__ shlptr(count, LogBytesPerHeapOop);
__ addptr(count, addr);
// Calculate start card address in "addr".
__ shrptr(addr, CardTable::card_shift());
__ movptr(tmp, (intptr_t)barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set())->card_table()->byte_map_base());
__ addptr(addr, tmp);

if (!UseNewCode) {
// If the object starts in a young region, there is nothing to do.
__ cmpb(Address(addr, 0), G1CardTable::g1_young_card_val());
__ jcc(Assembler::equal, done);
__ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
}

// Caclulate card address of last word.
__ subptr(count, 1);
__ shrptr(count, CardTable::card_shift());
__ addptr(count, tmp);

Label loop;
__ bind(loop);

Label next_card;
__ cmpb(Address(addr, 0), G1CardTable::dirty_card_val());
__ jcc(Assembler::zero, next_card);

// Card was not dirty. Dirty card and enqueue.
__ movb(Address(addr, 0), G1CardTable::dirty_card_val());

Address queue_index(r15_thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
Address buffer(r15_thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));

__ movptr(tmp, queue_index);
__ testptr(tmp, tmp);
Label runtime;
__ jcc(Assembler::zero, runtime);
__ subptr(tmp, wordSize);
__ movptr(queue_index, tmp);
__ addptr(tmp, buffer);
__ movptr(Address(tmp, 0), addr);
__ jmp(next_card);

__ bind(runtime);

// Save caller saved registers.
__ push_call_clobbered_registers(false /* save_fpu */);
// FIXME: probably issue with Windows....
if (c_rarg1 != r15_thread) {
__ mov(c_rarg1, r15_thread);
}
if (c_rarg0 != addr) {
__ mov(c_rarg0, addr);
}

__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), c_rarg0, c_rarg1);
__ pop_call_clobbered_registers(false /* save_fpu */);

__ bind(next_card);
__ addptr(addr, sizeof(CardTable::CardValue));
__ cmpptr(addr, count);
__ jcc(Assembler::belowEqual, loop);

__ bind(done);
return;
}
__ push_call_clobbered_registers(false /* save_fpu */);
#ifdef _LP64
if (c_rarg0 == count) { // On win64 c_rarg0 == rcx
Expand Down Expand Up @@ -308,10 +382,12 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
__ movptr(cardtable, (intptr_t)ct->card_table()->byte_map_base());
__ addptr(card_addr, cardtable);

__ cmpb(Address(card_addr, 0), G1CardTable::g1_young_card_val());
__ jcc(Assembler::equal, done);
if (!UseNewCode) {
__ cmpb(Address(card_addr, 0), G1CardTable::g1_young_card_val());
__ jcc(Assembler::equal, done);

__ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
__ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
}
__ cmpb(Address(card_addr, 0), G1CardTable::dirty_card_val());
__ jcc(Assembler::equal, done);

Expand Down Expand Up @@ -541,11 +617,12 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler*
__ addptr(card_addr, cardtable);

NOT_LP64(__ get_thread(thread);)
if (!UseNewCode) {
__ cmpb(Address(card_addr, 0), G1CardTable::g1_young_card_val());
__ jcc(Assembler::equal, done);

__ cmpb(Address(card_addr, 0), G1CardTable::g1_young_card_val());
__ jcc(Assembler::equal, done);

__ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
__ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
}
__ cmpb(Address(card_addr, 0), CardTable::dirty_card_val());
__ jcc(Assembler::equal, done);

Expand Down
19 changes: 12 additions & 7 deletions src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -465,20 +465,25 @@ void G1BarrierSetC2::post_barrier(GraphKit* kit,
// Ok must mark the card if not already dirty

// load the original value of the card
Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);

if (!UseNewCode) {
Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
__ if_then(card_val, BoolTest::ne, young_card, unlikely); {
if (!UseNewCode) {
kit->sync_kit(ideal);
kit->insert_mem_bar(Op_MemBarVolatile, oop_store);
__ sync_kit(kit);
}
kit->sync_kit(ideal);
kit->insert_mem_bar(Op_MemBarVolatile, oop_store);
__ sync_kit(kit);

Node* card_val_reload = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
__ if_then(card_val_reload, BoolTest::ne, dirty_card); {
g1_mark_card(kit, ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf);
} __ end_if();
} __ end_if();
} else {
Node* card_val_reload = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
__ if_then(card_val_reload, BoolTest::ne, dirty_card); {
g1_mark_card(kit, ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf);
} __ end_if();
}
} __ end_if();
} __ end_if();
} else {
Expand All @@ -488,7 +493,7 @@ void G1BarrierSetC2::post_barrier(GraphKit* kit,
// are set to 'g1_young_gen' (see G1CardTable::verify_g1_young_region()).
assert(!use_ReduceInitialCardMarks(), "can only happen with card marking");
Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
__ if_then(card_val, BoolTest::ne, young_card); {
__ if_then(card_val, BoolTest::ne, !UseNewCode ? young_card : dirty_card); {
g1_mark_card(kit, ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf);
} __ end_if();
}
Expand Down
30 changes: 20 additions & 10 deletions src/hotspot/share/gc/g1/g1BarrierSet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,9 @@ void G1BarrierSet::write_ref_array_pre(narrowOop* dst, size_t count, bool dest_u
void G1BarrierSet::write_ref_field_post_slow(volatile CardValue* byte) {
// In the slow path, we know a card is not young
assert(*byte != G1CardTable::g1_young_card_val(), "slow path invoked without filtering");
OrderAccess::storeload();
if (!UseNewCode) {
OrderAccess::storeload();
}
if (*byte != G1CardTable::dirty_card_val()) {
*byte = G1CardTable::dirty_card_val();
Thread* thr = Thread::current();
Expand All @@ -110,22 +112,30 @@ void G1BarrierSet::invalidate(JavaThread* thread, MemRegion mr) {
CardValue* last_byte = _card_table->byte_for(mr.last());

// skip young gen cards
if (*byte == G1CardTable::g1_young_card_val()) {
// MemRegion should not span multiple regions for the young gen.
DEBUG_ONLY(HeapRegion* containing_hr = G1CollectedHeap::heap()->heap_region_containing(mr.start());)
assert(containing_hr->is_young(), "it should be young");
assert(containing_hr->is_in(mr.start()), "it should contain start");
assert(containing_hr->is_in(mr.last()), "it should also contain last");
return;
if (!UseNewCode) {
if (*byte == G1CardTable::g1_young_card_val()) {
// MemRegion should not span multiple regions for the young gen.
DEBUG_ONLY(HeapRegion* containing_hr = G1CollectedHeap::heap()->heap_region_containing(mr.start());)
assert(containing_hr->is_young(), "it should be young");
assert(containing_hr->is_in(mr.start()), "it should contain start");
assert(containing_hr->is_in(mr.last()), "it should also contain last");
return;
}
OrderAccess::storeload();
} else {
HeapRegion* containing_hr = G1CollectedHeap::heap()->heap_region_containing(mr.start());
if (containing_hr->is_young()) {
assert(containing_hr->is_in(mr.last()), "it should also contain last");
return;
}
}

OrderAccess::storeload();
// Enqueue if necessary.
G1DirtyCardQueueSet& qset = G1BarrierSet::dirty_card_queue_set();
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(thread);
for (; byte <= last_byte; byte++) {
CardValue bv = *byte;
assert(bv != G1CardTable::g1_young_card_val(), "Invalid card");
assert(UseNewCode || bv != G1CardTable::g1_young_card_val(), "Invalid card");
if (bv != G1CardTable::dirty_card_val()) {
*byte = G1CardTable::dirty_card_val();
qset.enqueue(queue, byte);
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/gc/g1/g1BarrierSet.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* Please contact Oracle, 500 Oracle Parkway, Redwowrite_ref_arrayod Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
Expand Down
9 changes: 6 additions & 3 deletions src/hotspot/share/gc/g1/g1BarrierSet.inline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,13 @@ inline void G1BarrierSet::write_ref_array_work(MemRegion mr) {
template <DecoratorSet decorators, typename T>
inline void G1BarrierSet::write_ref_field_post(T* field) {
volatile CardValue* byte = _card_table->byte_for(field);
if (*byte != G1CardTable::g1_young_card_val()) {
// Take a slow path for cards in old
write_ref_field_post_slow(byte);
if (!UseNewCode) {
if (*byte == G1CardTable::g1_young_card_val()) {
return;
}
}
// Take a slow path for cards in old
write_ref_field_post_slow(byte);
}

inline void G1BarrierSet::enqueue_preloaded_if_weak(DecoratorSet decorators, oop value) {
Expand Down
11 changes: 9 additions & 2 deletions src/hotspot/share/gc/g1/g1BarrierSetRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,16 @@ void G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry(narrowOop* dst, s
bs->write_ref_array_pre(dst, length, false);
}

void G1BarrierSetRuntime::write_ref_array_post_entry(HeapWord* dst, size_t length) {
void G1BarrierSetRuntime::write_ref_array_post_entry(HeapWord* start, size_t length) {
HeapWord* end = (HeapWord*)((char*)start + (length*heapOopSize));

HeapWord* aligned_start = align_down(start, HeapWordSize);
HeapWord* aligned_end = align_up (end, HeapWordSize);
// If compressed oops were not being used, these should already be aligned
assert(UseCompressedOops || (aligned_start == start && aligned_end == end),
"Expected heap word alignment of start and end");
G1BarrierSet *bs = barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
bs->G1BarrierSet::write_ref_array(dst, length);
bs->write_ref_array_work(MemRegion(aligned_start, aligned_end));
}

// G1 pre write barrier slowpath
Expand Down
9 changes: 5 additions & 4 deletions src/hotspot/share/gc/g1/g1CardTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,12 @@ void G1CardTable::g1_mark_as_young(const MemRegion& mr) {
CardValue *const first = byte_for(mr.start());
CardValue *const last = byte_after(mr.last());

memset_with_concurrent_readers(first, g1_young_gen, pointer_delta(last, first, sizeof(CardValue)));
memset_with_concurrent_readers(first, !UseNewCode ? g1_young_card_val() : dirty_card_val(), pointer_delta(last, first, sizeof(CardValue)));
}

#ifndef PRODUCT
void G1CardTable::verify_g1_young_region(MemRegion mr) {
verify_region(mr, g1_young_gen, true);
verify_region(mr, !UseNewCode ? g1_young_card_val() : dirty_card_val(), true);
}
#endif

Expand Down Expand Up @@ -69,6 +69,7 @@ void G1CardTable::initialize(G1RegionToSpaceMapper* mapper) {
}

bool G1CardTable::is_in_young(const void* p) const {
volatile CardValue* card = byte_for(p);
return *card == G1CardTable::g1_young_card_val();
// volatile CardValue* card = byte_for(p);
//return *card == G1CardTable::g1_young_card_val();
return G1CollectedHeap::heap()->heap_region_containing(p)->is_young();
}

0 comments on commit 068a1d5

Please sign in to comment.