Skip to content

Commit

Permalink
* profile g1 barrier filters, putfield version * implement C1 profili…
Browse files Browse the repository at this point in the history
…ng code * only actually do counter updates for oop stores * pass through counters to c2 barrier to generate code based on results * re-enable use of non-G1 collectors :D * swap null and same-region check if both needed and the former is more discriminating * move profiling to reference write instead of bool write (oops) * fix compilation errors; barrier_data() is examined for != 0 a lot, but we added additional flags to it... :( Fixes crashes * experimental (UseNewCode2): dirty the young gen cards on TLAB allocation, see if this changes statistics significantly... * experimental (UseNewCode3): change cost estimations for loop unrolling to #instructions (which is ~ C2 nodes in the fast path) * some tests with switching same-region and null check * add missing file * first (incomplete) attempt at handling aastore * aastore should work now; but not a lot of methods get a methoddata? * TraceByteCodes also prints existing profile data * remove debug code * some changes, trying to find crash reason * fix issue with c2 compiler coming across CombinedData and the existing code doing random typecasts which do not apply * enable (buggy) from-young check using XXXDoYoungPreDirty using pre-dirtied young gen cards * -XX:-G1UseConcurrentRefinement disables barrier filters (this is arbitrary) * use UseNewCode3 to not generate any barrier filters instead of tying it to G1UseConcRefinement since they are independent * post barrier costs based on actual generated barrier parts * change pre-barrier costs to align with post barrier, use fast path only for determining its cost * optimize barrier costs

* aarch64 initial not working bring-up

* fix UseNewCode3 path, i.e. generation of no barrier filters at all.
  Only skipped them for nmethods with profiling information, not
everything.
  • Loading branch information
tschatzl committed Dec 11, 2024
1 parent 8f00e62 commit ff8aeea
Show file tree
Hide file tree
Showing 64 changed files with 1,477 additions and 239 deletions.
124 changes: 110 additions & 14 deletions src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,20 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
__ pop_call_clobbered_registers();

__ bind(done);
}

static void generate_post_barrier_same_region_check(MacroAssembler* masm, const Register store_addr, const Register new_val, const Register tmp1, Label& done) {
__ block_comment("cross-region");

// Does store cross heap regions?
__ eor(tmp1, store_addr, new_val); // tmp1 := store address ^ new value
__ lsr(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes)
__ cbz(tmp1, done);
}

static void generate_post_barrier_null_new_value_check(MacroAssembler* masm, const Register new_val, const Register tmp1, Label& done) {
__ block_comment("null-new-val");
__ cbz(new_val, done);
}

static void generate_post_barrier_fast_path(MacroAssembler* masm,
Expand All @@ -232,27 +245,43 @@ static void generate_post_barrier_fast_path(MacroAssembler* masm,
const Register tmp1,
const Register tmp2,
Label& done,
bool new_val_maybe_null) {
uint ext_barrier_data) {
assert(thread == rthread, "must be");
assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, noreg);

// Does store cross heap regions?
__ eor(tmp1, store_addr, new_val); // tmp1 := store address ^ new value
__ lsr(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes)
__ cbz(tmp1, done);

// Crosses regions, storing null?
if (new_val_maybe_null) {
__ cbz(new_val, done);
bool gen_cross_region_check = ((ext_barrier_data & G1C2BarrierPostGenCrossCheck) != 0) || !UseNewCode;
bool gen_null_new_val_check = ((ext_barrier_data & G1C2BarrierPostGenNullCheck) != 0) || !UseNewCode;
bool gen_card_table_check = ((ext_barrier_data & G1C2BarrierPostGenCardCheck) != 0) || !UseNewCode;
bool null_check_first = ((ext_barrier_data & G1C2BarrierPostNullCheckFirst) != 0) || !UseNewCode;

bool new_val_maybe_null = ((ext_barrier_data & G1C2BarrierPostNotNull) != 0);

__ block_comment(err_msg("barrier parts: gen_same_region %d gen_null_new %d gen_card_table %d maybe_null %d swap_same_null %d", gen_cross_region_check, gen_null_new_val_check, gen_card_table_check, new_val_maybe_null, null_check_first));

if (!null_check_first) {
if (gen_cross_region_check) {
generate_post_barrier_same_region_check(masm, store_addr, new_val, tmp1, done);
}
// Crosses regions, storing null?
if (gen_null_new_val_check && new_val_maybe_null) {
generate_post_barrier_null_new_value_check(masm, new_val, tmp1, done);
}
} else {
assert(gen_cross_region_check, "must be");
assert(gen_null_new_val_check, "must be");
generate_post_barrier_null_new_value_check(masm, new_val, tmp1, done);
generate_post_barrier_same_region_check(masm, store_addr, new_val, tmp1, done);
}

// Storing region crossing non-null, is card young?

__ lsr(tmp1, store_addr, CardTable::card_shift()); // tmp1 := card address relative to card table base

Address card_table_addr(thread, in_bytes(G1ThreadLocalData::card_table_base_offset()));
__ ldr(tmp2, card_table_addr); // tmp2 := card table base address
__ add(tmp1, tmp1, tmp2); // tmp1 := card address
if (UseCondCardMark) {
if (gen_card_table_check && UseCondCardMark) {
__ ldrb(tmp2, Address(tmp1)); // tmp2 := card
// Instead of loading clean_card_val and comparing, we exploit the fact that
// the LSB of non-clean cards is always 0, and the LSB of clean cards 1.
Expand All @@ -262,14 +291,22 @@ static void generate_post_barrier_fast_path(MacroAssembler* masm,
__ strb(zr, Address(tmp1)); // *(card address) := dirty_card_val
}

static uint8_t gen_all_barrier_parts() {
return G1C2BarrierPostNotNull | G1C2BarrierPostGenCrossCheck | G1C2BarrierPostGenNullCheck | G1C2BarrierPostGenCardCheck;
}

static uint8_t gen_no_barrier_parts() {
return 0;
}

void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
Register store_addr,
Register new_val,
Register thread,
Register tmp1,
Register tmp2) {
Label done;
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, false /* new_val_maybe_null */);
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, gen_all_barrier_parts());
__ bind(done);
}

Expand Down Expand Up @@ -329,9 +366,9 @@ void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
Register thread,
Register tmp1,
Register tmp2,
bool new_val_maybe_null) {
uint8_t ext_barrier_data) {
Label done;
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, new_val_maybe_null);
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, ext_barrier_data);
__ bind(done);
}

Expand Down Expand Up @@ -430,17 +467,76 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier

#undef __

#define __ masm->

void G1BarrierSetAssembler::g1_write_barrier_post_profile_c1(ciMethodData* md,
int bci,
MacroAssembler* masm,
Register store_addr,
Register new_val,
Register thread,
Register tmp1,
Register tmp2) {
assert(md != nullptr, "must be");

ciProfileData* data = md->bci_to_data(bci);
assert(data != nullptr, "must be");
if (!data->is_G1CounterData()) {
assert(!UseNewCode, "must be");
return;
}


data = data->as_G1CounterData();
assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, rscratch1 /* FIXME: move rscratch1 to regular tmp register */);

Register mdp = tmp2;
__ mov_metadata(mdp, md->constant_encoding());
__ increment(Address(mdp, md->byte_offset_of_slot(data, G1CounterData::visits_counter_offset())));

if (UseCompressedOops) {
__ decode_heap_oop(tmp1, new_val);
__ xorptr(tmp1, tmp1, store_addr);
} else {
__ xorptr(tmp1, new_val, store_addr);
}
__ shrptr(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes);
__ cmp(tmp1, zr);
__ cset(tmp1, Assembler::EQ);
__ addptr(Address(mdp, md->byte_offset_of_slot(data, G1CounterData::same_region_counter_offset())), tmp1); // How many same-region pointers

__ cmp(new_val, zr);
__ cset(tmp1, Assembler::EQ);
__ addptr(Address(mdp, md->byte_offset_of_slot(data, G1CounterData::null_new_val_counter_offset())), tmp1); // How many zeros

__ movptr(rscratch1, Address(thread, in_bytes(G1ThreadLocalData::card_table_base_offset())));
__ shrptr(tmp1, store_addr, G1CardTable::card_shift());
__ cmpb(Address(tmp1, rscratch1), G1CardTable::clean_card_val());
__ cset(tmp1, Assembler::EQ);
__ addptr(Address(mdp, md->byte_offset_of_slot(data, G1CounterData::clean_cards_counter_offset())), tmp1); // How many clean cards

if (XXXDoYoungPreDirty) {
__ movptr(rscratch1, Address(thread, in_bytes(G1ThreadLocalData::card_table_base_offset())));
__ shrptr(tmp1, store_addr, G1CardTable::card_shift());
__ cmpb(Address(tmp1, rscratch1), G1CardTable::g1_young_card);
__ cset(tmp1, Assembler::EQ);
__ addptr(Address(mdp, md->byte_offset_of_slot(data, G1CounterData::from_young_counter_offset())), tmp1); // How many from-young cards
}
}

void G1BarrierSetAssembler::g1_write_barrier_post_c1(MacroAssembler* masm,
Register store_addr,
Register new_val,
Register thread,
Register tmp1,
Register tmp2) {
Label done;
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, true /* new_val_maybe_null */);
masm->bind(done);
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, gen_all_barrier_parts());
__ bind(done);
}

#undef __

#define __ sasm->

void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
Register thread,
Register tmp1,
Register tmp2,
bool new_val_maybe_null);
uint ext_barrier_data);
#endif

void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.ad
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ static void write_barrier_post(MacroAssembler* masm,
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
G1BarrierSetAssembler* g1_asm = static_cast<G1BarrierSetAssembler*>(BarrierSet::barrier_set()->barrier_set_assembler());
bool new_val_maybe_null = G1BarrierStubC2::post_new_val_maybe_null(node);
g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, rthread, tmp1, tmp2, new_val_maybe_null);
g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, rthread, tmp1, tmp2, G1BarrierStubC2::ext_barrier_data(node));
}

%}
Expand Down
137 changes: 126 additions & 11 deletions src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,13 +243,14 @@ void InterpreterMacroAssembler::load_resolved_klass_at_offset(
// Kills:
// r2, r5
void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass,
Label& ok_is_subtype) {
Label& ok_is_subtype,
bool is_aastore) {
assert(Rsub_klass != r0, "r0 holds superklass");
assert(Rsub_klass != r2, "r2 holds 2ndary super array length");
assert(Rsub_klass != r5, "r5 holds 2ndary super array scan ptr");

// Profile the not-null value's klass.
profile_typecheck(r2, Rsub_klass, r5); // blows r2, reloads r5
profile_typecheck(r2, Rsub_klass, r5, is_aastore); // blows r2, reloads r5

// Do the check.
check_klass_subtype(Rsub_klass, r0, r2, ok_is_subtype); // blows r2
Expand Down Expand Up @@ -1305,42 +1306,63 @@ void InterpreterMacroAssembler::profile_ret(Register return_bci,
}
}

void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
void InterpreterMacroAssembler::profile_null_seen(Register mdp, bool is_aastore) {
if (ProfileInterpreter) {
Label profile_continue;

// If no method data exists, go to profile_continue.
test_method_data_pointer(mdp, profile_continue);

if (UseNewCode && is_aastore) {
addptr(mdp, in_bytes(CombinedData::receiver_type_data_offset()));
}

set_mdp_flag_at(mdp, BitData::null_seen_byte_constant());

// The method data pointer needs to be updated.
int mdp_delta = in_bytes(BitData::bit_data_size());
if (TypeProfileCasts) {
mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
if (UseNewCode && is_aastore) {
mdp_delta = in_bytes(CombinedData::receiver_type_data_size());
} else {
mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
}
}
update_mdp_by_constant(mdp, mdp_delta);

bind(profile_continue);
}
}

void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) {
void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2, bool is_aastore) {
if (ProfileInterpreter) {
Label profile_continue;

// If no method data exists, go to profile_continue.
test_method_data_pointer(mdp, profile_continue);

// The method data pointer needs to be updated.
int mdp_delta = in_bytes(BitData::bit_data_size());
if (TypeProfileCasts) {
mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
// do no profiling for aastore here
if (TypeProfileCasts && UseNewCode && is_aastore) {
block_comment("aastore-do profiling {");

update_mdp_by_constant(mdp, in_bytes(CombinedData::receiver_type_data_offset()));

// Record the object type.
record_klass_in_profile(klass, mdp, reg2);

update_mdp_by_constant(mdp, in_bytes(CombinedData::receiver_type_data_size()));

block_comment("aastore-do profiling }");
} else {
// The method data pointer needs to be updated.
int mdp_delta = in_bytes(BitData::bit_data_size());
if (TypeProfileCasts) {
mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());

// Record the object type.
record_klass_in_profile(klass, mdp, reg2);
}
update_mdp_by_constant(mdp, mdp_delta);
}
update_mdp_by_constant(mdp, mdp_delta);

bind(profile_continue);
}
Expand Down Expand Up @@ -1396,6 +1418,99 @@ void InterpreterMacroAssembler::profile_switch_case(Register index,
}
}

void InterpreterMacroAssembler::profile_oop_store(Register addr_base, Register addr_index, Register new_val) {
if (!UseG1GC) {
return;
}
if (!ProfileInterpreter) {
return;
}

Label profile_continue;

assert_different_registers(addr_base, addr_index, new_val, rscratch1, rscratch2, r10, r11);

block_comment("profile_oop_store {");

push(addr_base);
push(addr_index);
push(new_val);
push(r10);
push(r11);

Register mdp = r10;
Register tmp = rscratch2;
Register tmp2 = r11;
// If no method data exists, exit.
test_method_data_pointer(mdp, profile_continue);

if (UseCompressedOops) {
decode_heap_oop_not_null(new_val);
}

xorptr(tmp, addr, new_val);
shrptr(tmp, tmp, G1HeapRegion::LogOfHRGrainBytes);
cmp(tmp, zr);
cset(tmp, Assembler::equal);
addptr(Address(mdp, in_bytes(G1CounterData::same_region_counter_offset())), tmp);

cmp(new_val, zr);
cset(tmp, Assembler::equal);
addptr(Address(mdp, in_bytes(G1CounterData::null_new_val_counter_offset())), tmp);

movptr(tmp, Address(thread, G1ThreadLocalData::card_table_base_offset()));
add(addr_base, addr_base, addr_index);
shrptr(addr_base, G1CardTable::card_shift());

cmpb(Address(tmp, addr), G1CardTable::clean_card_val());
cset(tmp2, Assembler::notEqual);
addptr(Address(mdp, in_bytes(G1CounterData::clean_cards_counter_offset())), tmp2);

if (XXXDoYoungPreDirty) {
cmpb(Address(tmp, addr), G1CardTable::g1_young_card);
cset(tmp2, Assembler::notEqual);
addptr(Address(mdp, in_bytes(G1CounterData::from_young_counter_offset())), tmp2);
}

bind(profile_continue);

pop(r11);
pop(r10);
pop(new_val);
pop(addr_index);
pop(addr_base);

block_comment("}");
}

void InterpreterMacroAssembler::profile_putfield_fix_mdp() {
if (!UseG1GC) {
return;
}
if (!ProfileInterpreter) {
return;
}

Label profile_continue;

Register mdp = r10;

block_comment("profile_putfield_fix_mdp {");

push(mdp); // Just in case.

// If no method data exists, go to profile_continue.
test_method_data_pointer(mdp, profile_continue);

addptr(Address(mdp, in_bytes(G1CounterData::visits_counter_offset())), 1);
update_mdp_by_constant(mdp, in_bytes(G1CounterData::counter_data_size()));

bind(profile_continue);
pop(mdp);

block_comment("}");
}

void InterpreterMacroAssembler::_interp_verify_oop(Register reg, TosState state, const char* file, int line) {
if (state == atos) {
MacroAssembler::_verify_oop_checked(reg, "broken oop", file, line);
Expand Down
Loading

0 comments on commit ff8aeea

Please sign in to comment.