# HG changeset patch # User aph # Date 1453225972 0 # Node ID 220fb5b3d9cecb6fa9ed2b2508ec93cd244d6147 # Parent 0dab40fe02b2a756a23b40f206a44597272fb512 8146709: AArch64: Incorrect use of ADRP for byte_map_base Reviewed-by: roland diff -r 0dab40fe02b2 -r 220fb5b3d9ce src/cpu/aarch64/vm/aarch64.ad --- a/src/cpu/aarch64/vm/aarch64.ad Tue Jan 12 14:55:15 2016 +0000 +++ b/src/cpu/aarch64/vm/aarch64.ad Tue Jan 19 17:52:52 2016 +0000 @@ -4470,11 +4470,7 @@ enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{ MacroAssembler _masm(&cbuf); - address page = (address)$src$$constant; - Register dst_reg = as_Register($dst$$reg); - unsigned long off; - __ adrp(dst_reg, ExternalAddress(page), off); - assert(off == 0, "assumed offset == 0"); + __ load_byte_map_base($dst$$Register); %} enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{ diff -r 0dab40fe02b2 -r 220fb5b3d9ce src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp --- a/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp Tue Jan 12 14:55:15 2016 +0000 +++ b/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp Tue Jan 19 17:52:52 2016 +0000 @@ -1170,9 +1170,6 @@ #if INCLUDE_ALL_GCS -// Registers to be saved around calls to g1_wb_pre or g1_wb_post -#define G1_SAVE_REGS (RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2)) - case g1_pre_barrier_slow_id: { StubFrame f(sasm, "g1_pre_barrier", dont_gc_arguments); @@ -1214,10 +1211,10 @@ __ b(done); __ bind(runtime); - __ push(G1_SAVE_REGS, sp); + __ push_call_clobbered_registers(); f.load_argument(0, pre_val); __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); - __ pop(G1_SAVE_REGS, sp); + __ pop_call_clobbered_registers(); __ bind(done); } break; @@ -1245,45 +1242,49 @@ Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + PtrQueue::byte_offset_of_buf())); - const Register card_addr = rscratch2; - ExternalAddress cardtable((address) ct->byte_map_base); + const Register card_offset = rscratch2; + // LR is free here, so we can use it to hold the byte_map_base. + const Register byte_map_base = lr; - f.load_argument(0, card_addr); - __ lsr(card_addr, card_addr, CardTableModRefBS::card_shift); - unsigned long offset; - __ adrp(rscratch1, cardtable, offset); - __ add(card_addr, card_addr, rscratch1); - __ ldrb(rscratch1, Address(card_addr, offset)); + assert_different_registers(card_offset, byte_map_base, rscratch1); + + f.load_argument(0, card_offset); + __ lsr(card_offset, card_offset, CardTableModRefBS::card_shift); + __ load_byte_map_base(byte_map_base); + __ ldrb(rscratch1, Address(byte_map_base, card_offset)); __ cmpw(rscratch1, (int)G1SATBCardTableModRefBS::g1_young_card_val()); __ br(Assembler::EQ, done); assert((int)CardTableModRefBS::dirty_card_val() == 0, "must be 0"); __ membar(Assembler::StoreLoad); - __ ldrb(rscratch1, Address(card_addr, offset)); + __ ldrb(rscratch1, Address(byte_map_base, card_offset)); __ cbzw(rscratch1, done); // storing region crossing non-NULL, card is clean. // dirty card and log. - __ strb(zr, Address(card_addr, offset)); + __ strb(zr, Address(byte_map_base, card_offset)); + + // Convert card offset into an address in card_addr + Register card_addr = card_offset; + __ add(card_addr, byte_map_base, card_addr); __ ldr(rscratch1, queue_index); __ cbz(rscratch1, runtime); __ sub(rscratch1, rscratch1, wordSize); __ str(rscratch1, queue_index); - const Register buffer_addr = r0; + // Reuse LR to hold buffer_addr + const Register buffer_addr = lr; - __ push(RegSet::of(r0, r1), sp); __ ldr(buffer_addr, buffer); __ str(card_addr, Address(buffer_addr, rscratch1)); - __ pop(RegSet::of(r0, r1), sp); __ b(done); __ bind(runtime); - __ push(G1_SAVE_REGS, sp); + __ push_call_clobbered_registers(); __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); - __ pop(G1_SAVE_REGS, sp); + __ pop_call_clobbered_registers(); __ bind(done); } diff -r 0dab40fe02b2 -r 220fb5b3d9ce src/cpu/aarch64/vm/macroAssembler_aarch64.cpp --- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Tue Jan 12 14:55:15 2016 +0000 +++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Tue Jan 19 17:52:52 2016 +0000 @@ -2300,6 +2300,30 @@ } #endif +void MacroAssembler::push_call_clobbered_registers() { + push(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp); + + // Push v0-v7, v16-v31. + for (int i = 30; i >= 0; i -= 2) { + if (i <= v7->encoding() || i >= v16->encoding()) { + stpd(as_FloatRegister(i), as_FloatRegister(i+1), + Address(pre(sp, -2 * wordSize))); + } + } +} + +void MacroAssembler::pop_call_clobbered_registers() { + + for (int i = 0; i < 32; i += 2) { + if (i <= v7->encoding() || i >= v16->encoding()) { + ldpd(as_FloatRegister(i), as_FloatRegister(i+1), + Address(post(sp, 2 * wordSize))); + } + } + + pop(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp); +} + void MacroAssembler::push_CPU_state(bool save_vectors) { push(0x3fffffff, sp); // integer registers except lr & sp @@ -3052,7 +3076,7 @@ // FIXME: It's not likely that disp will fit into an offset so we // don't bother to check, but it could save an instruction. intptr_t disp = (intptr_t) ct->byte_map_base; - mov(rscratch1, disp); + load_byte_map_base(rscratch1); strb(zr, Address(obj, rscratch1)); } @@ -3535,12 +3559,10 @@ lsr(card_addr, store_addr, CardTableModRefBS::card_shift); - unsigned long offset; - adrp(tmp2, cardtable, offset); - // get the address of the card + load_byte_map_base(tmp2); add(card_addr, card_addr, tmp2); - ldrb(tmp2, Address(card_addr, offset)); + ldrb(tmp2, Address(card_addr)); cmpw(tmp2, (int)G1SATBCardTableModRefBS::g1_young_card_val()); br(Assembler::EQ, done); @@ -3548,13 +3570,13 @@ membar(Assembler::Assembler::StoreLoad); - ldrb(tmp2, Address(card_addr, offset)); + ldrb(tmp2, Address(card_addr)); cbzw(tmp2, done); // storing a region crossing, non-NULL oop, card is clean. // dirty card and log. - strb(zr, Address(card_addr, offset)); + strb(zr, Address(card_addr)); ldr(rscratch1, queue_index); cbz(rscratch1, runtime); @@ -3906,6 +3928,9 @@ long offset_low = dest_page - low_page; long offset_high = dest_page - high_page; + assert(is_valid_AArch64_address(dest.target()), "bad address"); + assert(dest.getMode() == Address::literal, "ADRP must be applied to a literal address"); + InstructionMark im(this); code_section()->relocate(inst_mark(), dest.rspec()); // 8143067: Ensure that the adrp can reach the dest from anywhere within @@ -3917,11 +3942,26 @@ long offset = dest_page - pc_page; offset = (offset & ((1<<20)-1)) << 12; _adrp(reg1, pc()+offset); - movk(reg1, ((unsigned long)dest.target() >> 32) & 0xffff, 32); + movk(reg1, (unsigned long)dest.target() >> 32, 32); } byte_offset = (unsigned long)dest.target() & 0xfff; } +void MacroAssembler::load_byte_map_base(Register reg) { + jbyte *byte_map_base = + ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base; + + if (is_valid_AArch64_address((address)byte_map_base)) { + // Strictly speaking the byte_map_base isn't an address at all, + // and it might even be negative. + unsigned long offset; + adrp(reg, ExternalAddress((address)byte_map_base), offset); + assert(offset == 0, "misaligned card table base"); + } else { + mov(reg, (uint64_t)byte_map_base); + } +} + void MacroAssembler::build_frame(int framesize) { if (framesize == 0) { // Is this even possible? diff -r 0dab40fe02b2 -r 220fb5b3d9ce src/cpu/aarch64/vm/macroAssembler_aarch64.hpp --- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Tue Jan 12 14:55:15 2016 +0000 +++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Tue Jan 19 17:52:52 2016 +0000 @@ -436,6 +436,13 @@ int push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); } int pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); } + // Push and pop everything that might be clobbered by a native + // runtime call except rscratch1 and rscratch2. (They are always + // scratch, so we don't have to protect them.) Only save the lower + // 64 bits of each vector register. + void push_call_clobbered_registers(); + void pop_call_clobbered_registers(); + // now mov instructions for loading absolute addresses and 32 or // 64 bit integers @@ -1108,6 +1115,15 @@ // of your data. Address form_address(Register Rd, Register base, long byte_offset, int shift); + // Return true iff an address is within the 48-bit AArch64 address + // space. + bool is_valid_AArch64_address(address a) { + return ((uint64_t)a >> 48) == 0; + } + + // Load the base of the cardtable byte map into reg. + void load_byte_map_base(Register reg); + // Prolog generator routines to support switch between x86 code and // generated ARM code diff -r 0dab40fe02b2 -r 220fb5b3d9ce src/cpu/aarch64/vm/stubGenerator_aarch64.cpp --- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Tue Jan 12 14:55:15 2016 +0000 +++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Tue Jan 19 17:52:52 2016 +0000 @@ -749,7 +749,7 @@ __ sub(end, end, start); // number of bytes to copy const Register count = end; // 'end' register contains bytes count now - __ mov(scratch, (address)ct->byte_map_base); + __ load_byte_map_base(scratch); __ add(start, start, scratch); __ BIND(L_loop); __ strb(zr, Address(start, count));