changeset 8588:220fb5b3d9ce

8146709: AArch64: Incorrect use of ADRP for byte_map_base Reviewed-by: roland
author aph
date Tue, 19 Jan 2016 17:52:52 +0000
parents 0dab40fe02b2
children 7a921b442081
files src/cpu/aarch64/vm/aarch64.ad src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp src/cpu/aarch64/vm/macroAssembler_aarch64.cpp src/cpu/aarch64/vm/macroAssembler_aarch64.hpp src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
diffstat 5 files changed, 87 insertions(+), 34 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/aarch64/vm/aarch64.ad	Tue Jan 12 14:55:15 2016 +0000
+++ b/src/cpu/aarch64/vm/aarch64.ad	Tue Jan 19 17:52:52 2016 +0000
@@ -4470,11 +4470,7 @@
 
   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
     MacroAssembler _masm(&cbuf);
-    address page = (address)$src$$constant;
-    Register dst_reg = as_Register($dst$$reg);
-    unsigned long off;
-    __ adrp(dst_reg, ExternalAddress(page), off);
-    assert(off == 0, "assumed offset == 0");
+    __ load_byte_map_base($dst$$Register);
   %}
 
   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
--- a/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp	Tue Jan 12 14:55:15 2016 +0000
+++ b/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp	Tue Jan 19 17:52:52 2016 +0000
@@ -1170,9 +1170,6 @@
 
 #if INCLUDE_ALL_GCS
 
-// Registers to be saved around calls to g1_wb_pre or g1_wb_post
-#define G1_SAVE_REGS (RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2))
-
     case g1_pre_barrier_slow_id:
       {
         StubFrame f(sasm, "g1_pre_barrier", dont_gc_arguments);
@@ -1214,10 +1211,10 @@
         __ b(done);
 
         __ bind(runtime);
-        __ push(G1_SAVE_REGS, sp);
+        __ push_call_clobbered_registers();
         f.load_argument(0, pre_val);
         __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
-	__ pop(G1_SAVE_REGS, sp);
+        __ pop_call_clobbered_registers();
         __ bind(done);
       }
       break;
@@ -1245,45 +1242,49 @@
         Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
                                         PtrQueue::byte_offset_of_buf()));
 
-        const Register card_addr = rscratch2;
-	ExternalAddress cardtable((address) ct->byte_map_base);
+        const Register card_offset = rscratch2;
+        // LR is free here, so we can use it to hold the byte_map_base.
+        const Register byte_map_base = lr;
 
-        f.load_argument(0, card_addr);
-        __ lsr(card_addr, card_addr, CardTableModRefBS::card_shift);
-	unsigned long offset;
-	__ adrp(rscratch1, cardtable, offset);
-        __ add(card_addr, card_addr, rscratch1);
-        __ ldrb(rscratch1, Address(card_addr, offset));
+        assert_different_registers(card_offset, byte_map_base, rscratch1);
+
+        f.load_argument(0, card_offset);
+        __ lsr(card_offset, card_offset, CardTableModRefBS::card_shift);
+        __ load_byte_map_base(byte_map_base);
+        __ ldrb(rscratch1, Address(byte_map_base, card_offset));
         __ cmpw(rscratch1, (int)G1SATBCardTableModRefBS::g1_young_card_val());
 	__ br(Assembler::EQ, done);
 
 	assert((int)CardTableModRefBS::dirty_card_val() == 0, "must be 0");
 
         __ membar(Assembler::StoreLoad);
-        __ ldrb(rscratch1, Address(card_addr, offset));
+        __ ldrb(rscratch1, Address(byte_map_base, card_offset));
 	__ cbzw(rscratch1, done);
 
         // storing region crossing non-NULL, card is clean.
         // dirty card and log.
-        __ strb(zr, Address(card_addr, offset));
+        __ strb(zr, Address(byte_map_base, card_offset));
+
+        // Convert card offset into an address in card_addr
+        Register card_addr = card_offset;
+        __ add(card_addr, byte_map_base, card_addr);
 
         __ ldr(rscratch1, queue_index);
         __ cbz(rscratch1, runtime);
         __ sub(rscratch1, rscratch1, wordSize);
         __ str(rscratch1, queue_index);
 
-        const Register buffer_addr = r0;
+        // Reuse LR to hold buffer_addr
+        const Register buffer_addr = lr;
 
-	__ push(RegSet::of(r0, r1), sp);
 	__ ldr(buffer_addr, buffer);
 	__ str(card_addr, Address(buffer_addr, rscratch1));
-	__ pop(RegSet::of(r0, r1), sp);
 	__ b(done);
 
         __ bind(runtime);
-	__ push(G1_SAVE_REGS, sp);
+        __ push_call_clobbered_registers();
         __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
-	__ pop(G1_SAVE_REGS, sp);
+        __ pop_call_clobbered_registers();
         __ bind(done);
 
       }
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Tue Jan 12 14:55:15 2016 +0000
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Tue Jan 19 17:52:52 2016 +0000
@@ -2300,6 +2300,30 @@
 }
 #endif
 
+void MacroAssembler::push_call_clobbered_registers() {
+  push(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp);
+
+  // Push v0-v7, v16-v31.
+  for (int i = 30; i >= 0; i -= 2) {
+    if (i <= v7->encoding() || i >= v16->encoding()) {
+        stpd(as_FloatRegister(i), as_FloatRegister(i+1),
+             Address(pre(sp, -2 * wordSize)));
+    }
+  }
+}
+
+void MacroAssembler::pop_call_clobbered_registers() {
+
+  for (int i = 0; i < 32; i += 2) {
+    if (i <= v7->encoding() || i >= v16->encoding()) {
+      ldpd(as_FloatRegister(i), as_FloatRegister(i+1),
+           Address(post(sp, 2 * wordSize)));
+    }
+  }
+
+  pop(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp);
+}
+
 void MacroAssembler::push_CPU_state(bool save_vectors) {
   push(0x3fffffff, sp);         // integer registers except lr & sp
 
@@ -3052,7 +3076,7 @@
   // FIXME: It's not likely that disp will fit into an offset so we
   // don't bother to check, but it could save an instruction.
   intptr_t disp = (intptr_t) ct->byte_map_base;
-  mov(rscratch1, disp);
+  load_byte_map_base(rscratch1);
   strb(zr, Address(obj, rscratch1));
 }
 
@@ -3535,12 +3559,10 @@
 
   lsr(card_addr, store_addr, CardTableModRefBS::card_shift);
 
-  unsigned long offset;
-  adrp(tmp2, cardtable, offset);
-
   // get the address of the card
+  load_byte_map_base(tmp2);
   add(card_addr, card_addr, tmp2);
-  ldrb(tmp2, Address(card_addr, offset));
+  ldrb(tmp2, Address(card_addr));
   cmpw(tmp2, (int)G1SATBCardTableModRefBS::g1_young_card_val());
   br(Assembler::EQ, done);
 
@@ -3548,13 +3570,13 @@
 
   membar(Assembler::Assembler::StoreLoad);
 
-  ldrb(tmp2, Address(card_addr, offset));
+  ldrb(tmp2, Address(card_addr));
   cbzw(tmp2, done);
 
   // storing a region crossing, non-NULL oop, card is clean.
   // dirty card and log.
 
-  strb(zr, Address(card_addr, offset));
+  strb(zr, Address(card_addr));
 
   ldr(rscratch1, queue_index);
   cbz(rscratch1, runtime);
@@ -3906,6 +3928,9 @@
   long offset_low = dest_page - low_page;
   long offset_high = dest_page - high_page;
 
+  assert(is_valid_AArch64_address(dest.target()), "bad address");
+  assert(dest.getMode() == Address::literal, "ADRP must be applied to a literal address");
+
   InstructionMark im(this);
   code_section()->relocate(inst_mark(), dest.rspec());
   // 8143067: Ensure that the adrp can reach the dest from anywhere within
@@ -3917,11 +3942,26 @@
     long offset = dest_page - pc_page;
     offset = (offset & ((1<<20)-1)) << 12;
     _adrp(reg1, pc()+offset);
-    movk(reg1, ((unsigned long)dest.target() >> 32) & 0xffff, 32);
+    movk(reg1, (unsigned long)dest.target() >> 32, 32);
   }
   byte_offset = (unsigned long)dest.target() & 0xfff;
 }
 
+void MacroAssembler::load_byte_map_base(Register reg) {
+  jbyte *byte_map_base =
+    ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base;
+
+  if (is_valid_AArch64_address((address)byte_map_base)) {
+    // Strictly speaking the byte_map_base isn't an address at all,
+    // and it might even be negative.
+    unsigned long offset;
+    adrp(reg, ExternalAddress((address)byte_map_base), offset);
+    assert(offset == 0, "misaligned card table base");
+  } else {
+    mov(reg, (uint64_t)byte_map_base);
+  }
+}
+
 void MacroAssembler::build_frame(int framesize) {
   if (framesize == 0) {
     // Is this even possible?
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Tue Jan 12 14:55:15 2016 +0000
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Tue Jan 19 17:52:52 2016 +0000
@@ -436,6 +436,13 @@
   int push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); }
   int pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); }
 
+  // Push and pop everything that might be clobbered by a native
+  // runtime call except rscratch1 and rscratch2.  (They are always
+  // scratch, so we don't have to protect them.)  Only save the lower
+  // 64 bits of each vector register.
+  void push_call_clobbered_registers();
+  void pop_call_clobbered_registers();
+
   // now mov instructions for loading absolute addresses and 32 or
   // 64 bit integers
 
@@ -1108,6 +1115,15 @@
   // of your data.
   Address form_address(Register Rd, Register base, long byte_offset, int shift);
 
+  // Return true iff an address is within the 48-bit AArch64 address
+  // space.
+  bool is_valid_AArch64_address(address a) {
+    return ((uint64_t)a >> 48) == 0;
+  }
+
+  // Load the base of the cardtable byte map into reg.
+  void load_byte_map_base(Register reg);
+
   // Prolog generator routines to support switch between x86 code and
   // generated ARM code
 
--- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Tue Jan 12 14:55:15 2016 +0000
+++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Tue Jan 19 17:52:52 2016 +0000
@@ -749,7 +749,7 @@
            __ sub(end, end, start); // number of bytes to copy
 
           const Register count = end; // 'end' register contains bytes count now
-	  __ mov(scratch, (address)ct->byte_map_base);
+          __ load_byte_map_base(scratch);
           __ add(start, start, scratch);
 	  __ BIND(L_loop);
 	  __ strb(zr, Address(start, count));