changeset 8555:4b0d672fa09c

8133352: aarch64: generates constrained unpredictable instructions Summary: Fix generation of unpredictable STXR Rs, Rt, [Rn] with Rs == Rt Reviewed-by: kvn, aph, adinn
author enevill
date Tue, 18 Aug 2015 12:40:22 +0000
parents 2812c402c790
children c0fd47b40d85
files src/cpu/aarch64/vm/assembler_aarch64.cpp src/cpu/aarch64/vm/assembler_aarch64.hpp src/cpu/aarch64/vm/interp_masm_aarch64.cpp src/cpu/aarch64/vm/macroAssembler_aarch64.cpp src/cpu/aarch64/vm/macroAssembler_aarch64.hpp src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp
diffstat 7 files changed, 58 insertions(+), 39 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/aarch64/vm/assembler_aarch64.cpp	Thu Jul 16 14:16:44 2015 +0000
+++ b/src/cpu/aarch64/vm/assembler_aarch64.cpp	Tue Aug 18 12:40:22 2015 +0000
@@ -285,7 +285,7 @@
     __ ldar(r21, r28);                                 //	ldar	x21, [x28]
 
 // LoadStoreExclusiveOp
-    __ stxrw(r24, r24, r7);                            //	stxr	w24, w24, [x7]
+    __ stxrw(r21, r24, r7);                            //       stxr    w21, w24, [x7]
     __ stlxrw(r21, r26, r28);                          //	stlxr	w21, w26, [x28]
     __ ldxrw(r21, r6);                                 //	ldxr	w21, [x6]
     __ ldaxrw(r15, r30);                               //	ldaxr	w15, [x30]
@@ -316,7 +316,7 @@
 
 // LoadStoreExclusiveOp
     __ ldxpw(r25, r4, r22);                            //	ldxp	w25, w4, [x22]
-    __ ldaxpw(r14, r14, r15);                          //	ldaxp	w14, w14, [x15]
+    __ ldaxpw(r13, r14, r15);                          //       ldaxp   w13, w14, [x15]
     __ stxpw(r20, r26, r8, r10);                       //	stxp	w20, w26, w8, [x10]
     __ stlxpw(r23, r18, r18, r18);                     //	stlxp	w23, w18, w18, [x18]
 
@@ -790,7 +790,7 @@
  260:	c85fffbb 	ldaxr	x27, [x29]
  264:	c89fffa0 	stlr	x0, [x29]
  268:	c8dfff95 	ldar	x21, [x28]
- 26c:	88187cf8 	stxr	w24, w24, [x7]
+ 26c:   88157cf8        stxr    w21, w24, [x7]
  270:	8815ff9a 	stlxr	w21, w26, [x28]
  274:	885f7cd5 	ldxr	w21, [x6]
  278:	885fffcf 	ldaxr	w15, [x30]
@@ -813,7 +813,7 @@
  2bc:	c82870bb 	stxp	w8, x27, x28, [x5]
  2c0:   c825b8c8        stlxp   w5, x8, x14, [x6]
  2c4:	887f12d9 	ldxp	w25, w4, [x22]
- 2c8:	887fb9ee 	ldaxp	w14, w14, [x15]
+ 2c8:   887fb9ed        ldaxp   w13, w14, [x15]
  2cc:	8834215a 	stxp	w20, w26, w8, [x10]
  2d0:	8837ca52 	stlxp	w23, w18, w18, [x18]
  2d4:	f806317e 	str	x30, [x11,#99]
@@ -1102,13 +1102,13 @@
     0xd444c320,     0xd503201f,     0xd69f03e0,     0xd6bf03e0,
     0xd5033fdf,     0xd5033f9f,     0xd5033abf,     0xd61f0040,
     0xd63f00a0,     0xc8147c55,     0xc805fcfd,     0xc85f7e05,
-    0xc85fffbb,     0xc89fffa0,     0xc8dfff95,     0x88187cf8,
+    0xc85fffbb,     0xc89fffa0,     0xc8dfff95,     0x88157cf8,
     0x8815ff9a,     0x885f7cd5,     0x885fffcf,     0x889ffc73,
     0x88dffc56,     0x48127c0f,     0x480bff85,     0x485f7cdd,
     0x485ffcf2,     0x489fff99,     0x48dffe62,     0x080a7c3e,
     0x0814fed5,     0x085f7c59,     0x085ffcb8,     0x089ffc70,
     0x08dfffb6,     0xc87f0a68,     0xc87fcdc7,     0xc82870bb,
-    0xc825b8c8,     0x887f12d9,     0x887fb9ee,     0x8834215a,
+    0xc825b8c8,     0x887f12d9,     0x887fb9ed,     0x8834215a,
     0x8837ca52,     0xf806317e,     0xb81b3337,     0x39000dc2,
     0x78005149,     0xf84391f4,     0xb85b220c,     0x385fd356,
     0x785d127e,     0x389f4149,     0x79801e3c,     0x79c014a3,
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp	Thu Jul 16 14:16:44 2015 +0000
+++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp	Tue Aug 18 12:40:22 2015 +0000
@@ -1096,13 +1096,13 @@
 
 #define INSN4(NAME, sz, op, o0) /* Four registers */			\
   void NAME(Register Rs, Register Rt1, Register Rt2, Register Rn) {	\
-    assert(Rs != Rn, "unpredictable instruction");                  \
+    guarantee(Rs != Rn && Rs != Rt1 && Rs != Rt2, "unpredictable instruction"); \
     load_store_exclusive(Rs, Rt1, Rt2, Rn, sz, op, o0);			\
   }
 
 #define INSN3(NAME, sz, op, o0) /* Three registers */			\
   void NAME(Register Rs, Register Rt, Register Rn) {			\
-    assert(Rs != Rn, "unpredictable instruction");                  \
+    guarantee(Rs != Rn && Rs != Rt, "unpredictable instruction");       \
     load_store_exclusive(Rs, Rt, (Register)0b11111, Rn, sz, op, o0);	\
   }
 
@@ -1114,6 +1114,7 @@
 
 #define INSN_FOO(NAME, sz, op, o0) /* Three registers, encoded differently */ \
   void NAME(Register Rt1, Register Rt2, Register Rn) {			\
+    guarantee(Rt1 != Rt2, "unpredictable instruction");                 \
     load_store_exclusive((Register)0b11111, Rt1, Rt2, Rn, sz, op, o0);	\
   }
 
--- a/src/cpu/aarch64/vm/interp_masm_aarch64.cpp	Thu Jul 16 14:16:44 2015 +0000
+++ b/src/cpu/aarch64/vm/interp_masm_aarch64.cpp	Tue Aug 18 12:40:22 2015 +0000
@@ -612,6 +612,7 @@
     Label done;
 
     const Register swap_reg = r0;
+    const Register tmp = c_rarg2;
     const Register obj_reg = c_rarg3; // Will contain the oop
 
     const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
@@ -625,7 +626,7 @@
     ldr(obj_reg, Address(lock_reg, obj_offset));
 
     if (UseBiasedLocking) {
-      biased_locking_enter(lock_reg, obj_reg, swap_reg, rscratch2, false, done, &slow_case);
+      biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case);
     }
 
     // Load (object->mark() | 1) into swap_reg
@@ -644,7 +645,7 @@
       cmpxchgptr(swap_reg, lock_reg, obj_reg, rscratch1, fast, &fail);
       bind(fast);
       atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
-		  rscratch2, rscratch1);
+                  rscratch2, rscratch1, tmp);
       b(done);
       bind(fail);
     } else {
@@ -672,7 +673,7 @@
     if (PrintBiasedLockingStatistics) {
       br(Assembler::NE, slow_case);
       atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
-		  rscratch2, rscratch1);
+                  rscratch2, rscratch1, tmp);
     }
     br(Assembler::EQ, done);
 
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Thu Jul 16 14:16:44 2015 +0000
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Tue Aug 18 12:40:22 2015 +0000
@@ -368,11 +368,7 @@
   if (PrintBiasedLockingStatistics && counters == NULL)
     counters = BiasedLocking::counters();
 
-  bool need_tmp_reg = false;
-  if (tmp_reg == noreg) {
-    tmp_reg = rscratch2;
-  }
-  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, rscratch1);
+  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, rscratch1, rscratch2, noreg);
   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
   Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
   Address klass_addr     (obj_reg, oopDesc::klass_offset_in_bytes());
@@ -402,7 +398,7 @@
   if (counters != NULL) {
     Label around;
     cbnz(tmp_reg, around);
-    atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, rscratch1);
+    atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, rscratch1, rscratch2);
     b(done);
     bind(around);
   } else {
@@ -455,7 +451,7 @@
     bind(here);
     if (counters != NULL) {
       atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()),
-		  tmp_reg, rscratch1);
+                  tmp_reg, rscratch1, rscratch2);
     }
   }
   b(done);
@@ -481,7 +477,7 @@
     bind(here);
     if (counters != NULL) {
       atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()),
-		  tmp_reg, rscratch1);
+                  tmp_reg, rscratch1, rscratch2);
     }
   }
   b(done);
@@ -509,7 +505,7 @@
     // removing the bias bit from the object's header.
     if (counters != NULL) {
       atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg,
-		  rscratch1);
+                  rscratch1, rscratch2);
     }
     bind(nope);
   }
@@ -1548,15 +1544,15 @@
   return Address(Rd);
 }
 
-void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) {
+void MacroAssembler::atomic_incw(Register counter_addr, Register tmp, Register tmp2) {
   Label retry_load;
   bind(retry_load);
   // flush and load exclusive from the memory location
   ldxrw(tmp, counter_addr);
   addw(tmp, tmp, 1);
   // if we store+flush with no intervening write tmp wil be zero
-  stxrw(tmp, tmp, counter_addr);
-  cbnzw(tmp, retry_load);
+  stxrw(tmp2, tmp, counter_addr);
+  cbnzw(tmp2, retry_load);
 }
 
 
@@ -1921,6 +1917,22 @@
   }
 }
 
+void MacroAssembler::sub(Register Rd, Register Rn, RegisterOrConstant decrement) {
+  if (decrement.is_register()) {
+    sub(Rd, Rn, decrement.as_register());
+  } else {
+    sub(Rd, Rn, decrement.as_constant());
+  }
+}
+
+void MacroAssembler::subw(Register Rd, Register Rn, RegisterOrConstant decrement) {
+  if (decrement.is_register()) {
+    subw(Rd, Rn, decrement.as_register());
+  } else {
+    subw(Rd, Rn, decrement.as_constant());
+  }
+}
+
 void MacroAssembler::reinit_heapbase()
 {
   if (UseCompressedOops) {
@@ -2010,7 +2022,7 @@
     return a != b.as_register() && a != c && b.as_register() != c;
 }
 
-#define ATOMIC_OP(LDXR, OP, STXR)					\
+#define ATOMIC_OP(LDXR, OP, IOP, STXR)                                       \
 void MacroAssembler::atomic_##OP(Register prev, RegisterOrConstant incr, Register addr) { \
   Register result = rscratch2;						\
   if (prev->is_valid())							\
@@ -2020,14 +2032,15 @@
   bind(retry_load);							\
   LDXR(result, addr);							\
   OP(rscratch1, result, incr);						\
-  STXR(rscratch1, rscratch1, addr);					\
-  cbnzw(rscratch1, retry_load);						\
-  if (prev->is_valid() && prev != result)				\
-    mov(prev, result);							\
+  STXR(rscratch2, rscratch1, addr);                                     \
+  cbnzw(rscratch2, retry_load);                                         \
+  if (prev->is_valid() && prev != result) {                             \
+    IOP(prev, rscratch1, incr);                                         \
+  }                                                                     \
 }
 
-ATOMIC_OP(ldxr, add, stxr)
-ATOMIC_OP(ldxrw, addw, stxrw)
+ATOMIC_OP(ldxr, add, sub, stxr)
+ATOMIC_OP(ldxrw, addw, subw, stxrw)
 
 #undef ATOMIC_OP
 
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Thu Jul 16 14:16:44 2015 +0000
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Tue Aug 18 12:40:22 2015 +0000
@@ -108,9 +108,7 @@
   // Biased locking support
   // lock_reg and obj_reg must be loaded up with the appropriate values.
   // swap_reg is killed.
-  // tmp_reg is optional. If it is supplied (i.e., != noreg) it will
-  // be killed; if not supplied, push/pop will be used internally to
-  // allocate a temporary (inefficient, avoid if possible).
+  // tmp_reg must be supplied and must not be rscratch1 or rscratch2
   // Optional slow case is for implementations (interpreter and C1) which branch to
   // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
   // Returns offset of first potentially-faulting instruction for null
@@ -127,10 +125,10 @@
 
   // Helper functions for statistics gathering.
   // Unconditional atomic increment.
-  void atomic_incw(Register counter_addr, Register tmp);
-  void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) {
+  void atomic_incw(Register counter_addr, Register tmp, Register tmp2);
+  void atomic_incw(Address counter_addr, Register tmp1, Register tmp2, Register tmp3) {
     lea(tmp1, counter_addr);
-    atomic_incw(tmp1, tmp2);
+    atomic_incw(tmp1, tmp2, tmp3);
   }
   // Load Effective Address
   void lea(Register r, const Address &a) {
@@ -1027,6 +1025,8 @@
 
   void add(Register Rd, Register Rn, RegisterOrConstant increment);
   void addw(Register Rd, Register Rn, RegisterOrConstant increment);
+  void sub(Register Rd, Register Rn, RegisterOrConstant decrement);
+  void subw(Register Rd, Register Rn, RegisterOrConstant decrement);
 
   void adrp(Register reg1, const Address &dest, unsigned long &byte_offset);
 
--- a/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp	Thu Jul 16 14:16:44 2015 +0000
+++ b/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp	Tue Aug 18 12:40:22 2015 +0000
@@ -1798,6 +1798,7 @@
   const Register obj_reg  = r19;  // Will contain the oop
   const Register lock_reg = r13;  // Address of compiler lock object (BasicLock)
   const Register old_hdr  = r13;  // value of old header at unlock time
+  const Register tmp = c_rarg3;
 
   Label slow_path_lock;
   Label lock_done;
@@ -1819,7 +1820,7 @@
     __ ldr(obj_reg, Address(oop_handle_reg, 0));
 
     if (UseBiasedLocking) {
-      __ biased_locking_enter(lock_reg, obj_reg, swap_reg, rscratch2, false, lock_done, &slow_path_lock);
+      __ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, lock_done, &slow_path_lock);
     }
 
     // Load (object->mark() | 1) into swap_reg %r0
--- a/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp	Thu Jul 16 14:16:44 2015 +0000
+++ b/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp	Tue Aug 18 12:40:22 2015 +0000
@@ -2026,15 +2026,18 @@
 }
 
 void TemplateInterpreterGenerator::count_bytecode() {
+  Register rscratch3 = r0;
   __ push(rscratch1);
   __ push(rscratch2);
+  __ push(rscratch3);
   Label L;
   __ mov(rscratch2, (address) &BytecodeCounter::_counter_value);
   __ bind(L);
   __ ldxr(rscratch1, rscratch2);
   __ add(rscratch1, rscratch1, 1);
-  __ stxr(rscratch1, rscratch1, rscratch2);
-  __ cbnzw(rscratch1, L);
+  __ stxr(rscratch3, rscratch1, rscratch2);
+  __ cbnzw(rscratch3, L);
+  __ pop(rscratch3);
   __ pop(rscratch2);
   __ pop(rscratch1);
 }