changeset 10477:a8b62a7ca950 icedtea-3.14.0pre01

8151775, PR3750: aarch64: add support for 8.1 LSE atomic operations Reviewed-by: aph
author fyang
date Sat, 23 Feb 2019 15:15:58 +0800
parents 7e8b97fc1c40
children 97a8a689a941
files src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp src/cpu/aarch64/vm/macroAssembler_aarch64.cpp src/cpu/aarch64/vm/macroAssembler_aarch64.hpp src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp
diffstat 5 files changed, 66 insertions(+), 236 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp	Wed Aug 14 18:57:49 2019 +0100
+++ b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp	Sat Feb 23 15:15:58 2019 +0800
@@ -1603,56 +1603,14 @@
 }
 
 void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) {
-  if (UseLSE) {
-    __ mov(rscratch1, cmpval);
-    __ casal(Assembler::word, rscratch1, newval, addr);
-    __ cmpw(rscratch1, cmpval);
-    __ cset(rscratch1, Assembler::NE);
-  } else {
-    Label retry_load, nope;
-    // flush and load exclusive from the memory location
-    // and fail if it is not what we expect
-    if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))
-      __ prfm(Address(addr), PSTL1STRM);
-    __ bind(retry_load);
-    __ ldaxrw(rscratch1, addr);
-    __ cmpw(rscratch1, cmpval);
-    __ cset(rscratch1, Assembler::NE);
-    __ br(Assembler::NE, nope);
-    // if we store+flush with no intervening write rscratch1 wil be zero
-    __ stlxrw(rscratch1, newval, addr);
-    // retry so we only ever return after a load fails to compare
-    // ensures we don't return a stale value after a failed write.
-    __ cbnzw(rscratch1, retry_load);
-    __ bind(nope);
-  }
+  __ cmpxchg(addr, cmpval, newval, Assembler::word, /* acquire*/ true, /* release*/ true, rscratch1);
+  __ cset(rscratch1, Assembler::NE);
   __ membar(__ AnyAny);
 }
 
 void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) {
-  if (UseLSE) {
-    __ mov(rscratch1, cmpval);
-    __ casal(Assembler::xword, rscratch1, newval, addr);
-    __ cmp(rscratch1, cmpval);
-    __ cset(rscratch1, Assembler::NE);
-  } else {
-    Label retry_load, nope;
-    // flush and load exclusive from the memory location
-    // and fail if it is not what we expect
-    if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))
-      __ prfm(Address(addr), PSTL1STRM);
-    __ bind(retry_load);
-    __ ldaxr(rscratch1, addr);
-    __ cmp(rscratch1, cmpval);
-    __ cset(rscratch1, Assembler::NE);
-    __ br(Assembler::NE, nope);
-    // if we store+flush with no intervening write rscratch1 wil be zero
-    __ stlxr(rscratch1, newval, addr);
-    // retry so we only ever return after a load fails to compare
-    // ensures we don't return a stale value after a failed write.
-    __ cbnz(rscratch1, retry_load);
-    __ bind(nope);
-  }
+  __ cmpxchg(addr, cmpval, newval, Assembler::xword, /* acquire*/ true, /* release*/ true, rscratch1);
+  __ cset(rscratch1, Assembler::NE);
   __ membar(__ AnyAny);
 }
 
@@ -3192,39 +3150,33 @@
   Address addr = as_Address(src->as_address_ptr(), noreg);
   BasicType type = src->type();
   bool is_oop = type == T_OBJECT || type == T_ARRAY;
-  Assembler::operand_size sz = Assembler::xword;
-
-  void (MacroAssembler::* lda)(Register Rd, Register Ra);
-  void (MacroAssembler::* add)(Register Rd, Register Rn, RegisterOrConstant increment);
-  void (MacroAssembler::* stl)(Register Rs, Register Rt, Register Rn);
+
+  void (MacroAssembler::* add)(Register prev, RegisterOrConstant incr, Register addr);
+  void (MacroAssembler::* xchg)(Register prev, Register newv, Register addr);
 
   switch(type) {
   case T_INT:
-    lda = &MacroAssembler::ldaxrw;
-    add = &MacroAssembler::addw;
-    stl = &MacroAssembler::stlxrw;
-    sz = Assembler::word;
+    xchg = &MacroAssembler::atomic_xchgalw;
+    add = &MacroAssembler::atomic_addalw;
     break;
   case T_LONG:
-    lda = &MacroAssembler::ldaxr;
-    add = &MacroAssembler::add;
-    stl = &MacroAssembler::stlxr;
+    xchg = &MacroAssembler::atomic_xchgal;
+    add = &MacroAssembler::atomic_addal;
     break;
   case T_OBJECT:
   case T_ARRAY:
     if (UseCompressedOops) {
-      lda = &MacroAssembler::ldaxrw;
-      add = &MacroAssembler::addw;
-      stl = &MacroAssembler::stlxrw;
-      sz = Assembler::word;
+      xchg = &MacroAssembler::atomic_xchgalw;
+      add = &MacroAssembler::atomic_addalw;
     } else {
-      lda = &MacroAssembler::ldaxr;
-      add = &MacroAssembler::add;
-      stl = &MacroAssembler::stlxr;
+      xchg = &MacroAssembler::atomic_xchgal;
+      add = &MacroAssembler::atomic_addal;
     }
     break;
   default:
     ShouldNotReachHere();
+    xchg = &MacroAssembler::atomic_xchgal;
+    add = &MacroAssembler::atomic_addal; // unreachable
   }
 
   switch (code) {
@@ -3234,32 +3186,16 @@
       Register tmp = as_reg(tmp_op);
       Register dst = as_reg(dest);
       if (data->is_constant()) {
-	inc = RegisterOrConstant(as_long(data));
-	assert_different_registers(dst, addr.base(), tmp,
-				   rscratch1, rscratch2);
+        inc = RegisterOrConstant(as_long(data));
+        assert_different_registers(dst, addr.base(), tmp,
+                                   rscratch1, rscratch2);
       } else {
-	inc = RegisterOrConstant(as_reg(data));
-	assert_different_registers(inc.as_register(), dst, addr.base(), tmp,
-				   rscratch1, rscratch2);
+        inc = RegisterOrConstant(as_reg(data));
+        assert_different_registers(inc.as_register(), dst, addr.base(), tmp,
+                                   rscratch1, rscratch2);
       }
       __ lea(tmp, addr);
-      if (UseLSE) {
-        if (inc.is_register()) {
-          __ ldaddal(sz, inc.as_register(), dst, tmp);
-        } else {
-          __ mov(rscratch2, inc.as_constant());
-          __ ldaddal(sz, rscratch2, dst, tmp);
-        }
-      } else {
-        Label again;
-        if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))
-          __ prfm(Address(tmp), PSTL1STRM);
-        __ bind(again);
-        (_masm->*lda)(dst, tmp);
-        (_masm->*add)(rscratch1, dst, inc);
-        (_masm->*stl)(rscratch2, rscratch1, tmp);
-        __ cbnzw(rscratch2, again);
-      }
+      (_masm->*add)(dst, inc, tmp);
       break;
     }
   case lir_xchg:
@@ -3273,19 +3209,9 @@
       }
       assert_different_registers(obj, addr.base(), tmp, rscratch1, dst);
       __ lea(tmp, addr);
-      if (UseLSE) {
-        __ swp(sz, obj, dst, tmp);
-      } else {
-        Label again;
-        if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))
-          __ prfm(Address(tmp), PSTL1STRM);
-        __ bind(again);
-        (_masm->*lda)(dst, tmp);
-        (_masm->*stl)(rscratch2, obj, tmp);
-        __ cbnzw(rscratch2, again);
-      }
+      (_masm->*xchg)(dst, obj, tmp);
       if (is_oop && UseCompressedOops) {
-	__ decode_heap_oop(dst);
+        __ decode_heap_oop(dst);
       }
     }
     break;
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Wed Aug 14 18:57:49 2019 +0100
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Sat Feb 23 15:15:58 2019 +0800
@@ -2222,8 +2222,8 @@
     return a != b.as_register() && a != c && b.as_register() != c;
 }
 
-#define ATOMIC_OP(LDXR, OP, IOP, AOP, STXR, sz)                         \
-void MacroAssembler::atomic_##OP(Register prev, RegisterOrConstant incr, Register addr) { \
+#define ATOMIC_OP(NAME, LDXR, OP, IOP, AOP, STXR, sz)                   \
+void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \
   if (UseLSE) {                                                         \
     prev = prev->is_valid() ? prev : zr;                                \
     if (incr.is_register()) {                                           \
@@ -2234,16 +2234,16 @@
     }                                                                   \
     return;                                                             \
   }                                                                     \
-  Register result = rscratch2;						\
-  if (prev->is_valid())							\
-    result = different(prev, incr, addr) ? prev : rscratch2;		\
-									\
-  Label retry_load;							\
+  Register result = rscratch2;                                          \
+  if (prev->is_valid())                                                 \
+    result = different(prev, incr, addr) ? prev : rscratch2;            \
+                                                                        \
+  Label retry_load;                                                     \
   if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))         \
     prfm(Address(addr), PSTL1STRM);                                     \
-  bind(retry_load);							\
-  LDXR(result, addr);							\
-  OP(rscratch1, result, incr);						\
+  bind(retry_load);                                                     \
+  LDXR(result, addr);                                                   \
+  OP(rscratch1, result, incr);                                          \
   STXR(rscratch2, rscratch1, addr);                                     \
   cbnzw(rscratch2, retry_load);                                         \
   if (prev->is_valid() && prev != result) {                             \
@@ -2251,35 +2251,39 @@
   }                                                                     \
 }
 
-ATOMIC_OP(ldxr, add, sub, ldadd, stxr, Assembler::xword)
-ATOMIC_OP(ldxrw, addw, subw, ldadd, stxrw, Assembler::word)
+ATOMIC_OP(add, ldxr, add, sub, ldadd, stxr, Assembler::xword)
+ATOMIC_OP(addw, ldxrw, addw, subw, ldadd, stxrw, Assembler::word)
+ATOMIC_OP(addal, ldaxr, add, sub, ldaddal, stlxr, Assembler::xword)
+ATOMIC_OP(addalw, ldaxrw, addw, subw, ldaddal, stlxrw, Assembler::word)
 
 #undef ATOMIC_OP
 
-#define ATOMIC_XCHG(OP, LDXR, STXR, sz)                                 \
-void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) {	\
+#define ATOMIC_XCHG(OP, AOP, LDXR, STXR, sz)                            \
+void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \
   if (UseLSE) {                                                         \
     prev = prev->is_valid() ? prev : zr;                                \
-    swp(sz, newv, prev, addr);                                          \
+    AOP(sz, newv, prev, addr);                                          \
     return;                                                             \
   }                                                                     \
-  Register result = rscratch2;						\
-  if (prev->is_valid())							\
-    result = different(prev, newv, addr) ? prev : rscratch2;		\
-									\
-  Label retry_load;							\
+  Register result = rscratch2;                                          \
+  if (prev->is_valid())                                                 \
+    result = different(prev, newv, addr) ? prev : rscratch2;            \
+                                                                        \
+  Label retry_load;                                                     \
   if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))         \
     prfm(Address(addr), PSTL1STRM);                                     \
-  bind(retry_load);							\
-  LDXR(result, addr);							\
-  STXR(rscratch1, newv, addr);						\
-  cbnzw(rscratch1, retry_load);						\
-  if (prev->is_valid() && prev != result)				\
-    mov(prev, result);							\
+  bind(retry_load);                                                     \
+  LDXR(result, addr);                                                   \
+  STXR(rscratch1, newv, addr);                                          \
+  cbnzw(rscratch1, retry_load);                                         \
+  if (prev->is_valid() && prev != result)                               \
+    mov(prev, result);                                                  \
 }
 
-ATOMIC_XCHG(xchg, ldxr, stxr, Assembler::xword)
-ATOMIC_XCHG(xchgw, ldxrw, stxrw, Assembler::word)
+ATOMIC_XCHG(xchg, swp, ldxr, stxr, Assembler::xword)
+ATOMIC_XCHG(xchgw, swp, ldxrw, stxrw, Assembler::word)
+ATOMIC_XCHG(xchgal, swpal, ldaxr, stlxr, Assembler::xword)
+ATOMIC_XCHG(xchgalw, swpal, ldaxrw, stlxrw, Assembler::word)
 
 #undef ATOMIC_XCHG
 
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Wed Aug 14 18:57:49 2019 +0100
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Sat Feb 23 15:15:58 2019 +0800
@@ -981,9 +981,13 @@
 
   void atomic_add(Register prev, RegisterOrConstant incr, Register addr);
   void atomic_addw(Register prev, RegisterOrConstant incr, Register addr);
+  void atomic_addal(Register prev, RegisterOrConstant incr, Register addr);
+  void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr);
 
   void atomic_xchg(Register prev, Register newv, Register addr);
   void atomic_xchgw(Register prev, Register newv, Register addr);
+  void atomic_xchgal(Register prev, Register newv, Register addr);
+  void atomic_xchgalw(Register prev, Register newv, Register addr);
 
   void orptr(Address adr, RegisterOrConstant src) {
     ldr(rscratch2, adr);
--- a/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp	Wed Aug 14 18:57:49 2019 +0100
+++ b/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp	Sat Feb 23 15:15:58 2019 +0800
@@ -2052,22 +2052,10 @@
   Register rscratch3 = r0;
   __ push(rscratch1);
   __ push(rscratch2);
-  __ mov(rscratch2, (address) &BytecodeCounter::_counter_value);
-  if (UseLSE) {
-    __ mov(rscratch1, 1);
-    __ ldadd(Assembler::xword, rscratch1, zr, rscratch2);
-  } else {
-    __ push(rscratch3);
-    Label L;
-    if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))
-      __ prfm(Address(rscratch2), PSTL1STRM);
-    __ bind(L);
-    __ ldxr(rscratch1, rscratch2);
-    __ add(rscratch1, rscratch1, 1);
-    __ stxr(rscratch3, rscratch1, rscratch2);
-    __ cbnzw(rscratch3, L);
-    __ pop(rscratch3);
-  }
+  __ push(rscratch3);
+  __ mov(rscratch3, (address) &BytecodeCounter::_counter_value);
+  __ atomic_add(noreg, 1, rscratch3);
+  __ pop(rscratch3);
   __ pop(rscratch2);
   __ pop(rscratch1);
 }
--- a/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp	Wed Aug 14 18:57:49 2019 +0100
+++ b/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp	Sat Feb 23 15:15:58 2019 +0800
@@ -35,19 +35,6 @@
 #define READ_MEM_BARRIER  __atomic_thread_fence(__ATOMIC_ACQUIRE);
 #define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE);
 
-// CASALW w2, w0, [x1]
-#define CASALW          ".word 0b10001000111000101111110000100000;"
-// CASAL x2, x0, [x1]
-#define CASAL           ".word 0b11001000111000101111110000100000;"
-// LDADDALW w0, w2, [x1]
-#define LDADDALW        ".word 0b10111000111000000000000000100010;"
-// LDADDAL w0, w2, [x1]
-#define LDADDAL         ".word 0b11111000111000000000000000100010;"
-// SWPW w0, w2, [x1]
-#define SWPW            ".word 0b10111000001000001000000000100010;"
-// SWP x0, x2, [x1]
-#define SWP             ".word 0b11111000001000001000000000100010;"
-
 inline void Atomic::store    (jbyte    store_value, jbyte*    dest) { *dest = store_value; }
 inline void Atomic::store    (jshort   store_value, jshort*   dest) { *dest = store_value; }
 inline void Atomic::store    (jint     store_value, jint*     dest) { *dest = store_value; }
@@ -63,17 +50,6 @@
 
 inline jint Atomic::add(jint add_value, volatile jint* dest)
 {
- if (UseLSE) {
-   register jint r_add_value asm("w0") = add_value;
-   register volatile jint *r_dest asm("x1") = dest;
-   register jint r_result asm("w2");
-   __asm volatile(LDADDALW
-                  : [_result]"=r"(r_result)
-                  : [_add_value]"r"(r_add_value),
-                    [_dest]"r"(r_dest)
-                  : "memory");
-   return r_result+add_value;
- }
  return __sync_add_and_fetch(dest, add_value);
 }
 
@@ -99,18 +75,6 @@
 
 inline jint Atomic::xchg (jint exchange_value, volatile jint* dest)
 {
-  if (UseLSE) {
-   register jint r_exchange_value asm("w0") = exchange_value;
-   register volatile jint *r_dest asm("x1") = dest;
-   register jint r_result asm("w2");
-   __asm volatile(SWPW
-                  : [_result]"=r"(r_result)
-                  : [_exchange_value]"r"(r_exchange_value),
-                    [_dest]"r"(r_dest)
-                  : "memory");
-   FULL_MEM_BARRIER;
-   return r_result;
-  }
   jint res = __sync_lock_test_and_set (dest, exchange_value);
   FULL_MEM_BARRIER;
   return res;
@@ -124,17 +88,6 @@
 
 inline jint Atomic::cmpxchg (jint exchange_value, volatile jint* dest, jint compare_value)
 {
- if (UseLSE) {
-   register jint r_exchange_value asm("w0") = exchange_value;
-   register volatile jint *r_dest asm("x1") = dest;
-   register jint r_compare_value asm("w2") = compare_value;
-   __asm volatile(CASALW
-                  : [_compare_value]"+r"(r_compare_value)
-                  : [_exchange_value]"r"(r_exchange_value),
-                    [_dest]"r"(r_dest)
-                  : "memory");
-   return r_compare_value;
- }
  return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
 }
 
@@ -143,17 +96,6 @@
 
 inline intptr_t Atomic::add_ptr(intptr_t add_value, volatile intptr_t* dest)
 {
- if (UseLSE) {
-   register intptr_t r_add_value asm("x0") = add_value;
-   register volatile intptr_t *r_dest asm("x1") = dest;
-   register intptr_t r_result asm("x2");
-   __asm volatile(LDADDAL
-                  : [_result]"=r"(r_result)
-                  : [_add_value]"r"(r_add_value),
-                    [_dest]"r"(r_dest)
-                  : "memory");
-   return r_result+add_value;
- }
  return __sync_add_and_fetch(dest, add_value);
 }
 
@@ -174,18 +116,6 @@
 
 inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest)
 {
-  if (UseLSE) {
-   register intptr_t r_exchange_value asm("x0") = exchange_value;
-   register volatile intptr_t *r_dest asm("x1") = dest;
-   register intptr_t r_result asm("x2");
-   __asm volatile(SWP
-                  : [_result]"=r"(r_result)
-                  : [_exchange_value]"r"(r_exchange_value),
-                    [_dest]"r"(r_dest)
-                  : "memory");
-   FULL_MEM_BARRIER;
-   return r_result;
-  }
   intptr_t res = __sync_lock_test_and_set (dest, exchange_value);
   FULL_MEM_BARRIER;
   return res;
@@ -193,33 +123,11 @@
 
 inline jlong Atomic::cmpxchg (jlong exchange_value, volatile jlong* dest, jlong compare_value)
 {
- if (UseLSE) {
-   register jlong r_exchange_value asm("x0") = exchange_value;
-   register volatile jlong *r_dest asm("x1") = dest;
-   register jlong r_compare_value asm("x2") = compare_value;
-   __asm volatile(CASAL
-                  : [_compare_value]"+r"(r_compare_value)
-                  : [_exchange_value]"r"(r_exchange_value),
-                    [_dest]"r"(r_dest)
-                  : "memory");
-   return r_compare_value;
- }
  return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
 }
 
 inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value)
 {
- if (UseLSE) {
-   register intptr_t r_exchange_value asm("x0") = exchange_value;
-   register volatile intptr_t *r_dest asm("x1") = dest;
-   register intptr_t r_compare_value asm("x2") = compare_value;
-   __asm volatile(CASAL
-                  : [_compare_value]"+r"(r_compare_value)
-                  : [_exchange_value]"r"(r_exchange_value),
-                    [_dest]"r"(r_dest)
-                  : "memory");
-   return r_compare_value;
- }
  return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
 }