changeset 6730:6dcb006c9f20

8153713, PR3741: aarch64: improve short array clearing using store pair Summary: aarch64: generate store pair instruction to clear short arrays Reviewed-by: aph
author fyang
date Tue, 12 Apr 2016 11:53:44 +0800
parents 12eb373fbf8f
children 87aadff83304
files src/cpu/aarch64/vm/aarch64.ad src/cpu/aarch64/vm/assembler_aarch64.cpp src/cpu/aarch64/vm/assembler_aarch64.hpp
diffstat 3 files changed, 49 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/aarch64/vm/aarch64.ad	Tue Jul 16 07:18:49 2019 +0100
+++ b/src/cpu/aarch64/vm/aarch64.ad	Tue Apr 12 11:53:44 2016 +0800
@@ -1607,7 +1607,7 @@
 const bool Matcher::init_array_count_is_in_bytes = false;
 
 // Threshold size for cleararray.
-const int Matcher::init_array_short_size = 18 * BytesPerLong;
+const int Matcher::init_array_short_size = 4 * BytesPerLong;
 
 // Use conditional move (CMOVL)
 const int Matcher::long_cmove_cost() {
@@ -11732,6 +11732,20 @@
   ins_pipe(pipe_class_memory);
 %}
 
+instruct clearArray_imm_reg(immL cnt, iRegP base, Universe dummy, rFlagsReg cr)
+%{
+  match(Set dummy (ClearArray cnt base));
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ClearArray $cnt, $base" %}
+
+  ins_encode %{
+    __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
+  %}
+
+  ins_pipe(pipe_class_memory);
+%}
+
 // ============================================================================
 // This name is KNOWN by the ADLC and cannot be changed.
 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
--- a/src/cpu/aarch64/vm/assembler_aarch64.cpp	Tue Jul 16 07:18:49 2019 +0100
+++ b/src/cpu/aarch64/vm/assembler_aarch64.cpp	Tue Apr 12 11:53:44 2016 +0800
@@ -5452,6 +5452,39 @@
   fill_words(base, cnt, zr);
 }
 
+// base:   Address of a buffer to be zeroed, 8 bytes aligned.
+// cnt:    Immediate count in 8-byte unit.
+#define ShortArraySize (18 * BytesPerLong)
+void MacroAssembler::zero_words(Register base, u_int64_t cnt)
+{
+  int i = cnt & 1;  // store any odd word to start
+  if (i) str(zr, Address(base));
+
+  if (cnt <= ShortArraySize / BytesPerLong) {
+    for (; i < (int)cnt; i += 2)
+      stp(zr, zr, Address(base, i * wordSize));
+  } else {
+    const int unroll = 4; // Number of stp(zr, zr) instructions we'll unroll
+    int remainder = cnt % (2 * unroll);
+    for (; i < remainder; i += 2)
+      stp(zr, zr, Address(base, i * wordSize));
+
+    Label loop;
+    Register cnt_reg = rscratch1;
+    Register loop_base = rscratch2;
+    cnt = cnt - remainder;
+    mov(cnt_reg, cnt);
+    // adjust base and prebias by -2 * wordSize so we can pre-increment
+    add(loop_base, base, (remainder - 2) * wordSize);
+    bind(loop);
+    sub(cnt_reg, cnt_reg, 2 * unroll);
+    for (i = 1; i < unroll; i++)
+      stp(zr, zr, Address(loop_base, 2 * i * wordSize));
+    stp(zr, zr, Address(pre(loop_base, 2 * unroll * wordSize)));
+    cbnz(cnt_reg, loop);
+  }
+}
+
 // base:   Address of a buffer to be filled, 8 bytes aligned.
 // cnt:    Count in 8-byte unit.
 // value:  Value to be filled with.
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp	Tue Jul 16 07:18:49 2019 +0100
+++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp	Tue Apr 12 11:53:44 2016 +0800
@@ -3583,6 +3583,7 @@
                           Register result, Register tmp1);
   void fill_words(Register base, Register cnt, Register value);
   void zero_words(Register base, Register cnt);
+  void zero_words(Register base, u_int64_t cnt);
 
   // ISB may be needed because of a safepoint
   void maybe_isb() { isb(); }