Mercurial > hg > release > icedtea7-forest-2.6 > hotspot
changeset 6729:12eb373fbf8f
8153797, PR3741: aarch64: Add Arrays.fill stub code
Reviewed-by: aph
Contributed-by: long.chen@linaro.org
author | enevill |
---|---|
date | Tue, 16 Jul 2019 07:18:49 +0100 |
parents | c98fe53cf02b |
children | 6dcb006c9f20 |
files | src/cpu/aarch64/vm/aarch64.ad src/cpu/aarch64/vm/assembler_aarch64.cpp src/cpu/aarch64/vm/assembler_aarch64.hpp src/cpu/aarch64/vm/stubGenerator_aarch64.cpp |
diffstat | 4 files changed, 197 insertions(+), 50 deletions(-) [+] |
line wrap: on
line diff
--- a/src/cpu/aarch64/vm/aarch64.ad Tue Jul 16 06:37:47 2019 +0100 +++ b/src/cpu/aarch64/vm/aarch64.ad Tue Jul 16 07:18:49 2019 +0100 @@ -2479,55 +2479,6 @@ } %} - enc_class aarch64_enc_clear_array_reg_reg(iRegL_R11 cnt, iRegP_R10 base) %{ - MacroAssembler _masm(&cbuf); - Register cnt_reg = as_Register($cnt$$reg); - Register base_reg = as_Register($base$$reg); - // base is word aligned - // cnt is count of words - - Label loop; - Label entry; - -// Algorithm: -// -// scratch1 = cnt & 7; -// cnt -= scratch1; -// p += scratch1; -// switch (scratch1) { -// do { -// cnt -= 8; -// p[-8] = 0; -// case 7: -// p[-7] = 0; -// case 6: -// p[-6] = 0; -// // ... -// case 1: -// p[-1] = 0; -// case 0: -// p += 8; -// } while (cnt); -// } - - const int unroll = 8; // Number of str(zr) instructions we'll unroll - - __ andr(rscratch1, cnt_reg, unroll - 1); // tmp1 = cnt % unroll - __ sub(cnt_reg, cnt_reg, rscratch1); // cnt -= unroll - // base_reg always points to the end of the region we're about to zero - __ add(base_reg, base_reg, rscratch1, Assembler::LSL, exact_log2(wordSize)); - __ adr(rscratch2, entry); - __ sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2); - __ br(rscratch2); - __ bind(loop); - __ sub(cnt_reg, cnt_reg, unroll); - for (int i = -unroll; i < 0; i++) - __ str(zr, Address(base_reg, i * wordSize)); - __ bind(entry); - __ add(base_reg, base_reg, unroll * wordSize); - __ cbnz(cnt_reg, loop); - %} - /// mov envcodings enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{ @@ -10408,7 +10359,9 @@ ins_cost(4 * INSN_COST); format %{ "ClearArray $cnt, $base" %} - ins_encode(aarch64_enc_clear_array_reg_reg(cnt, base)); + ins_encode %{ + __ zero_words($base$$Register, $cnt$$Register); + %} ins_pipe(pipe_class_memory); %}
--- a/src/cpu/aarch64/vm/assembler_aarch64.cpp Tue Jul 16 06:37:47 2019 +0100 +++ b/src/cpu/aarch64/vm/assembler_aarch64.cpp Tue Jul 16 07:18:49 2019 +0100 @@ -5445,6 +5445,61 @@ BLOCK_COMMENT("} string_compare"); } +// base: Address of a buffer to be zeroed, 8 bytes aligned. +// cnt: Count in 8-byte unit. +void MacroAssembler::zero_words(Register base, Register cnt) +{ + fill_words(base, cnt, zr); +} + +// base: Address of a buffer to be filled, 8 bytes aligned. +// cnt: Count in 8-byte unit. +// value: Value to be filled with. +// base will point to the end of the buffer after filling. +void MacroAssembler::fill_words(Register base, Register cnt, Register value) +{ +// Algorithm: +// +// scratch1 = cnt & 7; +// cnt -= scratch1; +// p += scratch1; +// switch (scratch1) { +// do { +// cnt -= 8; +// p[-8] = v; +// case 7: +// p[-7] = v; +// case 6: +// p[-6] = v; +// // ... +// case 1: +// p[-1] = v; +// case 0: +// p += 8; +// } while (cnt); +// } + + assert_different_registers(base, cnt, value, rscratch1, rscratch2); + + Label entry, loop; + const int unroll = 8; // Number of str instructions we'll unroll + + andr(rscratch1, cnt, unroll - 1); // tmp1 = cnt % unroll + cbz(rscratch1, entry); + sub(cnt, cnt, rscratch1); // cnt -= tmp1 + // base always points to the end of the region we're about to fill + add(base, base, rscratch1, Assembler::LSL, 3); + adr(rscratch2, entry); + sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2); + br(rscratch2); + bind(loop); + add(base, base, unroll * 8); + sub(cnt, cnt, unroll); + for (int i = -unroll; i < 0; i++) + str(value, Address(base, i * 8)); + bind(entry); + cbnz(cnt, loop); +} void MacroAssembler::string_equals(Register str1, Register str2, Register cnt, Register result,
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp Tue Jul 16 06:37:47 2019 +0100 +++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp Tue Jul 16 07:18:49 2019 +0100 @@ -3581,6 +3581,9 @@ Register tmp1); void char_arrays_equals(Register ary1, Register ary2, Register result, Register tmp1); + void fill_words(Register base, Register cnt, Register value); + void zero_words(Register base, Register cnt); + // ISB may be needed because of a safepoint void maybe_isb() { isb(); } };
--- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Tue Jul 16 06:37:47 2019 +0100 +++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Tue Jul 16 07:18:49 2019 +0100 @@ -1594,6 +1594,136 @@ } + // + // Generate stub for array fill. If "aligned" is true, the + // "to" address is assumed to be heapword aligned. + // + // Arguments for generated stub: + // to: c_rarg0 + // value: c_rarg1 + // count: c_rarg2 treated as signed + // + address generate_fill(BasicType t, bool aligned, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + BLOCK_COMMENT("Entry:"); + + const Register to = c_rarg0; // source array address + const Register value = c_rarg1; // value + const Register count = c_rarg2; // elements count + const Register cnt_words = c_rarg3; // temp register + + __ enter(); + + Label L_fill_elements, L_exit1; + + int shift = -1; + switch (t) { + case T_BYTE: + shift = 0; + __ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element + __ bfi(value, value, 8, 8); // 8 bit -> 16 bit + __ bfi(value, value, 16, 16); // 16 bit -> 32 bit + __ br(Assembler::LO, L_fill_elements); + break; + case T_SHORT: + shift = 1; + __ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element + __ bfi(value, value, 16, 16); // 16 bit -> 32 bit + __ br(Assembler::LO, L_fill_elements); + break; + case T_INT: + shift = 2; + __ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element + __ br(Assembler::LO, L_fill_elements); + break; + default: ShouldNotReachHere(); + } + + // Align source address at 8 bytes address boundary. + Label L_skip_align1, L_skip_align2, L_skip_align4; + if (!aligned) { + switch (t) { + case T_BYTE: + // One byte misalignment happens only for byte arrays. + __ tbz(to, 0, L_skip_align1); + __ strb(value, Address(__ post(to, 1))); + __ subw(count, count, 1); + __ bind(L_skip_align1); + // Fallthrough + case T_SHORT: + // Two bytes misalignment happens only for byte and short (char) arrays. + __ tbz(to, 1, L_skip_align2); + __ strh(value, Address(__ post(to, 2))); + __ subw(count, count, 2 >> shift); + __ bind(L_skip_align2); + // Fallthrough + case T_INT: + // Align to 8 bytes, we know we are 4 byte aligned to start. + __ tbz(to, 2, L_skip_align4); + __ strw(value, Address(__ post(to, 4))); + __ subw(count, count, 4 >> shift); + __ bind(L_skip_align4); + break; + default: ShouldNotReachHere(); + } + } + + // + // Fill large chunks + // + __ lsrw(cnt_words, count, 3 - shift); // number of words + __ bfi(value, value, 32, 32); // 32 bit -> 64 bit + __ subw(count, count, cnt_words, Assembler::LSL, 3 - shift); + __ fill_words(to, cnt_words, value); + + // Remaining count is less than 8 bytes. Fill it by a single store. + // Note that the total length is no less than 8 bytes. + if (t == T_BYTE || t == T_SHORT) { + Label L_exit1; + __ cbzw(count, L_exit1); + __ add(to, to, count, Assembler::LSL, shift); // points to the end + __ str(value, Address(to, -8)); // overwrite some elements + __ bind(L_exit1); + __ leave(); + __ ret(lr); + } + + // Handle copies less than 8 bytes. + Label L_fill_2, L_fill_4, L_exit2; + __ bind(L_fill_elements); + switch (t) { + case T_BYTE: + __ tbz(count, 0, L_fill_2); + __ strb(value, Address(__ post(to, 1))); + __ bind(L_fill_2); + __ tbz(count, 1, L_fill_4); + __ strh(value, Address(__ post(to, 2))); + __ bind(L_fill_4); + __ tbz(count, 2, L_exit2); + __ strw(value, Address(to)); + break; + case T_SHORT: + __ tbz(count, 0, L_fill_4); + __ strh(value, Address(__ post(to, 2))); + __ bind(L_fill_4); + __ tbz(count, 1, L_exit2); + __ strw(value, Address(to)); + break; + case T_INT: + __ cbzw(count, L_exit2); + __ strw(value, Address(to)); + break; + default: ShouldNotReachHere(); + } + __ bind(L_exit2); + __ leave(); + __ ret(lr); + return start; + } + void generate_arraycopy_stubs() { address entry; address entry_jbyte_arraycopy; @@ -1682,6 +1812,12 @@ StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy); StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL, /*dest_uninitialized*/true); + StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); + StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); + StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); + StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); + StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); + StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); } // Arguments: