Mercurial > hg > release > icedtea7-forest-2.6 > hotspot
changeset 6733:6bd6d4c7940e
8157841, PR3741: aarch64: prefetch ignores cache line size
Summary: fix prefetch to take account of cache line size
Reviewed-by: aph
Contributed-by: stuart.monteith@linaro.org, edward.nevill@linaro.org
author | enevill |
---|---|
date | Tue, 16 Jul 2019 10:47:47 +0100 |
parents | cbb799cc6c7c |
children | 5e713e212064 |
files | src/cpu/aarch64/vm/aarch64.ad src/cpu/aarch64/vm/assembler_aarch64.cpp src/cpu/aarch64/vm/assembler_aarch64.hpp src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp src/cpu/aarch64/vm/vm_version_aarch64.cpp src/cpu/aarch64/vm/vm_version_aarch64.hpp |
diffstat | 6 files changed, 69 insertions(+), 24 deletions(-) [+] |
line wrap: on
line diff
--- a/src/cpu/aarch64/vm/aarch64.ad Mon Feb 08 14:14:35 2016 +0000 +++ b/src/cpu/aarch64/vm/aarch64.ad Tue Jul 16 10:47:47 2019 +0100 @@ -2905,7 +2905,8 @@ // membar_acquire_lock(). { Label retry_load; - __ prfm(Address(oop), PSTL1STRM); + if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH)) + __ prfm(Address(oop), PSTL1STRM); __ bind(retry_load); __ ldxr(tmp, oop); __ cmp(tmp, disp_hdr); @@ -2956,7 +2957,8 @@ { Label retry_load, fail; - __ prfm(Address(tmp), PSTL1STRM); + if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH)) + __ prfm(Address(tmp), PSTL1STRM); __ bind(retry_load); __ ldxr(rscratch1, tmp); __ cmp(disp_hdr, rscratch1); @@ -3046,7 +3048,8 @@ { Label retry_load; - __ prfm(Address(oop), PSTL1STRM); + if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH)) + __ prfm(Address(oop), PSTL1STRM); __ bind(retry_load); __ ldxr(tmp, oop); __ cmp(box, tmp);
--- a/src/cpu/aarch64/vm/assembler_aarch64.cpp Mon Feb 08 14:14:35 2016 +0000 +++ b/src/cpu/aarch64/vm/assembler_aarch64.cpp Tue Jul 16 10:47:47 2019 +0100 @@ -3146,7 +3146,8 @@ void MacroAssembler::atomic_incw(Register counter_addr, Register tmp, Register tmp2) { Label retry_load; - prfm(Address(counter_addr), PSTL1STRM); + if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH)) + prfm(Address(counter_addr), PSTL1STRM); bind(retry_load); // flush and load exclusive from the memory location ldxrw(tmp, counter_addr); @@ -3572,7 +3573,8 @@ // addr identifies memory word to compare against/update // tmp returns 0/1 for success/failure Label retry_load, nope; - prfm(Address(addr), PSTL1STRM); + if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH)) + prfm(Address(addr), PSTL1STRM); bind(retry_load); // flush and load exclusive from the memory location // and fail if it is not what we expect @@ -3600,7 +3602,8 @@ // addr identifies memory word to compare against/update // tmp returns 0/1 for success/failure Label retry_load, nope; - prfm(Address(addr), PSTL1STRM); + if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH)) + prfm(Address(addr), PSTL1STRM); bind(retry_load); // flush and load exclusive from the memory location // and fail if it is not what we expect @@ -3635,7 +3638,8 @@ result = different(prev, incr, addr) ? prev : rscratch2; \ \ Label retry_load; \ - prfm(Address(addr), PSTL1STRM); \ + if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH)) \ + prfm(Address(addr), PSTL1STRM); \ bind(retry_load); \ LDXR(result, addr); \ OP(rscratch1, result, incr); \ @@ -3658,7 +3662,8 @@ result = different(prev, newv, addr) ? prev : rscratch2; \ \ Label retry_load; \ - prfm(Address(addr), PSTL1STRM); \ + if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH)) \ + prfm(Address(addr), PSTL1STRM); \ bind(retry_load); \ LDXR(result, addr); \ STXR(rscratch1, newv, addr); \
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp Mon Feb 08 14:14:35 2016 +0000 +++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp Tue Jul 16 10:47:47 2019 +0100 @@ -2677,10 +2677,11 @@ umaddl(Rd, Rn, Rm, zr); } -#define WRAP(INSN) \ - void INSN(Register Rd, Register Rn, Register Rm, Register Ra) { \ - if (Ra != zr) nop(); \ - Assembler::INSN(Rd, Rn, Rm, Ra); \ +#define WRAP(INSN) \ + void INSN(Register Rd, Register Rn, Register Rm, Register Ra) { \ + if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_A53MAC) && Ra != zr) \ + nop(); \ + Assembler::INSN(Rd, Rn, Rm, Ra); \ } WRAP(madd) WRAP(msub) WRAP(maddw) WRAP(msubw) @@ -2799,6 +2800,15 @@ mrs(0b011, 0b0000, 0b0000, 0b111, reg); } + // CTR_EL0: op1 == 011 + // CRn == 0000 + // CRm == 0000 + // op2 == 001 + inline void get_ctr_el0(Register reg) + { + mrs(0b011, 0b0000, 0b0000, 0b001, reg); + } + // idiv variant which deals with MINLONG as dividend and -1 as divisor int corrected_idivl(Register result, Register ra, Register rb, bool want_remainder, Register tmp = rscratch1);
--- a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp Mon Feb 08 14:14:35 2016 +0000 +++ b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp Tue Jul 16 10:47:47 2019 +0100 @@ -1512,7 +1512,8 @@ Label retry_load, nope; // flush and load exclusive from the memory location // and fail if it is not what we expect - __ prfm(Address(addr), PSTL1STRM); + if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH)) + __ prfm(Address(addr), PSTL1STRM); __ bind(retry_load); __ ldaxrw(rscratch1, addr); __ cmpw(rscratch1, cmpval); @@ -1531,7 +1532,8 @@ Label retry_load, nope; // flush and load exclusive from the memory location // and fail if it is not what we expect - __ prfm(Address(addr), PSTL1STRM); + if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH)) + __ prfm(Address(addr), PSTL1STRM); __ bind(retry_load); __ ldaxr(rscratch1, addr); __ cmp(rscratch1, cmpval);
--- a/src/cpu/aarch64/vm/vm_version_aarch64.cpp Mon Feb 08 14:14:35 2016 +0000 +++ b/src/cpu/aarch64/vm/vm_version_aarch64.cpp Tue Jul 16 10:47:47 2019 +0100 @@ -63,6 +63,7 @@ int VM_Version::_cpu; int VM_Version::_model; +int VM_Version::_model2; int VM_Version::_variant; int VM_Version::_revision; int VM_Version::_stepping; @@ -102,6 +103,9 @@ __ get_dczid_el0(rscratch1); __ strw(rscratch1, Address(c_rarg0, in_bytes(VM_Version::dczid_el0_offset()))); + __ get_ctr_el0(rscratch1); + __ strw(rscratch1, Address(c_rarg0, in_bytes(VM_Version::ctr_el0_offset()))); + __ leave(); __ ret(lr); @@ -121,17 +125,20 @@ getPsrInfo_stub(&_psr_info); + int dcache_line = VM_Version::dcache_line_size(); + if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) - FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256); + FLAG_SET_DEFAULT(AllocatePrefetchDistance, 3*dcache_line); if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) - FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64); - FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 256); - FLAG_SET_DEFAULT(PrefetchFieldsAhead, 256); - FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 256); + FLAG_SET_DEFAULT(AllocatePrefetchStepSize, dcache_line); + if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) + FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 3*dcache_line); if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) - FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 256); - if ((PrefetchCopyIntervalInBytes & 7) || (PrefetchCopyIntervalInBytes >= 32768)) { - warning("PrefetchCopyIntervalInBytes must be a multiple of 8 and < 32768"); + FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 3*dcache_line); + + if (PrefetchCopyIntervalInBytes != -1 && + ((PrefetchCopyIntervalInBytes & 7) || (PrefetchCopyIntervalInBytes >= 32768))) { + warning("PrefetchCopyIntervalInBytes must be -1, or a multiple of 8 and < 32768"); PrefetchCopyIntervalInBytes &= ~7; if (PrefetchCopyIntervalInBytes >= 32768) PrefetchCopyIntervalInBytes = 32760; @@ -150,6 +157,7 @@ _features_str = strdup(buf); _cpuFeatures = auxv; + int cpu_lines = 0; if (FILE *f = fopen("/proc/cpuinfo", "r")) { char buf[128], *p; while (fgets(buf, sizeof (buf), f) != NULL) { @@ -157,9 +165,11 @@ long v = strtol(p+1, NULL, 0); if (strncmp(buf, "CPU implementer", sizeof "CPU implementer" - 1) == 0) { _cpu = v; + cpu_lines++; } else if (strncmp(buf, "CPU variant", sizeof "CPU variant" - 1) == 0) { _variant = v; } else if (strncmp(buf, "CPU part", sizeof "CPU part" - 1) == 0) { + if (_model != v) _model2 = _model; _model = v; } else if (strncmp(buf, "CPU revision", sizeof "CPU revision" - 1) == 0) { _revision = v; @@ -170,8 +180,13 @@ } // Enable vendor specific features - if (_cpu == CPU_CAVIUM) _cpuFeatures |= CPU_DMB_ATOMICS; - if (_cpu == CPU_ARM) _cpuFeatures |= CPU_A53MAC; + if (_cpu == CPU_CAVIUM && _variant == 0) _cpuFeatures |= CPU_DMB_ATOMICS; + if (_cpu == CPU_ARM && (_model == 0xd03 || _model2 == 0xd03)) _cpuFeatures |= CPU_A53MAC; + if (_cpu == CPU_ARM && (_model == 0xd07 || _model2 == 0xd07)) _cpuFeatures |= CPU_STXR_PREFETCH; + // If an olde style /proc/cpuinfo (cpu_lines == 1) then if _model is an A57 (0xd07) + // we assume the worst and assume we could be on a big little system and have + // undisclosed A53 cores which we could be swapped to at any stage + if (_cpu == CPU_ARM && cpu_lines == 1 && _model == 0xd07) _cpuFeatures |= CPU_A53MAC; if (FLAG_IS_DEFAULT(UseCRC32)) { UseCRC32 = (auxv & HWCAP_CRC32) != 0;
--- a/src/cpu/aarch64/vm/vm_version_aarch64.hpp Mon Feb 08 14:14:35 2016 +0000 +++ b/src/cpu/aarch64/vm/vm_version_aarch64.hpp Tue Jul 16 10:47:47 2019 +0100 @@ -35,6 +35,7 @@ protected: static int _cpu; static int _model; + static int _model2; static int _variant; static int _revision; static int _stepping; @@ -44,6 +45,7 @@ struct PsrInfo { uint32_t dczid_el0; + uint32_t ctr_el0; }; static PsrInfo _psr_info; static void get_processor_features(); @@ -79,6 +81,7 @@ CPU_SHA1 = (1<<5), CPU_SHA2 = (1<<6), CPU_CRC32 = (1<<7), + CPU_STXR_PREFETCH= (1 << 29), CPU_A53MAC = (1 << 30), CPU_DMB_ATOMICS = (1 << 31), } cpuFeatureFlags; @@ -90,6 +93,7 @@ static int cpu_revision() { return _revision; } static int cpu_cpuFeatures() { return _cpuFeatures; } static ByteSize dczid_el0_offset() { return byte_offset_of(PsrInfo, dczid_el0); } + static ByteSize ctr_el0_offset() { return byte_offset_of(PsrInfo, ctr_el0); } static bool is_zva_enabled() { // Check the DZP bit (bit 4) of dczid_el0 is zero // and block size (bit 0~3) is not zero. @@ -100,6 +104,12 @@ assert(is_zva_enabled(), "ZVA not available"); return 4 << (_psr_info.dczid_el0 & 0xf); } + static int icache_line_size() { + return (1 << (_psr_info.ctr_el0 & 0x0f)) * 4; + } + static int dcache_line_size() { + return (1 << ((_psr_info.ctr_el0 >> 16) & 0x0f)) * 4; + } }; #endif // CPU_AARCH64_VM_VM_VERSION_AARCH64_HPP