# HG changeset patch # User kvn # Date 1302561031 25200 # Node ID 15c9a0e162697752443632bcb059121705a04063 # Parent 328926869b15a73c231b76afb5e64b7c41214f0f 7035713: 3DNow Prefetch Instruction Support Summary: The upcoming processors from AMD are the first that support 3dnow prefetch without supporting the 3dnow instruction set. Reviewed-by: kvn Contributed-by: tom.deneau@amd.com diff -r 328926869b15 -r 15c9a0e16269 src/cpu/x86/vm/assembler_x86.cpp --- a/src/cpu/x86/vm/assembler_x86.cpp Sat Apr 09 22:55:25 2011 -0700 +++ b/src/cpu/x86/vm/assembler_x86.cpp Mon Apr 11 15:30:31 2011 -0700 @@ -2317,7 +2317,7 @@ } void Assembler::prefetchr(Address src) { - NOT_LP64(assert(VM_Version::supports_3dnow(), "must support")); + NOT_LP64(assert(VM_Version::supports_3dnow_prefetch(), "must support")); InstructionMark im(this); prefetch_prefix(src); emit_byte(0x0D); @@ -2349,7 +2349,7 @@ } void Assembler::prefetchw(Address src) { - NOT_LP64(assert(VM_Version::supports_3dnow(), "must support")); + NOT_LP64(assert(VM_Version::supports_3dnow_prefetch(), "must support")); InstructionMark im(this); prefetch_prefix(src); emit_byte(0x0D); diff -r 328926869b15 -r 15c9a0e16269 src/cpu/x86/vm/c1_LIRAssembler_x86.cpp --- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Sat Apr 09 22:55:25 2011 -0700 +++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Mon Apr 11 15:30:31 2011 -0700 @@ -1401,7 +1401,7 @@ default: ShouldNotReachHere(); break; } - } else if (VM_Version::supports_3dnow()) { + } else if (VM_Version::supports_3dnow_prefetch()) { __ prefetchr(from_addr); } } @@ -1424,7 +1424,7 @@ default: ShouldNotReachHere(); break; } - } else if (VM_Version::supports_3dnow()) { + } else if (VM_Version::supports_3dnow_prefetch()) { __ prefetchw(from_addr); } } diff -r 328926869b15 -r 15c9a0e16269 src/cpu/x86/vm/vm_version_x86.cpp --- a/src/cpu/x86/vm/vm_version_x86.cpp Sat Apr 09 22:55:25 2011 -0700 +++ b/src/cpu/x86/vm/vm_version_x86.cpp Mon Apr 11 15:30:31 2011 -0700 @@ -348,7 +348,7 @@ } char buf[256]; - jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", cores_per_cpu(), threads_per_core(), cpu_family(), _model, _stepping, (supports_cmov() ? ", cmov" : ""), @@ -363,8 +363,7 @@ (supports_sse4_2() ? ", sse4.2" : ""), (supports_popcnt() ? ", popcnt" : ""), (supports_mmx_ext() ? ", mmxext" : ""), - (supports_3dnow() ? ", 3dnow" : ""), - (supports_3dnow2() ? ", 3dnowext" : ""), + (supports_3dnow_prefetch() ? ", 3dnowpref" : ""), (supports_lzcnt() ? ", lzcnt": ""), (supports_sse4a() ? ", sse4a": ""), (supports_ht() ? ", ht": "")); @@ -522,13 +521,13 @@ // set valid Prefetch instruction if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0; if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3; - if( ReadPrefetchInstr == 3 && !supports_3dnow() ) ReadPrefetchInstr = 0; - if( !supports_sse() && supports_3dnow() ) ReadPrefetchInstr = 3; + if( ReadPrefetchInstr == 3 && !supports_3dnow_prefetch() ) ReadPrefetchInstr = 0; + if( !supports_sse() && supports_3dnow_prefetch() ) ReadPrefetchInstr = 3; if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0; if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3; - if( AllocatePrefetchInstr == 3 && !supports_3dnow() ) AllocatePrefetchInstr=0; - if( !supports_sse() && supports_3dnow() ) AllocatePrefetchInstr = 3; + if( AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch() ) AllocatePrefetchInstr=0; + if( !supports_sse() && supports_3dnow_prefetch() ) AllocatePrefetchInstr = 3; // Allocation prefetch settings intx cache_line_size = L1_data_cache_line_size(); @@ -576,10 +575,10 @@ logical_processors_per_package()); tty->print_cr("UseSSE=%d",UseSSE); tty->print("Allocation: "); - if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow()) { + if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) { tty->print_cr("no prefetching"); } else { - if (UseSSE == 0 && supports_3dnow()) { + if (UseSSE == 0 && supports_3dnow_prefetch()) { tty->print("PREFETCHW"); } else if (UseSSE >= 1) { if (AllocatePrefetchInstr == 0) { diff -r 328926869b15 -r 15c9a0e16269 src/cpu/x86/vm/vm_version_x86.hpp --- a/src/cpu/x86/vm/vm_version_x86.hpp Sat Apr 09 22:55:25 2011 -0700 +++ b/src/cpu/x86/vm/vm_version_x86.hpp Mon Apr 11 15:30:31 2011 -0700 @@ -188,7 +188,8 @@ CPU_FXSR = (1 << 2), CPU_HT = (1 << 3), CPU_MMX = (1 << 4), - CPU_3DNOW = (1 << 5), // 3DNow comes from cpuid 0x80000001 (EDX) + CPU_3DNOW_PREFETCH = (1 << 5), // Processor supports 3dnow prefetch and prefetchw instructions + // may not necessarily support other 3dnow instructions CPU_SSE = (1 << 6), CPU_SSE2 = (1 << 7), CPU_SSE3 = (1 << 8), // SSE3 comes from cpuid 1 (ECX) @@ -328,8 +329,9 @@ // AMD features. if (is_amd()) { - if (_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) - result |= CPU_3DNOW; + if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) || + (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0)) + result |= CPU_3DNOW_PREFETCH; if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) result |= CPU_LZCNT; if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) @@ -446,9 +448,8 @@ // // AMD features // - static bool supports_3dnow() { return (_cpuFeatures & CPU_3DNOW) != 0; } + static bool supports_3dnow_prefetch() { return (_cpuFeatures & CPU_3DNOW_PREFETCH) != 0; } static bool supports_mmx_ext() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; } - static bool supports_3dnow2() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow2 != 0; } static bool supports_lzcnt() { return (_cpuFeatures & CPU_LZCNT) != 0; } static bool supports_sse4a() { return (_cpuFeatures & CPU_SSE4A) != 0; } diff -r 328926869b15 -r 15c9a0e16269 src/cpu/x86/vm/x86_32.ad --- a/src/cpu/x86/vm/x86_32.ad Sat Apr 09 22:55:25 2011 -0700 +++ b/src/cpu/x86/vm/x86_32.ad Mon Apr 11 15:30:31 2011 -0700 @@ -3423,7 +3423,7 @@ masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2] // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes - if ((EmitSync & 2048) && VM_Version::supports_3dnow() && os::is_MP()) { + if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { // prefetchw [eax + Offset(_owner)-2] masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2)); } @@ -3467,7 +3467,7 @@ masm.movptr(boxReg, tmpReg) ; // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes - if ((EmitSync & 2048) && VM_Version::supports_3dnow() && os::is_MP()) { + if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { // prefetchw [eax + Offset(_owner)-2] masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2)); } @@ -3614,7 +3614,7 @@ // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html. masm.get_thread (boxReg) ; - if ((EmitSync & 4096) && VM_Version::supports_3dnow() && os::is_MP()) { + if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { // prefetchw [ebx + Offset(_owner)-2] masm.prefetchw(Address(rbx, ObjectMonitor::owner_offset_in_bytes()-2)); } @@ -7333,7 +7333,7 @@ // Must be safe to execute with invalid address (cannot fault). instruct prefetchr0( memory mem ) %{ - predicate(UseSSE==0 && !VM_Version::supports_3dnow()); + predicate(UseSSE==0 && !VM_Version::supports_3dnow_prefetch()); match(PrefetchRead mem); ins_cost(0); size(0); @@ -7343,7 +7343,7 @@ %} instruct prefetchr( memory mem ) %{ - predicate(UseSSE==0 && VM_Version::supports_3dnow() || ReadPrefetchInstr==3); + predicate(UseSSE==0 && VM_Version::supports_3dnow_prefetch() || ReadPrefetchInstr==3); match(PrefetchRead mem); ins_cost(100); @@ -7387,7 +7387,7 @@ %} instruct prefetchw0( memory mem ) %{ - predicate(UseSSE==0 && !VM_Version::supports_3dnow()); + predicate(UseSSE==0 && !VM_Version::supports_3dnow_prefetch()); match(PrefetchWrite mem); ins_cost(0); size(0); @@ -7397,7 +7397,7 @@ %} instruct prefetchw( memory mem ) %{ - predicate(UseSSE==0 && VM_Version::supports_3dnow() || AllocatePrefetchInstr==3); + predicate(UseSSE==0 && VM_Version::supports_3dnow_prefetch() || AllocatePrefetchInstr==3); match( PrefetchWrite mem ); ins_cost(100);