# HG changeset patch # User never # Date 1282254707 25200 # Node ID f55c4f82ab9dbb8dd197438ade98a00d5bda2d46 # Parent 13b87063b4d8dea40ba97a283ad5bd3272e876f3 6978249: spill between cpu and fpu registers when those moves are fast Reviewed-by: kvn diff -r 13b87063b4d8 -r f55c4f82ab9d src/cpu/sparc/vm/vm_version_sparc.cpp --- a/src/cpu/sparc/vm/vm_version_sparc.cpp Wed Aug 18 01:22:16 2010 -0700 +++ b/src/cpu/sparc/vm/vm_version_sparc.cpp Thu Aug 19 14:51:47 2010 -0700 @@ -112,6 +112,11 @@ } } +#ifdef COMPILER2 + // Currently not supported anywhere. + FLAG_SET_DEFAULT(UseFPUForSpilling, false); +#endif + char buf[512]; jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s", (has_v8() ? ", has_v8" : ""), diff -r 13b87063b4d8 -r f55c4f82ab9d src/cpu/x86/vm/vm_version_x86.cpp --- a/src/cpu/x86/vm/vm_version_x86.cpp Wed Aug 18 01:22:16 2010 -0700 +++ b/src/cpu/x86/vm/vm_version_x86.cpp Thu Aug 19 14:51:47 2010 -0700 @@ -482,6 +482,15 @@ } } +#ifdef COMPILER2 + if (UseFPUForSpilling) { + if (UseSSE < 2) { + // Only supported with SSE2+ + FLAG_SET_DEFAULT(UseFPUForSpilling, false); + } + } +#endif + assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value"); assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value"); @@ -520,6 +529,11 @@ if( supports_sse4_2() && supports_ht() ) { // Nehalem based cpus AllocatePrefetchDistance = 192; AllocatePrefetchLines = 4; +#ifdef COMPILER2 + if (AggressiveOpts && FLAG_IS_DEFAULT(UseFPUForSpilling)) { + FLAG_SET_DEFAULT(UseFPUForSpilling, true); + } +#endif } } assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value"); diff -r 13b87063b4d8 -r f55c4f82ab9d src/cpu/x86/vm/x86_32.ad --- a/src/cpu/x86/vm/x86_32.ad Wed Aug 18 01:22:16 2010 -0700 +++ b/src/cpu/x86/vm/x86_32.ad Thu Aug 19 14:51:47 2010 -0700 @@ -852,6 +852,39 @@ } } +static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, + int src_hi, int dst_hi, int size, outputStream* st ) { + // 32-bit + if (cbuf) { + emit_opcode(*cbuf, 0x66); + emit_opcode(*cbuf, 0x0F); + emit_opcode(*cbuf, 0x6E); + emit_rm(*cbuf, 0x3, Matcher::_regEncode[dst_lo] & 7, Matcher::_regEncode[src_lo] & 7); +#ifndef PRODUCT + } else if (!do_size) { + st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); +#endif + } + return 4; +} + + +static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, + int src_hi, int dst_hi, int size, outputStream* st ) { + // 32-bit + if (cbuf) { + emit_opcode(*cbuf, 0x66); + emit_opcode(*cbuf, 0x0F); + emit_opcode(*cbuf, 0x7E); + emit_rm(*cbuf, 0x3, Matcher::_regEncode[src_lo] & 7, Matcher::_regEncode[dst_lo] & 7); +#ifndef PRODUCT + } else if (!do_size) { + st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); +#endif + } + return 4; +} + static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { if( cbuf ) { emit_opcode(*cbuf, 0x8B ); @@ -947,6 +980,12 @@ if( dst_first_rc == rc_int && src_first_rc == rc_stack ) size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); + // Check for integer reg-xmm reg copy + if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { + assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), + "no 64 bit integer-float reg moves" ); + return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); + } // -------------------------------------- // Check for float reg-reg copy if( src_first_rc == rc_float && dst_first_rc == rc_float ) { @@ -1018,6 +1057,13 @@ return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); } + // Check for xmm reg-integer reg copy + if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { + assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), + "no 64 bit float-integer reg moves" ); + return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); + } + // Check for xmm store if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); diff -r 13b87063b4d8 -r f55c4f82ab9d src/cpu/x86/vm/x86_64.ad --- a/src/cpu/x86/vm/x86_64.ad Wed Aug 18 01:22:16 2010 -0700 +++ b/src/cpu/x86/vm/x86_64.ad Thu Aug 19 14:51:47 2010 -0700 @@ -1607,8 +1607,8 @@ emit_opcode(*cbuf, 0x0F); emit_opcode(*cbuf, 0x7E); emit_rm(*cbuf, 0x3, - Matcher::_regEncode[dst_first] & 7, - Matcher::_regEncode[src_first] & 7); + Matcher::_regEncode[src_first] & 7, + Matcher::_regEncode[dst_first] & 7); #ifndef PRODUCT } else if (!do_size) { st->print("movdq %s, %s\t# spill", @@ -1637,8 +1637,8 @@ emit_opcode(*cbuf, 0x0F); emit_opcode(*cbuf, 0x7E); emit_rm(*cbuf, 0x3, - Matcher::_regEncode[dst_first] & 7, - Matcher::_regEncode[src_first] & 7); + Matcher::_regEncode[src_first] & 7, + Matcher::_regEncode[dst_first] & 7); #ifndef PRODUCT } else if (!do_size) { st->print("movdl %s, %s\t# spill", diff -r 13b87063b4d8 -r f55c4f82ab9d src/share/vm/opto/c2_globals.hpp --- a/src/share/vm/opto/c2_globals.hpp Wed Aug 18 01:22:16 2010 -0700 +++ b/src/share/vm/opto/c2_globals.hpp Thu Aug 19 14:51:47 2010 -0700 @@ -178,6 +178,9 @@ product(bool, ReduceBulkZeroing, true, \ "When bulk-initializing, try to avoid needless zeroing") \ \ + product(bool, UseFPUForSpilling, false, \ + "Spill integer registers to FPU instead of stack when possible") \ + \ develop_pd(intx, RegisterCostAreaRatio, \ "Spill selection in reg allocator: scale area by (X/64K) before " \ "adding cost") \ diff -r 13b87063b4d8 -r f55c4f82ab9d src/share/vm/opto/coalesce.cpp --- a/src/share/vm/opto/coalesce.cpp Wed Aug 18 01:22:16 2010 -0700 +++ b/src/share/vm/opto/coalesce.cpp Thu Aug 19 14:51:47 2010 -0700 @@ -780,6 +780,14 @@ // Number of bits free uint rm_size = rm.Size(); + if (UseFPUForSpilling && rm.is_AllStack() ) { + // Don't coalesce when frequency difference is large + Block *dst_b = _phc._cfg._bbs[dst_copy->_idx]; + Block *src_def_b = _phc._cfg._bbs[src_def->_idx]; + if (src_def_b->_freq > 10*dst_b->_freq ) + return false; + } + // If we can use any stack slot, then effective size is infinite if( rm.is_AllStack() ) rm_size += 1000000; // Incompatible masks, no way to coalesce diff -r 13b87063b4d8 -r f55c4f82ab9d src/share/vm/opto/matcher.cpp --- a/src/share/vm/opto/matcher.cpp Wed Aug 18 01:22:16 2010 -0700 +++ b/src/share/vm/opto/matcher.cpp Thu Aug 19 14:51:47 2010 -0700 @@ -456,6 +456,23 @@ *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP]; idealreg2spillmask[Op_RegP]->OR(C->FIRST_STACK_mask()); + if (UseFPUForSpilling) { + // This mask logic assumes that the spill operations are + // symmetric and that the registers involved are the same size. + // On sparc for instance we may have to use 64 bit moves will + // kill 2 registers when used with F0-F31. + idealreg2spillmask[Op_RegI]->OR(*idealreg2regmask[Op_RegF]); + idealreg2spillmask[Op_RegF]->OR(*idealreg2regmask[Op_RegI]); +#ifdef _LP64 + idealreg2spillmask[Op_RegN]->OR(*idealreg2regmask[Op_RegF]); + idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]); + idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]); + idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegD]); +#else + idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegF]); +#endif + } + // Make up debug masks. Any spill slot plus callee-save registers. // Caller-save registers are assumed to be trashable by the various // inline-cache fixup routines. diff -r 13b87063b4d8 -r f55c4f82ab9d src/share/vm/opto/reg_split.cpp --- a/src/share/vm/opto/reg_split.cpp Wed Aug 18 01:22:16 2010 -0700 +++ b/src/share/vm/opto/reg_split.cpp Thu Aug 19 14:51:47 2010 -0700 @@ -975,6 +975,19 @@ insidx++; // Reset iterator to skip USE side split continue; } + + if (UseFPUForSpilling && n->is_Call() && !uup && !dup ) { + // The use at the call can force the def down so insert + // a split before the use to allow the def more freedom. + maxlrg = split_USE(def,b,n,inpidx,maxlrg,dup,false, splits,slidx); + // If it wasn't split bail + if (!maxlrg) { + return 0; + } + insidx++; // Reset iterator to skip USE side split + continue; + } + // Here is the logic chart which describes USE Splitting: // 0 = false or DOWN, 1 = true or UP // diff -r 13b87063b4d8 -r f55c4f82ab9d src/share/vm/runtime/arguments.cpp --- a/src/share/vm/runtime/arguments.cpp Wed Aug 18 01:22:16 2010 -0700 +++ b/src/share/vm/runtime/arguments.cpp Thu Aug 19 14:51:47 2010 -0700 @@ -3003,10 +3003,6 @@ CommandLineFlags::printSetFlags(); } - if (PrintFlagsFinal) { - CommandLineFlags::printFlags(); - } - // Apply CPU specific policy for the BiasedLocking if (UseBiasedLocking) { if (!VM_Version::use_biased_locking() && diff -r 13b87063b4d8 -r f55c4f82ab9d src/share/vm/runtime/init.cpp --- a/src/share/vm/runtime/init.cpp Wed Aug 18 01:22:16 2010 -0700 +++ b/src/share/vm/runtime/init.cpp Thu Aug 19 14:51:47 2010 -0700 @@ -128,6 +128,12 @@ Universe::verify(); // make sure we're starting with a clean slate } + // All the flags that get adjusted by VM_Version_init and os::init_2 + // have been set so dump the flags now. + if (PrintFlagsFinal) { + CommandLineFlags::printFlags(); + } + return JNI_OK; }