changeset 1649:f55c4f82ab9d

6978249: spill between cpu and fpu registers when those moves are fast Reviewed-by: kvn
author never
date Thu, 19 Aug 2010 14:51:47 -0700
parents 13b87063b4d8
children ee5cc9e78493
files src/cpu/sparc/vm/vm_version_sparc.cpp src/cpu/x86/vm/vm_version_x86.cpp src/cpu/x86/vm/x86_32.ad src/cpu/x86/vm/x86_64.ad src/share/vm/opto/c2_globals.hpp src/share/vm/opto/coalesce.cpp src/share/vm/opto/matcher.cpp src/share/vm/opto/reg_split.cpp src/share/vm/runtime/arguments.cpp src/share/vm/runtime/init.cpp
diffstat 10 files changed, 116 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Wed Aug 18 01:22:16 2010 -0700
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Thu Aug 19 14:51:47 2010 -0700
@@ -112,6 +112,11 @@
     }
   }
 
+#ifdef COMPILER2
+  // Currently not supported anywhere.
+  FLAG_SET_DEFAULT(UseFPUForSpilling, false);
+#endif
+
   char buf[512];
   jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s",
                (has_v8() ? ", has_v8" : ""),
--- a/src/cpu/x86/vm/vm_version_x86.cpp	Wed Aug 18 01:22:16 2010 -0700
+++ b/src/cpu/x86/vm/vm_version_x86.cpp	Thu Aug 19 14:51:47 2010 -0700
@@ -482,6 +482,15 @@
     }
   }
 
+#ifdef COMPILER2
+  if (UseFPUForSpilling) {
+    if (UseSSE < 2) {
+      // Only supported with SSE2+
+      FLAG_SET_DEFAULT(UseFPUForSpilling, false);
+    }
+  }
+#endif
+
   assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value");
   assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value");
 
@@ -520,6 +529,11 @@
     if( supports_sse4_2() && supports_ht() ) { // Nehalem based cpus
       AllocatePrefetchDistance = 192;
       AllocatePrefetchLines = 4;
+#ifdef COMPILER2
+      if (AggressiveOpts && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
+        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
+      }
+#endif
     }
   }
   assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value");
--- a/src/cpu/x86/vm/x86_32.ad	Wed Aug 18 01:22:16 2010 -0700
+++ b/src/cpu/x86/vm/x86_32.ad	Thu Aug 19 14:51:47 2010 -0700
@@ -852,6 +852,39 @@
   }
 }
 
+static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
+                            int src_hi, int dst_hi, int size, outputStream* st ) {
+  // 32-bit
+  if (cbuf) {
+    emit_opcode(*cbuf, 0x66);
+    emit_opcode(*cbuf, 0x0F);
+    emit_opcode(*cbuf, 0x6E);
+    emit_rm(*cbuf, 0x3, Matcher::_regEncode[dst_lo] & 7, Matcher::_regEncode[src_lo] & 7);
+#ifndef PRODUCT
+  } else if (!do_size) {
+    st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
+#endif
+  }
+  return 4;
+}
+
+
+static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
+                                 int src_hi, int dst_hi, int size, outputStream* st ) {
+  // 32-bit
+  if (cbuf) {
+    emit_opcode(*cbuf, 0x66);
+    emit_opcode(*cbuf, 0x0F);
+    emit_opcode(*cbuf, 0x7E);
+    emit_rm(*cbuf, 0x3, Matcher::_regEncode[src_lo] & 7, Matcher::_regEncode[dst_lo] & 7);
+#ifndef PRODUCT
+  } else if (!do_size) {
+    st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
+#endif
+  }
+  return 4;
+}
+
 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
   if( cbuf ) {
     emit_opcode(*cbuf, 0x8B );
@@ -947,6 +980,12 @@
   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 
+  // Check for integer reg-xmm reg copy
+  if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
+    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
+            "no 64 bit integer-float reg moves" );
+    return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
+  }
   // --------------------------------------
   // Check for float reg-reg copy
   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
@@ -1018,6 +1057,13 @@
     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
   }
 
+  // Check for xmm reg-integer reg copy
+  if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
+    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
+            "no 64 bit float-integer reg moves" );
+    return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
+  }
+
   // Check for xmm store
   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
--- a/src/cpu/x86/vm/x86_64.ad	Wed Aug 18 01:22:16 2010 -0700
+++ b/src/cpu/x86/vm/x86_64.ad	Thu Aug 19 14:51:47 2010 -0700
@@ -1607,8 +1607,8 @@
           emit_opcode(*cbuf, 0x0F);
           emit_opcode(*cbuf, 0x7E);
           emit_rm(*cbuf, 0x3,
-                  Matcher::_regEncode[dst_first] & 7,
-                  Matcher::_regEncode[src_first] & 7);
+                  Matcher::_regEncode[src_first] & 7,
+                  Matcher::_regEncode[dst_first] & 7);
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("movdq   %s, %s\t# spill",
@@ -1637,8 +1637,8 @@
           emit_opcode(*cbuf, 0x0F);
           emit_opcode(*cbuf, 0x7E);
           emit_rm(*cbuf, 0x3,
-                  Matcher::_regEncode[dst_first] & 7,
-                  Matcher::_regEncode[src_first] & 7);
+                  Matcher::_regEncode[src_first] & 7,
+                  Matcher::_regEncode[dst_first] & 7);
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("movdl   %s, %s\t# spill",
--- a/src/share/vm/opto/c2_globals.hpp	Wed Aug 18 01:22:16 2010 -0700
+++ b/src/share/vm/opto/c2_globals.hpp	Thu Aug 19 14:51:47 2010 -0700
@@ -178,6 +178,9 @@
   product(bool, ReduceBulkZeroing, true,                                    \
           "When bulk-initializing, try to avoid needless zeroing")          \
                                                                             \
+  product(bool, UseFPUForSpilling, false,                                   \
+          "Spill integer registers to FPU instead of stack when possible")  \
+                                                                            \
   develop_pd(intx, RegisterCostAreaRatio,                                   \
           "Spill selection in reg allocator: scale area by (X/64K) before " \
           "adding cost")                                                    \
--- a/src/share/vm/opto/coalesce.cpp	Wed Aug 18 01:22:16 2010 -0700
+++ b/src/share/vm/opto/coalesce.cpp	Thu Aug 19 14:51:47 2010 -0700
@@ -780,6 +780,14 @@
   // Number of bits free
   uint rm_size = rm.Size();
 
+  if (UseFPUForSpilling && rm.is_AllStack() ) {
+    // Don't coalesce when frequency difference is large
+    Block *dst_b = _phc._cfg._bbs[dst_copy->_idx];
+    Block *src_def_b = _phc._cfg._bbs[src_def->_idx];
+    if (src_def_b->_freq > 10*dst_b->_freq )
+      return false;
+  }
+
   // If we can use any stack slot, then effective size is infinite
   if( rm.is_AllStack() ) rm_size += 1000000;
   // Incompatible masks, no way to coalesce
--- a/src/share/vm/opto/matcher.cpp	Wed Aug 18 01:22:16 2010 -0700
+++ b/src/share/vm/opto/matcher.cpp	Thu Aug 19 14:51:47 2010 -0700
@@ -456,6 +456,23 @@
   *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP];
    idealreg2spillmask[Op_RegP]->OR(C->FIRST_STACK_mask());
 
+   if (UseFPUForSpilling) {
+     // This mask logic assumes that the spill operations are
+     // symmetric and that the registers involved are the same size.
+     // On sparc for instance we may have to use 64 bit moves will
+     // kill 2 registers when used with F0-F31.
+     idealreg2spillmask[Op_RegI]->OR(*idealreg2regmask[Op_RegF]);
+     idealreg2spillmask[Op_RegF]->OR(*idealreg2regmask[Op_RegI]);
+#ifdef _LP64
+     idealreg2spillmask[Op_RegN]->OR(*idealreg2regmask[Op_RegF]);
+     idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
+     idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
+     idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegD]);
+#else
+     idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegF]);
+#endif
+   }
+
   // Make up debug masks.  Any spill slot plus callee-save registers.
   // Caller-save registers are assumed to be trashable by the various
   // inline-cache fixup routines.
--- a/src/share/vm/opto/reg_split.cpp	Wed Aug 18 01:22:16 2010 -0700
+++ b/src/share/vm/opto/reg_split.cpp	Thu Aug 19 14:51:47 2010 -0700
@@ -975,6 +975,19 @@
               insidx++;  // Reset iterator to skip USE side split
               continue;
             }
+
+            if (UseFPUForSpilling && n->is_Call() && !uup && !dup ) {
+              // The use at the call can force the def down so insert
+              // a split before the use to allow the def more freedom.
+              maxlrg = split_USE(def,b,n,inpidx,maxlrg,dup,false, splits,slidx);
+              // If it wasn't split bail
+              if (!maxlrg) {
+                return 0;
+              }
+              insidx++;  // Reset iterator to skip USE side split
+              continue;
+            }
+
             // Here is the logic chart which describes USE Splitting:
             // 0 = false or DOWN, 1 = true or UP
             //
--- a/src/share/vm/runtime/arguments.cpp	Wed Aug 18 01:22:16 2010 -0700
+++ b/src/share/vm/runtime/arguments.cpp	Thu Aug 19 14:51:47 2010 -0700
@@ -3003,10 +3003,6 @@
     CommandLineFlags::printSetFlags();
   }
 
-  if (PrintFlagsFinal) {
-    CommandLineFlags::printFlags();
-  }
-
   // Apply CPU specific policy for the BiasedLocking
   if (UseBiasedLocking) {
     if (!VM_Version::use_biased_locking() &&
--- a/src/share/vm/runtime/init.cpp	Wed Aug 18 01:22:16 2010 -0700
+++ b/src/share/vm/runtime/init.cpp	Thu Aug 19 14:51:47 2010 -0700
@@ -128,6 +128,12 @@
     Universe::verify();   // make sure we're starting with a clean slate
   }
 
+  // All the flags that get adjusted by VM_Version_init and os::init_2
+  // have been set so dump the flags now.
+  if (PrintFlagsFinal) {
+    CommandLineFlags::printFlags();
+  }
+
   return JNI_OK;
 }