changeset 1960:7737fa7ec2b5

7006044: materialize cheap non-oop pointers on 64-bit SPARC Summary: After 6961690 we load non-oop pointers for the constant table which could easily be materialized in a few instructions. Reviewed-by: never, kvn
author twisti
date Tue, 14 Dec 2010 12:44:30 -0800
parents 361783318e7e
children 781072b12368
files src/cpu/sparc/vm/assembler_sparc.cpp src/cpu/sparc/vm/assembler_sparc.hpp src/cpu/sparc/vm/sparc.ad
diffstat 3 files changed, 82 insertions(+), 39 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/sparc/vm/assembler_sparc.cpp	Mon Dec 13 22:41:03 2010 -0800
+++ b/src/cpu/sparc/vm/assembler_sparc.cpp	Tue Dec 14 12:44:30 2010 -0800
@@ -23,6 +23,7 @@
  */
 
 #include "precompiled.hpp"
+#include "asm/assembler.hpp"
 #include "assembler_sparc.inline.hpp"
 #include "gc_interface/collectedHeap.inline.hpp"
 #include "interpreter/interpreter.hpp"
@@ -1327,37 +1328,38 @@
 }
 
 
-int MacroAssembler::size_of_sethi(address a, bool worst_case) {
+int MacroAssembler::insts_for_sethi(address a, bool worst_case) {
 #ifdef _LP64
-  if (worst_case) return 7;
-  intptr_t iaddr = (intptr_t)a;
-  int hi32 = (int)(iaddr >> 32);
-  int lo32 = (int)(iaddr);
-  int inst_count;
-  if (hi32 == 0 && lo32 >= 0)
-    inst_count = 1;
-  else if (hi32 == -1)
-    inst_count = 2;
+  if (worst_case)  return 7;
+  intptr_t iaddr = (intptr_t) a;
+  int msb32 = (int) (iaddr >> 32);
+  int lsb32 = (int) (iaddr);
+  int count;
+  if (msb32 == 0 && lsb32 >= 0)
+    count = 1;
+  else if (msb32 == -1)
+    count = 2;
   else {
-    inst_count = 2;
-    if ( hi32 & 0x3ff )
-      inst_count++;
-    if ( lo32 & 0xFFFFFC00 ) {
-      if( (lo32 >> 20) & 0xfff ) inst_count += 2;
-      if( (lo32 >> 10) & 0x3ff ) inst_count += 2;
+    count = 2;
+    if (msb32 & 0x3ff)
+      count++;
+    if (lsb32 & 0xFFFFFC00 ) {
+      if ((lsb32 >> 20) & 0xfff)  count += 2;
+      if ((lsb32 >> 10) & 0x3ff)  count += 2;
     }
   }
-  return BytesPerInstWord * inst_count;
+  return count;
 #else
-  return BytesPerInstWord;
+  return 1;
 #endif
 }
 
-int MacroAssembler::worst_case_size_of_set() {
-  return size_of_sethi(NULL, true) + 1;
+int MacroAssembler::worst_case_insts_for_set() {
+  return insts_for_sethi(NULL, true) + 1;
 }
 
 
+// Keep in sync with MacroAssembler::insts_for_internal_set
 void MacroAssembler::internal_set(const AddressLiteral& addrlit, Register d, bool ForceRelocatable) {
   intptr_t value = addrlit.value();
 
@@ -1379,6 +1381,23 @@
   }
 }
 
+// Keep in sync with MacroAssembler::internal_set
+int MacroAssembler::insts_for_internal_set(intptr_t value) {
+  // can optimize
+  if (-4096 <= value && value <= 4095) {
+    return 1;
+  }
+  if (inv_hi22(hi22(value)) == value) {
+    return insts_for_sethi((address) value);
+  }
+  int count = insts_for_sethi((address) value);
+  AddressLiteral al(value);
+  if (al.low10() != 0) {
+    count++;
+  }
+  return count;
+}
+
 void MacroAssembler::set(const AddressLiteral& al, Register d) {
   internal_set(al, d, false);
 }
@@ -1443,11 +1462,11 @@
   }
 }
 
-int MacroAssembler::size_of_set64(jlong value) {
+int MacroAssembler::insts_for_set64(jlong value) {
   v9_dep();
 
-  int hi = (int)(value >> 32);
-  int lo = (int)(value & ~0);
+  int hi = (int) (value >> 32);
+  int lo = (int) (value & ~0);
   int count = 0;
 
   // (Matcher::isSimpleConstant64 knows about the following optimizations.)
--- a/src/cpu/sparc/vm/assembler_sparc.hpp	Mon Dec 13 22:41:03 2010 -0800
+++ b/src/cpu/sparc/vm/assembler_sparc.hpp	Tue Dec 14 12:44:30 2010 -0800
@@ -1884,23 +1884,24 @@
   void sethi(const AddressLiteral& addrlit, Register d);
   void patchable_sethi(const AddressLiteral& addrlit, Register d);
 
-  // compute the size of a sethi/set
-  static int  size_of_sethi( address a, bool worst_case = false );
-  static int  worst_case_size_of_set();
+  // compute the number of instructions for a sethi/set
+  static int  insts_for_sethi( address a, bool worst_case = false );
+  static int  worst_case_insts_for_set();
 
   // set may be either setsw or setuw (high 32 bits may be zero or sign)
 private:
   void internal_set(const AddressLiteral& al, Register d, bool ForceRelocatable);
+  static int insts_for_internal_set(intptr_t value);
 public:
   void set(const AddressLiteral& addrlit, Register d);
   void set(intptr_t value, Register d);
   void set(address addr, Register d, RelocationHolder const& rspec);
+  static int insts_for_set(intptr_t value) { return insts_for_internal_set(value); }
+
   void patchable_set(const AddressLiteral& addrlit, Register d);
   void patchable_set(intptr_t value, Register d);
   void set64(jlong value, Register d, Register tmp);
-
-  // Compute size of set64.
-  static int size_of_set64(jlong value);
+  static int insts_for_set64(jlong value);
 
   // sign-extend 32 to 64
   inline void signx( Register s, Register d ) { sra( s, G0, d); }
--- a/src/cpu/sparc/vm/sparc.ad	Mon Dec 13 22:41:03 2010 -0800
+++ b/src/cpu/sparc/vm/sparc.ad	Tue Dec 14 12:44:30 2010 -0800
@@ -1086,9 +1086,9 @@
 uint MachConstantBaseNode::size(PhaseRegAlloc*) const {
   if (UseRDPCForConstantTableBase) {
     // This is really the worst case but generally it's only 1 instruction.
-    return 4 /*rdpc*/ + 4 /*sub*/ + MacroAssembler::worst_case_size_of_set();
+    return (1 /*rdpc*/ + 1 /*sub*/ + MacroAssembler::worst_case_insts_for_set()) * BytesPerInstWord;
   } else {
-    return MacroAssembler::worst_case_size_of_set();
+    return MacroAssembler::worst_case_insts_for_set() * BytesPerInstWord;
   }
 }
 
@@ -1240,7 +1240,7 @@
 
 int MachEpilogNode::safepoint_offset() const {
   assert( do_polling(), "no return for this epilog node");
-  return MacroAssembler::size_of_sethi(os::get_polling_page());
+  return MacroAssembler::insts_for_sethi(os::get_polling_page()) * BytesPerInstWord;
 }
 
 //=============================================================================
@@ -3553,7 +3553,8 @@
   interface(CONST_INTER);
 %}
 
-// Pointer Immediate: 32 or 64-bit
+#ifdef _LP64
+// Pointer Immediate: 64-bit
 operand immP_set() %{
   predicate(!VM_Version::is_niagara1_plus());
   match(ConP);
@@ -3564,10 +3565,10 @@
   interface(CONST_INTER);
 %}
 
-// Pointer Immediate: 32 or 64-bit
+// Pointer Immediate: 64-bit
 // From Niagara2 processors on a load should be better than materializing.
 operand immP_load() %{
-  predicate(VM_Version::is_niagara1_plus());
+  predicate(VM_Version::is_niagara1_plus() && (n->bottom_type()->isa_oop_ptr() || (MacroAssembler::insts_for_set(n->get_ptr()) > 3)));
   match(ConP);
 
   op_cost(5);
@@ -3576,6 +3577,18 @@
   interface(CONST_INTER);
 %}
 
+// Pointer Immediate: 64-bit
+operand immP_no_oop_cheap() %{
+  predicate(VM_Version::is_niagara1_plus() && !n->bottom_type()->isa_oop_ptr() && (MacroAssembler::insts_for_set(n->get_ptr()) <= 3));
+  match(ConP);
+
+  op_cost(5);
+  // formats are generated automatically for constants and base registers
+  format %{ %}
+  interface(CONST_INTER);
+%}
+#endif
+
 operand immP13() %{
   predicate((-4096 < n->get_ptr()) && (n->get_ptr() <= 4095));
   match(ConP);
@@ -3673,7 +3686,7 @@
 
 // Long Immediate: cheap (materialize in <= 3 instructions)
 operand immL_cheap() %{
-  predicate(!VM_Version::is_niagara1_plus() || MacroAssembler::size_of_set64(n->get_long()) <= 3);
+  predicate(!VM_Version::is_niagara1_plus() || MacroAssembler::insts_for_set64(n->get_long()) <= 3);
   match(ConL);
   op_cost(0);
 
@@ -3683,7 +3696,7 @@
 
 // Long Immediate: expensive (materialize in > 3 instructions)
 operand immL_expensive() %{
-  predicate(VM_Version::is_niagara1_plus() && MacroAssembler::size_of_set64(n->get_long()) > 3);
+  predicate(VM_Version::is_niagara1_plus() && MacroAssembler::insts_for_set64(n->get_long()) > 3);
   match(ConL);
   op_cost(0);
 
@@ -6094,8 +6107,18 @@
   ins_cost(MEMORY_REF_COST);
   format %{ "LD     [$constanttablebase + $constantoffset],$dst\t! load from constant table: ptr=$con" %}
   ins_encode %{
-      RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset($con), $dst$$Register);
-     __ ld_ptr($constanttablebase, con_offset, $dst$$Register);
+    RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset($con), $dst$$Register);
+    __ ld_ptr($constanttablebase, con_offset, $dst$$Register);
+  %}
+  ins_pipe(loadConP);
+%}
+
+instruct loadConP_no_oop_cheap(iRegP dst, immP_no_oop_cheap con) %{
+  match(Set dst con);
+  ins_cost(DEFAULT_COST * 3/2);
+  format %{ "SET    $con,$dst\t! non-oop ptr" %}
+  ins_encode %{
+    __ set($con$$constant, $dst$$Register);
   %}
   ins_pipe(loadConP);
 %}