changeset 1362:9e321dcfa5b7

6940726: Use BIS instruction for allocation prefetch on Sparc Summary: Use BIS instruction for allocation prefetch on Sparc Reviewed-by: twisti
author kvn
date Wed, 07 Apr 2010 12:39:27 -0700
parents b9d85fcdf743
children 93767e6a2dfd
files src/cpu/sparc/vm/sparc.ad src/cpu/sparc/vm/vm_version_sparc.cpp src/share/vm/memory/threadLocalAllocBuffer.hpp src/share/vm/opto/macro.cpp src/share/vm/opto/memnode.hpp src/share/vm/runtime/globals.hpp
diffstat 6 files changed, 93 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/sparc/vm/sparc.ad	Wed Apr 07 10:35:56 2010 -0700
+++ b/src/cpu/sparc/vm/sparc.ad	Wed Apr 07 12:39:27 2010 -0700
@@ -471,6 +471,9 @@
 source %{
 #define __ _masm.
 
+// Block initializing store
+#define ASI_BLK_INIT_QUAD_LDD_P    0xE2
+
 // tertiary op of a LoadP or StoreP encoding
 #define REGP_OP true
 
@@ -6147,6 +6150,7 @@
 %}
 
 instruct prefetchw( memory mem ) %{
+  predicate(AllocatePrefetchStyle != 3 );
   match( PrefetchWrite mem );
   ins_cost(MEMORY_REF_COST);
 
@@ -6156,6 +6160,23 @@
   ins_pipe(iload_mem);
 %}
 
+// Use BIS instruction to prefetch.
+instruct prefetchw_bis( memory mem ) %{
+  predicate(AllocatePrefetchStyle == 3);
+  match( PrefetchWrite mem );
+  ins_cost(MEMORY_REF_COST);
+
+  format %{ "STXA   G0,$mem\t! // Block initializing store" %}
+  ins_encode %{
+     Register base = as_Register($mem$$base);
+     int disp = $mem$$disp;
+     if (disp != 0) {
+       __ add(base, AllocatePrefetchStepSize, base);
+     }
+     __ stxa(G0, base, G0, ASI_BLK_INIT_QUAD_LDD_P);
+  %}
+  ins_pipe(istore_mem_reg);
+%}
 
 //----------Store Instructions-------------------------------------------------
 // Store Byte
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Wed Apr 07 10:35:56 2010 -0700
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Wed Apr 07 12:39:27 2010 -0700
@@ -86,9 +86,19 @@
     if (FLAG_IS_DEFAULT(InteriorEntryAlignment)) {
       FLAG_SET_DEFAULT(InteriorEntryAlignment, 4);
     }
-    if (is_niagara1_plus() && FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
-      // Use smaller prefetch distance on N2
-      FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256);
+    if (is_niagara1_plus()) {
+      if (AllocatePrefetchStyle > 0 && FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
+        // Use BIS instruction for allocation prefetch.
+        FLAG_SET_DEFAULT(AllocatePrefetchStyle, 3);
+        if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
+          // Use smaller prefetch distance on N2 with BIS
+          FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64);
+        }
+      }
+      if (AllocatePrefetchStyle != 3 && FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
+        // Use different prefetch distance without BIS
+        FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256);
+      }
     }
 #endif
     if (FLAG_IS_DEFAULT(OptoLoopAlignment)) {
--- a/src/share/vm/memory/threadLocalAllocBuffer.hpp	Wed Apr 07 10:35:56 2010 -0700
+++ b/src/share/vm/memory/threadLocalAllocBuffer.hpp	Wed Apr 07 12:39:27 2010 -0700
@@ -111,7 +111,22 @@
 
   // Allocate size HeapWords. The memory is NOT initialized to zero.
   inline HeapWord* allocate(size_t size);
-  static size_t alignment_reserve()              { return align_object_size(typeArrayOopDesc::header_size(T_INT)); }
+
+  // Reserve space at the end of TLAB
+  static size_t end_reserve() {
+    int reserve_size = typeArrayOopDesc::header_size(T_INT);
+    if (AllocatePrefetchStyle == 3) {
+      // BIS is used to prefetch - we need a space for it.
+      // +1 for rounding up to next cache line +1 to be safe
+      int lines = AllocatePrefetchLines + 2;
+      int step_size = AllocatePrefetchStepSize;
+      int distance = AllocatePrefetchDistance;
+      int prefetch_end = (distance + step_size*lines)/(int)HeapWordSize;
+      reserve_size = MAX2(reserve_size, prefetch_end);
+    }
+    return reserve_size;
+  }
+  static size_t alignment_reserve()              { return align_object_size(end_reserve()); }
   static size_t alignment_reserve_in_bytes()     { return alignment_reserve() * HeapWordSize; }
 
   // Return tlab size or remaining space in eden such that the
--- a/src/share/vm/opto/macro.cpp	Wed Apr 07 10:35:56 2010 -0700
+++ b/src/share/vm/opto/macro.cpp	Wed Apr 07 12:39:27 2010 -0700
@@ -1487,11 +1487,11 @@
                                         Node*& contended_phi_rawmem,
                                         Node* old_eden_top, Node* new_eden_top,
                                         Node* length) {
+   enum { fall_in_path = 1, pf_path = 2 };
    if( UseTLAB && AllocatePrefetchStyle == 2 ) {
       // Generate prefetch allocation with watermark check.
       // As an allocation hits the watermark, we will prefetch starting
       // at a "distance" away from watermark.
-      enum { fall_in_path = 1, pf_path = 2 };
 
       Node *pf_region = new (C, 3) RegionNode(3);
       Node *pf_phi_rawmem = new (C, 3) PhiNode( pf_region, Type::MEMORY,
@@ -1570,6 +1570,45 @@
       needgc_false = pf_region;
       contended_phi_rawmem = pf_phi_rawmem;
       i_o = pf_phi_abio;
+   } else if( UseTLAB && AllocatePrefetchStyle == 3 ) {
+      // Insert a prefetch for each allocation only on the fast-path
+      Node *pf_region = new (C, 3) RegionNode(3);
+      Node *pf_phi_rawmem = new (C, 3) PhiNode( pf_region, Type::MEMORY,
+                                                TypeRawPtr::BOTTOM );
+
+      // Generate several prefetch instructions only for arrays.
+      uint lines = (length != NULL) ? AllocatePrefetchLines : 1;
+      uint step_size = AllocatePrefetchStepSize;
+      uint distance = AllocatePrefetchDistance;
+
+      // Next cache address.
+      Node *cache_adr = new (C, 4) AddPNode(old_eden_top, old_eden_top,
+                                            _igvn.MakeConX(distance));
+      transform_later(cache_adr);
+      cache_adr = new (C, 2) CastP2XNode(needgc_false, cache_adr);
+      transform_later(cache_adr);
+      Node* mask = _igvn.MakeConX(~(intptr_t)(step_size-1));
+      cache_adr = new (C, 3) AndXNode(cache_adr, mask);
+      transform_later(cache_adr);
+      cache_adr = new (C, 2) CastX2PNode(cache_adr);
+      transform_later(cache_adr);
+
+      // Prefetch
+      Node *prefetch = new (C, 3) PrefetchWriteNode( contended_phi_rawmem, cache_adr );
+      prefetch->set_req(0, needgc_false);
+      transform_later(prefetch);
+      contended_phi_rawmem = prefetch;
+      Node *prefetch_adr;
+      distance = step_size;
+      for ( uint i = 1; i < lines; i++ ) {
+        prefetch_adr = new (C, 4) AddPNode( cache_adr, cache_adr,
+                                            _igvn.MakeConX(distance) );
+        transform_later(prefetch_adr);
+        prefetch = new (C, 3) PrefetchWriteNode( contended_phi_rawmem, prefetch_adr );
+        transform_later(prefetch);
+        distance += step_size;
+        contended_phi_rawmem = prefetch;
+      }
    } else if( AllocatePrefetchStyle > 0 ) {
       // Insert a prefetch for each allocation only on the fast-path
       Node *prefetch_adr;
--- a/src/share/vm/opto/memnode.hpp	Wed Apr 07 10:35:56 2010 -0700
+++ b/src/share/vm/opto/memnode.hpp	Wed Apr 07 12:39:27 2010 -0700
@@ -1244,5 +1244,5 @@
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return NotAMachineReg; }
   virtual uint match_edge(uint idx) const { return idx==2; }
-  virtual const Type *bottom_type() const { return Type::ABIO; }
+  virtual const Type *bottom_type() const { return ( AllocatePrefetchStyle == 3 ) ? Type::MEMORY : Type::ABIO; }
 };
--- a/src/share/vm/runtime/globals.hpp	Wed Apr 07 10:35:56 2010 -0700
+++ b/src/share/vm/runtime/globals.hpp	Wed Apr 07 12:39:27 2010 -0700
@@ -2708,7 +2708,8 @@
   product(intx,  AllocatePrefetchStyle, 1,                                  \
           "0 = no prefetch, "                                               \
           "1 = prefetch instructions for each allocation, "                 \
-          "2 = use TLAB watermark to gate allocation prefetch")             \
+          "2 = use TLAB watermark to gate allocation prefetch, "            \
+          "3 = use BIS instruction on Sparc for allocation prefetch")       \
                                                                             \
   product(intx,  AllocatePrefetchDistance, -1,                              \
           "Distance to prefetch ahead of allocation pointer")               \