changeset 849:fc4be448891f

6851742: (EA) allocation elimination doesn't work with UseG1GC Summary: Fix eliminate_card_mark() to eliminate G1 pre/post barriers. Reviewed-by: never
author kvn
date Thu, 16 Jul 2009 14:10:42 -0700
parents fd50a67f97d1
children 84770322b304 64219d2a6493
files src/share/vm/opto/escape.cpp src/share/vm/opto/graphKit.cpp src/share/vm/opto/graphKit.hpp src/share/vm/opto/idealKit.cpp src/share/vm/opto/idealKit.hpp src/share/vm/opto/ifnode.cpp src/share/vm/opto/library_call.cpp src/share/vm/opto/machnode.cpp src/share/vm/opto/macro.cpp src/share/vm/opto/matcher.cpp src/share/vm/opto/phaseX.hpp src/share/vm/opto/type.hpp
diffstat 12 files changed, 319 insertions(+), 236 deletions(-) [+]
line wrap: on
line diff
--- a/src/share/vm/opto/escape.cpp	Wed Jul 15 13:37:35 2009 -0700
+++ b/src/share/vm/opto/escape.cpp	Thu Jul 16 14:10:42 2009 -0700
@@ -578,11 +578,24 @@
   if (phi_alias_idx == alias_idx) {
     return orig_phi;
   }
-  // have we already created a Phi for this alias index?
+  // Have we recently created a Phi for this alias index?
   PhiNode *result = get_map_phi(orig_phi->_idx);
   if (result != NULL && C->get_alias_index(result->adr_type()) == alias_idx) {
     return result;
   }
+  // Previous check may fail when the same wide memory Phi was split into Phis
+  // for different memory slices. Search all Phis for this region.
+  if (result != NULL) {
+    Node* region = orig_phi->in(0);
+    for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
+      Node* phi = region->fast_out(i);
+      if (phi->is_Phi() &&
+          C->get_alias_index(phi->as_Phi()->adr_type()) == alias_idx) {
+        assert(phi->_idx >= nodes_size(), "only new Phi per instance memory slice");
+        return phi->as_Phi();
+      }
+    }
+  }
   if ((int)C->unique() + 2*NodeLimitFudgeFactor > MaxNodeLimit) {
     if (C->do_escape_analysis() == true && !C->failing()) {
       // Retry compilation without escape analysis.
@@ -595,6 +608,7 @@
   orig_phi_worklist.append_if_missing(orig_phi);
   const TypePtr *atype = C->get_adr_type(alias_idx);
   result = PhiNode::make(orig_phi->in(0), NULL, Type::MEMORY, atype);
+  C->copy_node_notes_to(result, orig_phi);
   set_map_phi(orig_phi->_idx, result);
   igvn->set_type(result, result->bottom_type());
   record_for_optimizer(result);
--- a/src/share/vm/opto/graphKit.cpp	Wed Jul 15 13:37:35 2009 -0700
+++ b/src/share/vm/opto/graphKit.cpp	Thu Jul 16 14:10:42 2009 -0700
@@ -1373,11 +1373,12 @@
   return st;
 }
 
+
 void GraphKit::pre_barrier(Node* ctl,
                            Node* obj,
                            Node* adr,
-                           uint adr_idx,
-                           Node *val,
+                           uint  adr_idx,
+                           Node* val,
                            const TypeOopPtr* val_type,
                            BasicType bt) {
   BarrierSet* bs = Universe::heap()->barrier_set();
@@ -1385,7 +1386,7 @@
   switch (bs->kind()) {
     case BarrierSet::G1SATBCT:
     case BarrierSet::G1SATBCTLogging:
-        g1_write_barrier_pre(obj, adr, adr_idx, val, val_type, bt);
+      g1_write_barrier_pre(obj, adr, adr_idx, val, val_type, bt);
       break;
 
     case BarrierSet::CardTableModRef:
@@ -1404,8 +1405,8 @@
                             Node* store,
                             Node* obj,
                             Node* adr,
-                            uint adr_idx,
-                            Node *val,
+                            uint  adr_idx,
+                            Node* val,
                             BasicType bt,
                             bool use_precise) {
   BarrierSet* bs = Universe::heap()->barrier_set();
@@ -1413,7 +1414,7 @@
   switch (bs->kind()) {
     case BarrierSet::G1SATBCT:
     case BarrierSet::G1SATBCTLogging:
-        g1_write_barrier_post(store, obj, adr, adr_idx, val, bt, use_precise);
+      g1_write_barrier_post(store, obj, adr, adr_idx, val, bt, use_precise);
       break;
 
     case BarrierSet::CardTableModRef:
@@ -1431,42 +1432,36 @@
   }
 }
 
-Node* GraphKit::store_oop_to_object(Node* ctl,
-                                    Node* obj,
-                                    Node* adr,
-                                    const TypePtr* adr_type,
-                                    Node *val,
-                                    const TypeOopPtr* val_type,
-                                    BasicType bt) {
+Node* GraphKit::store_oop(Node* ctl,
+                          Node* obj,
+                          Node* adr,
+                          const TypePtr* adr_type,
+                          Node* val,
+                          const TypeOopPtr* val_type,
+                          BasicType bt,
+                          bool use_precise) {
+
+  set_control(ctl);
+  if (stopped()) return top(); // Dead path ?
+
+  assert(bt == T_OBJECT, "sanity");
+  assert(val != NULL, "not dead path");
   uint adr_idx = C->get_alias_index(adr_type);
-  Node* store;
-  pre_barrier(ctl, obj, adr, adr_idx, val, val_type, bt);
-  store = store_to_memory(control(), adr, val, bt, adr_idx);
-  post_barrier(control(), store, obj, adr, adr_idx, val, bt, false);
+  assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory" );
+
+  pre_barrier(control(), obj, adr, adr_idx, val, val_type, bt);
+  Node* store = store_to_memory(control(), adr, val, bt, adr_idx);
+  post_barrier(control(), store, obj, adr, adr_idx, val, bt, use_precise);
   return store;
 }
 
-Node* GraphKit::store_oop_to_array(Node* ctl,
-                                   Node* obj,
-                                   Node* adr,
-                                   const TypePtr* adr_type,
-                                   Node *val,
-                                   const TypeOopPtr* val_type,
-                                   BasicType bt) {
-  uint adr_idx = C->get_alias_index(adr_type);
-  Node* store;
-  pre_barrier(ctl, obj, adr, adr_idx, val, val_type, bt);
-  store = store_to_memory(control(), adr, val, bt, adr_idx);
-  post_barrier(control(), store, obj, adr, adr_idx, val, bt, true);
-  return store;
-}
-
+// Could be an array or object we don't know at compile time (unsafe ref.)
 Node* GraphKit::store_oop_to_unknown(Node* ctl,
-                                     Node* obj,
-                                     Node* adr,
-                                     const TypePtr* adr_type,
-                                     Node *val,
-                                     BasicType bt) {
+                             Node* obj,   // containing obj
+                             Node* adr,  // actual adress to store val at
+                             const TypePtr* adr_type,
+                             Node* val,
+                             BasicType bt) {
   Compile::AliasType* at = C->alias_type(adr_type);
   const TypeOopPtr* val_type = NULL;
   if (adr_type->isa_instptr()) {
@@ -1485,12 +1480,7 @@
   if (val_type == NULL) {
     val_type = TypeInstPtr::BOTTOM;
   }
-
-  uint adr_idx = at->index();
-  pre_barrier(ctl, obj, adr, adr_idx, val, val_type, bt);
-  Node* store = store_to_memory(control(), adr, val, bt, adr_idx);
-  post_barrier(control(), store, obj, adr, adr_idx, val, bt, true);
-  return store;
+  return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, true);
 }
 
 
@@ -1804,93 +1794,6 @@
 }
 
 
-//------------------------------store_barrier----------------------------------
-// Insert a write-barrier store.  This is to let generational GC work; we have
-// to flag all oop-stores before the next GC point.
-void GraphKit::write_barrier_post(Node* oop_store, Node* obj, Node* adr,
-                                  Node* val, bool use_precise) {
-  // No store check needed if we're storing a NULL or an old object
-  // (latter case is probably a string constant). The concurrent
-  // mark sweep garbage collector, however, needs to have all nonNull
-  // oop updates flagged via card-marks.
-  if (val != NULL && val->is_Con()) {
-    // must be either an oop or NULL
-    const Type* t = val->bottom_type();
-    if (t == TypePtr::NULL_PTR || t == Type::TOP)
-      // stores of null never (?) need barriers
-      return;
-    ciObject* con = t->is_oopptr()->const_oop();
-    if (con != NULL
-        && con->is_perm()
-        && Universe::heap()->can_elide_permanent_oop_store_barriers())
-      // no store barrier needed, because no old-to-new ref created
-      return;
-  }
-
-  if (use_ReduceInitialCardMarks()
-      && obj == just_allocated_object(control())) {
-    // We can skip marks on a freshly-allocated object.
-    // Keep this code in sync with do_eager_card_mark in runtime.cpp.
-    // That routine eagerly marks the occasional object which is produced
-    // by the slow path, so that we don't have to do it here.
-    return;
-  }
-
-  if (!use_precise) {
-    // All card marks for a (non-array) instance are in one place:
-    adr = obj;
-  }
-  // (Else it's an array (or unknown), and we want more precise card marks.)
-  assert(adr != NULL, "");
-
-  // Get the alias_index for raw card-mark memory
-  int adr_type = Compile::AliasIdxRaw;
-  // Convert the pointer to an int prior to doing math on it
-  Node* cast = _gvn.transform(new (C, 2) CastP2XNode(control(), adr));
-  // Divide by card size
-  assert(Universe::heap()->barrier_set()->kind() == BarrierSet::CardTableModRef,
-         "Only one we handle so far.");
-  CardTableModRefBS* ct =
-    (CardTableModRefBS*)(Universe::heap()->barrier_set());
-  Node *b = _gvn.transform(new (C, 3) URShiftXNode( cast, _gvn.intcon(CardTableModRefBS::card_shift) ));
-  // We store into a byte array, so do not bother to left-shift by zero
-  Node *c = byte_map_base_node();
-  // Combine
-  Node *sb_ctl = control();
-  Node *sb_adr = _gvn.transform(new (C, 4) AddPNode( top()/*no base ptr*/, c, b ));
-  Node *sb_val = _gvn.intcon(0);
-  // Smash zero into card
-  if( !UseConcMarkSweepGC ) {
-    BasicType bt = T_BYTE;
-    store_to_memory(sb_ctl, sb_adr, sb_val, bt, adr_type);
-  } else {
-    // Specialized path for CM store barrier
-    cms_card_mark( sb_ctl, sb_adr, sb_val, oop_store);
-  }
-}
-
-// Specialized path for CMS store barrier
-void GraphKit::cms_card_mark(Node* ctl, Node* adr, Node* val, Node *oop_store) {
-  BasicType bt = T_BYTE;
-  int adr_idx = Compile::AliasIdxRaw;
-  Node* mem = memory(adr_idx);
-
-  // The type input is NULL in PRODUCT builds
-  const TypePtr* type = NULL;
-  debug_only(type = C->get_adr_type(adr_idx));
-
-  // Add required edge to oop_store, optimizer does not support precedence edges.
-  // Convert required edge to precedence edge before allocation.
-  Node *store = _gvn.transform( new (C, 5) StoreCMNode(ctl, mem, adr, type, val, oop_store) );
-  set_memory(store, adr_idx);
-
-  // For CMS, back-to-back card-marks can only remove the first one
-  // and this requires DU info.  Push on worklist for optimizer.
-  if (mem->req() > MemNode::Address && adr == mem->in(MemNode::Address))
-    record_for_igvn(store);
-}
-
-
 void GraphKit::round_double_arguments(ciMethod* dest_method) {
   // (Note:  TypeFunc::make has a cache that makes this fast.)
   const TypeFunc* tf    = TypeFunc::make(dest_method);
@@ -3215,6 +3118,79 @@
   return NULL;
 }
 
+//----------------------------- store barriers ----------------------------
+#define __ ideal.
+
+void GraphKit::sync_kit(IdealKit& ideal) {
+  // Final sync IdealKit and graphKit.
+  __ drain_delay_transform();
+  set_all_memory(__ merged_memory());
+  set_control(__ ctrl());
+}
+
+// vanilla/CMS post barrier
+// Insert a write-barrier store.  This is to let generational GC work; we have
+// to flag all oop-stores before the next GC point.
+void GraphKit::write_barrier_post(Node* oop_store,
+                                  Node* obj,
+                                  Node* adr,
+                                  Node* val,
+                                  bool use_precise) {
+  // No store check needed if we're storing a NULL or an old object
+  // (latter case is probably a string constant). The concurrent
+  // mark sweep garbage collector, however, needs to have all nonNull
+  // oop updates flagged via card-marks.
+  if (val != NULL && val->is_Con()) {
+    // must be either an oop or NULL
+    const Type* t = val->bottom_type();
+    if (t == TypePtr::NULL_PTR || t == Type::TOP)
+      // stores of null never (?) need barriers
+      return;
+    ciObject* con = t->is_oopptr()->const_oop();
+    if (con != NULL
+        && con->is_perm()
+        && Universe::heap()->can_elide_permanent_oop_store_barriers())
+      // no store barrier needed, because no old-to-new ref created
+      return;
+  }
+
+  if (!use_precise) {
+    // All card marks for a (non-array) instance are in one place:
+    adr = obj;
+  }
+  // (Else it's an array (or unknown), and we want more precise card marks.)
+  assert(adr != NULL, "");
+
+  IdealKit ideal(gvn(), control(), merged_memory(), true);
+
+  // Convert the pointer to an int prior to doing math on it
+  Node* cast = __ CastPX(__ ctrl(), adr);
+
+  // Divide by card size
+  assert(Universe::heap()->barrier_set()->kind() == BarrierSet::CardTableModRef,
+         "Only one we handle so far.");
+  Node* card_offset = __ URShiftX( cast, __ ConI(CardTableModRefBS::card_shift) );
+
+  // Combine card table base and card offset
+  Node* card_adr = __ AddP(__ top(), byte_map_base_node(), card_offset );
+
+  // Get the alias_index for raw card-mark memory
+  int adr_type = Compile::AliasIdxRaw;
+  // Smash zero into card
+  Node*   zero = __ ConI(0);
+  BasicType bt = T_BYTE;
+  if( !UseConcMarkSweepGC ) {
+    __ store(__ ctrl(), card_adr, zero, bt, adr_type);
+  } else {
+    // Specialized path for CM store barrier
+    __ storeCM(__ ctrl(), card_adr, zero, oop_store, bt, adr_type);
+  }
+
+  // Final sync IdealKit and GraphKit.
+  sync_kit(ideal);
+}
+
+// G1 pre/post barriers
 void GraphKit::g1_write_barrier_pre(Node* obj,
                                     Node* adr,
                                     uint alias_idx,
@@ -3222,10 +3198,8 @@
                                     const TypeOopPtr* val_type,
                                     BasicType bt) {
   IdealKit ideal(gvn(), control(), merged_memory(), true);
-#define __ ideal.
-  __ declares_done();
-
-  Node* thread = __ thread();
+
+  Node* tls = __ thread(); // ThreadLocalStorage
 
   Node* no_ctrl = NULL;
   Node* no_base = __ top();
@@ -3248,9 +3222,9 @@
 
   // set_control( ctl);
 
-  Node* marking_adr = __ AddP(no_base, thread, __ ConX(marking_offset));
-  Node* buffer_adr  = __ AddP(no_base, thread, __ ConX(buffer_offset));
-  Node* index_adr   = __ AddP(no_base, thread, __ ConX(index_offset));
+  Node* marking_adr = __ AddP(no_base, tls, __ ConX(marking_offset));
+  Node* buffer_adr  = __ AddP(no_base, tls, __ ConX(buffer_offset));
+  Node* index_adr   = __ AddP(no_base, tls, __ ConX(index_offset));
 
   // Now some of the values
 
@@ -3278,55 +3252,52 @@
         Node* next_index = __ SubI(index,  __ ConI(sizeof(intptr_t)));
         Node* next_indexX = next_index;
 #ifdef _LP64
-          // We could refine the type for what it's worth
-          // const TypeLong* lidxtype = TypeLong::make(CONST64(0), get_size_from_queue);
-          next_indexX = _gvn.transform( new (C, 2) ConvI2LNode(next_index, TypeLong::make(0, max_jlong, Type::WidenMax)) );
-#endif // _LP64
+        // We could refine the type for what it's worth
+        // const TypeLong* lidxtype = TypeLong::make(CONST64(0), get_size_from_queue);
+        next_indexX = _gvn.transform( new (C, 2) ConvI2LNode(next_index, TypeLong::make(0, max_jlong, Type::WidenMax)) );
+#endif
 
         // Now get the buffer location we will log the original value into and store it
-
         Node *log_addr = __ AddP(no_base, buffer, next_indexX);
-        // __ store(__ ctrl(), log_addr, orig, T_OBJECT, C->get_alias_index(TypeOopPtr::BOTTOM));
         __ store(__ ctrl(), log_addr, orig, T_OBJECT, Compile::AliasIdxRaw);
 
-
         // update the index
-        // __ store(__ ctrl(), index_adr, next_index, T_INT, Compile::AliasIdxRaw);
-        // This is a hack to force this store to occur before the oop store that is coming up
-        __ store(__ ctrl(), index_adr, next_index, T_INT, C->get_alias_index(TypeOopPtr::BOTTOM));
+        __ store(__ ctrl(), index_adr, next_index, T_INT, Compile::AliasIdxRaw);
 
       } __ else_(); {
 
         // logging buffer is full, call the runtime
         const TypeFunc *tf = OptoRuntime::g1_wb_pre_Type();
-        // __ make_leaf_call(tf, OptoRuntime::g1_wb_pre_Java(), "g1_wb_pre", orig, thread);
-        __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), "g1_wb_pre", orig, thread);
-      } __ end_if();
-    } __ end_if();
-  } __ end_if();
-
-  __ drain_delay_transform();
-  set_control( __ ctrl());
-  set_all_memory( __ merged_memory());
-
-#undef __
+        __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), "g1_wb_pre", orig, tls);
+      } __ end_if();  // (!index)
+    } __ end_if();  // (orig != NULL)
+  } __ end_if();  // (!marking)
+
+  // Final sync IdealKit and GraphKit.
+  sync_kit(ideal);
 }
 
 //
 // Update the card table and add card address to the queue
 //
-void GraphKit::g1_mark_card(IdealKit* ideal, Node* card_adr, Node* store,  Node* index, Node* index_adr, Node* buffer, const TypeFunc* tf) {
-#define __ ideal->
+void GraphKit::g1_mark_card(IdealKit& ideal,
+                            Node* card_adr,
+                            Node* oop_store,
+                            Node* index,
+                            Node* index_adr,
+                            Node* buffer,
+                            const TypeFunc* tf) {
+
   Node* zero = __ ConI(0);
   Node* no_base = __ top();
   BasicType card_bt = T_BYTE;
   // Smash zero into card. MUST BE ORDERED WRT TO STORE
-  __ storeCM(__ ctrl(), card_adr, zero, store, card_bt, Compile::AliasIdxRaw);
+  __ storeCM(__ ctrl(), card_adr, zero, oop_store, card_bt, Compile::AliasIdxRaw);
 
   //  Now do the queue work
   __ if_then(index, BoolTest::ne, zero); {
 
-    Node* next_index = __ SubI(index,  __ ConI(sizeof(intptr_t)));
+    Node* next_index = __ SubI(index, __ ConI(sizeof(intptr_t)));
     Node* next_indexX = next_index;
 #ifdef _LP64
     // We could refine the type for what it's worth
@@ -3341,10 +3312,10 @@
   } __ else_(); {
     __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), "g1_wb_post", card_adr, __ thread());
   } __ end_if();
-#undef __
+
 }
 
-void GraphKit::g1_write_barrier_post(Node* store,
+void GraphKit::g1_write_barrier_post(Node* oop_store,
                                      Node* obj,
                                      Node* adr,
                                      uint alias_idx,
@@ -3369,10 +3340,8 @@
   assert(adr != NULL, "");
 
   IdealKit ideal(gvn(), control(), merged_memory(), true);
-#define __ ideal.
-  __ declares_done();
-
-  Node* thread = __ thread();
+
+  Node* tls = __ thread(); // ThreadLocalStorage
 
   Node* no_ctrl = NULL;
   Node* no_base = __ top();
@@ -3394,8 +3363,8 @@
 
   // Pointers into the thread
 
-  Node* buffer_adr = __ AddP(no_base, thread, __ ConX(buffer_offset));
-  Node* index_adr =  __ AddP(no_base, thread, __ ConX(index_offset));
+  Node* buffer_adr = __ AddP(no_base, tls, __ ConX(buffer_offset));
+  Node* index_adr =  __ AddP(no_base, tls, __ ConX(index_offset));
 
   // Now some values
 
@@ -3404,18 +3373,14 @@
 
 
   // Convert the store obj pointer to an int prior to doing math on it
-  // Use addr not obj gets accurate card marks
-
-  // Node* cast = __ CastPX(no_ctrl, adr /* obj */);
-
   // Must use ctrl to prevent "integerized oop" existing across safepoint
-  Node* cast =  __ CastPX(__ ctrl(), ( use_precise ? adr : obj ));
+  Node* cast =  __ CastPX(__ ctrl(), adr);
 
   // Divide pointer by card size
   Node* card_offset = __ URShiftX( cast, __ ConI(CardTableModRefBS::card_shift) );
 
   // Combine card table base and card offset
-  Node *card_adr = __ AddP(no_base, byte_map_base_node(), card_offset );
+  Node* card_adr = __ AddP(no_base, byte_map_base_node(), card_offset );
 
   // If we know the value being stored does it cross regions?
 
@@ -3439,18 +3404,17 @@
         Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
 
         __ if_then(card_val, BoolTest::ne, zero); {
-          g1_mark_card(&ideal, card_adr, store, index, index_adr, buffer, tf);
+          g1_mark_card(ideal, card_adr, oop_store, index, index_adr, buffer, tf);
         } __ end_if();
       } __ end_if();
     } __ end_if();
   } else {
-    g1_mark_card(&ideal, card_adr, store, index, index_adr, buffer, tf);
+    // Object.clone() instrinsic uses this path.
+    g1_mark_card(ideal, card_adr, oop_store, index, index_adr, buffer, tf);
   }
 
-
-  __ drain_delay_transform();
-  set_control( __ ctrl());
-  set_all_memory( __ merged_memory());
+  // Final sync IdealKit and GraphKit.
+  sync_kit(ideal);
+}
 #undef __
 
-}
--- a/src/share/vm/opto/graphKit.hpp	Wed Jul 15 13:37:35 2009 -0700
+++ b/src/share/vm/opto/graphKit.hpp	Thu Jul 16 14:10:42 2009 -0700
@@ -449,13 +449,24 @@
   //
   // If val==NULL, it is taken to be a completely unknown value. QQQ
 
+  Node* store_oop(Node* ctl,
+                  Node* obj,   // containing obj
+                  Node* adr,  // actual adress to store val at
+                  const TypePtr* adr_type,
+                  Node* val,
+                  const TypeOopPtr* val_type,
+                  BasicType bt,
+                  bool use_precise);
+
   Node* store_oop_to_object(Node* ctl,
                             Node* obj,   // containing obj
                             Node* adr,  // actual adress to store val at
                             const TypePtr* adr_type,
                             Node* val,
                             const TypeOopPtr* val_type,
-                            BasicType bt);
+                            BasicType bt) {
+    return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, false);
+  }
 
   Node* store_oop_to_array(Node* ctl,
                            Node* obj,   // containing obj
@@ -463,7 +474,9 @@
                            const TypePtr* adr_type,
                            Node* val,
                            const TypeOopPtr* val_type,
-                           BasicType bt);
+                           BasicType bt) {
+    return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, true);
+  }
 
   // Could be an array or object we don't know at compile time (unsafe ref.)
   Node* store_oop_to_unknown(Node* ctl,
@@ -488,9 +501,6 @@
   // Return a load of array element at idx.
   Node* load_array_element(Node* ctl, Node* ary, Node* idx, const TypeAryPtr* arytype);
 
-  // CMS card-marks have an input from the corresponding oop_store
-  void  cms_card_mark(Node* ctl, Node* adr, Node* val, Node* oop_store);
-
   //---------------- Dtrace support --------------------
   void make_dtrace_method_entry_exit(ciMethod* method, bool is_entry);
   void make_dtrace_method_entry(ciMethod* method) {
@@ -582,9 +592,6 @@
     return C->too_many_recompiles(method(), bci(), reason);
   }
 
-  // vanilla/CMS post barrier
-  void write_barrier_post(Node *store, Node* obj, Node* adr, Node* val, bool use_precise);
-
   // Returns the object (if any) which was created the moment before.
   Node* just_allocated_object(Node* current_control);
 
@@ -593,6 +600,11 @@
             && Universe::heap()->can_elide_tlab_store_barriers());
   }
 
+  void sync_kit(IdealKit& ideal);
+
+  // vanilla/CMS post barrier
+  void write_barrier_post(Node *store, Node* obj, Node* adr, Node* val, bool use_precise);
+
   // G1 pre/post barriers
   void g1_write_barrier_pre(Node* obj,
                             Node* adr,
@@ -610,7 +622,7 @@
                              bool use_precise);
   // Helper function for g1
   private:
-  void g1_mark_card(IdealKit* ideal, Node* card_adr, Node* store,  Node* index, Node* index_adr,
+  void g1_mark_card(IdealKit& ideal, Node* card_adr, Node* store,  Node* index, Node* index_adr,
                     Node* buffer, const TypeFunc* tf);
 
   public:
--- a/src/share/vm/opto/idealKit.cpp	Wed Jul 15 13:37:35 2009 -0700
+++ b/src/share/vm/opto/idealKit.cpp	Thu Jul 16 14:10:42 2009 -0700
@@ -34,7 +34,7 @@
 const uint IdealKit::first_var = TypeFunc::Parms + 1;
 
 //----------------------------IdealKit-----------------------------------------
-IdealKit::IdealKit(PhaseGVN &gvn, Node* control, Node* mem, bool delay_all_transforms) :
+IdealKit::IdealKit(PhaseGVN &gvn, Node* control, Node* mem, bool delay_all_transforms, bool has_declarations) :
   _gvn(gvn), C(gvn.C) {
   _initial_ctrl = control;
   _initial_memory = mem;
@@ -47,6 +47,9 @@
   _pending_cvstates = new (C->node_arena()) GrowableArray<Node*>(C->node_arena(), init_size, 0, 0);
   _delay_transform  = new (C->node_arena()) GrowableArray<Node*>(C->node_arena(), init_size, 0, 0);
   DEBUG_ONLY(_state = new (C->node_arena()) GrowableArray<int>(C->node_arena(), init_size, 0, 0));
+  if (!has_declarations) {
+     declarations_done();
+  }
 }
 
 //-------------------------------if_then-------------------------------------
@@ -97,7 +100,7 @@
 //-------------------------------end_if-------------------------------------
 // Merge the "then" and "else" cvstates.
 //
-// The if_then() pushed the current state for later use
+// The if_then() pushed a copy of the current state for later use
 // as the initial state for a future "else" clause.  The
 // current state then became the initial state for the
 // then clause.  If an "else" clause was encountered, it will
@@ -258,8 +261,8 @@
   return delay_transform(PhiNode::make(reg, n, ct));
 }
 
-//-----------------------------declares_done-----------------------------------
-void IdealKit::declares_done() {
+//-----------------------------declarations_done-------------------------------
+void IdealKit::declarations_done() {
   _cvstate = new_cvstate();   // initialize current cvstate
   set_ctrl(_initial_ctrl);    // initialize control in current cvstate
   set_all_memory(_initial_memory);// initialize memory in current cvstate
@@ -277,7 +280,9 @@
 
 //-----------------------------delay_transform-----------------------------------
 Node* IdealKit::delay_transform(Node* n) {
-  gvn().set_type(n, n->bottom_type());
+  if (!gvn().is_IterGVN() || !gvn().is_IterGVN()->delay_transform()) {
+    gvn().set_type(n, n->bottom_type());
+  }
   _delay_transform->push(n);
   return n;
 }
@@ -321,7 +326,9 @@
 Node* IdealKit::memory(uint alias_idx) {
   MergeMemNode* mem = merged_memory();
   Node* p = mem->memory_at(alias_idx);
-  _gvn.set_type(p, Type::MEMORY);  // must be mapped
+  if (!gvn().is_IterGVN() || !gvn().is_IterGVN()->delay_transform()) {
+    _gvn.set_type(p, Type::MEMORY);  // must be mapped
+  }
   return p;
 }
 
@@ -462,9 +469,6 @@
   const TypePtr* adr_type = TypeRawPtr::BOTTOM;
   uint adr_idx = C->get_alias_index(adr_type);
 
-  // Clone initial memory
-  MergeMemNode* cloned_mem =  MergeMemNode::make(C, merged_memory());
-
   // Slow-path leaf call
   int size = slow_call_type->domain()->cnt();
   CallNode *call =  (CallNode*)new (C, size) CallLeafNode( slow_call_type, slow_call, leaf_name, adr_type);
@@ -489,9 +493,6 @@
 
   set_ctrl(transform( new (C, 1) ProjNode(call,TypeFunc::Control) ));
 
-  // Set the incoming clone of memory as current memory
-  set_all_memory(cloned_mem);
-
   // Make memory for the call
   Node* mem = _gvn.transform( new (C, 1) ProjNode(call, TypeFunc::Memory) );
 
--- a/src/share/vm/opto/idealKit.hpp	Wed Jul 15 13:37:35 2009 -0700
+++ b/src/share/vm/opto/idealKit.hpp	Thu Jul 16 14:10:42 2009 -0700
@@ -49,7 +49,7 @@
 // Example:
 //    Node* limit = ??
 //    IdealVariable i(kit), j(kit);
-//    declares_done();
+//    declarations_done();
 //    Node* exit = make_label(1); // 1 goto
 //    set(j, ConI(0));
 //    loop(i, ConI(0), BoolTest::lt, limit); {
@@ -101,10 +101,7 @@
   Node* new_cvstate();                     // Create a new cvstate
   Node* cvstate() { return _cvstate; }     // current cvstate
   Node* copy_cvstate();                    // copy current cvstate
-  void set_ctrl(Node* ctrl) { _cvstate->set_req(TypeFunc::Control, ctrl); }
 
-  // Should this assert this is a MergeMem???
-  void set_all_memory(Node* mem){ _cvstate->set_req(TypeFunc::Memory, mem); }
   void set_memory(Node* mem, uint alias_idx );
   void do_memory_merge(Node* merging, Node* join);
   void clear(Node* m);                     // clear a cvstate
@@ -132,15 +129,17 @@
   Node* memory(uint alias_idx);
 
  public:
-  IdealKit(PhaseGVN &gvn, Node* control, Node* memory, bool delay_all_transforms = false);
+  IdealKit(PhaseGVN &gvn, Node* control, Node* memory, bool delay_all_transforms = false, bool has_declarations = false);
   ~IdealKit() {
     stop();
     drain_delay_transform();
   }
   // Control
   Node* ctrl()                          { return _cvstate->in(TypeFunc::Control); }
+  void set_ctrl(Node* ctrl)             { _cvstate->set_req(TypeFunc::Control, ctrl); }
   Node* top()                           { return C->top(); }
   MergeMemNode* merged_memory()         { return _cvstate->in(TypeFunc::Memory)->as_MergeMem(); }
+  void set_all_memory(Node* mem)        { _cvstate->set_req(TypeFunc::Memory, mem); }
   void set(IdealVariable& v, Node* rhs) { _cvstate->set_req(first_var + v.id(), rhs); }
   Node* value(IdealVariable& v)         { return _cvstate->in(first_var + v.id()); }
   void dead(IdealVariable& v)           { set(v, (Node*)NULL); }
@@ -155,7 +154,7 @@
   Node* make_label(int goto_ct);
   void bind(Node* lab);
   void goto_(Node* lab, bool bind = false);
-  void declares_done();
+  void declarations_done();
   void drain_delay_transform();
 
   Node* IfTrue(IfNode* iff)  { return transform(new (C,1) IfTrueNode(iff)); }
--- a/src/share/vm/opto/ifnode.cpp	Wed Jul 15 13:37:35 2009 -0700
+++ b/src/share/vm/opto/ifnode.cpp	Thu Jul 16 14:10:42 2009 -0700
@@ -378,7 +378,18 @@
 
   // Force the original merge dead
   igvn->hash_delete(r);
-  r->set_req_X(0,NULL,igvn);
+  // First, remove region's dead users.
+  for (DUIterator_Last lmin, l = r->last_outs(lmin); l >= lmin;) {
+    Node* u = r->last_out(l);
+    if( u == r ) {
+      r->set_req(0, NULL);
+    } else {
+      assert(u->outcnt() == 0, "only dead users");
+      igvn->remove_dead_node(u);
+    }
+    l -= 1;
+  }
+  igvn->remove_dead_node(r);
 
   // Now remove the bogus extra edges used to keep things alive
   igvn->remove_dead_node( hook );
--- a/src/share/vm/opto/library_call.cpp	Wed Jul 15 13:37:35 2009 -0700
+++ b/src/share/vm/opto/library_call.cpp	Thu Jul 16 14:10:42 2009 -0700
@@ -1030,7 +1030,7 @@
   const TypeAry* target_array_type = TypeAry::make(TypeInt::CHAR, TypeInt::make(0, target_length, Type::WidenMin));
   const TypeAryPtr* target_type = TypeAryPtr::make(TypePtr::BotPTR, target_array_type, target_array->klass(), true, Type::OffsetBot);
 
-  IdealKit kit(gvn(), control(), merged_memory());
+  IdealKit kit(gvn(), control(), merged_memory(), false, true);
 #define __ kit.
   Node* zero             = __ ConI(0);
   Node* one              = __ ConI(1);
@@ -1042,7 +1042,7 @@
   Node* targetOffset     = __ ConI(targetOffset_i);
   Node* sourceEnd        = __ SubI(__ AddI(sourceOffset, sourceCount), targetCountLess1);
 
-  IdealVariable rtn(kit), i(kit), j(kit); __ declares_done();
+  IdealVariable rtn(kit), i(kit), j(kit); __ declarations_done();
   Node* outer_loop = __ make_label(2 /* goto */);
   Node* return_    = __ make_label(1);
 
@@ -1079,9 +1079,9 @@
        __ bind(outer_loop);
   }__ end_loop(); __ dead(i);
   __ bind(return_);
-  __ drain_delay_transform();
-
-  set_control(__ ctrl());
+
+  // Final sync IdealKit and GraphKit.
+  sync_kit(kit);
   Node* result = __ value(rtn);
 #undef __
   C->set_has_loops(true);
@@ -2183,14 +2183,23 @@
         // of it. So we need to emit code to conditionally do the proper type of
         // store.
 
-        IdealKit kit(gvn(), control(),  merged_memory());
-        kit.declares_done();
+        IdealKit ideal(gvn(), control(),  merged_memory());
+#define __ ideal.
         // QQQ who knows what probability is here??
-        kit.if_then(heap_base_oop, BoolTest::ne, null(), PROB_UNLIKELY(0.999)); {
-          (void) store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type);
-        } kit.else_(); {
-          (void) store_to_memory(control(), adr, val, type, adr_type, is_volatile);
-        } kit.end_if();
+        __ if_then(heap_base_oop, BoolTest::ne, null(), PROB_UNLIKELY(0.999)); {
+          // Sync IdealKit and graphKit.
+          set_all_memory( __ merged_memory());
+          set_control(__ ctrl());
+          Node* st = store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type);
+          // Update IdealKit memory.
+          __ set_all_memory(merged_memory());
+          __ set_ctrl(control());
+        } __ else_(); {
+          __ store(__ ctrl(), adr, val, type, alias_type->index(), is_volatile);
+        } __ end_if();
+        // Final sync IdealKit and GraphKit.
+        sync_kit(ideal);
+#undef __
       }
     }
   }
--- a/src/share/vm/opto/machnode.cpp	Wed Jul 15 13:37:35 2009 -0700
+++ b/src/share/vm/opto/machnode.cpp	Thu Jul 16 14:10:42 2009 -0700
@@ -300,6 +300,12 @@
           }
         }
         adr_type = t_disp->add_offset(offset);
+      } else if( base == NULL && offset != 0 && offset != Type::OffsetBot ) {
+        // Use ideal type if it is oop ptr.
+        const TypePtr *tp = oper->type()->isa_ptr();
+        if( tp != NULL) {
+          adr_type = tp;
+        }
       }
     }
 
--- a/src/share/vm/opto/macro.cpp	Wed Jul 15 13:37:35 2009 -0700
+++ b/src/share/vm/opto/macro.cpp	Thu Jul 16 14:10:42 2009 -0700
@@ -198,14 +198,79 @@
 }
 
 // Eliminate a card mark sequence.  p2x is a ConvP2XNode
-void PhaseMacroExpand::eliminate_card_mark(Node *p2x) {
+void PhaseMacroExpand::eliminate_card_mark(Node* p2x) {
   assert(p2x->Opcode() == Op_CastP2X, "ConvP2XNode required");
-  Node *shift = p2x->unique_out();
-  Node *addp = shift->unique_out();
-  for (DUIterator_Last jmin, j = addp->last_outs(jmin); j >= jmin; --j) {
-    Node *st = addp->last_out(j);
-    assert(st->is_Store(), "store required");
-    _igvn.replace_node(st, st->in(MemNode::Memory));
+  if (!UseG1GC) {
+    // vanilla/CMS post barrier
+    Node *shift = p2x->unique_out();
+    Node *addp = shift->unique_out();
+    for (DUIterator_Last jmin, j = addp->last_outs(jmin); j >= jmin; --j) {
+      Node *st = addp->last_out(j);
+      assert(st->is_Store(), "store required");
+      _igvn.replace_node(st, st->in(MemNode::Memory));
+    }
+  } else {
+    // G1 pre/post barriers
+    assert(p2x->outcnt() == 2, "expects 2 users: Xor and URShift nodes");
+    // It could be only one user, URShift node, in Object.clone() instrinsic
+    // but the new allocation is passed to arraycopy stub and it could not
+    // be scalar replaced. So we don't check the case.
+
+    // Remove G1 post barrier.
+
+    // Search for CastP2X->Xor->URShift->Cmp path which
+    // checks if the store done to a different from the value's region.
+    // And replace Cmp with #0 (false) to collapse G1 post barrier.
+    Node* xorx = NULL;
+    for (DUIterator_Fast imax, i = p2x->fast_outs(imax); i < imax; i++) {
+      Node* u = p2x->fast_out(i);
+      if (u->Opcode() == Op_XorX) {
+        xorx = u;
+        break;
+      }
+    }
+    assert(xorx != NULL, "missing G1 post barrier");
+    Node* shift = xorx->unique_out();
+    Node* cmpx = shift->unique_out();
+    assert(cmpx->is_Cmp() && cmpx->unique_out()->is_Bool() &&
+    cmpx->unique_out()->as_Bool()->_test._test == BoolTest::ne,
+    "missing region check in G1 post barrier");
+    _igvn.replace_node(cmpx, makecon(TypeInt::CC_EQ));
+
+    // Remove G1 pre barrier.
+
+    // Search "if (marking != 0)" check and set it to "false".
+    Node* this_region = p2x->in(0);
+    assert(this_region != NULL, "");
+    // There is no G1 pre barrier if previous stored value is NULL
+    // (for example, after initialization).
+    if (this_region->is_Region() && this_region->req() == 3) {
+      int ind = 1;
+      if (!this_region->in(ind)->is_IfFalse()) {
+        ind = 2;
+      }
+      if (this_region->in(ind)->is_IfFalse()) {
+        Node* bol = this_region->in(ind)->in(0)->in(1);
+        assert(bol->is_Bool(), "");
+        cmpx = bol->in(1);
+        if (bol->as_Bool()->_test._test == BoolTest::ne &&
+            cmpx->is_Cmp() && cmpx->in(2) == intcon(0) &&
+            cmpx->in(1)->is_Load()) {
+          Node* adr = cmpx->in(1)->as_Load()->in(MemNode::Address);
+          const int marking_offset = in_bytes(JavaThread::satb_mark_queue_offset() +
+                                              PtrQueue::byte_offset_of_active());
+          if (adr->is_AddP() && adr->in(AddPNode::Base) == top() &&
+              adr->in(AddPNode::Address)->Opcode() == Op_ThreadLocal &&
+              adr->in(AddPNode::Offset) == MakeConX(marking_offset)) {
+            _igvn.replace_node(cmpx, makecon(TypeInt::CC_EQ));
+          }
+        }
+      }
+    }
+    // Now CastP2X can be removed since it is used only on dead path
+    // which currently still alive until igvn optimize it.
+    assert(p2x->unique_out()->Opcode() == Op_URShiftX, "");
+    _igvn.replace_node(p2x, top());
   }
 }
 
@@ -760,14 +825,11 @@
           if (n->is_Store()) {
             _igvn.replace_node(n, n->in(MemNode::Memory));
           } else {
-            assert( n->Opcode() == Op_CastP2X, "CastP2X required");
             eliminate_card_mark(n);
           }
           k -= (oc2 - use->outcnt());
         }
       } else {
-        assert( !use->is_SafePoint(), "safepoint uses must have been already elimiated");
-        assert( use->Opcode() == Op_CastP2X, "CastP2X required");
         eliminate_card_mark(use);
       }
       j -= (oc1 - res->outcnt());
--- a/src/share/vm/opto/matcher.cpp	Wed Jul 15 13:37:35 2009 -0700
+++ b/src/share/vm/opto/matcher.cpp	Thu Jul 16 14:10:42 2009 -0700
@@ -1489,8 +1489,7 @@
 #ifdef ASSERT
     // Verify adr type after matching memory operation
     const MachOper* oper = mach->memory_operand();
-    if (oper != NULL && oper != (MachOper*)-1 &&
-        mach->adr_type() != TypeRawPtr::BOTTOM) { // non-direct addressing mode
+    if (oper != NULL && oper != (MachOper*)-1) {
       // It has a unique memory operand.  Find corresponding ideal mem node.
       Node* m = NULL;
       if (leaf->is_Mem()) {
--- a/src/share/vm/opto/phaseX.hpp	Wed Jul 15 13:37:35 2009 -0700
+++ b/src/share/vm/opto/phaseX.hpp	Thu Jul 16 14:10:42 2009 -0700
@@ -450,6 +450,8 @@
     subsume_node(old, nn);
   }
 
+  bool delay_transform() const { return _delay_transform; }
+
   void set_delay_transform(bool delay) {
     _delay_transform = delay;
   }
--- a/src/share/vm/opto/type.hpp	Wed Jul 15 13:37:35 2009 -0700
+++ b/src/share/vm/opto/type.hpp	Thu Jul 16 14:10:42 2009 -0700
@@ -1216,6 +1216,8 @@
 #define Op_AndX      Op_AndL
 #define Op_AddX      Op_AddL
 #define Op_SubX      Op_SubL
+#define Op_XorX      Op_XorL
+#define Op_URShiftX  Op_URShiftL
 // conversions
 #define ConvI2X(x)   ConvI2L(x)
 #define ConvL2X(x)   (x)
@@ -1258,6 +1260,8 @@
 #define Op_AndX      Op_AndI
 #define Op_AddX      Op_AddI
 #define Op_SubX      Op_SubI
+#define Op_XorX      Op_XorI
+#define Op_URShiftX  Op_URShiftI
 // conversions
 #define ConvI2X(x)   (x)
 #define ConvL2X(x)   ConvL2I(x)