Mercurial > hg > openjdk > hsx14

--- a/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp	Tue Mar 24 13:09:52 2009 -0400
@@ -24,7 +24,7 @@

 // We need to sort heap regions by collection desirability.

-class CSetChooserCache {
+class CSetChooserCache VALUE_OBJ_CLASS_SPEC {
 private:
   enum {
     CacheLength = 16
--- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Tue Mar 24 13:09:52 2009 -0400
@@ -145,14 +145,9 @@
   if (G1RSBarrierUseQueue) {
     DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
     dcqs.abandon_logs();
-    if (_cg1rThread->do_traversal()) {
-      _pya = PYA_restart;
-    } else {
-      _cg1rThread->set_do_traversal(true);
-      // Reset the post-yield actions.
-      _pya = PYA_continue;
-      _last_pya = PYA_continue;
-    }
+    // Reset the post-yield actions.
+    _pya = PYA_continue;
+    _last_pya = PYA_continue;
   } else {
     _pya = PYA_restart;
   }
--- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Tue Mar 24 13:09:52 2009 -0400
@@ -33,7 +33,7 @@
   PYA_cancel     // It's been completed by somebody else: cancel.
 };

-class ConcurrentG1Refine {
+class ConcurrentG1Refine: public CHeapObj {
   ConcurrentG1RefineThread* _cg1rThread;

   volatile jint _pya;
--- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp	Tue Mar 24 13:09:52 2009 -0400
@@ -133,14 +133,12 @@
       _co_tracker.update(false);

       if (G1SmoothConcRefine) {
-        start_vtime_sec = os::elapsedVTime();
         prev_buffer_num = curr_buffer_num;
-
         _sts.leave();
         os::sleep(Thread::current(), (jlong) _interval_ms, false);
         _sts.join();
+        start_vtime_sec = os::elapsedVTime();
       }
-
       n_logs++;
     }
     // Make sure we harvest the PYA, if any.
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Tue Mar 24 13:09:52 2009 -0400
@@ -420,6 +420,10 @@

   _has_overflown(false),
   _concurrent(false),
+  _has_aborted(false),
+  _restart_for_overflow(false),
+  _concurrent_marking_in_progress(false),
+  _should_gray_objects(false),

   // _verbose_level set below

@@ -1228,7 +1232,16 @@
     if (!_final && _regions_done == 0)
       _start_vtime_sec = os::elapsedVTime();

-    if (hr->continuesHumongous()) return false;
+    if (hr->continuesHumongous()) {
+      HeapRegion* hum_start = hr->humongous_start_region();
+      // If the head region of the humongous region has been determined
+      // to be alive, then all the tail regions should be marked
+      // such as well.
+      if (_region_bm->at(hum_start->hrs_index())) {
+        _region_bm->par_at_put(hr->hrs_index(), 1);
+      }
+      return false;
+    }

     HeapWord* nextTop = hr->next_top_at_mark_start();
     HeapWord* start   = hr->top_at_conc_mark_count();
--- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Tue Mar 24 13:09:52 2009 -0400
@@ -30,7 +30,7 @@
 // A generic CM bit map.  This is essentially a wrapper around the BitMap
 // class, with one bit per (1<<_shifter) HeapWords.

-class CMBitMapRO {
+class CMBitMapRO VALUE_OBJ_CLASS_SPEC {
  protected:
   HeapWord* _bmStartWord;      // base address of range covered by map
   size_t    _bmWordSize;       // map size (in #HeapWords covered)
@@ -139,7 +139,7 @@

 // Represents a marking stack used by the CM collector.
 // Ideally this should be GrowableArray<> just like MSC's marking stack(s).
-class CMMarkStack {
+class CMMarkStack VALUE_OBJ_CLASS_SPEC {
   ConcurrentMark* _cm;
   oop*   _base;      // bottom of stack
   jint   _index;     // one more than last occupied index
@@ -237,7 +237,7 @@
   void oops_do(OopClosure* f);
 };

-class CMRegionStack {
+class CMRegionStack VALUE_OBJ_CLASS_SPEC {
   MemRegion* _base;
   jint _capacity;
   jint _index;
@@ -312,7 +312,7 @@

 class ConcurrentMarkThread;

-class ConcurrentMark {
+class ConcurrentMark: public CHeapObj {
   friend class ConcurrentMarkThread;
   friend class CMTask;
   friend class CMBitMapClosure;
--- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Tue Mar 24 13:09:52 2009 -0400
@@ -107,7 +107,7 @@
       if (PrintGC) {
         gclog_or_tty->date_stamp(PrintGCDateStamps);
         gclog_or_tty->stamp(PrintGCTimeStamps);
-        tty->print_cr("[GC concurrent-mark-start]");
+        gclog_or_tty->print_cr("[GC concurrent-mark-start]");
       }

       if (!g1_policy->in_young_gc_mode()) {
@@ -320,8 +320,6 @@
   set_in_progress();
   clear_started();
   if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-starting");
-
-  return;
 }

 // Note: this method, although exported by the ConcurrentMarkSweepThread,
--- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp	Tue Mar 24 13:09:52 2009 -0400
@@ -78,8 +78,8 @@

 void DirtyCardQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock,
                                    int max_completed_queue,
-                                   Mutex* lock) {
-  PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue);
+                                   Mutex* lock, PtrQueueSet* fl_owner) {
+  PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue, fl_owner);
   set_buffer_size(DCQBarrierQueueBufferSize);
   set_process_completed_threshold(DCQBarrierProcessCompletedThreshold);
--- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp	Tue Mar 24 13:09:52 2009 -0400
@@ -88,7 +88,7 @@

   void initialize(Monitor* cbl_mon, Mutex* fl_lock,
                   int max_completed_queue = 0,
-                  Mutex* lock = NULL);
+                  Mutex* lock = NULL, PtrQueueSet* fl_owner = NULL);

   // The number of parallel ids that can be claimed to allow collector or
   // mutator threads to do card-processing work.
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Tue Mar 24 13:09:52 2009 -0400
@@ -136,12 +136,20 @@
   int calls() { return _calls; }
 };

+class RedirtyLoggedCardTableEntryFastClosure : public CardTableEntryClosure {
+public:
+  bool do_card_ptr(jbyte* card_ptr, int worker_i) {
+    *card_ptr = CardTableModRefBS::dirty_card_val();
+    return true;
+  }
+};
+
 YoungList::YoungList(G1CollectedHeap* g1h)
   : _g1h(g1h), _head(NULL),
     _scan_only_head(NULL), _scan_only_tail(NULL), _curr_scan_only(NULL),
     _length(0), _scan_only_length(0),
     _last_sampled_rs_lengths(0),
-    _survivor_head(NULL), _survivors_tail(NULL), _survivor_length(0)
+    _survivor_head(NULL), _survivor_tail(NULL), _survivor_length(0)
 {
   guarantee( check_list_empty(false), "just making sure..." );
 }
@@ -159,16 +167,15 @@
 }

 void YoungList::add_survivor_region(HeapRegion* hr) {
-  assert(!hr->is_survivor(), "should not already be for survived");
+  assert(hr->is_survivor(), "should be flagged as survivor region");
   assert(hr->get_next_young_region() == NULL, "cause it should!");

   hr->set_next_young_region(_survivor_head);
   if (_survivor_head == NULL) {
-    _survivors_tail = hr;
+    _survivor_tail = hr;
   }
   _survivor_head = hr;

-  hr->set_survivor();
   ++_survivor_length;
 }

@@ -239,7 +246,7 @@

   empty_list(_survivor_head);
   _survivor_head = NULL;
-  _survivors_tail = NULL;
+  _survivor_tail = NULL;
   _survivor_length = 0;

   _last_sampled_rs_lengths = 0;
@@ -391,6 +398,7 @@

   // Add survivor regions to SurvRateGroup.
   _g1h->g1_policy()->note_start_adding_survivor_regions();
+  _g1h->g1_policy()->finished_recalculating_age_indexes(true /* is_survivors */);
   for (HeapRegion* curr = _survivor_head;
        curr != NULL;
        curr = curr->get_next_young_region()) {
@@ -401,7 +409,7 @@
   if (_survivor_head != NULL) {
     _head           = _survivor_head;
     _length         = _survivor_length + _scan_only_length;
-    _survivors_tail->set_next_young_region(_scan_only_head);
+    _survivor_tail->set_next_young_region(_scan_only_head);
   } else {
     _head           = _scan_only_head;
     _length         = _scan_only_length;
@@ -418,9 +426,9 @@
   _curr_scan_only   = NULL;

   _survivor_head    = NULL;
-  _survivors_tail   = NULL;
+  _survivor_tail   = NULL;
   _survivor_length  = 0;
-  _g1h->g1_policy()->finished_recalculating_age_indexes();
+  _g1h->g1_policy()->finished_recalculating_age_indexes(false /* is_survivors */);

   assert(check_list_well_formed(), "young list should be well formed");
 }
@@ -553,7 +561,7 @@
   if (_gc_alloc_region_counts[purpose] < g1_policy()->max_regions(purpose)) {
     alloc_region = newAllocRegion_work(word_size, true, zero_filled);
     if (purpose == GCAllocForSurvived && alloc_region != NULL) {
-      _young_list->add_survivor_region(alloc_region);
+      alloc_region->set_survivor();
     }
     ++_gc_alloc_region_counts[purpose];
   } else {
@@ -778,6 +786,12 @@
   }
 }

+void G1CollectedHeap::abandon_gc_alloc_regions() {
+  // first, make sure that the GC alloc region list is empty (it should!)
+  assert(_gc_alloc_region_list == NULL, "invariant");
+  release_gc_alloc_regions(true /* totally */);
+}
+
 class PostMCRemSetClearClosure: public HeapRegionClosure {
   ModRefBarrierSet* _mr_bs;
 public:
@@ -812,6 +826,40 @@
   }
 };

+class RebuildRSOutOfRegionClosure: public HeapRegionClosure {
+  G1CollectedHeap*   _g1h;
+  UpdateRSOopClosure _cl;
+  int                _worker_i;
+public:
+  RebuildRSOutOfRegionClosure(G1CollectedHeap* g1, int worker_i = 0) :
+    _cl(g1->g1_rem_set()->as_HRInto_G1RemSet(), worker_i),
+    _worker_i(worker_i),
+    _g1h(g1)
+  { }
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->continuesHumongous()) {
+      _cl.set_from(r);
+      r->oop_iterate(&_cl);
+    }
+    return false;
+  }
+};
+
+class ParRebuildRSTask: public AbstractGangTask {
+  G1CollectedHeap* _g1;
+public:
+  ParRebuildRSTask(G1CollectedHeap* g1)
+    : AbstractGangTask("ParRebuildRSTask"),
+      _g1(g1)
+  { }
+
+  void work(int i) {
+    RebuildRSOutOfRegionClosure rebuild_rs(_g1, i);
+    _g1->heap_region_par_iterate_chunked(&rebuild_rs, i,
+                                         HeapRegion::RebuildRSClaimValue);
+  }
+};
+
 void G1CollectedHeap::do_collection(bool full, bool clear_all_soft_refs,
                                     size_t word_size) {
   ResourceMark rm;
@@ -872,6 +920,7 @@

     // Make sure we'll choose a new allocation region afterwards.
     abandon_cur_alloc_region();
+    abandon_gc_alloc_regions();
     assert(_cur_alloc_region == NULL, "Invariant.");
     g1_rem_set()->as_HRInto_G1RemSet()->cleanupHRRS();
     tear_down_region_lists();
@@ -912,30 +961,42 @@
     if (VerifyAfterGC && total_collections() >= VerifyGCStartAt) {
       HandleMark hm;  // Discard invalid handles created during verification
       gclog_or_tty->print(" VerifyAfterGC:");
+      prepare_for_verify();
       Universe::verify(false);
     }
     NOT_PRODUCT(ref_processor()->verify_no_references_recorded());

     reset_gc_time_stamp();
     // Since everything potentially moved, we will clear all remembered
-    // sets, and clear all cards.  Later we will also cards in the used
-    // portion of the heap after the resizing (which could be a shrinking.)
-    // We will also reset the GC time stamps of the regions.
+    // sets, and clear all cards.  Later we will rebuild remebered
+    // sets. We will also reset the GC time stamps of the regions.
     PostMCRemSetClearClosure rs_clear(mr_bs());
     heap_region_iterate(&rs_clear);

     // Resize the heap if necessary.
     resize_if_necessary_after_full_collection(full ? 0 : word_size);

-    // Since everything potentially moved, we will clear all remembered
-    // sets, but also dirty all cards corresponding to used regions.
-    PostMCRemSetInvalidateClosure rs_invalidate(mr_bs());
-    heap_region_iterate(&rs_invalidate);
     if (_cg1r->use_cache()) {
       _cg1r->clear_and_record_card_counts();
       _cg1r->clear_hot_cache();
     }

+    // Rebuild remembered sets of all regions.
+    if (ParallelGCThreads > 0) {
+      ParRebuildRSTask rebuild_rs_task(this);
+      assert(check_heap_region_claim_values(
+             HeapRegion::InitialClaimValue), "sanity check");
+      set_par_threads(workers()->total_workers());
+      workers()->run_task(&rebuild_rs_task);
+      set_par_threads(0);
+      assert(check_heap_region_claim_values(
+             HeapRegion::RebuildRSClaimValue), "sanity check");
+      reset_heap_region_claim_values();
+    } else {
+      RebuildRSOutOfRegionClosure rebuild_rs(this);
+      heap_region_iterate(&rebuild_rs);
+    }
+
     if (PrintGC) {
       print_size_transition(gclog_or_tty, g1h_prev_used, used(), capacity());
     }
@@ -957,7 +1018,8 @@
     // dirtied, so this should abandon those logs, and set "do_traversal"
     // to true.
     concurrent_g1_refine()->set_pya_restart();
-
+    assert(!G1DeferredRSUpdate
+           || (G1DeferredRSUpdate && (dirty_card_queue_set().completed_buffers_num() == 0)), "Should not be any");
     assert(regions_accounted_for(), "Region leakage!");
   }

@@ -1248,7 +1310,7 @@
 }

 void G1CollectedHeap::shrink(size_t shrink_bytes) {
-  release_gc_alloc_regions();
+  release_gc_alloc_regions(true /* totally */);
   tear_down_region_lists();  // We will rebuild them in a moment.
   shrink_helper(shrink_bytes);
   rebuild_region_lists();
@@ -1285,8 +1347,9 @@
   _unclean_regions_coming(false),
   _young_list(new YoungList(this)),
   _gc_time_stamp(0),
-  _surviving_young_words(NULL)
-{
+  _surviving_young_words(NULL),
+  _in_cset_fast_test(NULL),
+  _in_cset_fast_test_base(NULL) {
   _g1h = this; // To catch bugs.
   if (_process_strong_tasks == NULL || !_process_strong_tasks->valid()) {
     vm_exit_during_initialization("Failed necessary allocation.");
@@ -1311,9 +1374,19 @@
   }

   for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
-    _gc_alloc_regions[ap]       = NULL;
-    _gc_alloc_region_counts[ap] = 0;
-  }
+    _gc_alloc_regions[ap]          = NULL;
+    _gc_alloc_region_counts[ap]    = 0;
+    _retained_gc_alloc_regions[ap] = NULL;
+    // by default, we do not retain a GC alloc region for each ap;
+    // we'll override this, when appropriate, below
+    _retain_gc_alloc_region[ap]    = false;
+  }
+
+  // We will try to remember the last half-full tenured region we
+  // allocated to at the end of a collection so that we can re-use it
+  // during the next collection.
+  _retain_gc_alloc_region[GCAllocForTenured]  = true;
+
   guarantee(_task_queues != NULL, "task_queues allocation failure.");
 }

@@ -1460,6 +1533,13 @@
                                                   G1DirtyCardQueueMax,
                                                   Shared_DirtyCardQ_lock);
   }
+  if (G1DeferredRSUpdate) {
+    dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon,
+                                      DirtyCardQ_FL_lock,
+                                      0,
+                                      Shared_DirtyCardQ_lock,
+                                      &JavaThread::dirty_card_queue_set());
+  }
   // In case we're keeping closure specialization stats, initialize those
   // counts and that mechanism.
   SpecializationStats::clear();
@@ -2052,15 +2132,7 @@
   bool doHeapRegion(HeapRegion* r) {
     guarantee(_par || r->claim_value() == HeapRegion::InitialClaimValue,
               "Should be unclaimed at verify points.");
-    if (r->isHumongous()) {
-      if (r->startsHumongous()) {
-        // Verify the single H object.
-        oop(r->bottom())->verify();
-        size_t word_sz = oop(r->bottom())->size();
-        guarantee(r->top() == r->bottom() + word_sz,
-                  "Only one object in a humongous region");
-      }
-    } else {
+    if (!r->continuesHumongous()) {
       VerifyObjsInRegionClosure not_dead_yet_cl(r);
       r->verify(_allow_dirty);
       r->object_iterate(&not_dead_yet_cl);
@@ -2112,6 +2184,7 @@
     _g1h(g1h), _allow_dirty(allow_dirty) { }

   void work(int worker_i) {
+    HandleMark hm;
     VerifyRegionClosure blk(_allow_dirty, true);
     _g1h->heap_region_par_iterate_chunked(&blk, worker_i,
                                           HeapRegion::ParVerifyClaimValue);
@@ -2310,7 +2383,6 @@
 void
 G1CollectedHeap::checkConcurrentMark() {
     VerifyMarkedObjsClosure verifycl(this);
-    doConcurrentMark();
     //    MutexLockerEx x(getMarkBitMapLock(),
     //              Mutex::_no_safepoint_check_flag);
     object_iterate(&verifycl);
@@ -2485,6 +2557,19 @@
     g1_policy()->record_collection_pause_start(start_time_sec,
                                                start_used_bytes);

+    guarantee(_in_cset_fast_test == NULL, "invariant");
+    guarantee(_in_cset_fast_test_base == NULL, "invariant");
+    _in_cset_fast_test_length = max_regions();
+    _in_cset_fast_test_base =
+                             NEW_C_HEAP_ARRAY(bool, _in_cset_fast_test_length);
+    memset(_in_cset_fast_test_base, false,
+                                     _in_cset_fast_test_length * sizeof(bool));
+    // We're biasing _in_cset_fast_test to avoid subtracting the
+    // beginning of the heap every time we want to index; basically
+    // it's the same with what we do with the card table.
+    _in_cset_fast_test = _in_cset_fast_test_base -
+              ((size_t) _g1_reserved.start() >> HeapRegion::LogOfHRGrainBytes);
+
 #if SCAN_ONLY_VERBOSE
     _young_list->print();
 #endif // SCAN_ONLY_VERBOSE
@@ -2553,13 +2638,19 @@
       free_collection_set(g1_policy()->collection_set());
       g1_policy()->clear_collection_set();

+      FREE_C_HEAP_ARRAY(bool, _in_cset_fast_test_base);
+      // this is more for peace of mind; we're nulling them here and
+      // we're expecting them to be null at the beginning of the next GC
+      _in_cset_fast_test = NULL;
+      _in_cset_fast_test_base = NULL;
+
       if (popular_region != NULL) {
         // We have to wait until now, because we don't want the region to
         // be rescheduled for pop-evac during RS update.
         popular_region->set_popular_pending(false);
       }

-      release_gc_alloc_regions();
+      release_gc_alloc_regions(false /* totally */);

       cleanup_surviving_young_words();

@@ -2572,6 +2663,9 @@
         _young_list->print();
 #endif // SCAN_ONLY_VERBOSE

+        g1_policy()->record_survivor_regions(_young_list->survivor_length(),
+                                             _young_list->first_survivor_region(),
+                                             _young_list->last_survivor_region());
         _young_list->reset_auxilary_lists();
       }
     } else {
@@ -2598,7 +2692,8 @@
 #endif // SCAN_ONLY_VERBOSE

     double end_time_sec = os::elapsedTime();
-    g1_policy()->record_pause_time((end_time_sec - start_time_sec)*1000.0);
+    double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS;
+    g1_policy()->record_pause_time_ms(pause_time_ms);
     GCOverheadReporter::recordSTWEnd(end_time_sec);
     g1_policy()->record_collection_pause_end(popular_region != NULL,
                                              abandoned);
@@ -2608,6 +2703,7 @@
     if (VerifyAfterGC && total_collections() >= VerifyGCStartAt) {
       HandleMark hm;  // Discard invalid handles created during verification
       gclog_or_tty->print(" VerifyAfterGC:");
+      prepare_for_verify();
       Universe::verify(false);
     }

@@ -2641,6 +2737,10 @@

 void G1CollectedHeap::set_gc_alloc_region(int purpose, HeapRegion* r) {
   assert(purpose >= 0 && purpose < GCAllocPurposeCount, "invalid purpose");
+  // make sure we don't call set_gc_alloc_region() multiple times on
+  // the same region
+  assert(r == NULL || !r->is_gc_alloc_region(),
+         "shouldn't already be a GC alloc region");
   HeapWord* original_top = NULL;
   if (r != NULL)
     original_top = r->top();
@@ -2730,9 +2830,22 @@
   while (_gc_alloc_region_list != NULL) {
     HeapRegion* r = _gc_alloc_region_list;
     assert(r->is_gc_alloc_region(), "Invariant.");
+    // We need HeapRegion::oops_on_card_seq_iterate_careful() to work on
+    // newly allocated data in order to be able to apply deferred updates
+    // before the GC is done for verification purposes (i.e to allow
+    // G1HRRSFlushLogBuffersOnVerify). It's safe thing to do after the
+    // collection.
+    r->ContiguousSpace::set_saved_mark();
     _gc_alloc_region_list = r->next_gc_alloc_region();
     r->set_next_gc_alloc_region(NULL);
     r->set_is_gc_alloc_region(false);
+    if (r->is_survivor()) {
+      if (r->is_empty()) {
+        r->set_not_young();
+      } else {
+        _young_list->add_survivor_region(r);
+      }
+    }
     if (r->is_empty()) {
       ++_free_regions;
     }
@@ -2750,23 +2863,55 @@
 }

 void G1CollectedHeap::get_gc_alloc_regions() {
+  // First, let's check that the GC alloc region list is empty (it should)
+  assert(_gc_alloc_region_list == NULL, "invariant");
+
   for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
+    assert(_gc_alloc_regions[ap] == NULL, "invariant");
+
     // Create new GC alloc regions.
-    HeapRegion* alloc_region = _gc_alloc_regions[ap];
-    // Clear this alloc region, so that in case it turns out to be
-    // unacceptable, we end up with no allocation region, rather than a bad
-    // one.
-    _gc_alloc_regions[ap] = NULL;
-    if (alloc_region == NULL || alloc_region->in_collection_set()) {
-      // Can't re-use old one.  Allocate a new one.
+    HeapRegion* alloc_region = _retained_gc_alloc_regions[ap];
+    _retained_gc_alloc_regions[ap] = NULL;
+
+    if (alloc_region != NULL) {
+      assert(_retain_gc_alloc_region[ap], "only way to retain a GC region");
+
+      // let's make sure that the GC alloc region is not tagged as such
+      // outside a GC operation
+      assert(!alloc_region->is_gc_alloc_region(), "sanity");
+
+      if (alloc_region->in_collection_set() ||
+          alloc_region->top() == alloc_region->end() ||
+          alloc_region->top() == alloc_region->bottom()) {
+        // we will discard the current GC alloc region if it's in the
+        // collection set (it can happen!), if it's already full (no
+        // point in using it), or if it's empty (this means that it
+        // was emptied during a cleanup and it should be on the free
+        // list now).
+
+        alloc_region = NULL;
+      }
+    }
+
+    if (alloc_region == NULL) {
+      // we will get a new GC alloc region
       alloc_region = newAllocRegionWithExpansion(ap, 0);
     }
+
     if (alloc_region != NULL) {
+      assert(_gc_alloc_regions[ap] == NULL, "pre-condition");
       set_gc_alloc_region(ap, alloc_region);
     }
+
+    assert(_gc_alloc_regions[ap] == NULL ||
+           _gc_alloc_regions[ap]->is_gc_alloc_region(),
+           "the GC alloc region should be tagged as such");
+    assert(_gc_alloc_regions[ap] == NULL ||
+           _gc_alloc_regions[ap] == _gc_alloc_region_list,
+           "the GC alloc region should be the same as the GC alloc list head");
   }
   // Set alternative regions for allocation purposes that have reached
-  // thier limit.
+  // their limit.
   for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
     GCAllocPurpose alt_purpose = g1_policy()->alternative_purpose(ap);
     if (_gc_alloc_regions[ap] == NULL && alt_purpose != ap) {
@@ -2776,27 +2921,55 @@
   assert(check_gc_alloc_regions(), "alloc regions messed up");
 }

-void G1CollectedHeap::release_gc_alloc_regions() {
+void G1CollectedHeap::release_gc_alloc_regions(bool totally) {
   // We keep a separate list of all regions that have been alloc regions in
-  // the current collection pause.  Forget that now.
+  // the current collection pause. Forget that now. This method will
+  // untag the GC alloc regions and tear down the GC alloc region
+  // list. It's desirable that no regions are tagged as GC alloc
+  // outside GCs.
   forget_alloc_region_list();

   // The current alloc regions contain objs that have survived
   // collection. Make them no longer GC alloc regions.
   for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
     HeapRegion* r = _gc_alloc_regions[ap];
-    if (r != NULL && r->is_empty()) {
-      {
+    _retained_gc_alloc_regions[ap] = NULL;
+
+    if (r != NULL) {
+      // we retain nothing on _gc_alloc_regions between GCs
+      set_gc_alloc_region(ap, NULL);
+      _gc_alloc_region_counts[ap] = 0;
+
+      if (r->is_empty()) {
+        // we didn't actually allocate anything in it; let's just put
+        // it on the free list
         MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
         r->set_zero_fill_complete();
         put_free_region_on_list_locked(r);
+      } else if (_retain_gc_alloc_region[ap] && !totally) {
+        // retain it so that we can use it at the beginning of the next GC
+        _retained_gc_alloc_regions[ap] = r;
       }
     }
-    // set_gc_alloc_region will also NULLify all aliases to the region
-    set_gc_alloc_region(ap, NULL);
-    _gc_alloc_region_counts[ap] = 0;
-  }
-}
+  }
+}
+
+#ifndef PRODUCT
+// Useful for debugging
+
+void G1CollectedHeap::print_gc_alloc_regions() {
+  gclog_or_tty->print_cr("GC alloc regions");
+  for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
+    HeapRegion* r = _gc_alloc_regions[ap];
+    if (r == NULL) {
+      gclog_or_tty->print_cr("  %2d : "PTR_FORMAT, ap, NULL);
+    } else {
+      gclog_or_tty->print_cr("  %2d : "PTR_FORMAT" "SIZE_FORMAT,
+                             ap, r->bottom(), r->used());
+    }
+  }
+}
+#endif // PRODUCT

 void G1CollectedHeap::init_for_evac_failure(OopsInHeapRegionClosure* cl) {
   _drain_in_progress = false;
@@ -2877,27 +3050,51 @@
   }
 };

-class RecreateRSetEntriesClosure: public OopClosure {
+class UpdateRSetImmediate : public OopsInHeapRegionClosure {
 private:
   G1CollectedHeap* _g1;
   G1RemSet* _g1_rem_set;
-  HeapRegion* _from;
 public:
-  RecreateRSetEntriesClosure(G1CollectedHeap* g1, HeapRegion* from) :
-    _g1(g1), _g1_rem_set(g1->g1_rem_set()), _from(from)
-  {}
+  UpdateRSetImmediate(G1CollectedHeap* g1) :
+    _g1(g1), _g1_rem_set(g1->g1_rem_set()) {}

   void do_oop(narrowOop* p) {
     guarantee(false, "NYI");
   }
   void do_oop(oop* p) {
     assert(_from->is_in_reserved(p), "paranoia");
-    if (*p != NULL) {
-      _g1_rem_set->write_ref(_from, p);
+    if (*p != NULL && !_from->is_survivor()) {
+      _g1_rem_set->par_write_ref(_from, p, 0);
     }
   }
 };

+class UpdateRSetDeferred : public OopsInHeapRegionClosure {
+private:
+  G1CollectedHeap* _g1;
+  DirtyCardQueue *_dcq;
+  CardTableModRefBS* _ct_bs;
+
+public:
+  UpdateRSetDeferred(G1CollectedHeap* g1, DirtyCardQueue* dcq) :
+    _g1(g1), _ct_bs((CardTableModRefBS*)_g1->barrier_set()), _dcq(dcq) {}
+
+  void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  void do_oop(oop* p) {
+    assert(_from->is_in_reserved(p), "paranoia");
+    if (!_from->is_in_reserved(*p) && !_from->is_survivor()) {
+      size_t card_index = _ct_bs->index_for(p);
+      if (_ct_bs->mark_card_deferred(card_index)) {
+        _dcq->enqueue((jbyte*)_ct_bs->byte_for_index(card_index));
+      }
+    }
+  }
+};
+
+
+
 class RemoveSelfPointerClosure: public ObjectClosure {
 private:
   G1CollectedHeap* _g1;
@@ -2905,11 +3102,11 @@
   HeapRegion* _hr;
   size_t _prev_marked_bytes;
   size_t _next_marked_bytes;
+  OopsInHeapRegionClosure *_cl;
 public:
-  RemoveSelfPointerClosure(G1CollectedHeap* g1, HeapRegion* hr) :
-    _g1(g1), _cm(_g1->concurrent_mark()), _hr(hr),
-    _prev_marked_bytes(0), _next_marked_bytes(0)
-  {}
+  RemoveSelfPointerClosure(G1CollectedHeap* g1, OopsInHeapRegionClosure* cl) :
+    _g1(g1), _cm(_g1->concurrent_mark()),  _prev_marked_bytes(0),
+    _next_marked_bytes(0), _cl(cl) {}

   size_t prev_marked_bytes() { return _prev_marked_bytes; }
   size_t next_marked_bytes() { return _next_marked_bytes; }
@@ -2947,8 +3144,7 @@
       // that, if evacuation fails, we might have remembered set
       // entries missing given that we skipped cards on the
       // collection set. So, we'll recreate such entries now.
-      RecreateRSetEntriesClosure cl(_g1, _hr);
-      obj->oop_iterate(&cl);
+      obj->oop_iterate(_cl);
       assert(_cm->isPrevMarked(obj), "Should be marked!");
     } else {
       // The object has been either evacuated or is dead. Fill it with a
@@ -2961,14 +3157,23 @@
 };

 void G1CollectedHeap::remove_self_forwarding_pointers() {
+  UpdateRSetImmediate immediate_update(_g1h);
+  DirtyCardQueue dcq(&_g1h->dirty_card_queue_set());
+  UpdateRSetDeferred deferred_update(_g1h, &dcq);
+  OopsInHeapRegionClosure *cl;
+  if (G1DeferredRSUpdate) {
+    cl = &deferred_update;
+  } else {
+    cl = &immediate_update;
+  }
   HeapRegion* cur = g1_policy()->collection_set();
-
   while (cur != NULL) {
     assert(g1_policy()->assertMarkedBytesDataOK(), "Should be!");

+    RemoveSelfPointerClosure rspc(_g1h, cl);
     if (cur->evacuation_failed()) {
-      RemoveSelfPointerClosure rspc(_g1h, cur);
       assert(cur->in_collection_set(), "bad CS");
+      cl->set_region(cur);
       cur->object_iterate(&rspc);

       // A number of manipulations to make the TAMS be the current top,
@@ -3129,6 +3334,20 @@
   return block;
 }

+void G1CollectedHeap::retire_alloc_region(HeapRegion* alloc_region,
+                                            bool par) {
+  // Another thread might have obtained alloc_region for the given
+  // purpose, and might be attempting to allocate in it, and might
+  // succeed.  Therefore, we can't do the "finalization" stuff on the
+  // region below until we're sure the last allocation has happened.
+  // We ensure this by allocating the remaining space with a garbage
+  // object.
+  if (par) par_allocate_remaining_space(alloc_region);
+  // Now we can do the post-GC stuff on the region.
+  alloc_region->note_end_of_copying();
+  g1_policy()->record_after_bytes(alloc_region->used());
+}
+
 HeapWord*
 G1CollectedHeap::allocate_during_gc_slow(GCAllocPurpose purpose,
                                          HeapRegion*    alloc_region,
@@ -3146,16 +3365,7 @@
     // Otherwise, continue; this new region is empty, too.
   }
   assert(alloc_region != NULL, "We better have an allocation region");
-  // Another thread might have obtained alloc_region for the given
-  // purpose, and might be attempting to allocate in it, and might
-  // succeed.  Therefore, we can't do the "finalization" stuff on the
-  // region below until we're sure the last allocation has happened.
-  // We ensure this by allocating the remaining space with a garbage
-  // object.
-  if (par) par_allocate_remaining_space(alloc_region);
-  // Now we can do the post-GC stuff on the region.
-  alloc_region->note_end_of_copying();
-  g1_policy()->record_after_bytes(alloc_region->used());
+  retire_alloc_region(alloc_region, par);

   if (_gc_alloc_region_counts[purpose] >= g1_policy()->max_regions(purpose)) {
     // Cannot allocate more regions for the given purpose.
@@ -3164,7 +3374,7 @@
     if (purpose != alt_purpose) {
       HeapRegion* alt_region = _gc_alloc_regions[alt_purpose];
       // Has not the alternative region been aliased?
-      if (alloc_region != alt_region) {
+      if (alloc_region != alt_region && alt_region != NULL) {
         // Try to allocate in the alternative region.
         if (par) {
           block = alt_region->par_allocate(word_size);
@@ -3173,9 +3383,10 @@
         }
         // Make an alias.
         _gc_alloc_regions[purpose] = _gc_alloc_regions[alt_purpose];
-      }
-      if (block != NULL) {
-        return block;
+        if (block != NULL) {
+          return block;
+        }
+        retire_alloc_region(alt_region, par);
       }
       // Both the allocation region and the alternative one are full
       // and aliased, replace them with a new allocation region.
@@ -3471,11 +3682,15 @@
 protected:
   G1CollectedHeap* _g1h;
   RefToScanQueue*  _refs;
+  DirtyCardQueue   _dcq;
+  CardTableModRefBS* _ct_bs;
+  G1RemSet* _g1_rem;

   typedef GrowableArray<oop*> OverflowQueue;
   OverflowQueue* _overflowed_refs;

   G1ParGCAllocBuffer _alloc_buffers[GCAllocPurposeCount];
+  ageTable           _age_table;

   size_t           _alloc_buffer_waste;
   size_t           _undo_waste;
@@ -3511,12 +3726,37 @@

   void   add_to_undo_waste(size_t waste)         { _undo_waste += waste; }

+  DirtyCardQueue& dirty_card_queue()             { return _dcq;  }
+  CardTableModRefBS* ctbs()                      { return _ct_bs; }
+
+  void immediate_rs_update(HeapRegion* from, oop* p, int tid) {
+    if (!from->is_survivor()) {
+      _g1_rem->par_write_ref(from, p, tid);
+    }
+  }
+
+  void deferred_rs_update(HeapRegion* from, oop* p, int tid) {
+    // If the new value of the field points to the same region or
+    // is the to-space, we don't need to include it in the Rset updates.
+    if (!from->is_in_reserved(*p) && !from->is_survivor()) {
+      size_t card_index = ctbs()->index_for(p);
+      // If the card hasn't been added to the buffer, do it.
+      if (ctbs()->mark_card_deferred(card_index)) {
+        dirty_card_queue().enqueue((jbyte*)ctbs()->byte_for_index(card_index));
+      }
+    }
+  }
+
 public:
   G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num)
     : _g1h(g1h),
       _refs(g1h->task_queue(queue_num)),
+      _dcq(&g1h->dirty_card_queue_set()),
+      _ct_bs((CardTableModRefBS*)_g1h->barrier_set()),
+      _g1_rem(g1h->g1_rem_set()),
       _hash_seed(17), _queue_num(queue_num),
       _term_attempts(0),
+      _age_table(false),
 #if G1_DETAILED_STATS
       _pushes(0), _pops(0), _steals(0),
       _steal_attempts(0),  _overflow_pushes(0),
@@ -3551,8 +3791,9 @@

   RefToScanQueue*   refs()            { return _refs;             }
   OverflowQueue*    overflowed_refs() { return _overflowed_refs;  }
-
-  inline G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose) {
+  ageTable*         age_table()       { return &_age_table;       }
+
+  G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose) {
     return &_alloc_buffers[purpose];
   }

@@ -3560,6 +3801,9 @@
   size_t undo_waste()                            { return _undo_waste; }

   void push_on_queue(oop* ref) {
+    assert(ref != NULL, "invariant");
+    assert(has_partial_array_mask(ref) || _g1h->obj_in_cs(*ref), "invariant");
+
     if (!refs()->push(ref)) {
       overflowed_refs()->push(ref);
       IF_G1_DETAILED_STATS(note_overflow_push());
@@ -3572,6 +3816,10 @@
     if (!refs()->pop_local(ref)) {
       ref = NULL;
     } else {
+      assert(ref != NULL, "invariant");
+      assert(has_partial_array_mask(ref) || _g1h->obj_in_cs(*ref),
+             "invariant");
+
       IF_G1_DETAILED_STATS(note_pop());
     }
   }
@@ -3583,6 +3831,14 @@
   int refs_to_scan()                             { return refs()->size();                 }
   int overflowed_refs_to_scan()                  { return overflowed_refs()->length();    }

+  void update_rs(HeapRegion* from, oop* p, int tid) {
+    if (G1DeferredRSUpdate) {
+      deferred_rs_update(from, p, tid);
+    } else {
+      immediate_rs_update(from, p, tid);
+    }
+  }
+
   HeapWord* allocate_slow(GCAllocPurpose purpose, size_t word_sz) {

     HeapWord* obj = NULL;
@@ -3601,8 +3857,7 @@

       obj = alloc_buf->allocate(word_sz);
       assert(obj != NULL, "buffer was definitely big enough...");
-    }
-    else {
+    } else {
       obj = _g1h->par_allocate_during_gc(purpose, word_sz);
     }
     return obj;
@@ -3695,31 +3950,63 @@
     }
   }

+private:
+  void deal_with_reference(oop* ref_to_scan) {
+    if (has_partial_array_mask(ref_to_scan)) {
+      _partial_scan_cl->do_oop_nv(ref_to_scan);
+    } else {
+      // Note: we can use "raw" versions of "region_containing" because
+      // "obj_to_scan" is definitely in the heap, and is not in a
+      // humongous region.
+      HeapRegion* r = _g1h->heap_region_containing_raw(ref_to_scan);
+      _evac_cl->set_region(r);
+      _evac_cl->do_oop_nv(ref_to_scan);
+    }
+  }
+
+public:
   void trim_queue() {
+    // I've replicated the loop twice, first to drain the overflow
+    // queue, second to drain the task queue. This is better than
+    // having a single loop, which checks both conditions and, inside
+    // it, either pops the overflow queue or the task queue, as each
+    // loop is tighter. Also, the decision to drain the overflow queue
+    // first is not arbitrary, as the overflow queue is not visible
+    // to the other workers, whereas the task queue is. So, we want to
+    // drain the "invisible" entries first, while allowing the other
+    // workers to potentially steal the "visible" entries.
+
     while (refs_to_scan() > 0 || overflowed_refs_to_scan() > 0) {
-      oop *ref_to_scan = NULL;
-      if (overflowed_refs_to_scan() == 0) {
-        pop_from_queue(ref_to_scan);
-      } else {
+      while (overflowed_refs_to_scan() > 0) {
+        oop *ref_to_scan = NULL;
         pop_from_overflow_queue(ref_to_scan);
+        assert(ref_to_scan != NULL, "invariant");
+        // We shouldn't have pushed it on the queue if it was not
+        // pointing into the CSet.
+        assert(ref_to_scan != NULL, "sanity");
+        assert(has_partial_array_mask(ref_to_scan) ||
+                                      _g1h->obj_in_cs(*ref_to_scan), "sanity");
+
+        deal_with_reference(ref_to_scan);
       }
-      if (ref_to_scan != NULL) {
-        if ((intptr_t)ref_to_scan & G1_PARTIAL_ARRAY_MASK) {
-          _partial_scan_cl->do_oop_nv(ref_to_scan);
-        } else {
-          // Note: we can use "raw" versions of "region_containing" because
-          // "obj_to_scan" is definitely in the heap, and is not in a
-          // humongous region.
-          HeapRegion* r = _g1h->heap_region_containing_raw(ref_to_scan);
-          _evac_cl->set_region(r);
-          _evac_cl->do_oop_nv(ref_to_scan);
+
+      while (refs_to_scan() > 0) {
+        oop *ref_to_scan = NULL;
+        pop_from_queue(ref_to_scan);
+
+        if (ref_to_scan != NULL) {
+          // We shouldn't have pushed it on the queue if it was not
+          // pointing into the CSet.
+          assert(has_partial_array_mask(ref_to_scan) ||
+                                      _g1h->obj_in_cs(*ref_to_scan), "sanity");
+
+          deal_with_reference(ref_to_scan);
         }
       }
     }
   }
 };

-
 G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
   _g1(g1), _g1_rem(_g1->g1_rem_set()), _cm(_g1->concurrent_mark()),
   _par_scan_state(par_scan_state) { }
@@ -3728,16 +4015,25 @@
 // Should probably be made inline and moved in g1OopClosures.inline.hpp.
 void G1ParScanClosure::do_oop_nv(oop* p) {
   oop obj = *p;
+
   if (obj != NULL) {
-    if (_g1->obj_in_cs(obj)) {
-      if (obj->is_forwarded()) {
-        *p = obj->forwardee();
-      } else {
-        _par_scan_state->push_on_queue(p);
-        return;
-      }
+    if (_g1->in_cset_fast_test(obj)) {
+      // We're not going to even bother checking whether the object is
+      // already forwarded or not, as this usually causes an immediate
+      // stall. We'll try to prefetch the object (for write, given that
+      // we might need to install the forwarding reference) and we'll
+      // get back to it when pop it from the queue
+      Prefetch::write(obj->mark_addr(), 0);
+      Prefetch::read(obj->mark_addr(), (HeapWordSize*2));
+
+      // slightly paranoid test; I'm trying to catch potential
+      // problems before we go into push_on_queue to know where the
+      // problem is coming from
+      assert(obj == *p, "the value of *p should not have changed");
+      _par_scan_state->push_on_queue(p);
+    } else {
+      _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num());
     }
-    _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num());
   }
 }

@@ -3765,7 +4061,9 @@
           (!from_region->is_young() && young_index == 0), "invariant" );
   G1CollectorPolicy* g1p = _g1->g1_policy();
   markOop m = old->mark();
-  GCAllocPurpose alloc_purpose = g1p->evacuation_destination(from_region, m->age(),
+  int age = m->has_displaced_mark_helper() ? m->displaced_mark_helper()->age()
+                                           : m->age();
+  GCAllocPurpose alloc_purpose = g1p->evacuation_destination(from_region, age,
                                                              word_sz);
   HeapWord* obj_ptr = _par_scan_state->allocate(alloc_purpose, word_sz);
   oop       obj     = oop(obj_ptr);
@@ -3777,13 +4075,39 @@
     return _g1->handle_evacuation_failure_par(cl, old);
   }

+  // We're going to allocate linearly, so might as well prefetch ahead.
+  Prefetch::write(obj_ptr, PrefetchCopyIntervalInBytes);
+
   oop forward_ptr = old->forward_to_atomic(obj);
   if (forward_ptr == NULL) {
     Copy::aligned_disjoint_words((HeapWord*) old, obj_ptr, word_sz);
-    obj->set_mark(m);
     if (g1p->track_object_age(alloc_purpose)) {
-      obj->incr_age();
+      // We could simply do obj->incr_age(). However, this causes a
+      // performance issue. obj->incr_age() will first check whether
+      // the object has a displaced mark by checking its mark word;
+      // getting the mark word from the new location of the object
+      // stalls. So, given that we already have the mark word and we
+      // are about to install it anyway, it's better to increase the
+      // age on the mark word, when the object does not have a
+      // displaced mark word. We're not expecting many objects to have
+      // a displaced marked word, so that case is not optimized
+      // further (it could be...) and we simply call obj->incr_age().
+
+      if (m->has_displaced_mark_helper()) {
+        // in this case, we have to install the mark word first,
+        // otherwise obj looks to be forwarded (the old mark word,
+        // which contains the forward pointer, was copied)
+        obj->set_mark(m);
+        obj->incr_age();
+      } else {
+        m = m->incr_age();
+        obj->set_mark(m);
+      }
+      _par_scan_state->age_table()->add(obj, word_sz);
+    } else {
+      obj->set_mark(m);
     }
+
     // preserve "next" mark bit
     if (_g1->mark_in_progress() && !_g1->is_obj_ill(old)) {
       if (!use_local_bitmaps ||
@@ -3805,9 +4129,11 @@

     if (obj->is_objArray() && arrayOop(obj)->length() >= ParGCArrayScanChunk) {
       arrayOop(old)->set_length(0);
-      _par_scan_state->push_on_queue((oop*) ((intptr_t)old | G1_PARTIAL_ARRAY_MASK));
+      _par_scan_state->push_on_queue(set_partial_array_mask(old));
     } else {
-      _scanner->set_region(_g1->heap_region_containing(obj));
+      // No point in using the slower heap_region_containing() method,
+      // given that we know obj is in the heap.
+      _scanner->set_region(_g1->heap_region_containing_raw(obj));
       obj->oop_iterate_backwards(_scanner);
     }
   } else {
@@ -3817,47 +4143,55 @@
   return obj;
 }

-template<bool do_gen_barrier, G1Barrier barrier, bool do_mark_forwardee>
-void G1ParCopyClosure<do_gen_barrier, barrier, do_mark_forwardee>::do_oop_work(oop* p) {
+template<bool do_gen_barrier, G1Barrier barrier,
+         bool do_mark_forwardee, bool skip_cset_test>
+void G1ParCopyClosure<do_gen_barrier, barrier,
+                      do_mark_forwardee, skip_cset_test>::do_oop_work(oop* p) {
   oop obj = *p;
   assert(barrier != G1BarrierRS || obj != NULL,
          "Precondition: G1BarrierRS implies obj is nonNull");

-  if (obj != NULL) {
-    if (_g1->obj_in_cs(obj)) {
+  // The only time we skip the cset test is when we're scanning
+  // references popped from the queue. And we only push on the queue
+  // references that we know point into the cset, so no point in
+  // checking again. But we'll leave an assert here for peace of mind.
+  assert(!skip_cset_test || _g1->obj_in_cs(obj), "invariant");
+
+  // here the null check is implicit in the cset_fast_test() test
+  if (skip_cset_test || _g1->in_cset_fast_test(obj)) {
 #if G1_REM_SET_LOGGING
-      gclog_or_tty->print_cr("Loc "PTR_FORMAT" contains pointer "PTR_FORMAT" into CS.",
-                             p, (void*) obj);
+    gclog_or_tty->print_cr("Loc "PTR_FORMAT" contains pointer "PTR_FORMAT" "
+                           "into CS.", p, (void*) obj);
 #endif
-      if (obj->is_forwarded()) {
-        *p = obj->forwardee();
-      } else {
-        *p = copy_to_survivor_space(obj);
-      }
-      // When scanning the RS, we only care about objs in CS.
-      if (barrier == G1BarrierRS) {
-        _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num());
-      }
+    if (obj->is_forwarded()) {
+      *p = obj->forwardee();
+    } else {
+      *p = copy_to_survivor_space(obj);
+    }
+    // When scanning the RS, we only care about objs in CS.
+    if (barrier == G1BarrierRS) {
+      _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num());
     }
-    // When scanning moved objs, must look at all oops.
-    if (barrier == G1BarrierEvac) {
-      _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num());
-    }
-
-    if (do_gen_barrier) {
-      par_do_barrier(p);
-    }
-  }
-}
-
-template void G1ParCopyClosure<false, G1BarrierEvac, false>::do_oop_work(oop* p);
-
-template <class T> void G1ParScanPartialArrayClosure::process_array_chunk(
+  }
+
+  // When scanning moved objs, must look at all oops.
+  if (barrier == G1BarrierEvac && obj != NULL) {
+    _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num());
+  }
+
+  if (do_gen_barrier && obj != NULL) {
+    par_do_barrier(p);
+  }
+}
+
+template void G1ParCopyClosure<false, G1BarrierEvac, false, true>::do_oop_work(oop* p);
+
+template<class T> void G1ParScanPartialArrayClosure::process_array_chunk(
   oop obj, int start, int end) {
   // process our set of indices (include header in first chunk)
   assert(start < end, "invariant");
   T* const base      = (T*)objArrayOop(obj)->base();
-  T* const start_addr = base + start;
+  T* const start_addr = (start == 0) ? (T*) obj : base + start;
   T* const end_addr   = base + end;
   MemRegion mr((HeapWord*)start_addr, (HeapWord*)end_addr);
   _scanner.set_region(_g1->heap_region_containing(obj));
@@ -3866,7 +4200,8 @@

 void G1ParScanPartialArrayClosure::do_oop_nv(oop* p) {
   assert(!UseCompressedOops, "Needs to be fixed to work with compressed oops");
-  oop old = oop((intptr_t)p & ~G1_PARTIAL_ARRAY_MASK);
+  assert(has_partial_array_mask(p), "invariant");
+  oop old = clear_partial_array_mask(p);
   assert(old->is_objArray(), "must be obj array");
   assert(old->is_forwarded(), "must be forwarded");
   assert(Universe::heap()->is_in_reserved(old), "must be in heap.");
@@ -3884,7 +4219,7 @@
     end = start + ParGCArrayScanChunk;
     arrayOop(old)->set_length(end);
     // Push remainder.
-    _par_scan_state->push_on_queue((oop*) ((intptr_t) old | G1_PARTIAL_ARRAY_MASK));
+    _par_scan_state->push_on_queue(set_partial_array_mask(old));
   } else {
     // Restore length so that the heap remains parsable in
     // case of evacuation failure.
@@ -3893,11 +4228,6 @@

   // process our set of indices (include header in first chunk)
   process_array_chunk<oop>(obj, start, end);
-  oop* start_addr = start == 0 ? (oop*)obj : obj->obj_at_addr<oop>(start);
-  oop* end_addr   = (oop*)(obj->base()) + end; // obj_at_addr(end) asserts end < length
-  MemRegion mr((HeapWord*)start_addr, (HeapWord*)end_addr);
-  _scanner.set_region(_g1->heap_region_containing(obj));
-  obj->oop_iterate(&_scanner, mr);
 }

 int G1ScanAndBalanceClosure::_nq = 0;
@@ -3931,6 +4261,13 @@
                           pss->hash_seed(),
                           ref_to_scan)) {
         IF_G1_DETAILED_STATS(pss->note_steal());
+
+        // slightly paranoid tests; I'm trying to catch potential
+        // problems before we go into push_on_queue to know where the
+        // problem is coming from
+        assert(ref_to_scan != NULL, "invariant");
+        assert(has_partial_array_mask(ref_to_scan) ||
+                                   _g1h->obj_in_cs(*ref_to_scan), "invariant");
         pss->push_on_queue(ref_to_scan);
         continue;
       }
@@ -3976,10 +4313,10 @@
     ResourceMark rm;
     HandleMark   hm;

-    G1ParScanThreadState pss(_g1h, i);
-    G1ParScanHeapEvacClosure     scan_evac_cl(_g1h, &pss);
-    G1ParScanHeapEvacClosure     evac_failure_cl(_g1h, &pss);
-    G1ParScanPartialArrayClosure partial_scan_cl(_g1h, &pss);
+    G1ParScanThreadState            pss(_g1h, i);
+    G1ParScanHeapEvacClosure        scan_evac_cl(_g1h, &pss);
+    G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss);
+    G1ParScanPartialArrayClosure    partial_scan_cl(_g1h, &pss);

     pss.set_evac_closure(&scan_evac_cl);
     pss.set_evac_failure_closure(&evac_failure_cl);
@@ -3988,6 +4325,7 @@
     G1ParScanExtRootClosure         only_scan_root_cl(_g1h, &pss);
     G1ParScanPermClosure            only_scan_perm_cl(_g1h, &pss);
     G1ParScanHeapRSClosure          only_scan_heap_rs_cl(_g1h, &pss);
+
     G1ParScanAndMarkExtRootClosure  scan_mark_root_cl(_g1h, &pss);
     G1ParScanAndMarkPermClosure     scan_mark_perm_cl(_g1h, &pss);
     G1ParScanAndMarkHeapRSClosure   scan_mark_heap_rs_cl(_g1h, &pss);
@@ -4024,6 +4362,9 @@
       _g1h->g1_policy()->record_obj_copy_time(i, elapsed_ms-term_ms);
       _g1h->g1_policy()->record_termination_time(i, term_ms);
     }
+    if (G1UseSurvivorSpace) {
+      _g1h->g1_policy()->record_thread_age_table(pss.age_table());
+    }
     _g1h->update_surviving_young_words(pss.surviving_young_words()+1);

     // Clean up any par-expanded rem sets.
@@ -4240,7 +4581,6 @@
   g1_rem_set()->prepare_for_oops_into_collection_set_do();
   concurrent_g1_refine()->set_use_cache(false);
   int n_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1);
-
   set_par_threads(n_workers);
   G1ParTask g1_par_task(this, n_workers, _task_queues);

@@ -4248,8 +4588,9 @@

   change_strong_roots_parity();  // In preparation for parallel strong roots.
   rem_set()->prepare_for_younger_refs_iterate(true);
+
+  assert(dirty_card_queue_set().completed_buffers_num() == 0, "Should be empty");
   double start_par = os::elapsedTime();
-
   if (ParallelGCThreads > 0) {
     // The individual threads will set their evac-failure closures.
     workers()->run_task(&g1_par_task);
@@ -4263,14 +4604,14 @@
   // Is this the right thing to do here?  We don't save marks
   // on individual heap regions when we allocate from
   // them in parallel, so this seems like the correct place for this.
-  all_alloc_regions_note_end_of_copying();
+  retire_all_alloc_regions();
   {
     G1IsAliveClosure is_alive(this);
     G1KeepAliveClosure keep_alive(this);
     JNIHandles::weak_oops_do(&is_alive, &keep_alive);
   }
-
   g1_rem_set()->cleanup_after_oops_into_collection_set_do();
+
   concurrent_g1_refine()->set_use_cache(true);

   finalize_for_evac_failure();
@@ -4281,7 +4622,6 @@

   if (evacuation_failed()) {
     remove_self_forwarding_pointers();
-
     if (PrintGCDetails) {
       gclog_or_tty->print(" (evacuation failed)");
     } else if (PrintGC) {
@@ -4289,6 +4629,14 @@
     }
   }

+  if (G1DeferredRSUpdate) {
+    RedirtyLoggedCardTableEntryFastClosure redirty;
+    dirty_card_queue_set().set_closure(&redirty);
+    dirty_card_queue_set().apply_closure_to_all_completed_buffers();
+    JavaThread::dirty_card_queue_set().merge_bufferlists(&dirty_card_queue_set());
+    assert(dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed");
+  }
+
   COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
 }

@@ -4903,7 +5251,7 @@
   return no_allocs;
 }

-void G1CollectedHeap::all_alloc_regions_note_end_of_copying() {
+void G1CollectedHeap::retire_all_alloc_regions() {
   for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
     HeapRegion* r = _gc_alloc_regions[ap];
     if (r != NULL) {
@@ -4916,8 +5264,7 @@
         }
       }
       if (!has_processed_alias) {
-        r->note_end_of_copying();
-        g1_policy()->record_after_bytes(r->used());
+        retire_alloc_region(r, false /* par */);
       }
     }
   }
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Tue Mar 24 13:09:52 2009 -0400
@@ -90,7 +90,7 @@
   HeapRegion* _curr_scan_only;

   HeapRegion* _survivor_head;
-  HeapRegion* _survivors_tail;
+  HeapRegion* _survivor_tail;
   size_t      _survivor_length;

   void          empty_list(HeapRegion* list);
@@ -105,6 +105,7 @@
   bool          is_empty() { return _length == 0; }
   size_t        length() { return _length; }
   size_t        scan_only_length() { return _scan_only_length; }
+  size_t        survivor_length() { return _survivor_length; }

   void rs_length_sampling_init();
   bool rs_length_sampling_more();
@@ -120,6 +121,7 @@
   HeapRegion* first_region() { return _head; }
   HeapRegion* first_scan_only_region() { return _scan_only_head; }
   HeapRegion* first_survivor_region() { return _survivor_head; }
+  HeapRegion* last_survivor_region() { return _survivor_tail; }
   HeapRegion* par_get_next_scan_only_region() {
     MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
     HeapRegion* ret = _curr_scan_only;
@@ -170,7 +172,6 @@
     NumAPIs = HeapRegion::MaxAge
   };

-
   // The one and only G1CollectedHeap, so static functions can find it.
   static G1CollectedHeap* _g1h;

@@ -215,11 +216,20 @@

   // Postcondition: cur_alloc_region == NULL.
   void abandon_cur_alloc_region();
+  void abandon_gc_alloc_regions();

   // The to-space memory regions into which objects are being copied during
   // a GC.
   HeapRegion* _gc_alloc_regions[GCAllocPurposeCount];
-  uint _gc_alloc_region_counts[GCAllocPurposeCount];
+  size_t _gc_alloc_region_counts[GCAllocPurposeCount];
+  // These are the regions, one per GCAllocPurpose, that are half-full
+  // at the end of a collection and that we want to reuse during the
+  // next collection.
+  HeapRegion* _retained_gc_alloc_regions[GCAllocPurposeCount];
+  // This specifies whether we will keep the last half-full region at
+  // the end of a collection so that it can be reused during the next
+  // collection (this is specified per GCAllocPurpose)
+  bool _retain_gc_alloc_region[GCAllocPurposeCount];

   // A list of the regions that have been set to be alloc regions in the
   // current collection.
@@ -247,6 +257,27 @@
   NumberSeq _pop_obj_rc_at_copy;
   void print_popularity_summary_info() const;

+  // This is used for a quick test on whether a reference points into
+  // the collection set or not. Basically, we have an array, with one
+  // byte per region, and that byte denotes whether the corresponding
+  // region is in the collection set or not. The entry corresponding
+  // the bottom of the heap, i.e., region 0, is pointed to by
+  // _in_cset_fast_test_base.  The _in_cset_fast_test field has been
+  // biased so that it actually points to address 0 of the address
+  // space, to make the test as fast as possible (we can simply shift
+  // the address to address into it, instead of having to subtract the
+  // bottom of the heap from the address before shifting it; basically
+  // it works in the same way the card table works).
+  bool* _in_cset_fast_test;
+
+  // The allocated array used for the fast test on whether a reference
+  // points into the collection set or not. This field is also used to
+  // free the array.
+  bool* _in_cset_fast_test_base;
+
+  // The length of the _in_cset_fast_test_base array.
+  size_t _in_cset_fast_test_length;
+
   volatile unsigned _gc_time_stamp;

   size_t* _surviving_young_words;
@@ -260,8 +291,8 @@
   // Returns "true" iff none of the gc alloc regions have any allocations
   // since the last call to "save_marks".
   bool all_alloc_regions_no_allocs_since_save_marks();
-  // Calls "note_end_of_copying on all gc alloc_regions.
-  void all_alloc_regions_note_end_of_copying();
+  // Perform finalization stuff on all allocation regions.
+  void retire_all_alloc_regions();

   // The number of regions allocated to hold humongous objects.
   int         _num_humongous_regions;
@@ -330,6 +361,10 @@
   // that parallel threads might be attempting allocations.
   void par_allocate_remaining_space(HeapRegion* r);

+  // Retires an allocation region when it is full or at the end of a
+  // GC pause.
+  void  retire_alloc_region(HeapRegion* alloc_region, bool par);
+
   // Helper function for two callbacks below.
   // "full", if true, indicates that the GC is for a System.gc() request,
   // and should collect the entire heap.  If "clear_all_soft_refs" is true,
@@ -368,6 +403,38 @@
   virtual void gc_prologue(bool full);
   virtual void gc_epilogue(bool full);

+  // We register a region with the fast "in collection set" test. We
+  // simply set to true the array slot corresponding to this region.
+  void register_region_with_in_cset_fast_test(HeapRegion* r) {
+    assert(_in_cset_fast_test_base != NULL, "sanity");
+    assert(r->in_collection_set(), "invariant");
+    int index = r->hrs_index();
+    assert(0 <= (size_t) index && (size_t) index < _in_cset_fast_test_length,
+           "invariant");
+    assert(!_in_cset_fast_test_base[index], "invariant");
+    _in_cset_fast_test_base[index] = true;
+  }
+
+  // This is a fast test on whether a reference points into the
+  // collection set or not. It does not assume that the reference
+  // points into the heap; if it doesn't, it will return false.
+  bool in_cset_fast_test(oop obj) {
+    assert(_in_cset_fast_test != NULL, "sanity");
+    if (_g1_committed.contains((HeapWord*) obj)) {
+      // no need to subtract the bottom of the heap from obj,
+      // _in_cset_fast_test is biased
+      size_t index = ((size_t) obj) >> HeapRegion::LogOfHRGrainBytes;
+      bool ret = _in_cset_fast_test[index];
+      // let's make sure the result is consistent with what the slower
+      // test returns
+      assert( ret || !obj_in_cs(obj), "sanity");
+      assert(!ret ||  obj_in_cs(obj), "sanity");
+      return ret;
+    } else {
+      return false;
+    }
+  }
+
 protected:

   // Shrink the garbage-first heap by at most the given size (in bytes!).
@@ -398,6 +465,10 @@
   // And it's mod ref barrier set, used to track updates for the above.
   ModRefBarrierSet* _mr_bs;

+  // A set of cards that cover the objects for which the Rsets should be updated
+  // concurrently after the collection.
+  DirtyCardQueueSet _dirty_card_queue_set;
+
   // The Heap Region Rem Set Iterator.
   HeapRegionRemSetIterator** _rem_set_iterator;

@@ -526,8 +597,21 @@

   // Ensure that the relevant gc_alloc regions are set.
   void get_gc_alloc_regions();
-  // We're done with GC alloc regions; release them, as appropriate.
-  void release_gc_alloc_regions();
+  // We're done with GC alloc regions. We are going to tear down the
+  // gc alloc list and remove the gc alloc tag from all the regions on
+  // that list. However, we will also retain the last (i.e., the one
+  // that is half-full) GC alloc region, per GCAllocPurpose, for
+  // possible reuse during the next collection, provided
+  // _retain_gc_alloc_region[] indicates that it should be the
+  // case. Said regions are kept in the _retained_gc_alloc_regions[]
+  // array. If the parameter totally is set, we will not retain any
+  // regions, irrespective of what _retain_gc_alloc_region[]
+  // indicates.
+  void release_gc_alloc_regions(bool totally);
+#ifndef PRODUCT
+  // Useful for debugging.
+  void print_gc_alloc_regions();
+#endif // !PRODUCT

   // ("Weak") Reference processing support
   ReferenceProcessor* _ref_processor;
@@ -607,6 +691,9 @@

   RefToScanQueue *task_queue(int i);

+  // A set of cards where updates happened during the GC
+  DirtyCardQueueSet& dirty_card_queue_set() { return _dirty_card_queue_set; }
+
   // Create a G1CollectedHeap with the specified policy.
   // Must call the initialize method afterwards.
   // May not return if something goes wrong.
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Tue Mar 24 13:09:52 2009 -0400
@@ -36,8 +36,11 @@

 inline HeapRegion*
 G1CollectedHeap::heap_region_containing_raw(const void* addr) const {
-  HeapRegion* res = _hrs->addr_to_region(addr);
-  assert(res != NULL, "addr outside of heap?");
+  assert(_g1_reserved.contains(addr), "invariant");
+  size_t index = ((intptr_t) addr - (intptr_t) _g1_reserved.start())
+                                              >> HeapRegion::LogOfHRGrainBytes;
+  HeapRegion* res = _hrs->at(index);
+  assert(res == _hrs->addr_to_region(addr), "sanity");
   return res;
 }
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Tue Mar 24 13:09:52 2009 -0400
@@ -196,8 +196,13 @@
   _short_lived_surv_rate_group(new SurvRateGroup(this, "Short Lived",
                                                  G1YoungSurvRateNumRegionsSummary)),
   _survivor_surv_rate_group(new SurvRateGroup(this, "Survivor",
-                                              G1YoungSurvRateNumRegionsSummary))
+                                              G1YoungSurvRateNumRegionsSummary)),
   // add here any more surv rate groups
+  _recorded_survivor_regions(0),
+  _recorded_survivor_head(NULL),
+  _recorded_survivor_tail(NULL),
+  _survivors_age_table(true)
+
 {
   _recent_prev_end_times_for_all_gcs_sec->add(os::elapsedTime());
   _prev_collection_pause_end_ms = os::elapsedTime() * 1000.0;
@@ -272,6 +277,15 @@
   _concurrent_mark_cleanup_times_ms->add(0.20);
   _tenuring_threshold = MaxTenuringThreshold;

+  if (G1UseSurvivorSpace) {
+    // if G1FixedSurvivorSpaceSize is 0 which means the size is not
+    // fixed, then _max_survivor_regions will be calculated at
+    // calculate_young_list_target_config during initialization
+    _max_survivor_regions = G1FixedSurvivorSpaceSize / HeapRegion::GrainBytes;
+  } else {
+    _max_survivor_regions = 0;
+  }
+
   initialize_all();
 }

@@ -283,6 +297,9 @@
 void G1CollectorPolicy::initialize_flags() {
   set_min_alignment(HeapRegion::GrainBytes);
   set_max_alignment(GenRemSet::max_alignment_constraint(rem_set_name()));
+  if (SurvivorRatio < 1) {
+    vm_exit_during_initialization("Invalid survivor ratio specified");
+  }
   CollectorPolicy::initialize_flags();
 }

@@ -301,6 +318,8 @@
                                   "-XX:+UseConcMarkSweepGC.");
   }

+  initialize_gc_policy_counters();
+
   if (G1Gen) {
     _in_young_gc_mode = true;

@@ -322,6 +341,12 @@
   }
 }

+// Create the jstat counters for the policy.
+void G1CollectorPolicy::initialize_gc_policy_counters()
+{
+  _gc_policy_counters = new GCPolicyCounters("GarbageFirst", 1, 2 + G1Gen);
+}
+
 void G1CollectorPolicy::calculate_young_list_min_length() {
   _young_list_min_length = 0;

@@ -352,6 +377,7 @@
     guarantee( so_length < _young_list_target_length, "invariant" );
     _young_list_so_prefix_length = so_length;
   }
+  calculate_survivors_policy();
 }

 // This method calculate the optimal scan-only set for a fixed young
@@ -448,6 +474,9 @@
   if (full_young_gcs() && _free_regions_at_end_of_collection > 0) {
     // we are in fully-young mode and there are free regions in the heap

+    double survivor_regions_evac_time =
+        predict_survivor_regions_evac_time();
+
     size_t min_so_length = 0;
     size_t max_so_length = 0;

@@ -497,9 +526,8 @@
       scanned_cards = predict_non_young_card_num(adj_rs_lengths);
     // calculate this once, so that we don't have to recalculate it in
     // the innermost loop
-    double base_time_ms = predict_base_elapsed_time_ms(pending_cards,
-                                                       scanned_cards);
-
+    double base_time_ms = predict_base_elapsed_time_ms(pending_cards, scanned_cards)
+                          + survivor_regions_evac_time;
     // the result
     size_t final_young_length = 0;
     size_t final_so_length = 0;
@@ -548,14 +576,14 @@
     bool done = false;
     // this is the outermost loop
     while (!done) {
-#if 0
+#ifdef TRACE_CALC_YOUNG_CONFIG
       // leave this in for debugging, just in case
       gclog_or_tty->print_cr("searching between " SIZE_FORMAT " and " SIZE_FORMAT
                              ", incr " SIZE_FORMAT ", pass %s",
                              from_so_length, to_so_length, so_length_incr,
                              (pass == pass_type_coarse) ? "coarse" :
                              (pass == pass_type_fine) ? "fine" : "final");
-#endif // 0
+#endif // TRACE_CALC_YOUNG_CONFIG

       size_t so_length = from_so_length;
       size_t init_free_regions =
@@ -651,11 +679,11 @@
           guarantee( so_length_incr == so_coarse_increments, "invariant" );
           guarantee( final_so_length >= min_so_length, "invariant" );

-#if 0
+#ifdef TRACE_CALC_YOUNG_CONFIG
           // leave this in for debugging, just in case
           gclog_or_tty->print_cr("  coarse pass: SO length " SIZE_FORMAT,
                                  final_so_length);
-#endif // 0
+#endif // TRACE_CALC_YOUNG_CONFIG

           from_so_length =
             (final_so_length - min_so_length > so_coarse_increments) ?
@@ -687,12 +715,12 @@
             // of the optimal
             size_t new_so_length = 950 * final_so_length / 1000;

-#if 0
+#ifdef TRACE_CALC_YOUNG_CONFIG
             // leave this in for debugging, just in case
             gclog_or_tty->print_cr("  fine pass: SO length " SIZE_FORMAT
                                    ", setting it to " SIZE_FORMAT,
                                     final_so_length, new_so_length);
-#endif // 0
+#endif // TRACE_CALC_YOUNG_CONFIG

             from_so_length = new_so_length;
             to_so_length = new_so_length;
@@ -719,7 +747,8 @@
     }

     // we should have at least one region in the target young length
-    _young_list_target_length = MAX2((size_t) 1, final_young_length);
+    _young_list_target_length =
+        MAX2((size_t) 1, final_young_length + _recorded_survivor_regions);
     if (final_so_length >= final_young_length)
       // and we need to ensure that the S-O length is not greater than
       // the target young length (this is being a bit careful)
@@ -734,7 +763,7 @@
     double end_time_sec = os::elapsedTime();
     double elapsed_time_ms = (end_time_sec - start_time_sec) * 1000.0;

-#if 0
+#ifdef TRACE_CALC_YOUNG_CONFIG
     // leave this in for debugging, just in case
     gclog_or_tty->print_cr("target = %1.1lf ms, young = " SIZE_FORMAT
                            ", SO = " SIZE_FORMAT ", "
@@ -747,9 +776,9 @@
                            calculations,
                            full_young_gcs() ? "full" : "partial",
                            should_initiate_conc_mark() ? " i-m" : "",
-                           in_marking_window(),
-                           in_marking_window_im());
-#endif // 0
+                           _in_marking_window,
+                           _in_marking_window_im);
+#endif // TRACE_CALC_YOUNG_CONFIG

     if (_young_list_target_length < _young_list_min_length) {
       // bummer; this means that, if we do a pause when the optimal
@@ -768,14 +797,14 @@
         // S-O length
         so_length = calculate_optimal_so_length(_young_list_min_length);

-#if 0
+#ifdef TRACE_CALC_YOUNG_CONFIG
       // leave this in for debugging, just in case
       gclog_or_tty->print_cr("adjusted target length from "
                              SIZE_FORMAT " to " SIZE_FORMAT
                              ", SO " SIZE_FORMAT,
                              _young_list_target_length, _young_list_min_length,
                              so_length);
-#endif // 0
+#endif // TRACE_CALC_YOUNG_CONFIG

       _young_list_target_length =
         MAX2(_young_list_min_length, (size_t)1);
@@ -785,12 +814,12 @@
     // we are in a partially-young mode or we've run out of regions (due
     // to evacuation failure)

-#if 0
+#ifdef TRACE_CALC_YOUNG_CONFIG
     // leave this in for debugging, just in case
     gclog_or_tty->print_cr("(partial) setting target to " SIZE_FORMAT
                            ", SO " SIZE_FORMAT,
                            _young_list_min_length, 0);
-#endif // 0
+#endif // TRACE_CALC_YOUNG_CONFIG

     // we'll do the pause as soon as possible and with no S-O prefix
     // (see above for the reasons behind the latter)
@@ -884,6 +913,16 @@
   return true;
 }

+double G1CollectorPolicy::predict_survivor_regions_evac_time() {
+  double survivor_regions_evac_time = 0.0;
+  for (HeapRegion * r = _recorded_survivor_head;
+       r != NULL && r != _recorded_survivor_tail->get_next_young_region();
+       r = r->get_next_young_region()) {
+    survivor_regions_evac_time += predict_region_elapsed_time_ms(r, true);
+  }
+  return survivor_regions_evac_time;
+}
+
 void G1CollectorPolicy::check_prediction_validity() {
   guarantee( adaptive_young_list_length(), "should not call this otherwise" );

@@ -975,7 +1014,7 @@

   _all_full_gc_times_ms->add(full_gc_time_ms);

-  update_recent_gc_times(end_sec, full_gc_time_sec);
+  update_recent_gc_times(end_sec, full_gc_time_ms);

   _g1->clear_full_collection();

@@ -995,11 +1034,15 @@
   _short_lived_surv_rate_group->start_adding_regions();
   // also call this on any additional surv rate groups

+  record_survivor_regions(0, NULL, NULL);
+
   _prev_region_num_young   = _region_num_young;
   _prev_region_num_tenured = _region_num_tenured;

   _free_regions_at_end_of_collection = _g1->free_regions();
   _scan_only_regions_at_end_of_collection = 0;
+  // Reset survivors SurvRateGroup.
+  _survivor_surv_rate_group->reset();
   calculate_young_list_min_length();
   calculate_young_list_target_config();
  }
@@ -1044,6 +1087,7 @@

   assert(_g1->used_regions() == _g1->recalculate_used_regions(),
          "sanity");
+  assert(_g1->used() == _g1->recalculate_used(), "sanity");

   double s_w_t_ms = (start_time_sec - _stop_world_start) * 1000.0;
   _all_stop_world_times_ms->add(s_w_t_ms);
@@ -1104,6 +1148,10 @@
   _short_lived_surv_rate_group->record_scan_only_prefix(short_lived_so_length);
   tag_scan_only(short_lived_so_length);

+  if (G1UseSurvivorSpace) {
+    _survivors_age_table.clear();
+  }
+
   assert( verify_young_ages(), "region age verification" );
 }

@@ -1428,6 +1476,7 @@
   size_t cur_used_bytes = _g1->used();
   assert(cur_used_bytes == _g1->recalculate_used(), "It should!");
   bool last_pause_included_initial_mark = false;
+  bool update_stats = !abandoned && !_g1->evacuation_failed();

 #ifndef PRODUCT
   if (G1YoungSurvRateVerbose) {
@@ -1488,7 +1537,7 @@

   _n_pauses++;

-  if (!abandoned) {
+  if (update_stats) {
     _recent_CH_strong_roots_times_ms->add(_cur_CH_strong_roots_dur_ms);
     _recent_G1_strong_roots_times_ms->add(_cur_G1_strong_roots_dur_ms);
     _recent_evac_times_ms->add(evac_ms);
@@ -1588,8 +1637,10 @@
   double obj_copy_time = avg_value(_par_last_obj_copy_times_ms);
   double termination_time = avg_value(_par_last_termination_times_ms);

-  double parallel_other_time;
-  if (!abandoned) {
+  double parallel_other_time = _cur_collection_par_time_ms -
+    (update_rs_time + ext_root_scan_time + mark_stack_scan_time +
+     scan_only_time + scan_rs_time + obj_copy_time + termination_time);
+  if (update_stats) {
     MainBodySummary* body_summary = summary->main_body_summary();
     guarantee(body_summary != NULL, "should not be null!");

@@ -1607,9 +1658,6 @@
       body_summary->record_parallel_time_ms(_cur_collection_par_time_ms);
       body_summary->record_clear_ct_time_ms(_cur_clear_ct_time_ms);
       body_summary->record_termination_time_ms(termination_time);
-      parallel_other_time = _cur_collection_par_time_ms -
-        (update_rs_time + ext_root_scan_time + mark_stack_scan_time +
-         scan_only_time + scan_rs_time + obj_copy_time + termination_time);
       body_summary->record_parallel_other_time_ms(parallel_other_time);
     }
     body_summary->record_mark_closure_time_ms(_mark_closure_time_ms);
@@ -1754,8 +1802,10 @@
     gclog_or_tty->print_cr("]");

   _all_pause_times_ms->add(elapsed_ms);
-  summary->record_total_time_ms(elapsed_ms);
-  summary->record_other_time_ms(other_time_ms);
+  if (update_stats) {
+    summary->record_total_time_ms(elapsed_ms);
+    summary->record_other_time_ms(other_time_ms);
+  }
   for (int i = 0; i < _aux_num; ++i)
     if (_cur_aux_times_set[i])
       _all_aux_times_ms[i].add(_cur_aux_times_ms[i]);
@@ -1805,7 +1855,7 @@

   // <NEW PREDICTION>

-  if (!popular && !abandoned) {
+  if (!popular && update_stats) {
     double pause_time_ms = elapsed_ms;

     size_t diff = 0;
@@ -1965,9 +2015,6 @@
   // </NEW PREDICTION>

   _target_pause_time_ms = -1.0;
-
-  // TODO: calculate tenuring threshold
-  _tenuring_threshold = MaxTenuringThreshold;
 }

 // <NEW PREDICTION>
@@ -2058,7 +2105,7 @@
     guarantee( hr->is_young() && hr->age_in_surv_rate_group() != -1,
                "invariant" );
     int age = hr->age_in_surv_rate_group();
-    double yg_surv_rate = predict_yg_surv_rate(age);
+    double yg_surv_rate = predict_yg_surv_rate(age, hr->surv_rate_group());
     bytes_to_copy = (size_t) ((double) hr->used() * yg_surv_rate);
   }

@@ -2091,7 +2138,7 @@
   }
 #if PREDICTIONS_VERBOSE
   if (young) {
-    _recorded_young_bytes += hr->asSpace()->used();
+    _recorded_young_bytes += hr->used();
   } else {
     _recorded_marked_bytes += hr->max_live_bytes();
   }
@@ -2119,11 +2166,6 @@
       predict_non_young_card_num(_predicted_rs_lengths);
   _recorded_region_num = _recorded_young_regions + _recorded_non_young_regions;

-  _predicted_young_survival_ratio = 0.0;
-  for (int i = 0; i < _recorded_young_regions; ++i)
-    _predicted_young_survival_ratio += predict_yg_surv_rate(i);
-  _predicted_young_survival_ratio /= (double) _recorded_young_regions;
-
   _predicted_scan_only_scan_time_ms =
     predict_scan_only_time_ms(_recorded_scan_only_regions);
   _predicted_rs_update_time_ms =
@@ -2673,8 +2715,11 @@
   assert(in_young_gc_mode(), "should be in young GC mode");
   bool ret;
   size_t young_list_length = _g1->young_list_length();
-
-  if (young_list_length < _young_list_target_length) {
+  size_t young_list_max_length = _young_list_target_length;
+  if (G1FixedEdenSize) {
+    young_list_max_length -= _max_survivor_regions;
+  }
+  if (young_list_length < young_list_max_length) {
     ret = true;
     ++_region_num_young;
   } else {
@@ -2710,17 +2755,39 @@
 }


-uint G1CollectorPolicy::max_regions(int purpose) {
+size_t G1CollectorPolicy::max_regions(int purpose) {
   switch (purpose) {
     case GCAllocForSurvived:
-      return G1MaxSurvivorRegions;
+      return _max_survivor_regions;
     case GCAllocForTenured:
-      return UINT_MAX;
+      return REGIONS_UNLIMITED;
     default:
-      return UINT_MAX;
+      ShouldNotReachHere();
+      return REGIONS_UNLIMITED;
   };
 }

+// Calculates survivor space parameters.
+void G1CollectorPolicy::calculate_survivors_policy()
+{
+  if (!G1UseSurvivorSpace) {
+    return;
+  }
+  if (G1FixedSurvivorSpaceSize == 0) {
+    _max_survivor_regions = _young_list_target_length / SurvivorRatio;
+  } else {
+    _max_survivor_regions = G1FixedSurvivorSpaceSize / HeapRegion::GrainBytes;
+  }
+
+  if (G1FixedTenuringThreshold) {
+    _tenuring_threshold = MaxTenuringThreshold;
+  } else {
+    _tenuring_threshold = _survivors_age_table.compute_tenuring_threshold(
+        HeapRegion::GrainWords * _max_survivor_regions);
+  }
+}
+
+
 void
 G1CollectorPolicy_BestRegionsFirst::
 set_single_region_collection_set(HeapRegion* hr) {
@@ -2743,7 +2810,11 @@
   double max_pause_time_ms = _mmu_tracker->max_gc_time() * 1000.0;

   size_t young_list_length = _g1->young_list_length();
-  bool reached_target_length = young_list_length >= _young_list_target_length;
+  size_t young_list_max_length = _young_list_target_length;
+  if (G1FixedEdenSize) {
+    young_list_max_length -= _max_survivor_regions;
+  }
+  bool reached_target_length = young_list_length >= young_list_max_length;

   if (in_young_gc_mode()) {
     if (reached_target_length) {
@@ -2985,6 +3056,7 @@
   _collection_set = hr;
   _collection_set_size++;
   _collection_set_bytes_used_before += hr->used();
+  _g1->register_region_with_in_cset_fast_test(hr);
 }

 void
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Tue Mar 24 13:09:52 2009 -0400
@@ -49,7 +49,7 @@
 class MainBodySummary;
 class PopPreambleSummary;

-class PauseSummary {
+class PauseSummary: public CHeapObj {
   define_num_seq(total)
     define_num_seq(other)

@@ -58,7 +58,7 @@
   virtual PopPreambleSummary* pop_preamble_summary() { return NULL; }
 };

-class MainBodySummary {
+class MainBodySummary: public CHeapObj {
   define_num_seq(satb_drain) // optional
   define_num_seq(parallel) // parallel only
     define_num_seq(ext_root_scan)
@@ -75,7 +75,7 @@
   define_num_seq(clear_ct)  // parallel only
 };

-class PopPreambleSummary {
+class PopPreambleSummary: public CHeapObj {
   define_num_seq(pop_preamble)
     define_num_seq(pop_update_rs)
     define_num_seq(pop_scan_rs)
@@ -557,6 +557,8 @@
     return get_new_neg_prediction(_young_gc_eff_seq);
   }

+  double predict_survivor_regions_evac_time();
+
   // </NEW PREDICTION>

 public:
@@ -599,8 +601,8 @@

   // Returns an estimate of the survival rate of the region at yg-age
   // "yg_age".
-  double predict_yg_surv_rate(int age) {
-    TruncatedSeq* seq = _short_lived_surv_rate_group->get_seq(age);
+  double predict_yg_surv_rate(int age, SurvRateGroup* surv_rate_group) {
+    TruncatedSeq* seq = surv_rate_group->get_seq(age);
     if (seq->num() == 0)
       gclog_or_tty->print("BARF! age is %d", age);
     guarantee( seq->num() > 0, "invariant" );
@@ -610,6 +612,10 @@
     return pred;
   }

+  double predict_yg_surv_rate(int age) {
+    return predict_yg_surv_rate(age, _short_lived_surv_rate_group);
+  }
+
   double accum_yg_surv_rate_pred(int age) {
     return _short_lived_surv_rate_group->accum_surv_rate_pred(age);
   }
@@ -822,6 +828,9 @@

   virtual void init();

+  // Create jstat counters for the policy.
+  virtual void initialize_gc_policy_counters();
+
   virtual HeapWord* mem_allocate_work(size_t size,
                                       bool is_tlab,
                                       bool* gc_overhead_limit_was_exceeded);
@@ -957,7 +966,7 @@
     record_termination_time(0, ms);
   }

-  void record_pause_time(double ms) {
+  void record_pause_time_ms(double ms) {
     _last_pause_time_ms = ms;
   }

@@ -1047,8 +1056,12 @@
   // Print stats on young survival ratio
   void print_yg_surv_rate_info() const;

-  void finished_recalculating_age_indexes() {
-    _short_lived_surv_rate_group->finished_recalculating_age_indexes();
+  void finished_recalculating_age_indexes(bool is_survivors) {
+    if (is_survivors) {
+      _survivor_surv_rate_group->finished_recalculating_age_indexes();
+    } else {
+      _short_lived_surv_rate_group->finished_recalculating_age_indexes();
+    }
     // do that for any other surv rate groups
   }

@@ -1097,6 +1110,17 @@
   // maximum amount of suvivors regions.
   int _tenuring_threshold;

+  // The limit on the number of regions allocated for survivors.
+  size_t _max_survivor_regions;
+
+  // The amount of survor regions after a collection.
+  size_t _recorded_survivor_regions;
+  // List of survivor regions.
+  HeapRegion* _recorded_survivor_head;
+  HeapRegion* _recorded_survivor_tail;
+
+  ageTable _survivors_age_table;
+
 public:

   inline GCAllocPurpose
@@ -1116,7 +1140,9 @@
     return GCAllocForTenured;
   }

-  uint max_regions(int purpose);
+  static const size_t REGIONS_UNLIMITED = ~(size_t)0;
+
+  size_t max_regions(int purpose);

   // The limit on regions for a particular purpose is reached.
   void note_alloc_region_limit_reached(int purpose) {
@@ -1132,6 +1158,23 @@
   void note_stop_adding_survivor_regions() {
     _survivor_surv_rate_group->stop_adding_regions();
   }
+
+  void record_survivor_regions(size_t      regions,
+                               HeapRegion* head,
+                               HeapRegion* tail) {
+    _recorded_survivor_regions = regions;
+    _recorded_survivor_head    = head;
+    _recorded_survivor_tail    = tail;
+  }
+
+  void record_thread_age_table(ageTable* age_table)
+  {
+    _survivors_age_table.merge_par(age_table);
+  }
+
+  // Calculates survivor space parameters.
+  void calculate_survivors_policy();
+
 };

 // This encapsulates a particular strategy for a g1 Collector.
--- a/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp	Tue Mar 24 13:09:52 2009 -0400
@@ -28,7 +28,7 @@
 /***** ALL TIMES ARE IN SECS!!!!!!! *****/

 // this is the "interface"
-class G1MMUTracker {
+class G1MMUTracker: public CHeapObj {
 protected:
   double          _time_slice;
   double          _max_gc_time; // this is per time slice
@@ -67,7 +67,7 @@
   }
 };

-class G1MMUTrackerQueueElem {
+class G1MMUTrackerQueueElem VALUE_OBJ_CLASS_SPEC {
 private:
   double _start_time;
   double _end_time;
--- a/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Tue Mar 24 13:09:52 2009 -0400
@@ -77,6 +77,18 @@

 #define G1_PARTIAL_ARRAY_MASK 1

+inline bool has_partial_array_mask(oop* ref) {
+  return (intptr_t) ref & G1_PARTIAL_ARRAY_MASK;
+}
+
+inline oop* set_partial_array_mask(oop obj) {
+  return (oop*) ((intptr_t) obj | G1_PARTIAL_ARRAY_MASK);
+}
+
+inline oop clear_partial_array_mask(oop* ref) {
+  return oop((intptr_t) ref & ~G1_PARTIAL_ARRAY_MASK);
+}
+
 class G1ParScanPartialArrayClosure : public G1ParClosureSuper {
   G1ParScanClosure _scanner;
   template <class T> void process_array_chunk(oop obj, int start, int end);
@@ -101,7 +113,8 @@
     G1ParClosureSuper(g1, par_scan_state), _scanner(scanner) { }
 };

-template<bool do_gen_barrier, G1Barrier barrier, bool do_mark_forwardee>
+template<bool do_gen_barrier, G1Barrier barrier,
+         bool do_mark_forwardee, bool skip_cset_test>
 class G1ParCopyClosure : public G1ParCopyHelper {
   G1ParScanClosure _scanner;
   void do_oop_work(oop* p);
@@ -119,14 +132,22 @@
   virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
 };

-typedef G1ParCopyClosure<false, G1BarrierNone, false> G1ParScanExtRootClosure;
-typedef G1ParCopyClosure<true, G1BarrierNone, false> G1ParScanPermClosure;
-typedef G1ParCopyClosure<false, G1BarrierNone, true> G1ParScanAndMarkExtRootClosure;
-typedef G1ParCopyClosure<true, G1BarrierNone, true> G1ParScanAndMarkPermClosure;
-typedef G1ParCopyClosure<false, G1BarrierRS, false> G1ParScanHeapRSClosure;
-typedef G1ParCopyClosure<false, G1BarrierRS, true> G1ParScanAndMarkHeapRSClosure;
-typedef G1ParCopyClosure<false, G1BarrierEvac, false> G1ParScanHeapEvacClosure;
-
+typedef G1ParCopyClosure<false, G1BarrierNone, false, false> G1ParScanExtRootClosure;
+typedef G1ParCopyClosure<true,  G1BarrierNone, false, false> G1ParScanPermClosure;
+typedef G1ParCopyClosure<false, G1BarrierNone, true,  false> G1ParScanAndMarkExtRootClosure;
+typedef G1ParCopyClosure<true,  G1BarrierNone, true,  false> G1ParScanAndMarkPermClosure;
+typedef G1ParCopyClosure<false, G1BarrierRS,   false, false> G1ParScanHeapRSClosure;
+typedef G1ParCopyClosure<false, G1BarrierRS,   true,  false> G1ParScanAndMarkHeapRSClosure;
+// This is the only case when we set skip_cset_test. Basically, this
+// closure is (should?) only be called directly while we're draining
+// the overflow and task queues. In that case we know that the
+// reference in question points into the collection set, otherwise we
+// would not have pushed it on the queue.
+typedef G1ParCopyClosure<false, G1BarrierEvac, false, true> G1ParScanHeapEvacClosure;
+// We need a separate closure to handle references during evacuation
+// failure processing, as it cannot asume that the reference already
+ // points to the collection set (like G1ParScanHeapEvacClosure does).
+typedef G1ParCopyClosure<false, G1BarrierEvac, false, false> G1ParScanHeapEvacFailureClosure;

 class FilterIntoCSClosure: public OopClosure {
   G1CollectedHeap* _g1;
--- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Tue Mar 24 13:09:52 2009 -0400
@@ -105,33 +105,6 @@
   _g1->heap_region_iterate(&rc);
 }

-class UpdateRSOopClosure: public OopClosure {
-  HeapRegion* _from;
-  HRInto_G1RemSet* _rs;
-  int _worker_i;
-public:
-  UpdateRSOopClosure(HRInto_G1RemSet* rs, int worker_i = 0) :
-    _from(NULL), _rs(rs), _worker_i(worker_i) {
-    guarantee(_rs != NULL, "Requires an HRIntoG1RemSet");
-  }
-
-  void set_from(HeapRegion* from) {
-    assert(from != NULL, "from region must be non-NULL");
-    _from = from;
-  }
-
-  virtual void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
-  virtual void do_oop(oop* p) {
-    assert(_from != NULL, "from region must be non-NULL");
-    _rs->par_write_ref(_from, p, _worker_i);
-  }
-  // Override: this closure is idempotent.
-  //  bool idempotent() { return true; }
-  bool apply_to_weak_ref_discovered_field() { return true; }
-};
-
 class UpdateRSOutOfRegionClosure: public HeapRegionClosure {
   G1CollectedHeap*    _g1h;
   ModRefBarrierSet*   _mr_bs;
@@ -177,11 +150,19 @@
     _cards_scanned(NULL), _total_cards_scanned(0)
 {
   _seq_task = new SubTasksDone(NumSeqTasks);
-  _new_refs = NEW_C_HEAP_ARRAY(GrowableArray<oop*>*, ParallelGCThreads);
+  guarantee(n_workers() > 0, "There should be some workers");
+  _new_refs = NEW_C_HEAP_ARRAY(GrowableArray<oop*>*, n_workers());
+  for (uint i = 0; i < n_workers(); i++) {
+    _new_refs[i] = new (ResourceObj::C_HEAP) GrowableArray<oop*>(8192,true);
+  }
 }

 HRInto_G1RemSet::~HRInto_G1RemSet() {
   delete _seq_task;
+  for (uint i = 0; i < n_workers(); i++) {
+    delete _new_refs[i];
+  }
+  FREE_C_HEAP_ARRAY(GrowableArray<oop*>*, _new_refs);
 }

 void CountNonCleanMemRegionClosure::do_MemRegion(MemRegion mr) {
@@ -281,8 +262,9 @@
         if (!_ct_bs->is_card_claimed(card_index) &&
             !_ct_bs->is_card_dirty(card_index)) {
           assert(_ct_bs->is_card_clean(card_index) ||
-                 _ct_bs->is_card_claimed(card_index),
-                 "Card is either dirty, clean, or claimed");
+                 _ct_bs->is_card_claimed(card_index) ||
+                 _ct_bs->is_card_deferred(card_index),
+                 "Card is either clean, claimed or deferred");
           if (_ct_bs->claim_card(card_index))
             scanCard(card_index, card_region);
         }
@@ -338,14 +320,12 @@

   _g1p->record_scan_rs_start_time(worker_i, rs_time_start * 1000.0);
   _g1p->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0);
-  if (ParallelGCThreads > 0) {
-    // In this case, we called scanNewRefsRS and recorded the corresponding
-    // time.
-    double scan_new_refs_time_ms = _g1p->get_scan_new_refs_time(worker_i);
-    if (scan_new_refs_time_ms > 0.0) {
-      closure_app_time_ms += scan_new_refs_time_ms;
-    }
+
+  double scan_new_refs_time_ms = _g1p->get_scan_new_refs_time(worker_i);
+  if (scan_new_refs_time_ms > 0.0) {
+    closure_app_time_ms += scan_new_refs_time_ms;
   }
+
   _g1p->record_obj_copy_time(worker_i, closure_app_time_ms);
 }

@@ -469,8 +449,8 @@
   double scan_new_refs_start_sec = os::elapsedTime();
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   CardTableModRefBS* ct_bs = (CardTableModRefBS*) (g1h->barrier_set());
-  while (_new_refs[worker_i]->is_nonempty()) {
-    oop* p = _new_refs[worker_i]->pop();
+  for (int i = 0; i < _new_refs[worker_i]->length(); i++) {
+    oop* p = _new_refs[worker_i]->at(i);
     oop obj = *p;
     // *p was in the collection set when p was pushed on "_new_refs", but
     // another thread may have processed this location from an RS, so it
@@ -480,10 +460,6 @@
       HeapRegion* r = g1h->heap_region_containing(p);

       DEBUG_ONLY(HeapRegion* to = g1h->heap_region_containing(obj));
-      assert(ParallelGCThreads > 1
-             || to->rem_set()->contains_reference(p),
-             "Invariant: pushed after being added."
-             "(Not reliable in parallel code.)");
       oc->set_region(r);
       // If "p" has already been processed concurrently, this is
       // idempotent.
@@ -526,20 +502,31 @@
   }

   if (ParallelGCThreads > 0) {
-    // This is a temporary change to serialize the update and scanning
-    // of remembered sets. There are some race conditions when this is
-    // done in parallel and they are causing failures. When we resolve
-    // said race conditions, we'll revert back to parallel remembered
-    // set updating and scanning. See CRs 6677707 and 6677708.
-    if (worker_i == 0) {
+    // The two flags below were introduced temporarily to serialize
+    // the updating and scanning of remembered sets. There are some
+    // race conditions when these two operations are done in parallel
+    // and they are causing failures. When we resolve said race
+    // conditions, we'll revert back to parallel remembered set
+    // updating and scanning. See CRs 6677707 and 6677708.
+    if (G1EnableParallelRSetUpdating || (worker_i == 0)) {
       updateRS(worker_i);
       scanNewRefsRS(oc, worker_i);
+    } else {
+      _g1p->record_update_rs_start_time(worker_i, os::elapsedTime());
+      _g1p->record_update_rs_processed_buffers(worker_i, 0.0);
+      _g1p->record_update_rs_time(worker_i, 0.0);
+      _g1p->record_scan_new_refs_time(worker_i, 0.0);
+    }
+    if (G1EnableParallelRSetScanning || (worker_i == 0)) {
       scanRS(oc, worker_i);
+    } else {
+      _g1p->record_scan_rs_start_time(worker_i, os::elapsedTime());
+      _g1p->record_scan_rs_time(worker_i, 0.0);
     }
   } else {
     assert(worker_i == 0, "invariant");
-
     updateRS(0);
+    scanNewRefsRS(oc, 0);
     scanRS(oc, 0);
   }
 }
@@ -559,11 +546,7 @@
   assert(!_par_traversal_in_progress, "Invariant between iterations.");
   if (ParallelGCThreads > 0) {
     set_par_traversal(true);
-    int n_workers = _g1->workers()->total_workers();
-    _seq_task->set_par_threads(n_workers);
-    for (uint i = 0; i < ParallelGCThreads; i++)
-      _new_refs[i] = new (ResourceObj::C_HEAP) GrowableArray<oop*>(8192,true);
-
+    _seq_task->set_par_threads((int)n_workers());
     if (cg1r->do_traversal()) {
       updateRS(0);
       // Have to do this again after updaters
@@ -572,6 +555,9 @@
   }
   guarantee( _cards_scanned == NULL, "invariant" );
   _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers());
+  for (uint i = 0; i < n_workers(); ++i) {
+    _cards_scanned[i] = 0;
+  }
   _total_cards_scanned = 0;
 }

@@ -584,6 +570,53 @@
   }
 };

+class UpdateRSetOopsIntoCSImmediate : public OopClosure {
+  G1CollectedHeap* _g1;
+public:
+  UpdateRSetOopsIntoCSImmediate(G1CollectedHeap* g1) : _g1(g1) { }
+  virtual void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  virtual void do_oop(oop* p) {
+    HeapRegion* to = _g1->heap_region_containing(*p);
+    if (to->in_collection_set()) {
+      if (to->rem_set()->add_reference(p, 0)) {
+        _g1->schedule_popular_region_evac(to);
+      }
+    }
+  }
+};
+
+class UpdateRSetOopsIntoCSDeferred : public OopClosure {
+  G1CollectedHeap* _g1;
+  CardTableModRefBS* _ct_bs;
+  DirtyCardQueue* _dcq;
+public:
+  UpdateRSetOopsIntoCSDeferred(G1CollectedHeap* g1, DirtyCardQueue* dcq) :
+    _g1(g1), _ct_bs((CardTableModRefBS*)_g1->barrier_set()), _dcq(dcq) { }
+  virtual void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  virtual void do_oop(oop* p) {
+    oop obj = *p;
+    if (_g1->obj_in_cs(obj)) {
+      size_t card_index = _ct_bs->index_for(p);
+      if (_ct_bs->mark_card_deferred(card_index)) {
+        _dcq->enqueue((jbyte*)_ct_bs->byte_for_index(card_index));
+      }
+    }
+  }
+};
+
+void HRInto_G1RemSet::new_refs_iterate(OopClosure* cl) {
+  for (size_t i = 0; i < n_workers(); i++) {
+    for (int j = 0; j < _new_refs[i]->length(); j++) {
+      oop* p = _new_refs[i]->at(j);
+      cl->do_oop(p);
+    }
+  }
+}
+
 void HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do() {
   guarantee( _cards_scanned != NULL, "invariant" );
   _total_cards_scanned = 0;
@@ -606,11 +639,25 @@
     if (cg1r->do_traversal()) {
       cg1r->cg1rThread()->set_do_traversal(false);
     }
-    for (uint i = 0; i < ParallelGCThreads; i++) {
-      delete _new_refs[i];
-    }
     set_par_traversal(false);
   }
+
+  if (_g1->evacuation_failed()) {
+    // Restore remembered sets for the regions pointing into
+    // the collection set.
+    if (G1DeferredRSUpdate) {
+      DirtyCardQueue dcq(&_g1->dirty_card_queue_set());
+      UpdateRSetOopsIntoCSDeferred deferred_update(_g1, &dcq);
+      new_refs_iterate(&deferred_update);
+    } else {
+      UpdateRSetOopsIntoCSImmediate immediate_update(_g1);
+      new_refs_iterate(&immediate_update);
+    }
+  }
+  for (uint i = 0; i < n_workers(); i++) {
+    _new_refs[i]->clear();
+  }
+
   assert(!_par_traversal_in_progress, "Invariant between iterations.");
 }

@@ -988,7 +1035,9 @@
   }
 }
 void HRInto_G1RemSet::prepare_for_verify() {
-  if (G1HRRSFlushLogBuffersOnVerify && VerifyBeforeGC && !_g1->full_collection()) {
+  if (G1HRRSFlushLogBuffersOnVerify &&
+      (VerifyBeforeGC || VerifyAfterGC)
+      &&  !_g1->full_collection()) {
     cleanupHRRS();
     _g1->set_refine_cte_cl_concurrency(false);
     if (SafepointSynchronize::is_at_safepoint()) {
@@ -999,5 +1048,7 @@
     _cg1r->set_use_cache(false);
     updateRS(0);
     _cg1r->set_use_cache(cg1r_use_cache);
+
+    assert(JavaThread::dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed");
   }
 }
--- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Tue Mar 24 13:09:52 2009 -0400
@@ -30,7 +30,7 @@
 class HRInto_G1RemSet;
 class ConcurrentG1Refine;

-class G1RemSet {
+class G1RemSet: public CHeapObj {
 protected:
   G1CollectedHeap* _g1;

@@ -155,6 +155,7 @@
   bool _par_traversal_in_progress;
   void set_par_traversal(bool b);
   GrowableArray<oop*>** _new_refs;
+  void new_refs_iterate(OopClosure* cl);

 public:
   // This is called to reset dual hash tables after the gc pause
@@ -214,3 +215,27 @@
   int n() { return _n; };
   HeapWord* start_first() { return _start_first; }
 };
+
+class UpdateRSOopClosure: public OopClosure {
+  HeapRegion* _from;
+  HRInto_G1RemSet* _rs;
+  int _worker_i;
+public:
+  UpdateRSOopClosure(HRInto_G1RemSet* rs, int worker_i = 0) :
+    _from(NULL), _rs(rs), _worker_i(worker_i) {
+    guarantee(_rs != NULL, "Requires an HRIntoG1RemSet");
+  }
+
+  void set_from(HeapRegion* from) {
+    assert(from != NULL, "from region must be non-NULL");
+    _from = from;
+  }
+
+  virtual void do_oop(narrowOop* p);
+  virtual void do_oop(oop* p);
+
+  // Override: this closure is idempotent.
+  //  bool idempotent() { return true; }
+  bool apply_to_weak_ref_discovered_field() { return true; }
+};
+
--- a/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp	Tue Mar 24 13:09:52 2009 -0400
@@ -31,24 +31,7 @@
 }

 inline void HRInto_G1RemSet::write_ref_nv(HeapRegion* from, oop* p) {
-  oop obj = *p;
-  assert(from != NULL && from->is_in_reserved(p),
-         "p is not in a from");
-  HeapRegion* to = _g1->heap_region_containing(obj);
-  if (from != to && to != NULL) {
-    if (!to->popular() && !from->is_survivor()) {
-#if G1_REM_SET_LOGGING
-      gclog_or_tty->print_cr("Adding " PTR_FORMAT " (" PTR_FORMAT ") to RS"
-                             " for region [" PTR_FORMAT ", " PTR_FORMAT ")",
-                             p, obj,
-                             to->bottom(), to->end());
-#endif
-      assert(to->rem_set() != NULL, "Need per-region 'into' remsets.");
-      if (to->rem_set()->add_reference(p)) {
-        _g1->schedule_popular_region_evac(to);
-      }
-    }
-  }
+  par_write_ref(from, p, 0);
 }

 inline void HRInto_G1RemSet::write_ref(HeapRegion* from, oop* p) {
@@ -82,7 +65,22 @@
   HeapRegion* to = _g1->heap_region_containing(obj);
   // The test below could be optimized by applying a bit op to to and from.
   if (to != NULL && from != NULL && from != to) {
-    if (!to->popular() && !from->is_survivor()) {
+    bool update_delayed = false;
+    // There is a tricky infinite loop if we keep pushing
+    // self forwarding pointers onto our _new_refs list.
+    // The _par_traversal_in_progress flag is true during the collection pause,
+    // false during the evacuation failure handing.
+    if (_par_traversal_in_progress &&
+        to->in_collection_set() && !self_forwarded(obj)) {
+      _new_refs[tid]->push(p);
+      // Deferred updates to the Cset are either discarded (in the normal case),
+      // or processed (if an evacuation failure occurs) at the end
+      // of the collection.
+      // See HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do().
+      update_delayed = true;
+    }
+
+    if (!to->popular() && !update_delayed) {
 #if G1_REM_SET_LOGGING
       gclog_or_tty->print_cr("Adding " PTR_FORMAT " (" PTR_FORMAT ") to RS"
                              " for region [" PTR_FORMAT ", " PTR_FORMAT ")",
@@ -94,11 +92,14 @@
         _g1->schedule_popular_region_evac(to);
       }
     }
-    // There is a tricky infinite loop if we keep pushing
-    // self forwarding pointers onto our _new_refs list.
-    if (_par_traversal_in_progress &&
-        to->in_collection_set() && !self_forwarded(obj)) {
-      _new_refs[tid]->push(p);
-    }
   }
 }
+
+inline void UpdateRSOopClosure::do_oop(narrowOop* p) {
+  guarantee(false, "NYI");
+}
+
+inline void UpdateRSOopClosure::do_oop(oop* p) {
+  assert(_from != NULL, "from region must be non-NULL");
+  _rs->par_write_ref(_from, p, _worker_i);
+}
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Tue Mar 24 13:09:52 2009 -0400
@@ -28,7 +28,7 @@

 #define G1_FLAGS(develop, develop_pd, product, product_pd, diagnostic, experimental, notproduct, manageable, product_rw) \
                                                                             \
-  product(intx, ParallelGCG1AllocBufferSize, 4*K,                           \
+  product(intx, ParallelGCG1AllocBufferSize, 8*K,                           \
           "Size of parallel G1 allocation buffers in to-space.")            \
                                                                             \
   product(intx, G1TimeSliceMS, 500,                                         \
@@ -172,6 +172,9 @@
   develop(bool, G1RSBarrierUseQueue, true,                                  \
           "If true, use queueing RS barrier")                               \
                                                                             \
+  develop(bool, G1DeferredRSUpdate, true,                                   \
+          "If true, use deferred RS updates")                               \
+                                                                            \
   develop(bool, G1RSLogCheckCardTable, false,                               \
           "If true, verify that no dirty cards remain after RS log "        \
           "processing.")                                                    \
@@ -281,7 +284,25 @@
   develop(bool, G1HRRSFlushLogBuffersOnVerify, false,                       \
           "Forces flushing of log buffers before verification.")            \
                                                                             \
-  product(intx, G1MaxSurvivorRegions, 0,                                    \
-          "The maximum number of survivor regions")
+  product(bool, G1UseSurvivorSpace, true,                                   \
+          "When true, use survivor space.")                                 \
+                                                                            \
+  product(bool, G1FixedTenuringThreshold, false,                            \
+          "When set, G1 will not adjust the tenuring threshold")            \
+                                                                            \
+  product(bool, G1FixedEdenSize, false,                                     \
+          "When set, G1 will not allocate unused survivor space regions")   \
+                                                                            \
+  product(uintx, G1FixedSurvivorSpaceSize, 0,                               \
+          "If non-0 is the size of the G1 survivor space, "                 \
+          "otherwise SurvivorRatio is used to determine the size")          \
+                                                                            \
+  experimental(bool, G1EnableParallelRSetUpdating, false,                   \
+          "Enables the parallelization of remembered set updating "         \
+          "during evacuation pauses")                                       \
+                                                                            \
+  experimental(bool, G1EnableParallelRSetScanning, false,                   \
+          "Enables the parallelization of remembered set scanning "         \
+          "during evacuation pauses")

 G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)
--- a/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp	Tue Mar 24 13:09:52 2009 -0400
@@ -32,11 +32,13 @@
   G1BarrierNone, G1BarrierRS, G1BarrierEvac
 };

-template<bool do_gen_barrier, G1Barrier barrier, bool do_mark_forwardee>
+template<bool do_gen_barrier, G1Barrier barrier,
+         bool do_mark_forwardee, bool skip_cset_test>
 class G1ParCopyClosure;
 class G1ParScanClosure;

-typedef G1ParCopyClosure<false, G1BarrierEvac, false> G1ParScanHeapEvacClosure;
+typedef G1ParCopyClosure<false, G1BarrierEvac, false, true>
+                                                      G1ParScanHeapEvacClosure;

 class FilterIntoCSClosure;
 class FilterOutOfRegionClosure;
--- a/src/share/vm/gc_implementation/g1/heapRegion.hpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp	Tue Mar 24 13:09:52 2009 -0400
@@ -318,7 +318,8 @@
     FinalCountClaimValue  = 1,
     NoteEndClaimValue     = 2,
     ScrubRemSetClaimValue = 3,
-    ParVerifyClaimValue   = 4
+    ParVerifyClaimValue   = 4,
+    RebuildRSClaimValue   = 5
   };

   // Concurrent refinement requires contiguous heap regions (in which TLABs
@@ -566,7 +567,11 @@
   void note_end_of_copying() {
     assert(top() >= _next_top_at_mark_start,
            "Increase only");
-    _next_top_at_mark_start = top();
+    // Survivor regions will be scanned on the start of concurrent
+    // marking.
+    if (!is_survivor()) {
+      _next_top_at_mark_start = top();
+    }
   }

   // Returns "false" iff no object in the region was allocated when the
@@ -829,7 +834,7 @@

 // A linked lists of heap regions.  It leaves the "next" field
 // unspecified; that's up to subtypes.
-class RegionList {
+class RegionList VALUE_OBJ_CLASS_SPEC {
 protected:
   virtual HeapRegion* get_next(HeapRegion* chr) = 0;
   virtual void set_next(HeapRegion* chr,
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Tue Mar 24 13:09:52 2009 -0400
@@ -65,9 +65,11 @@
   // We need access in order to union things into the base table.
   BitMap* bm() { return &_bm; }

+#if PRT_COUNT_OCCUPIED
   void recount_occupied() {
     _occupied = (jint) bm()->count_one_bits();
   }
+#endif

   PerRegionTable(HeapRegion* hr) :
     _hr(hr),
@@ -1144,7 +1146,9 @@
   size_t i = _outgoing_region_map.get_next_one_offset(0);
   while (i < _outgoing_region_map.size()) {
     HeapRegion* to_region = g1h->region_at(i);
-    to_region->rem_set()->clear_incoming_entry(hr());
+    if (!to_region->in_collection_set()) {
+      to_region->rem_set()->clear_incoming_entry(hr());
+    }
     i = _outgoing_region_map.get_next_one_offset(i+1);
   }
 }
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Tue Mar 24 13:09:52 2009 -0400
@@ -58,7 +58,7 @@
 //      is represented.  If a deleted PRT is re-used, a thread adding a bit,
 //      thinking the PRT is for a different region, does no harm.

-class OtherRegionsTable: public CHeapObj {
+class OtherRegionsTable VALUE_OBJ_CLASS_SPEC {
   friend class HeapRegionRemSetIterator;

   G1CollectedHeap* _g1h;
--- a/src/share/vm/gc_implementation/g1/ptrQueue.cpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/ptrQueue.cpp	Tue Mar 24 13:09:52 2009 -0400
@@ -91,15 +91,17 @@
   _n_completed_buffers(0),
   _process_completed_threshold(0), _process_completed(false),
   _buf_free_list(NULL), _buf_free_list_sz(0)
-{}
+{
+  _fl_owner = this;
+}

 void** PtrQueueSet::allocate_buffer() {
   assert(_sz > 0, "Didn't set a buffer size.");
-  MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag);
-  if (_buf_free_list != NULL) {
-    void** res = _buf_free_list;
-    _buf_free_list = (void**)_buf_free_list[0];
-    _buf_free_list_sz--;
+  MutexLockerEx x(_fl_owner->_fl_lock, Mutex::_no_safepoint_check_flag);
+  if (_fl_owner->_buf_free_list != NULL) {
+    void** res = _fl_owner->_buf_free_list;
+    _fl_owner->_buf_free_list = (void**)_fl_owner->_buf_free_list[0];
+    _fl_owner->_buf_free_list_sz--;
     // Just override the next pointer with NULL, just in case we scan this part
     // of the buffer.
     res[0] = NULL;
@@ -111,10 +113,10 @@

 void PtrQueueSet::deallocate_buffer(void** buf) {
   assert(_sz > 0, "Didn't set a buffer size.");
-  MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag);
-  buf[0] = (void*)_buf_free_list;
-  _buf_free_list = buf;
-  _buf_free_list_sz++;
+  MutexLockerEx x(_fl_owner->_fl_lock, Mutex::_no_safepoint_check_flag);
+  buf[0] = (void*)_fl_owner->_buf_free_list;
+  _fl_owner->_buf_free_list = buf;
+  _fl_owner->_buf_free_list_sz++;
 }

 void PtrQueueSet::reduce_free_list() {
@@ -207,3 +209,58 @@
 void PtrQueueSet::set_process_completed_threshold(size_t sz) {
   _process_completed_threshold = sz;
 }
+
+// Merge lists of buffers. Notify waiting threads if the length of the list
+// exceeds threshold. The source queue is emptied as a result. The queues
+// must share the monitor.
+void PtrQueueSet::merge_bufferlists(PtrQueueSet *src) {
+  assert(_cbl_mon == src->_cbl_mon, "Should share the same lock");
+  MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+  if (_completed_buffers_tail == NULL) {
+    assert(_completed_buffers_head == NULL, "Well-formedness");
+    _completed_buffers_head = src->_completed_buffers_head;
+    _completed_buffers_tail = src->_completed_buffers_tail;
+  } else {
+    assert(_completed_buffers_head != NULL, "Well formedness");
+    if (src->_completed_buffers_head != NULL) {
+      _completed_buffers_tail->next = src->_completed_buffers_head;
+      _completed_buffers_tail = src->_completed_buffers_tail;
+    }
+  }
+  _n_completed_buffers += src->_n_completed_buffers;
+
+  src->_n_completed_buffers = 0;
+  src->_completed_buffers_head = NULL;
+  src->_completed_buffers_tail = NULL;
+
+  assert(_completed_buffers_head == NULL && _completed_buffers_tail == NULL ||
+         _completed_buffers_head != NULL && _completed_buffers_tail != NULL,
+         "Sanity");
+
+  if (!_process_completed &&
+      _n_completed_buffers >= _process_completed_threshold) {
+    _process_completed = true;
+    if (_notify_when_complete)
+      _cbl_mon->notify_all();
+  }
+}
+
+// Merge free lists of the two queues. The free list of the source
+// queue is emptied as a result. The queues must share the same
+// mutex that guards free lists.
+void PtrQueueSet::merge_freelists(PtrQueueSet* src) {
+  assert(_fl_lock == src->_fl_lock, "Should share the same lock");
+  MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag);
+  if (_buf_free_list != NULL) {
+    void **p = _buf_free_list;
+    while (*p != NULL) {
+      p = (void**)*p;
+    }
+    *p = src->_buf_free_list;
+  } else {
+    _buf_free_list = src->_buf_free_list;
+  }
+  _buf_free_list_sz += src->_buf_free_list_sz;
+  src->_buf_free_list = NULL;
+  src->_buf_free_list_sz = 0;
+}
--- a/src/share/vm/gc_implementation/g1/ptrQueue.hpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/ptrQueue.hpp	Tue Mar 24 13:09:52 2009 -0400
@@ -29,7 +29,7 @@

 class PtrQueueSet;

-class PtrQueue: public CHeapObj {
+class PtrQueue VALUE_OBJ_CLASS_SPEC {

 protected:
   // The ptr queue set to which this queue belongs.
@@ -130,7 +130,7 @@
 // In particular, the individual queues allocate buffers from this shared
 // set, and return completed buffers to the set.
 // All these variables are are protected by the TLOQ_CBL_mon. XXX ???
-class PtrQueueSet: public CHeapObj {
+class PtrQueueSet VALUE_OBJ_CLASS_SPEC {

 protected:

@@ -155,6 +155,9 @@
   Mutex* _fl_lock;
   void** _buf_free_list;
   size_t _buf_free_list_sz;
+  // Queue set can share a freelist. The _fl_owner variable
+  // specifies the owner. It is set to "this" by default.
+  PtrQueueSet* _fl_owner;

   // The size of all buffers in the set.
   size_t _sz;
@@ -188,10 +191,13 @@
   // Because of init-order concerns, we can't pass these as constructor
   // arguments.
   void initialize(Monitor* cbl_mon, Mutex* fl_lock,
-                  int max_completed_queue = 0) {
+                  int max_completed_queue = 0,
+                  PtrQueueSet *fl_owner = NULL) {
     _max_completed_queue = max_completed_queue;
     assert(cbl_mon != NULL && fl_lock != NULL, "Init order issue?");
-    _cbl_mon = cbl_mon; _fl_lock = fl_lock;
+    _cbl_mon = cbl_mon;
+    _fl_lock = fl_lock;
+    _fl_owner = (fl_owner != NULL) ? fl_owner : this;
   }

   // Return an empty oop array of size _sz (required to be non-zero).
@@ -228,4 +234,7 @@
   void reduce_free_list();

   size_t completed_buffers_num() { return _n_completed_buffers; }
+
+  void merge_bufferlists(PtrQueueSet* src);
+  void merge_freelists(PtrQueueSet* src);
 };
--- a/src/share/vm/gc_implementation/g1/sparsePRT.cpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/sparsePRT.cpp	Tue Mar 24 13:09:52 2009 -0400
@@ -504,6 +504,7 @@
   // Make sure that the current and next tables agree.  (Another mechanism
   // takes care of deleting now-unused tables.)
   _cur = _next;
+  set_expanded(false);
 }

 void SparsePRT::expand() {
--- a/src/share/vm/gc_implementation/g1/sparsePRT.hpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/sparsePRT.hpp	Tue Mar 24 13:09:52 2009 -0400
@@ -33,7 +33,7 @@
 // old versions synchronously.


-class SparsePRTEntry {
+class SparsePRTEntry: public CHeapObj {
 public:
   enum SomePublicConstants {
     CardsPerEntry = (short)4,
@@ -167,7 +167,7 @@
 };

   // ValueObj because will be embedded in HRRS iterator.
-class RSHashTableIter: public CHeapObj {
+class RSHashTableIter VALUE_OBJ_CLASS_SPEC {
     short _tbl_ind;
     short _bl_ind;
     short _card_ind;
@@ -213,7 +213,7 @@

 class SparsePRTIter;

-class SparsePRT : public CHeapObj {
+class SparsePRT VALUE_OBJ_CLASS_SPEC {
   //  Iterations are done on the _cur hash table, since they only need to
   //  see entries visible at the start of a collection pause.
   //  All other operations are done using the _next hash table.
@@ -274,7 +274,7 @@

   // Clean up all tables on the expanded list.  Called single threaded.
   static void cleanup_all();
-  RSHashTable* next() const { return _next; }
+  RSHashTable* cur() const { return _cur; }


   void init_iterator(SparsePRTIter* sprt_iter);
@@ -300,7 +300,7 @@
   {}

   void init(const SparsePRT* sprt) {
-    RSHashTableIter::init(sprt->next());
+    RSHashTableIter::init(sprt->cur());
   }
   bool has_next(size_t& card_index) {
     return RSHashTableIter::has_next(card_index);
--- a/src/share/vm/gc_implementation/g1/survRateGroup.cpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/survRateGroup.cpp	Tue Mar 24 13:09:52 2009 -0400
@@ -29,23 +29,14 @@
                              const char* name,
                              size_t summary_surv_rates_len) :
     _g1p(g1p), _name(name),
-    _all_regions_allocated(0),
-    _curr_length(0), _scan_only_prefix(0), _setup_seq_num(0),
-    _array_length(0), _surv_rate(NULL), _accum_surv_rate_pred(NULL),
-    _accum_surv_rate(0.0), _surv_rate_pred(NULL), _last_pred(0.0),
     _summary_surv_rates_len(summary_surv_rates_len),
     _summary_surv_rates_max_len(0),
-    _summary_surv_rates(NULL) {
-
-  // the following will set up the arrays with length 1
-  _curr_length = 1;
-  stop_adding_regions();
-  guarantee( _array_length == 1, "invariant" );
-  guarantee( _surv_rate_pred[0] != NULL, "invariant" );
-  _surv_rate_pred[0]->add(0.4);
-  all_surviving_words_recorded(false);
-  _curr_length = 0;
-
+    _summary_surv_rates(NULL),
+    _surv_rate(NULL),
+    _accum_surv_rate_pred(NULL),
+    _surv_rate_pred(NULL)
+{
+  reset();
   if (summary_surv_rates_len > 0) {
     size_t length = summary_surv_rates_len;
       _summary_surv_rates = NEW_C_HEAP_ARRAY(NumberSeq*, length);
@@ -60,61 +51,80 @@
   start_adding_regions();
 }

+
+void SurvRateGroup::reset()
+{
+  _all_regions_allocated = 0;
+  _scan_only_prefix      = 0;
+  _setup_seq_num         = 0;
+  _stats_arrays_length   = 0;
+  _accum_surv_rate       = 0.0;
+  _last_pred             = 0.0;
+  // the following will set up the arrays with length 1
+  _region_num            = 1;
+  stop_adding_regions();
+  guarantee( _stats_arrays_length == 1, "invariant" );
+  guarantee( _surv_rate_pred[0] != NULL, "invariant" );
+  _surv_rate_pred[0]->add(0.4);
+  all_surviving_words_recorded(false);
+  _region_num = 0;
+}
+
+
 void
 SurvRateGroup::start_adding_regions() {
-  _setup_seq_num   = _array_length;
-  _curr_length     = _scan_only_prefix;
+  _setup_seq_num   = _stats_arrays_length;
+  _region_num      = _scan_only_prefix;
   _accum_surv_rate = 0.0;

 #if 0
-  gclog_or_tty->print_cr("start adding regions, seq num %d, length %d",
-                         _setup_seq_num, _curr_length);
+  gclog_or_tty->print_cr("[%s] start adding regions, seq num %d, length %d",
+                         _name, _setup_seq_num, _region_num);
 #endif // 0
 }

 void
 SurvRateGroup::stop_adding_regions() {
-  size_t length = _curr_length;

 #if 0
-  gclog_or_tty->print_cr("stop adding regions, length %d", length);
+  gclog_or_tty->print_cr("[%s] stop adding regions, length %d", _name, _region_num);
 #endif // 0

-  if (length > _array_length) {
+  if (_region_num > _stats_arrays_length) {
     double* old_surv_rate = _surv_rate;
     double* old_accum_surv_rate_pred = _accum_surv_rate_pred;
     TruncatedSeq** old_surv_rate_pred = _surv_rate_pred;

-    _surv_rate = NEW_C_HEAP_ARRAY(double, length);
+    _surv_rate = NEW_C_HEAP_ARRAY(double, _region_num);
     if (_surv_rate == NULL) {
-      vm_exit_out_of_memory(sizeof(double) * length,
+      vm_exit_out_of_memory(sizeof(double) * _region_num,
                             "Not enough space for surv rate array.");
     }
-    _accum_surv_rate_pred = NEW_C_HEAP_ARRAY(double, length);
+    _accum_surv_rate_pred = NEW_C_HEAP_ARRAY(double, _region_num);
     if (_accum_surv_rate_pred == NULL) {
-      vm_exit_out_of_memory(sizeof(double) * length,
+      vm_exit_out_of_memory(sizeof(double) * _region_num,
                          "Not enough space for accum surv rate pred array.");
     }
-    _surv_rate_pred = NEW_C_HEAP_ARRAY(TruncatedSeq*, length);
+    _surv_rate_pred = NEW_C_HEAP_ARRAY(TruncatedSeq*, _region_num);
     if (_surv_rate == NULL) {
-      vm_exit_out_of_memory(sizeof(TruncatedSeq*) * length,
+      vm_exit_out_of_memory(sizeof(TruncatedSeq*) * _region_num,
                             "Not enough space for surv rate pred array.");
     }

-    for (size_t i = 0; i < _array_length; ++i)
+    for (size_t i = 0; i < _stats_arrays_length; ++i)
       _surv_rate_pred[i] = old_surv_rate_pred[i];

 #if 0
-    gclog_or_tty->print_cr("stop adding regions, new seqs %d to %d",
-                  _array_length, length - 1);
+    gclog_or_tty->print_cr("[%s] stop adding regions, new seqs %d to %d",
+                  _name, _array_length, _region_num - 1);
 #endif // 0

-    for (size_t i = _array_length; i < length; ++i) {
+    for (size_t i = _stats_arrays_length; i < _region_num; ++i) {
       _surv_rate_pred[i] = new TruncatedSeq(10);
       // _surv_rate_pred[i]->add(last_pred);
     }

-    _array_length = length;
+    _stats_arrays_length = _region_num;

     if (old_surv_rate != NULL)
       FREE_C_HEAP_ARRAY(double, old_surv_rate);
@@ -124,7 +134,7 @@
       FREE_C_HEAP_ARRAY(NumberSeq*, old_surv_rate_pred);
   }

-  for (size_t i = 0; i < _array_length; ++i)
+  for (size_t i = 0; i < _stats_arrays_length; ++i)
     _surv_rate[i] = 0.0;
 }

@@ -135,7 +145,7 @@

   double ret = _accum_surv_rate;
   if (adjustment > 0) {
-    TruncatedSeq* seq = get_seq(_curr_length+1);
+    TruncatedSeq* seq = get_seq(_region_num+1);
     double surv_rate = _g1p->get_new_prediction(seq);
     ret += surv_rate;
   }
@@ -145,23 +155,23 @@

 int
 SurvRateGroup::next_age_index() {
-  TruncatedSeq* seq = get_seq(_curr_length);
+  TruncatedSeq* seq = get_seq(_region_num);
   double surv_rate = _g1p->get_new_prediction(seq);
   _accum_surv_rate += surv_rate;

-  ++_curr_length;
+  ++_region_num;
   return (int) ++_all_regions_allocated;
 }

 void
 SurvRateGroup::record_scan_only_prefix(size_t scan_only_prefix) {
-  guarantee( scan_only_prefix <= _curr_length, "pre-condition" );
+  guarantee( scan_only_prefix <= _region_num, "pre-condition" );
   _scan_only_prefix = scan_only_prefix;
 }

 void
 SurvRateGroup::record_surviving_words(int age_in_group, size_t surv_words) {
-  guarantee( 0 <= age_in_group && (size_t) age_in_group < _curr_length,
+  guarantee( 0 <= age_in_group && (size_t) age_in_group < _region_num,
              "pre-condition" );
   guarantee( _surv_rate[age_in_group] <= 0.00001,
              "should only update each slot once" );
@@ -178,15 +188,15 @@

 void
 SurvRateGroup::all_surviving_words_recorded(bool propagate) {
-  if (propagate && _curr_length > 0) { // conservative
-    double surv_rate = _surv_rate_pred[_curr_length-1]->last();
+  if (propagate && _region_num > 0) { // conservative
+    double surv_rate = _surv_rate_pred[_region_num-1]->last();

 #if 0
     gclog_or_tty->print_cr("propagating %1.2lf from %d to %d",
                   surv_rate, _curr_length, _array_length - 1);
 #endif // 0

-    for (size_t i = _curr_length; i < _array_length; ++i) {
+    for (size_t i = _region_num; i < _stats_arrays_length; ++i) {
       guarantee( _surv_rate[i] <= 0.00001,
                  "the slot should not have been updated" );
       _surv_rate_pred[i]->add(surv_rate);
@@ -195,7 +205,7 @@

   double accum = 0.0;
   double pred = 0.0;
-  for (size_t i = 0; i < _array_length; ++i) {
+  for (size_t i = 0; i < _stats_arrays_length; ++i) {
     pred = _g1p->get_new_prediction(_surv_rate_pred[i]);
     if (pred > 1.0) pred = 1.0;
     accum += pred;
@@ -209,8 +219,8 @@
 void
 SurvRateGroup::print() {
   gclog_or_tty->print_cr("Surv Rate Group: %s (%d entries, %d scan-only)",
-                _name, _curr_length, _scan_only_prefix);
-  for (size_t i = 0; i < _curr_length; ++i) {
+                _name, _region_num, _scan_only_prefix);
+  for (size_t i = 0; i < _region_num; ++i) {
     gclog_or_tty->print_cr("    age %4d   surv rate %6.2lf %%   pred %6.2lf %%%s",
                   i, _surv_rate[i] * 100.0,
                   _g1p->get_new_prediction(_surv_rate_pred[i]) * 100.0,
--- a/src/share/vm/gc_implementation/g1/survRateGroup.hpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/survRateGroup.hpp	Tue Mar 24 13:09:52 2009 -0400
@@ -29,7 +29,7 @@
   G1CollectorPolicy* _g1p;
   const char* _name;

-  size_t  _array_length;
+  size_t  _stats_arrays_length;
   double* _surv_rate;
   double* _accum_surv_rate_pred;
   double  _last_pred;
@@ -40,7 +40,7 @@
   size_t         _summary_surv_rates_max_len;

   int _all_regions_allocated;
-  size_t _curr_length;
+  size_t _region_num;
   size_t _scan_only_prefix;
   size_t _setup_seq_num;

@@ -48,6 +48,7 @@
   SurvRateGroup(G1CollectorPolicy* g1p,
                 const char* name,
                 size_t summary_surv_rates_len);
+  void reset();
   void start_adding_regions();
   void stop_adding_regions();
   void record_scan_only_prefix(size_t scan_only_prefix);
@@ -55,22 +56,21 @@
   void all_surviving_words_recorded(bool propagate);
   const char* name() { return _name; }

-  size_t region_num() { return _curr_length; }
+  size_t region_num() { return _region_num; }
   size_t scan_only_length() { return _scan_only_prefix; }
   double accum_surv_rate_pred(int age) {
     assert(age >= 0, "must be");
-    if ((size_t)age < _array_length)
+    if ((size_t)age < _stats_arrays_length)
       return _accum_surv_rate_pred[age];
     else {
-      double diff = (double) (age - _array_length + 1);
-      return _accum_surv_rate_pred[_array_length-1] + diff * _last_pred;
+      double diff = (double) (age - _stats_arrays_length + 1);
+      return _accum_surv_rate_pred[_stats_arrays_length-1] + diff * _last_pred;
     }
   }

   double accum_surv_rate(size_t adjustment);

   TruncatedSeq* get_seq(size_t age) {
-    guarantee( 0 <= age, "pre-condition" );
     if (age >= _setup_seq_num) {
       guarantee( _setup_seq_num > 0, "invariant" );
       age = _setup_seq_num-1;
--- a/src/share/vm/gc_implementation/includeDB_gc_g1	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/includeDB_gc_g1	Tue Mar 24 13:09:52 2009 -0400
@@ -31,7 +31,7 @@
 cardTableRS.cpp				concurrentMark.hpp
 cardTableRS.cpp				g1SATBCardTableModRefBS.hpp

-collectionSetChooser.cpp		g1CollectedHeap.hpp
+collectionSetChooser.cpp		g1CollectedHeap.inline.hpp
 collectionSetChooser.cpp		g1CollectorPolicy.hpp
 collectionSetChooser.cpp		collectionSetChooser.hpp

@@ -42,14 +42,15 @@
 concurrentG1Refine.cpp			concurrentG1Refine.hpp
 concurrentG1Refine.cpp			concurrentG1RefineThread.hpp
 concurrentG1Refine.cpp			copy.hpp
-concurrentG1Refine.cpp			g1CollectedHeap.hpp
+concurrentG1Refine.cpp			g1CollectedHeap.inline.hpp
 concurrentG1Refine.cpp			g1RemSet.hpp

 concurrentG1Refine.hpp			globalDefinitions.hpp
+concurrentG1Refine.hpp			allocation.hpp

 concurrentG1RefineThread.cpp		concurrentG1Refine.hpp
 concurrentG1RefineThread.cpp		concurrentG1RefineThread.hpp
-concurrentG1RefineThread.cpp		g1CollectedHeap.hpp
+concurrentG1RefineThread.cpp		g1CollectedHeap.inline.hpp
 concurrentG1RefineThread.cpp            g1CollectorPolicy.hpp
 concurrentG1RefineThread.cpp		handles.inline.hpp
 concurrentG1RefineThread.cpp		mutexLocker.hpp
@@ -166,10 +167,11 @@
 g1CollectorPolicy.cpp			concurrentMarkThread.inline.hpp
 g1CollectorPolicy.cpp			debug.hpp
 g1CollectorPolicy.cpp			java.hpp
-g1CollectorPolicy.cpp                   g1CollectedHeap.hpp
+g1CollectorPolicy.cpp                   g1CollectedHeap.inline.hpp
 g1CollectorPolicy.cpp                   g1CollectorPolicy.hpp
 g1CollectorPolicy.cpp                   heapRegionRemSet.hpp
 g1CollectorPolicy.cpp			mutexLocker.hpp
+g1CollectorPolicy.cpp			gcPolicyCounters.hpp

 g1CollectorPolicy.hpp                   collectorPolicy.hpp
 g1CollectorPolicy.hpp                   collectionSetChooser.hpp
@@ -187,7 +189,7 @@
 g1MarkSweep.cpp                         codeCache.hpp
 g1MarkSweep.cpp                         events.hpp
 g1MarkSweep.cpp                         fprofiler.hpp
-g1MarkSweep.hpp                         g1CollectedHeap.hpp
+g1MarkSweep.hpp                         g1CollectedHeap.inline.hpp
 g1MarkSweep.cpp                         g1MarkSweep.hpp
 g1MarkSweep.cpp                         gcLocker.hpp
 g1MarkSweep.cpp                         genCollectedHeap.hpp
@@ -226,7 +228,7 @@
 g1MMUTracker.cpp			mutexLocker.hpp

 g1MMUTracker.hpp			debug.hpp
-
+g1MMUTracker.hpp			allocation.hpp
 g1RemSet.cpp				bufferingOopClosure.hpp
 g1RemSet.cpp				concurrentG1Refine.hpp
 g1RemSet.cpp				concurrentG1RefineThread.hpp
@@ -270,6 +272,7 @@
 heapRegion.hpp                          watermark.hpp
 heapRegion.hpp				g1_specialized_oop_closures.hpp
 heapRegion.hpp				survRateGroup.hpp
+heapRegion.hpp				ageTable.hpp

 heapRegionRemSet.hpp			sparsePRT.hpp

@@ -283,7 +286,7 @@
 heapRegionRemSet.cpp                    space.inline.hpp

 heapRegionSeq.cpp                       allocation.hpp
-heapRegionSeq.cpp                       g1CollectedHeap.hpp
+heapRegionSeq.cpp                       g1CollectedHeap.inline.hpp
 heapRegionSeq.cpp                       heapRegionSeq.hpp

 heapRegionSeq.hpp                       growableArray.hpp
@@ -334,18 +337,18 @@
 survRateGroup.hpp			numberSeq.hpp

 survRateGroup.cpp			allocation.hpp
-survRateGroup.cpp			g1CollectedHeap.hpp
+survRateGroup.cpp			g1CollectedHeap.inline.hpp
 survRateGroup.cpp			g1CollectorPolicy.hpp
 survRateGroup.cpp			heapRegion.hpp
 survRateGroup.cpp			survRateGroup.hpp

 thread.cpp				concurrentMarkThread.inline.hpp

-universe.cpp                            g1CollectedHeap.hpp
+universe.cpp                            g1CollectedHeap.inline.hpp
 universe.cpp                            g1CollectorPolicy.hpp

 vm_operations_g1.hpp			vmGCOperations.hpp

 vm_operations_g1.cpp			vm_operations_g1.hpp
-vm_operations_g1.cpp                    g1CollectedHeap.hpp
+vm_operations_g1.cpp                    g1CollectedHeap.inline.hpp
 vm_operations_g1.cpp                    isGCActiveMark.hpp
--- a/src/share/vm/gc_implementation/includeDB_gc_shared	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/includeDB_gc_shared	Tue Mar 24 13:09:52 2009 -0400
@@ -100,4 +100,4 @@
 spaceCounters.hpp                       perfData.hpp
 spaceCounters.hpp                       generationCounters.hpp

-vmGCOperations.cpp                      g1CollectedHeap.hpp
+vmGCOperations.cpp                      g1CollectedHeap.inline.hpp
--- a/src/share/vm/gc_implementation/shared/ageTable.cpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/shared/ageTable.cpp	Tue Mar 24 13:09:52 2009 -0400
@@ -67,6 +67,12 @@
   }
 }

+void ageTable::merge_par(ageTable* subTable) {
+  for (int i = 0; i < table_size; i++) {
+    Atomic::add_ptr(subTable->sizes[i], &sizes[i]);
+  }
+}
+
 int ageTable::compute_tenuring_threshold(size_t survivor_capacity) {
   size_t desired_survivor_size = (size_t)((((double) survivor_capacity)*TargetSurvivorRatio)/100);
   size_t total = 0;
--- a/src/share/vm/gc_implementation/shared/ageTable.hpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/gc_implementation/shared/ageTable.hpp	Tue Mar 24 13:09:52 2009 -0400
@@ -56,6 +56,7 @@
   // Merge another age table with the current one.  Used
   // for parallel young generation gc.
   void merge(ageTable* subTable);
+  void merge_par(ageTable* subTable);

   // calculate new tenuring threshold based on age information
   int compute_tenuring_threshold(size_t survivor_capacity);
--- a/src/share/vm/memory/cardTableModRefBS.cpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/memory/cardTableModRefBS.cpp	Tue Mar 24 13:09:52 2009 -0400
@@ -343,18 +343,62 @@
   inline_write_ref_field(field, newVal);
 }

+/*
+   Claimed and deferred bits are used together in G1 during the evacuation
+   pause. These bits can have the following state transitions:
+   1. The claimed bit can be put over any other card state. Except that
+      the "dirty -> dirty and claimed" transition is checked for in
+      G1 code and is not used.
+   2. Deferred bit can be set only if the previous state of the card
+      was either clean or claimed. mark_card_deferred() is wait-free.
+      We do not care if the operation is be successful because if
+      it does not it will only result in duplicate entry in the update
+      buffer because of the "cache-miss". So it's not worth spinning.
+ */
+

 bool CardTableModRefBS::claim_card(size_t card_index) {
   jbyte val = _byte_map[card_index];
-  if (val != claimed_card_val()) {
-    jbyte res = Atomic::cmpxchg((jbyte) claimed_card_val(), &_byte_map[card_index], val);
-    if (res == val)
+  assert(val != dirty_card_val(), "Shouldn't claim a dirty card");
+  while (val == clean_card_val() ||
+         (val & (clean_card_mask_val() | claimed_card_val())) != claimed_card_val()) {
+    jbyte new_val = val;
+    if (val == clean_card_val()) {
+      new_val = (jbyte)claimed_card_val();
+    } else {
+      new_val = val | (jbyte)claimed_card_val();
+    }
+    jbyte res = Atomic::cmpxchg(new_val, &_byte_map[card_index], val);
+    if (res == val) {
       return true;
-    else return false;
+    }
+    val = res;
   }
   return false;
 }

+bool CardTableModRefBS::mark_card_deferred(size_t card_index) {
+  jbyte val = _byte_map[card_index];
+  // It's already processed
+  if ((val & (clean_card_mask_val() | deferred_card_val())) == deferred_card_val()) {
+    return false;
+  }
+  // Cached bit can be installed either on a clean card or on a claimed card.
+  jbyte new_val = val;
+  if (val == clean_card_val()) {
+    new_val = (jbyte)deferred_card_val();
+  } else {
+    if (val & claimed_card_val()) {
+      new_val = val | (jbyte)deferred_card_val();
+    }
+  }
+  if (new_val != val) {
+    Atomic::cmpxchg(new_val, &_byte_map[card_index], val);
+  }
+  return true;
+}
+
+
 void CardTableModRefBS::non_clean_card_iterate(Space* sp,
                                                MemRegion mr,
                                                DirtyCardToOopClosure* dcto_cl,
--- a/src/share/vm/memory/cardTableModRefBS.hpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/memory/cardTableModRefBS.hpp	Tue Mar 24 13:09:52 2009 -0400
@@ -52,11 +52,15 @@

   enum CardValues {
     clean_card                  = -1,
+    // The mask contains zeros in places for all other values.
+    clean_card_mask             = clean_card - 31,
+
     dirty_card                  =  0,
     precleaned_card             =  1,
-    claimed_card                =  3,
-    last_card                   =  4,
-    CT_MR_BS_last_reserved      = 10
+    claimed_card                =  2,
+    deferred_card               =  4,
+    last_card                   =  8,
+    CT_MR_BS_last_reserved      = 16
   };

   // dirty and precleaned are equivalent wrt younger_refs_iter.
@@ -254,9 +258,11 @@
   };

   static int clean_card_val()      { return clean_card; }
+  static int clean_card_mask_val() { return clean_card_mask; }
   static int dirty_card_val()      { return dirty_card; }
   static int claimed_card_val()    { return claimed_card; }
   static int precleaned_card_val() { return precleaned_card; }
+  static int deferred_card_val()   { return deferred_card; }

   // For RTTI simulation.
   bool is_a(BarrierSet::Name bsn) {
@@ -329,7 +335,8 @@
   }

   bool is_card_claimed(size_t card_index) {
-    return _byte_map[card_index] == claimed_card_val();
+    jbyte val = _byte_map[card_index];
+    return (val & (clean_card_mask_val() | claimed_card_val())) == claimed_card_val();
   }

   bool claim_card(size_t card_index);
@@ -338,6 +345,13 @@
     return _byte_map[card_index] == clean_card_val();
   }

+  bool is_card_deferred(size_t card_index) {
+    jbyte val = _byte_map[card_index];
+    return (val & (clean_card_mask_val() | deferred_card_val())) == deferred_card_val();
+  }
+
+  bool mark_card_deferred(size_t card_index);
+
   // Card marking array base (adjusted for heap low boundary)
   // This would be the 0th element of _byte_map, if the heap started at 0x0.
   // But since the heap starts at some higher address, this points to somewhere
@@ -434,6 +448,10 @@
     return byte_for(p) - _byte_map;
   }

+  const jbyte* byte_for_index(const size_t card_index) const {
+    return _byte_map + card_index;
+  }
+
   void verify();
   void verify_guard();
--- a/src/share/vm/opto/graphKit.cpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/opto/graphKit.cpp	Tue Mar 24 13:09:52 2009 -0400
@@ -3233,12 +3233,11 @@

   // Now some of the values

-  Node* marking = __ load(no_ctrl, marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw);
-  Node* index   = __ load(no_ctrl, index_adr, TypeInt::INT, T_INT, Compile::AliasIdxRaw);
-  Node* buffer  = __ load(no_ctrl, buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);
+  Node* marking = __ load(__ ctrl(), marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw);

   // if (!marking)
   __ if_then(marking, BoolTest::ne, zero); {
+    Node* index   = __ load(__ ctrl(), index_adr, TypeInt::INT, T_INT, Compile::AliasIdxRaw);

     const Type* t1 = adr->bottom_type();
     const Type* t2 = val->bottom_type();
@@ -3246,6 +3245,7 @@
     Node* orig = __ load(no_ctrl, adr, val_type, bt, alias_idx);
     // if (orig != NULL)
     __ if_then(orig, BoolTest::ne, null()); {
+      Node* buffer  = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);

       // load original value
       // alias_idx correct??
--- a/src/share/vm/utilities/workgroup.hpp	Fri Mar 20 12:17:54 2009 -0700
+++ b/src/share/vm/utilities/workgroup.hpp	Tue Mar 24 13:09:52 2009 -0400
@@ -32,7 +32,7 @@

 // An abstract task to be worked on by a gang.
 // You subclass this to supply your own work() method
-class AbstractGangTask: public CHeapObj {
+class AbstractGangTask VALUE_OBJ_CLASS_SPEC {
 public:
   // The abstract work method.
   // The argument tells you which member of the gang you are.