changeset 1035:a3b9e96881fe

Merge
author xlu
date Fri, 23 Oct 2009 18:44:33 -0700
parents 08780c8a9f04 (current diff) dfdaf65c3423 (diff)
children d912f17c1ae4 473cce303f13
files
diffstat 28 files changed, 533 insertions(+), 331 deletions(-) [+]
line wrap: on
line diff
--- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Tue Oct 20 16:34:08 2009 -0400
+++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Fri Oct 23 18:44:33 2009 -0700
@@ -377,3 +377,11 @@
   _g1h->g1_policy()->record_cc_clear_time(elapsed * 1000.0);
 #endif
 }
+
+void ConcurrentG1Refine::print_worker_threads_on(outputStream* st) const {
+  for (int i = 0; i < _n_threads; ++i) {
+    _threads[i]->print_on(st);
+    st->cr();
+  }
+}
+
--- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Tue Oct 20 16:34:08 2009 -0400
+++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Fri Oct 23 18:44:33 2009 -0700
@@ -179,4 +179,6 @@
   void clear_and_record_card_counts();
 
   static size_t thread_num();
+
+  void print_worker_threads_on(outputStream* st) const;
 };
--- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp	Tue Oct 20 16:34:08 2009 -0400
+++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp	Fri Oct 23 18:44:33 2009 -0700
@@ -204,8 +204,12 @@
   if (G1TraceConcurrentRefinement) gclog_or_tty->print_cr("G1-Refine-stop");
 }
 
-void ConcurrentG1RefineThread::print() {
-  gclog_or_tty->print("\"Concurrent G1 Refinement Thread\" ");
-  Thread::print();
-  gclog_or_tty->cr();
+void ConcurrentG1RefineThread::print() const {
+  print_on(tty);
 }
+
+void ConcurrentG1RefineThread::print_on(outputStream* st) const {
+  st->print("\"G1 Concurrent Refinement Thread#%d\" ", _worker_id);
+  Thread::print_on(st);
+  st->cr();
+}
--- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp	Tue Oct 20 16:34:08 2009 -0400
+++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp	Fri Oct 23 18:44:33 2009 -0700
@@ -77,7 +77,8 @@
                            int worker_id_offset, int worker_id);
 
   // Printing
-  void print();
+  void print() const;
+  void print_on(outputStream* st) const;
 
   // Total virtual time so far.
   double vtime_accum() { return _vtime_accum; }
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Tue Oct 20 16:34:08 2009 -0400
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Fri Oct 23 18:44:33 2009 -0700
@@ -237,7 +237,7 @@
   _index = next_index;
   for (int i = 0; i < n; i++) {
     int ind = start + i;
-    guarantee(ind < _capacity, "By overflow test above.");
+    assert(ind < _capacity, "By overflow test above.");
     _base[ind] = ptr_arr[i];
   }
 }
@@ -310,12 +310,12 @@
     if (res == index) {
       MemRegion mr = _base[next_index];
       if (mr.start() != NULL) {
-        tmp_guarantee_CM( mr.end() != NULL, "invariant" );
-        tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
+        assert(mr.end() != NULL, "invariant");
+        assert(mr.word_size() > 0, "invariant");
         return mr;
       } else {
         // that entry was invalidated... let's skip it
-        tmp_guarantee_CM( mr.end() == NULL, "invariant" );
+        assert(mr.end() == NULL, "invariant");
       }
     }
     // Otherwise, we need to try again.
@@ -328,10 +328,10 @@
   for (int i = 0; i < _oops_do_bound; ++i) {
     MemRegion mr = _base[i];
     if (mr.start() != NULL) {
-      tmp_guarantee_CM( mr.end() != NULL, "invariant");
-      tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
+      assert(mr.end() != NULL, "invariant");
+      assert(mr.word_size() > 0, "invariant");
       HeapRegion* hr = g1h->heap_region_containing(mr.start());
-      tmp_guarantee_CM( hr != NULL, "invariant" );
+      assert(hr != NULL, "invariant");
       if (hr->in_collection_set()) {
         // The region points into the collection set
         _base[i] = MemRegion();
@@ -339,7 +339,7 @@
       }
     } else {
       // that entry was invalidated... let's skip it
-      tmp_guarantee_CM( mr.end() == NULL, "invariant" );
+      assert(mr.end() == NULL, "invariant");
     }
   }
   return result;
@@ -542,8 +542,8 @@
     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 #endif
 
-    guarantee( parallel_marking_threads() > 0, "peace of mind" );
-    _parallel_workers = new WorkGang("Parallel Marking Threads",
+    guarantee(parallel_marking_threads() > 0, "peace of mind");
+    _parallel_workers = new WorkGang("G1 Parallel Marking Threads",
                                      (int) parallel_marking_threads(), false, true);
     if (_parallel_workers == NULL)
       vm_exit_during_initialization("Failed necessary allocation.");
@@ -569,8 +569,7 @@
     return;
 
   MemRegion committed = _g1h->g1_committed();
-  tmp_guarantee_CM( committed.start() == _heap_start,
-                    "start shouldn't change" );
+  assert(committed.start() == _heap_start, "start shouldn't change");
   HeapWord* new_end = committed.end();
   if (new_end > _heap_end) {
     // The heap has been expanded.
@@ -592,9 +591,10 @@
   _heap_start = committed.start();
   _heap_end   = committed.end();
 
-  guarantee( _heap_start != NULL &&
-             _heap_end != NULL   &&
-             _heap_start < _heap_end, "heap bounds should look ok" );
+  // Separated the asserts so that we know which one fires.
+  assert(_heap_start != NULL, "heap bounds should look ok");
+  assert(_heap_end != NULL, "heap bounds should look ok");
+  assert(_heap_start < _heap_end, "heap bounds should look ok");
 
   // reset all the marking data structures and any necessary flags
   clear_marking_state();
@@ -614,7 +614,7 @@
 }
 
 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
-  guarantee( active_tasks <= _max_task_num, "we should not have more" );
+  assert(active_tasks <= _max_task_num, "we should not have more");
 
   _active_tasks = active_tasks;
   // Need to update the three data structures below according to the
@@ -634,8 +634,8 @@
     // We currently assume that the concurrent flag has been set to
     // false before we start remark. At this point we should also be
     // in a STW phase.
-    guarantee( !concurrent_marking_in_progress(), "invariant" );
-    guarantee( _finger == _heap_end, "only way to get here" );
+    assert(!concurrent_marking_in_progress(), "invariant");
+    assert(_finger == _heap_end, "only way to get here");
     update_g1_committed(true);
   }
 }
@@ -933,8 +933,8 @@
   // initial-mark that the committed space is expanded during the
   // pause without CM observing this change. So the assertions below
   // is a bit conservative; but better than nothing.
-  tmp_guarantee_CM( _g1h->g1_committed().contains(addr),
-                    "address should be within the heap bounds" );
+  assert(_g1h->g1_committed().contains(addr),
+         "address should be within the heap bounds");
 
   if (!_nextMarkBitMap->isMarked(addr))
     _nextMarkBitMap->parMark(addr);
@@ -960,12 +960,15 @@
   if (mr.start() < finger) {
     // The finger is always heap region aligned and it is not possible
     // for mr to span heap regions.
-    tmp_guarantee_CM( mr.end() <= finger, "invariant" );
-
-    tmp_guarantee_CM( mr.start() <= mr.end() &&
-                      _heap_start <= mr.start() &&
-                      mr.end() <= _heap_end,
-                  "region boundaries should fall within the committed space" );
+    assert(mr.end() <= finger, "invariant");
+
+    // Separated the asserts so that we know which one fires.
+    assert(mr.start() <= mr.end(),
+           "region boundaries should fall within the committed space");
+    assert(_heap_start <= mr.start(),
+           "region boundaries should fall within the committed space");
+    assert(mr.end() <= _heap_end,
+           "region boundaries should fall within the committed space");
     if (verbose_low())
       gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
                              "below the finger, pushing it",
@@ -1014,14 +1017,14 @@
 
 public:
   void work(int worker_i) {
-    guarantee( Thread::current()->is_ConcurrentGC_thread(),
-               "this should only be done by a conc GC thread" );
+    assert(Thread::current()->is_ConcurrentGC_thread(),
+           "this should only be done by a conc GC thread");
 
     double start_vtime = os::elapsedVTime();
 
     ConcurrentGCThread::stsJoin();
 
-    guarantee( (size_t)worker_i < _cm->active_tasks(), "invariant" );
+    assert((size_t) worker_i < _cm->active_tasks(), "invariant");
     CMTask* the_task = _cm->task(worker_i);
     the_task->record_start_time();
     if (!_cm->has_aborted()) {
@@ -1059,7 +1062,7 @@
       } while (!_cm->has_aborted() && the_task->has_aborted());
     }
     the_task->record_end_time();
-    guarantee( !the_task->has_aborted() || _cm->has_aborted(), "invariant" );
+    guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
 
     ConcurrentGCThread::stsLeave();
 
@@ -1182,8 +1185,7 @@
   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
     for (intptr_t i = start_card_num; i <= last_card_num; i++) {
 #if CARD_BM_TEST_MODE
-      guarantee(_card_bm->at(i - _bottom_card_num),
-                "Should already be set.");
+      guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
 #else
       _card_bm->par_at_put(i - _bottom_card_num, 1);
 #endif
@@ -1328,7 +1330,7 @@
       // In any case, we set the last card num.
       last_card_num = obj_last_card_num;
 
-      marked_bytes += obj_sz * HeapWordSize;
+      marked_bytes += (size_t)obj_sz * HeapWordSize;
       // Find the next marked object after this one.
       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
       _changed = true;
@@ -1442,7 +1444,7 @@
     }
     assert(calccl.complete(), "Shouldn't have yielded!");
 
-    guarantee( (size_t)i < _n_workers, "invariant" );
+    assert((size_t) i < _n_workers, "invariant");
     _live_bytes[i] = calccl.tot_live();
     _used_bytes[i] = calccl.tot_used();
   }
@@ -1774,14 +1776,14 @@
       hd->rem_set()->clear();
       HeapRegion* next_hd = hd->next_from_unclean_list();
       (void)list->pop();
-      guarantee(list->hd() == next_hd, "how not?");
+      assert(list->hd() == next_hd, "how not?");
       _g1h->put_region_on_unclean_list(hd);
       if (!hd->isHumongous()) {
         // Add this to the _free_regions count by 1.
         _g1h->finish_free_region_work(0, 0, 1, NULL);
       }
       hd = list->hd();
-      guarantee(hd == next_hd, "how not?");
+      assert(hd == next_hd, "how not?");
     }
   }
 }
@@ -1931,9 +1933,6 @@
     g1h->set_par_threads(n_workers);
     g1h->workers()->run_task(&remarkTask);
     g1h->set_par_threads(0);
-
-    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
-    guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
   } else {
     G1CollectedHeap::StrongRootsScope srs(g1h);
     // this is remark, so we'll use up all available threads
@@ -1945,10 +1944,9 @@
     // active_workers will be fewer. The extra ones will just bail out
     // immediately.
     remarkTask.work(0);
-
-    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
-    guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
   }
+  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
+  guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
 
   print_stats();
 
@@ -1989,7 +1987,7 @@
       str = "outside G1 reserved";
     else {
       HeapRegion* hr  = _g1h->heap_region_containing(obj);
-      guarantee( hr != NULL, "invariant" );
+      guarantee(hr != NULL, "invariant");
       if (hr->obj_allocated_since_prev_marking(obj)) {
         str = "over TAMS";
         if (_bitmap->isMarked((HeapWord*) obj))
@@ -2125,7 +2123,7 @@
   HeapWord* objAddr = (HeapWord*) obj;
   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
   if (_g1h->is_in_g1_reserved(objAddr)) {
-    tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
+    assert(obj != NULL, "is_in_g1_reserved should ensure this");
     HeapRegion* hr = _g1h->heap_region_containing(obj);
     if (_g1h->is_obj_ill(obj, hr)) {
       if (verbose_high())
@@ -2167,7 +2165,7 @@
   satb_mq_set.iterate_closure_all_threads();
 
   satb_mq_set.set_closure(NULL);
-  guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
+  assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
 }
 
 void ConcurrentMark::markPrev(oop p) {
@@ -2200,7 +2198,7 @@
   // _heap_end will not change underneath our feet; it only changes at
   // yield points.
   while (finger < _heap_end) {
-    tmp_guarantee_CM( _g1h->is_in_g1_reserved(finger), "invariant" );
+    assert(_g1h->is_in_g1_reserved(finger), "invariant");
 
     // is the gap between reading the finger and doing the CAS too long?
 
@@ -2222,7 +2220,7 @@
 
       // notice that _finger == end cannot be guaranteed here since,
       // someone else might have moved the finger even further
-      guarantee( _finger >= end, "the finger should have moved forward" );
+      assert(_finger >= end, "the finger should have moved forward");
 
       if (verbose_low())
         gclog_or_tty->print_cr("[%d] we were successful with region = "
@@ -2234,8 +2232,8 @@
                                  "returning it ", task_num, curr_region);
         return curr_region;
       } else {
-        tmp_guarantee_CM( limit == bottom,
-                          "the region limit should be at bottom" );
+        assert(limit == bottom,
+               "the region limit should be at bottom");
         if (verbose_low())
           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
                                  "returning NULL", task_num, curr_region);
@@ -2244,7 +2242,7 @@
         return NULL;
       }
     } else {
-      guarantee( _finger > finger, "the finger should have moved forward" );
+      assert(_finger > finger, "the finger should have moved forward");
       if (verbose_low())
         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
                                "global finger = "PTR_FORMAT", "
@@ -2282,7 +2280,7 @@
   if (_regionStack.invalidate_entries_into_cset()) {
     // otherwise, any gray objects copied during the evacuation pause
     // might not be visited.
-    guarantee( _should_gray_objects, "invariant" );
+    assert(_should_gray_objects, "invariant");
   }
 }
 
@@ -2637,6 +2635,10 @@
                 cmThread()->vtime_count_accum());
 }
 
+void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
+  _parallel_workers->print_worker_threads_on(st);
+}
+
 // Closures
 // XXX: there seems to be a lot of code  duplication here;
 // should refactor and consolidate the shared code.
@@ -2711,12 +2713,12 @@
 
   bool do_bit(size_t offset) {
     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
-    tmp_guarantee_CM( _nextMarkBitMap->isMarked(addr), "invariant" );
-    tmp_guarantee_CM( addr < _cm->finger(), "invariant" );
+    assert(_nextMarkBitMap->isMarked(addr), "invariant");
+    assert( addr < _cm->finger(), "invariant");
 
     if (_scanning_heap_region) {
       statsOnly( _task->increase_objs_found_on_bitmap() );
-      tmp_guarantee_CM( addr >= _task->finger(), "invariant" );
+      assert(addr >= _task->finger(), "invariant");
       // We move that task's local finger along.
       _task->move_finger_to(addr);
     } else {
@@ -2761,8 +2763,9 @@
   virtual void do_oop(      oop* p) { do_oop_work(p); }
 
   template <class T> void do_oop_work(T* p) {
-    tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant" );
-    tmp_guarantee_CM( !_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(), "invariant" );
+    assert(_g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
+    assert(!_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(),
+           "invariant");
 
     oop obj = oopDesc::load_decode_heap_oop(p);
     if (_cm->verbose_high())
@@ -2779,8 +2782,11 @@
 };
 
 void CMTask::setup_for_region(HeapRegion* hr) {
-  tmp_guarantee_CM( hr != NULL && !hr->continuesHumongous(),
-      "claim_region() should have filtered out continues humongous regions" );
+  // Separated the asserts so that we know which one fires.
+  assert(hr != NULL,
+        "claim_region() should have filtered out continues humongous regions");
+  assert(!hr->continuesHumongous(),
+        "claim_region() should have filtered out continues humongous regions");
 
   if (_cm->verbose_low())
     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
@@ -2808,9 +2814,9 @@
     // as the region is not supposed to be empty in the first place)
     _finger = bottom;
   } else if (limit >= _region_limit) {
-    tmp_guarantee_CM( limit >= _finger, "peace of mind" );
+    assert(limit >= _finger, "peace of mind");
   } else {
-    tmp_guarantee_CM( limit < _region_limit, "only way to get here" );
+    assert(limit < _region_limit, "only way to get here");
     // This can happen under some pretty unusual circumstances.  An
     // evacuation pause empties the region underneath our feet (NTAMS
     // at bottom). We then do some allocation in the region (NTAMS
@@ -2828,7 +2834,7 @@
 }
 
 void CMTask::giveup_current_region() {
-  tmp_guarantee_CM( _curr_region != NULL, "invariant" );
+  assert(_curr_region != NULL, "invariant");
   if (_cm->verbose_low())
     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
                            _task_id, _curr_region);
@@ -2846,7 +2852,7 @@
 }
 
 void CMTask::reset(CMBitMap* nextMarkBitMap) {
-  guarantee( nextMarkBitMap != NULL, "invariant" );
+  guarantee(nextMarkBitMap != NULL, "invariant");
 
   if (_cm->verbose_low())
     gclog_or_tty->print_cr("[%d] resetting", _task_id);
@@ -2912,7 +2918,7 @@
   HeapWord* objAddr = (HeapWord*) obj;
   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
   if (_g1h->is_in_g1_reserved(objAddr)) {
-    tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
+    assert(obj != NULL, "is_in_g1_reserved should ensure this");
     HeapRegion* hr =  _g1h->heap_region_containing(obj);
     if (_g1h->is_obj_ill(obj, hr)) {
       if (_cm->verbose_high())
@@ -2973,10 +2979,11 @@
 
 void CMTask::push(oop obj) {
   HeapWord* objAddr = (HeapWord*) obj;
-  tmp_guarantee_CM( _g1h->is_in_g1_reserved(objAddr), "invariant" );
-  tmp_guarantee_CM( !_g1h->heap_region_containing(objAddr)->is_on_free_list(), "invariant" );
-  tmp_guarantee_CM( !_g1h->is_obj_ill(obj), "invariant" );
-  tmp_guarantee_CM( _nextMarkBitMap->isMarked(objAddr), "invariant" );
+  assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
+  assert(!_g1h->heap_region_containing(objAddr)->is_on_free_list(),
+         "invariant");
+  assert(!_g1h->is_obj_ill(obj), "invariant");
+  assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
 
   if (_cm->verbose_high())
     gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
@@ -2995,7 +3002,7 @@
     // stack, we should have definitely removed some entries from the
     // local queue. So, there must be space on it.
     bool success = _task_queue->push(obj);
-    tmp_guarantee_CM( success, "invariant" );
+    assert(success, "invariant");
   }
 
   statsOnly( int tmp_size = _task_queue->size();
@@ -3005,9 +3012,9 @@
 }
 
 void CMTask::reached_limit() {
-  tmp_guarantee_CM( _words_scanned >= _words_scanned_limit ||
-                    _refs_reached >= _refs_reached_limit ,
-                 "shouldn't have been called otherwise" );
+  assert(_words_scanned >= _words_scanned_limit ||
+         _refs_reached >= _refs_reached_limit ,
+         "shouldn't have been called otherwise");
   regular_clock_call();
 }
 
@@ -3165,8 +3172,8 @@
   oop buffer[global_stack_transfer_size];
   int n;
   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
-  tmp_guarantee_CM( n <= global_stack_transfer_size,
-                    "we should not pop more than the given limit" );
+  assert(n <= global_stack_transfer_size,
+         "we should not pop more than the given limit");
   if (n > 0) {
     // yes, we did actually pop at least one entry
 
@@ -3178,7 +3185,7 @@
       bool success = _task_queue->push(buffer[i]);
       // We only call this when the local queue is empty or under a
       // given target limit. So, we do not expect this push to fail.
-      tmp_guarantee_CM( success, "invariant" );
+      assert(success, "invariant");
     }
 
     statsOnly( int tmp_size = _task_queue->size();
@@ -3218,10 +3225,9 @@
         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
                                (void*) obj);
 
-      tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) obj),
-                        "invariant" );
-      tmp_guarantee_CM( !_g1h->heap_region_containing(obj)->is_on_free_list(),
-                        "invariant" );
+      assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
+      assert(!_g1h->heap_region_containing(obj)->is_on_free_list(),
+             "invariant");
 
       scan_object(obj);
 
@@ -3243,7 +3249,7 @@
 
   // We have a policy to drain the local queue before we attempt to
   // drain the global stack.
-  tmp_guarantee_CM( partially || _task_queue->size() == 0, "invariant" );
+  assert(partially || _task_queue->size() == 0, "invariant");
 
   // Decide what the target size is, depending whether we're going to
   // drain it partially (so that other tasks can steal if they run out
@@ -3324,9 +3330,9 @@
 
   _draining_satb_buffers = false;
 
-  tmp_guarantee_CM( has_aborted() ||
-                    concurrent() ||
-                    satb_mq_set.completed_buffers_num() == 0, "invariant" );
+  assert(has_aborted() ||
+         concurrent() ||
+         satb_mq_set.completed_buffers_num() == 0, "invariant");
 
   if (ParallelGCThreads > 0)
     satb_mq_set.set_par_closure(_task_id, NULL);
@@ -3342,8 +3348,8 @@
   if (has_aborted())
     return;
 
-  tmp_guarantee_CM( _region_finger == NULL,
-                    "it should be NULL when we're not scanning a region" );
+  assert(_region_finger == NULL,
+         "it should be NULL when we're not scanning a region");
 
   if (!_cm->region_stack_empty()) {
     if (_cm->verbose_low())
@@ -3359,12 +3365,12 @@
         gclog_or_tty->print_cr("[%d] we are scanning region "
                                "["PTR_FORMAT", "PTR_FORMAT")",
                                _task_id, mr.start(), mr.end());
-      tmp_guarantee_CM( mr.end() <= _cm->finger(),
-                        "otherwise the region shouldn't be on the stack" );
+      assert(mr.end() <= _cm->finger(),
+             "otherwise the region shouldn't be on the stack");
       assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
       if (_nextMarkBitMap->iterate(bc, mr)) {
-        tmp_guarantee_CM( !has_aborted(),
-               "cannot abort the task without aborting the bitmap iteration" );
+        assert(!has_aborted(),
+               "cannot abort the task without aborting the bitmap iteration");
 
         // We finished iterating over the region without aborting.
         regular_clock_call();
@@ -3376,14 +3382,14 @@
           statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
         }
       } else {
-        guarantee( has_aborted(), "currently the only way to do so" );
+        assert(has_aborted(), "currently the only way to do so");
 
         // The only way to abort the bitmap iteration is to return
         // false from the do_bit() method. However, inside the
         // do_bit() method we move the _region_finger to point to the
         // object currently being looked at. So, if we bail out, we
         // have definitely set _region_finger to something non-null.
-        guarantee( _region_finger != NULL, "invariant" );
+        assert(_region_finger != NULL, "invariant");
 
         // The iteration was actually aborted. So now _region_finger
         // points to the address of the object we last scanned. If we
@@ -3412,13 +3418,6 @@
       _region_finger = NULL;
     }
 
-    // We only push regions on the region stack during evacuation
-    // pauses. So if we come out the above iteration because we region
-    // stack is empty, it will remain empty until the next yield
-    // point. So, the guarantee below is safe.
-    guarantee( has_aborted() || _cm->region_stack_empty(),
-               "only way to exit the loop" );
-
     if (_cm->verbose_low())
       gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
                              _task_id, _cm->region_stack_size());
@@ -3576,21 +3575,21 @@
  *****************************************************************************/
 
 void CMTask::do_marking_step(double time_target_ms) {
-  guarantee( time_target_ms >= 1.0, "minimum granularity is 1ms" );
-  guarantee( concurrent() == _cm->concurrent(), "they should be the same" );
-
-  guarantee( concurrent() || _cm->region_stack_empty(),
-             "the region stack should have been cleared before remark" );
-  guarantee( _region_finger == NULL,
-             "this should be non-null only when a region is being scanned" );
+  assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
+  assert(concurrent() == _cm->concurrent(), "they should be the same");
+
+  assert(concurrent() || _cm->region_stack_empty(),
+         "the region stack should have been cleared before remark");
+  assert(_region_finger == NULL,
+         "this should be non-null only when a region is being scanned");
 
   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
-  guarantee( _task_queues != NULL, "invariant" );
-  guarantee( _task_queue != NULL,  "invariant" );
-  guarantee( _task_queues->queue(_task_id) == _task_queue, "invariant" );
-
-  guarantee( !_claimed,
-             "only one thread should claim this task at any one time" );
+  assert(_task_queues != NULL, "invariant");
+  assert(_task_queue != NULL, "invariant");
+  assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
+
+  assert(!_claimed,
+         "only one thread should claim this task at any one time");
 
   // OK, this doesn't safeguard again all possible scenarios, as it is
   // possible for two threads to set the _claimed flag at the same
@@ -3661,9 +3660,8 @@
   do {
     if (!has_aborted() && _curr_region != NULL) {
       // This means that we're already holding on to a region.
-      tmp_guarantee_CM( _finger != NULL,
-                        "if region is not NULL, then the finger "
-                        "should not be NULL either" );
+      assert(_finger != NULL, "if region is not NULL, then the finger "
+             "should not be NULL either");
 
       // We might have restarted this task after an evacuation pause
       // which might have evacuated the region we're holding on to
@@ -3695,13 +3693,13 @@
         giveup_current_region();
         regular_clock_call();
       } else {
-        guarantee( has_aborted(), "currently the only way to do so" );
+        assert(has_aborted(), "currently the only way to do so");
         // The only way to abort the bitmap iteration is to return
         // false from the do_bit() method. However, inside the
         // do_bit() method we move the _finger to point to the
         // object currently being looked at. So, if we bail out, we
         // have definitely set _finger to something non-null.
-        guarantee( _finger != NULL, "invariant" );
+        assert(_finger != NULL, "invariant");
 
         // Region iteration was actually aborted. So now _finger
         // points to the address of the object we last scanned. If we
@@ -3728,9 +3726,10 @@
     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
       // We are going to try to claim a new region. We should have
       // given up on the previous one.
-      tmp_guarantee_CM( _curr_region  == NULL &&
-                        _finger       == NULL &&
-                        _region_limit == NULL, "invariant" );
+      // Separated the asserts so that we know which one fires.
+      assert(_curr_region  == NULL, "invariant");
+      assert(_finger       == NULL, "invariant");
+      assert(_region_limit == NULL, "invariant");
       if (_cm->verbose_low())
         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
       HeapRegion* claimed_region = _cm->claim_region(_task_id);
@@ -3744,7 +3743,7 @@
                                  _task_id, claimed_region);
 
         setup_for_region(claimed_region);
-        tmp_guarantee_CM( _curr_region == claimed_region, "invariant" );
+        assert(_curr_region == claimed_region, "invariant");
       }
       // It is important to call the regular clock here. It might take
       // a while to claim a region if, for example, we hit a large
@@ -3755,8 +3754,8 @@
     }
 
     if (!has_aborted() && _curr_region == NULL) {
-      tmp_guarantee_CM( _cm->out_of_regions(),
-                        "at this point we should be out of regions" );
+      assert(_cm->out_of_regions(),
+             "at this point we should be out of regions");
     }
   } while ( _curr_region != NULL && !has_aborted());
 
@@ -3765,8 +3764,8 @@
     // tasks might be pushing objects to it concurrently. We also cannot
     // check if the region stack is empty because if a thread is aborting
     // it can push a partially done region back.
-    tmp_guarantee_CM( _cm->out_of_regions(),
-                      "at this point we should be out of regions" );
+    assert(_cm->out_of_regions(),
+           "at this point we should be out of regions");
 
     if (_cm->verbose_low())
       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
@@ -3790,8 +3789,8 @@
     // tasks might be pushing objects to it concurrently. We also cannot
     // check if the region stack is empty because if a thread is aborting
     // it can push a partially done region back.
-    guarantee( _cm->out_of_regions() &&
-               _task_queue->size() == 0, "only way to reach here" );
+    assert(_cm->out_of_regions() && _task_queue->size() == 0,
+           "only way to reach here");
 
     if (_cm->verbose_low())
       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
@@ -3807,8 +3806,8 @@
 
         statsOnly( ++_steals );
 
-        tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj),
-                          "any stolen object should be marked" );
+        assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
+               "any stolen object should be marked");
         scan_object(obj);
 
         // And since we're towards the end, let's totally drain the
@@ -3828,8 +3827,9 @@
     // tasks might be concurrently pushing objects on it. We also cannot
     // check if the region stack is empty because if a thread is aborting
     // it can push a partially done region back.
-    guarantee( _cm->out_of_regions() &&
-               _task_queue->size() == 0, "only way to reach here" );
+    // Separated the asserts so that we know which one fires.
+    assert(_cm->out_of_regions(), "only way to reach here");
+    assert(_task_queue->size() == 0, "only way to reach here");
 
     if (_cm->verbose_low())
       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
@@ -3849,7 +3849,7 @@
       if (_task_id == 0) {
         // let's allow task 0 to do this
         if (concurrent()) {
-          guarantee( _cm->concurrent_marking_in_progress(), "invariant" );
+          assert(_cm->concurrent_marking_in_progress(), "invariant");
           // we need to set this to false before the next
           // safepoint. This way we ensure that the marking phase
           // doesn't observe any more heap expansions.
@@ -3858,15 +3858,16 @@
       }
 
       // We can now guarantee that the global stack is empty, since
-      // all other tasks have finished.
-      guarantee( _cm->out_of_regions() &&
-                 _cm->region_stack_empty() &&
-                 _cm->mark_stack_empty() &&
-                 _task_queue->size() == 0 &&
-                 !_cm->has_overflown() &&
-                 !_cm->mark_stack_overflow() &&
-                 !_cm->region_stack_overflow(),
-                 "only way to reach here" );
+      // all other tasks have finished. We separated the guarantees so
+      // that, if a condition is false, we can immediately find out
+      // which one.
+      guarantee(_cm->out_of_regions(), "only way to reach here");
+      guarantee(_cm->region_stack_empty(), "only way to reach here");
+      guarantee(_cm->mark_stack_empty(), "only way to reach here");
+      guarantee(_task_queue->size() == 0, "only way to reach here");
+      guarantee(!_cm->has_overflown(), "only way to reach here");
+      guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
+      guarantee(!_cm->region_stack_overflow(), "only way to reach here");
 
       if (_cm->verbose_low())
         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
@@ -3961,8 +3962,8 @@
     _task_queue(task_queue),
     _task_queues(task_queues),
     _oop_closure(NULL) {
-  guarantee( task_queue != NULL, "invariant" );
-  guarantee( task_queues != NULL, "invariant" );
+  guarantee(task_queue != NULL, "invariant");
+  guarantee(task_queues != NULL, "invariant");
 
   statsOnly( _clock_due_to_scanning = 0;
              _clock_due_to_marking  = 0 );
--- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Tue Oct 20 16:34:08 2009 -0400
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Fri Oct 23 18:44:33 2009 -0700
@@ -295,12 +295,6 @@
 } while (0)
 #endif // _MARKING_STATS_
 
-// Some extra guarantees that I like to also enable in optimised mode
-// when debugging. If you want to enable them, comment out the assert
-// macro and uncomment out the guaratee macro
-// #define tmp_guarantee_CM(expr, str) guarantee(expr, str)
-#define tmp_guarantee_CM(expr, str) assert(expr, str)
-
 typedef enum {
   no_verbose  = 0,   // verbose turned off
   stats_verbose,     // only prints stats at the end of marking
@@ -485,15 +479,15 @@
 
   // Returns the task with the given id
   CMTask* task(int id) {
-    guarantee( 0 <= id && id < (int) _active_tasks, "task id not within "
-               "active bounds" );
+    assert(0 <= id && id < (int) _active_tasks,
+           "task id not within active bounds");
     return _tasks[id];
   }
 
   // Returns the task queue with the given id
   CMTaskQueue* task_queue(int id) {
-    guarantee( 0 <= id && id < (int) _active_tasks, "task queue id not within "
-               "active bounds" );
+    assert(0 <= id && id < (int) _active_tasks,
+           "task queue id not within active bounds");
     return (CMTaskQueue*) _task_queues->queue(id);
   }
 
@@ -723,6 +717,8 @@
 
   void print_summary_info();
 
+  void print_worker_threads_on(outputStream* st) const;
+
   // The following indicate whether a given verbose level has been
   // set. Notice that anything above stats is conditional to
   // _MARKING_VERBOSE_ having been set to 1
@@ -959,8 +955,7 @@
 
   // It scans an object and visits its children.
   void scan_object(oop obj) {
-    tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj),
-                      "invariant" );
+    assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
 
     if (_cm->verbose_high())
       gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
@@ -999,14 +994,13 @@
 
   // moves the local finger to a new location
   inline void move_finger_to(HeapWord* new_finger) {
-    tmp_guarantee_CM( new_finger >= _finger && new_finger < _region_limit,
-                   "invariant" );
+    assert(new_finger >= _finger && new_finger < _region_limit, "invariant");
     _finger = new_finger;
   }
 
   // moves the region finger to a new location
   inline void move_region_finger_to(HeapWord* new_finger) {
-    tmp_guarantee_CM( new_finger < _cm->finger(), "invariant" );
+    assert(new_finger < _cm->finger(), "invariant");
     _region_finger = new_finger;
   }
 
--- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Tue Oct 20 16:34:08 2009 -0400
+++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Fri Oct 23 18:44:33 2009 -0700
@@ -286,10 +286,14 @@
   }
 }
 
-void ConcurrentMarkThread::print() {
-  gclog_or_tty->print("\"Concurrent Mark GC Thread\" ");
-  Thread::print();
-  gclog_or_tty->cr();
+void ConcurrentMarkThread::print() const {
+  print_on(tty);
+}
+
+void ConcurrentMarkThread::print_on(outputStream* st) const {
+  st->print("\"G1 Main Concurrent Mark GC Thread\" ");
+  Thread::print_on(st);
+  st->cr();
 }
 
 void ConcurrentMarkThread::sleepBeforeNextCycle() {
--- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp	Tue Oct 20 16:34:08 2009 -0400
+++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp	Fri Oct 23 18:44:33 2009 -0700
@@ -57,7 +57,8 @@
   static SurrogateLockerThread* slt() { return _slt; }
 
   // Printing
-  void print();
+  void print_on(outputStream* st) const;
+  void print() const;
 
   // Total virtual time so far.
   double vtime_accum();
--- a/src/share/vm/gc_implementation/g1/concurrentZFThread.cpp	Tue Oct 20 16:34:08 2009 -0400
+++ b/src/share/vm/gc_implementation/g1/concurrentZFThread.cpp	Fri Oct 23 18:44:33 2009 -0700
@@ -157,10 +157,14 @@
   }
 }
 
-void ConcurrentZFThread::print() {
-  gclog_or_tty->print("\"Concurrent ZF Thread\" ");
-  Thread::print();
-  gclog_or_tty->cr();
+void ConcurrentZFThread::print() const {
+  print_on(tty);
+}
+
+void ConcurrentZFThread::print_on(outputStream* st) const {
+  st->print("\"G1 Concurrent Zero-Fill Thread\" ");
+  Thread::print_on(st);
+  st->cr();
 }
 
 
--- a/src/share/vm/gc_implementation/g1/concurrentZFThread.hpp	Tue Oct 20 16:34:08 2009 -0400
+++ b/src/share/vm/gc_implementation/g1/concurrentZFThread.hpp	Fri Oct 23 18:44:33 2009 -0700
@@ -61,7 +61,8 @@
   virtual void run();
 
   // Printing
-  void print();
+  void print_on(outputStream* st) const;
+  void print() const;
 
   // Waits until "r" has been zero-filled.  Requires caller to hold the
   // ZF_mon.
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Tue Oct 20 16:34:08 2009 -0400
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Fri Oct 23 18:44:33 2009 -0700
@@ -2210,40 +2210,58 @@
   bool _allow_dirty;
   bool _par;
   bool _use_prev_marking;
+  bool _failures;
 public:
   // use_prev_marking == true  -> use "prev" marking information,
   // use_prev_marking == false -> use "next" marking information
   VerifyRegionClosure(bool allow_dirty, bool par, bool use_prev_marking)
     : _allow_dirty(allow_dirty),
       _par(par),
-      _use_prev_marking(use_prev_marking) {}
+      _use_prev_marking(use_prev_marking),
+      _failures(false) {}
+
+  bool failures() {
+    return _failures;
+  }
 
   bool doHeapRegion(HeapRegion* r) {
     guarantee(_par || r->claim_value() == HeapRegion::InitialClaimValue,
               "Should be unclaimed at verify points.");
     if (!r->continuesHumongous()) {
-      VerifyObjsInRegionClosure not_dead_yet_cl(r, _use_prev_marking);
-      r->verify(_allow_dirty, _use_prev_marking);
-      r->object_iterate(&not_dead_yet_cl);
-      guarantee(r->max_live_bytes() >= not_dead_yet_cl.live_bytes(),
-                "More live objects than counted in last complete marking.");
+      bool failures = false;
+      r->verify(_allow_dirty, _use_prev_marking, &failures);
+      if (failures) {
+        _failures = true;
+      } else {
+        VerifyObjsInRegionClosure not_dead_yet_cl(r, _use_prev_marking);
+        r->object_iterate(&not_dead_yet_cl);
+        if (r->max_live_bytes() < not_dead_yet_cl.live_bytes()) {
+          gclog_or_tty->print_cr("["PTR_FORMAT","PTR_FORMAT"] "
+                                 "max_live_bytes "SIZE_FORMAT" "
+                                 "< calculated "SIZE_FORMAT,
+                                 r->bottom(), r->end(),
+                                 r->max_live_bytes(),
+                                 not_dead_yet_cl.live_bytes());
+          _failures = true;
+        }
+      }
     }
-    return false;
+    return false; // stop the region iteration if we hit a failure
   }
 };
 
 class VerifyRootsClosure: public OopsInGenClosure {
 private:
   G1CollectedHeap* _g1h;
+  bool             _use_prev_marking;
   bool             _failures;
-  bool             _use_prev_marking;
 public:
   // use_prev_marking == true  -> use "prev" marking information,
   // use_prev_marking == false -> use "next" marking information
   VerifyRootsClosure(bool use_prev_marking) :
     _g1h(G1CollectedHeap::heap()),
-    _failures(false),
-    _use_prev_marking(use_prev_marking) { }
+    _use_prev_marking(use_prev_marking),
+    _failures(false) { }
 
   bool failures() { return _failures; }
 
@@ -2253,7 +2271,7 @@
       oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
       if (_g1h->is_obj_dead_cond(obj, _use_prev_marking)) {
         gclog_or_tty->print_cr("Root location "PTR_FORMAT" "
-                               "points to dead obj "PTR_FORMAT, p, (void*) obj);
+                              "points to dead obj "PTR_FORMAT, p, (void*) obj);
         obj->print_on(gclog_or_tty);
         _failures = true;
       }
@@ -2271,6 +2289,7 @@
   G1CollectedHeap* _g1h;
   bool _allow_dirty;
   bool _use_prev_marking;
+  bool _failures;
 
 public:
   // use_prev_marking == true  -> use "prev" marking information,
@@ -2280,13 +2299,21 @@
     AbstractGangTask("Parallel verify task"),
     _g1h(g1h),
     _allow_dirty(allow_dirty),
-    _use_prev_marking(use_prev_marking) { }
+    _use_prev_marking(use_prev_marking),
+    _failures(false) { }
+
+  bool failures() {
+    return _failures;
+  }
 
   void work(int worker_i) {
     HandleMark hm;
     VerifyRegionClosure blk(_allow_dirty, true, _use_prev_marking);
     _g1h->heap_region_par_iterate_chunked(&blk, worker_i,
                                           HeapRegion::ParVerifyClaimValue);
+    if (blk.failures()) {
+      _failures = true;
+    }
   }
 };
 
@@ -2307,6 +2334,7 @@
                          &rootsCl,
                          &blobsCl,
                          &rootsCl);
+    bool failures = rootsCl.failures();
     rem_set()->invalidate(perm_gen()->used_region(), false);
     if (!silent) { gclog_or_tty->print("heapRegions "); }
     if (GCParallelVerificationEnabled && ParallelGCThreads > 1) {
@@ -2318,6 +2346,9 @@
       set_par_threads(n_workers);
       workers()->run_task(&task);
       set_par_threads(0);
+      if (task.failures()) {
+        failures = true;
+      }
 
       assert(check_heap_region_claim_values(HeapRegion::ParVerifyClaimValue),
              "sanity check");
@@ -2329,10 +2360,23 @@
     } else {
       VerifyRegionClosure blk(allow_dirty, false, use_prev_marking);
       _hrs->iterate(&blk);
+      if (blk.failures()) {
+        failures = true;
+      }
     }
     if (!silent) gclog_or_tty->print("remset ");
     rem_set()->verify();
-    guarantee(!rootsCl.failures(), "should not have had failures");
+
+    if (failures) {
+      gclog_or_tty->print_cr("Heap:");
+      print_on(gclog_or_tty, true /* extended */);
+      gclog_or_tty->print_cr("");
+      if (VerifyDuringGC && G1VerifyConcMarkPrintReachable) {
+        concurrent_mark()->print_prev_bitmap_reachable();
+      }
+      gclog_or_tty->flush();
+    }
+    guarantee(!failures, "there should not have been any failures");
   } else {
     if (!silent) gclog_or_tty->print("(SKIPPING roots, heapRegions, remset) ");
   }
@@ -2374,6 +2418,7 @@
   st->cr();
   perm()->as_gen()->print_on(st);
   if (extended) {
+    st->cr();
     print_on_extended(st);
   }
 }
@@ -2383,27 +2428,18 @@
   _hrs->iterate(&blk);
 }
 
-class PrintOnThreadsClosure : public ThreadClosure {
-  outputStream* _st;
-public:
-  PrintOnThreadsClosure(outputStream* st) : _st(st) { }
-  virtual void do_thread(Thread *t) {
-    t->print_on(_st);
-  }
-};
-
 void G1CollectedHeap::print_gc_threads_on(outputStream* st) const {
   if (ParallelGCThreads > 0) {
-    workers()->print_worker_threads();
-  }
-  st->print("\"G1 concurrent mark GC Thread\" ");
-  _cmThread->print();
+    workers()->print_worker_threads_on(st);
+  }
+
+  _cmThread->print_on(st);
   st->cr();
-  st->print("\"G1 concurrent refinement GC Threads\" ");
-  PrintOnThreadsClosure p(st);
-  _cg1r->threads_do(&p);
-  st->cr();
-  st->print("\"G1 zero-fill GC Thread\" ");
+
+  _cm->print_worker_threads_on(st);
+
+  _cg1r->print_worker_threads_on(st);
+
   _czft->print_on(st);
   st->cr();
 }
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Tue Oct 20 16:34:08 2009 -0400
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Fri Oct 23 18:44:33 2009 -0700
@@ -992,11 +992,39 @@
 
   // Can a compiler initialize a new object without store barriers?
   // This permission only extends from the creation of a new object
-  // via a TLAB up to the first subsequent safepoint.
+  // via a TLAB up to the first subsequent safepoint. If such permission
+  // is granted for this heap type, the compiler promises to call
+  // defer_store_barrier() below on any slow path allocation of
+  // a new object for which such initializing store barriers will
+  // have been elided. G1, like CMS, allows this, but should be
+  // ready to provide a compensating write barrier as necessary
+  // if that storage came out of a non-young region. The efficiency
+  // of this implementation depends crucially on being able to
+  // answer very efficiently in constant time whether a piece of
+  // storage in the heap comes from a young region or not.
+  // See ReduceInitialCardMarks.
   virtual bool can_elide_tlab_store_barriers() const {
-    // Since G1's TLAB's may, on occasion, come from non-young regions
-    // as well. (Is there a flag controlling that? XXX)
-    return false;
+    return true;
+  }
+
+  bool is_in_young(oop obj) {
+    HeapRegion* hr = heap_region_containing(obj);
+    return hr != NULL && hr->is_young();
+  }
+
+  // We don't need barriers for initializing stores to objects
+  // in the young gen: for the SATB pre-barrier, there is no
+  // pre-value that needs to be remembered; for the remembered-set
+  // update logging post-barrier, we don't maintain remembered set
+  // information for young gen objects. Note that non-generational
+  // G1 does not have any "young" objects, should not elide
+  // the rs logging barrier and so should always answer false below.
+  // However, non-generational G1 (-XX:-G1Gen) appears to have
+  // bit-rotted so was not tested below.
+  virtual bool can_elide_initializing_store_barrier(oop new_obj) {
+    assert(G1Gen || !is_in_young(new_obj),
+           "Non-generational G1 should never return true below");
+    return is_in_young(new_obj);
   }
 
   // Can a compiler elide a store barrier when it writes
--- a/src/share/vm/gc_implementation/g1/heapRegion.cpp	Tue Oct 20 16:34:08 2009 -0400
+++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp	Fri Oct 23 18:44:33 2009 -0700
@@ -722,12 +722,13 @@
     st->print(" F");
   else
     st->print("  ");
-  st->print(" %d", _gc_time_stamp);
+  st->print(" %5d", _gc_time_stamp);
   G1OffsetTableContigSpace::print_on(st);
 }
 
 void HeapRegion::verify(bool allow_dirty) const {
-  verify(allow_dirty, /* use_prev_marking */ true);
+  bool dummy = false;
+  verify(allow_dirty, /* use_prev_marking */ true, /* failures */ &dummy);
 }
 
 #define OBJ_SAMPLE_INTERVAL 0
@@ -736,8 +737,11 @@
 // This really ought to be commoned up into OffsetTableContigSpace somehow.
 // We would need a mechanism to make that code skip dead objects.
 
-void HeapRegion::verify(bool allow_dirty, bool use_prev_marking) const {
+void HeapRegion::verify(bool allow_dirty,
+                        bool use_prev_marking,
+                        bool* failures) const {
   G1CollectedHeap* g1 = G1CollectedHeap::heap();
+  *failures = false;
   HeapWord* p = bottom();
   HeapWord* prev_p = NULL;
   int objs = 0;
@@ -746,8 +750,14 @@
   while (p < top()) {
     size_t size = oop(p)->size();
     if (blocks == BLOCK_SAMPLE_INTERVAL) {
-      guarantee(p == block_start_const(p + (size/2)),
-                "check offset computation");
+      HeapWord* res = block_start_const(p + (size/2));
+      if (p != res) {
+        gclog_or_tty->print_cr("offset computation 1 for "PTR_FORMAT" and "
+                               SIZE_FORMAT" returned "PTR_FORMAT,
+                               p, size, res);
+        *failures = true;
+        return;
+      }
       blocks = 0;
     } else {
       blocks++;
@@ -755,11 +765,34 @@
     if (objs == OBJ_SAMPLE_INTERVAL) {
       oop obj = oop(p);
       if (!g1->is_obj_dead_cond(obj, this, use_prev_marking)) {
-        obj->verify();
-        vl_cl.set_containing_obj(obj);
-        obj->oop_iterate(&vl_cl);
-        if (G1MaxVerifyFailures >= 0
-            && vl_cl.n_failures() >= G1MaxVerifyFailures) break;
+        if (obj->is_oop()) {
+          klassOop klass = obj->klass();
+          if (!klass->is_perm()) {
+            gclog_or_tty->print_cr("klass "PTR_FORMAT" of object "PTR_FORMAT" "
+                                   "not in perm", klass, obj);
+            *failures = true;
+            return;
+          } else if (!klass->is_klass()) {
+            gclog_or_tty->print_cr("klass "PTR_FORMAT" of object "PTR_FORMAT" "
+                                   "not a klass", klass, obj);
+            *failures = true;
+            return;
+          } else {
+            vl_cl.set_containing_obj(obj);
+            obj->oop_iterate(&vl_cl);
+            if (vl_cl.failures()) {
+              *failures = true;
+            }
+            if (G1MaxVerifyFailures >= 0 &&
+                vl_cl.n_failures() >= G1MaxVerifyFailures) {
+              return;
+            }
+          }
+        } else {
+          gclog_or_tty->print_cr(PTR_FORMAT" no an oop", obj);
+          *failures = true;
+          return;
+        }
       }
       objs = 0;
     } else {
@@ -771,21 +804,22 @@
   HeapWord* rend = end();
   HeapWord* rtop = top();
   if (rtop < rend) {
-    guarantee(block_start_const(rtop + (rend - rtop) / 2) == rtop,
-              "check offset computation");
-  }
-  if (vl_cl.failures()) {
-    gclog_or_tty->print_cr("Heap:");
-    G1CollectedHeap::heap()->print_on(gclog_or_tty, true /* extended */);
-    gclog_or_tty->print_cr("");
+    HeapWord* res = block_start_const(rtop + (rend - rtop) / 2);
+    if (res != rtop) {
+        gclog_or_tty->print_cr("offset computation 2 for "PTR_FORMAT" and "
+                               PTR_FORMAT" returned "PTR_FORMAT,
+                               rtop, rend, res);
+        *failures = true;
+        return;
+    }
   }
-  if (VerifyDuringGC &&
-      G1VerifyConcMarkPrintReachable &&
-      vl_cl.failures()) {
-    g1->concurrent_mark()->print_prev_bitmap_reachable();
+
+  if (p != top()) {
+    gclog_or_tty->print_cr("end of last object "PTR_FORMAT" "
+                           "does not match top "PTR_FORMAT, p, top());
+    *failures = true;
+    return;
   }
-  guarantee(!vl_cl.failures(), "region verification failed");
-  guarantee(p == top(), "end of last object must match end of space");
 }
 
 // G1OffsetTableContigSpace code; copied from space.cpp.  Hope this can go
--- a/src/share/vm/gc_implementation/g1/heapRegion.hpp	Tue Oct 20 16:34:08 2009 -0400
+++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp	Fri Oct 23 18:44:33 2009 -0700
@@ -569,13 +569,8 @@
   // ever evacuated into this region.  If we evacuate, allocate, and
   // then evacuate we are in deep doodoo.
   void note_end_of_copying() {
-    assert(top() >= _next_top_at_mark_start,
-           "Increase only");
-    // Survivor regions will be scanned on the start of concurrent
-    // marking.
-    if (!is_survivor()) {
-      _next_top_at_mark_start = top();
-    }
+    assert(top() >= _next_top_at_mark_start, "Increase only");
+    _next_top_at_mark_start = top();
   }
 
   // Returns "false" iff no object in the region was allocated when the
@@ -798,7 +793,7 @@
   // use_prev_marking == true. Currently, there is only one case where
   // this is called with use_prev_marking == false, which is to verify
   // the "next" marking information at the end of remark.
-  void verify(bool allow_dirty, bool use_prev_marking) const;
+  void verify(bool allow_dirty, bool use_prev_marking, bool *failures) const;
 
   // Override; it uses the "prev" marking information
   virtual void verify(bool allow_dirty) const;
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Tue Oct 20 16:34:08 2009 -0400
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Fri Oct 23 18:44:33 2009 -0700
@@ -314,41 +314,6 @@
   return false;
 }
 
-// Static method
-bool ParallelScavengeHeap::is_in_young(oop* p) {
-  ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
-  assert(heap->kind() == CollectedHeap::ParallelScavengeHeap,
-                                            "Must be ParallelScavengeHeap");
-
-  PSYoungGen* young_gen = heap->young_gen();
-
-  if (young_gen->is_in_reserved(p)) {
-    return true;
-  }
-
-  return false;
-}
-
-// Static method
-bool ParallelScavengeHeap::is_in_old_or_perm(oop* p) {
-  ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
-  assert(heap->kind() == CollectedHeap::ParallelScavengeHeap,
-                                            "Must be ParallelScavengeHeap");
-
-  PSOldGen* old_gen = heap->old_gen();
-  PSPermGen* perm_gen = heap->perm_gen();
-
-  if (old_gen->is_in_reserved(p)) {
-    return true;
-  }
-
-  if (perm_gen->is_in_reserved(p)) {
-    return true;
-  }
-
-  return false;
-}
-
 // There are two levels of allocation policy here.
 //
 // When an allocation request fails, the requesting thread must invoke a VM
@@ -764,6 +729,13 @@
   CollectedHeap::resize_all_tlabs();
 }
 
+bool ParallelScavengeHeap::can_elide_initializing_store_barrier(oop new_obj) {
+  // We don't need barriers for stores to objects in the
+  // young gen and, a fortiori, for initializing stores to
+  // objects therein.
+  return is_in_young(new_obj);
+}
+
 // This method is used by System.gc() and JVMTI.
 void ParallelScavengeHeap::collect(GCCause::Cause cause) {
   assert(!Heap_lock->owned_by_self(),
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Tue Oct 20 16:34:08 2009 -0400
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Fri Oct 23 18:44:33 2009 -0700
@@ -129,8 +129,8 @@
     return perm_gen()->is_in(p);
   }
 
-  static bool is_in_young(oop *p);        // reserved part
-  static bool is_in_old_or_perm(oop *p);  // reserved part
+  inline bool is_in_young(oop p);        // reserved part
+  inline bool is_in_old_or_perm(oop p);  // reserved part
 
   // Memory allocation.   "gc_time_limit_was_exceeded" will
   // be set to true if the adaptive size policy determine that
@@ -191,6 +191,10 @@
     return true;
   }
 
+  // Return true if we don't we need a store barrier for
+  // initializing stores to an object at this address.
+  virtual bool can_elide_initializing_store_barrier(oop new_obj);
+
   // Can a compiler elide a store barrier when it writes
   // a permanent oop into the heap?  Applies when the compiler
   // is storing x to the heap, where x->is_perm() is true.
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.inline.hpp	Tue Oct 20 16:34:08 2009 -0400
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.inline.hpp	Fri Oct 23 18:44:33 2009 -0700
@@ -41,3 +41,11 @@
     PSMarkSweep::invoke(maximum_compaction);
   }
 }
+
+inline bool ParallelScavengeHeap::is_in_young(oop p) {
+  return young_gen()->is_in_reserved(p);
+}
+
+inline bool ParallelScavengeHeap::is_in_old_or_perm(oop p) {
+  return old_gen()->is_in_reserved(p) || perm_gen()->is_in_reserved(p);
+}
--- a/src/share/vm/gc_interface/collectedHeap.cpp	Tue Oct 20 16:34:08 2009 -0400
+++ b/src/share/vm/gc_interface/collectedHeap.cpp	Fri Oct 23 18:44:33 2009 -0700
@@ -137,6 +137,89 @@
   return obj;
 }
 
+void CollectedHeap::flush_deferred_store_barrier(JavaThread* thread) {
+  MemRegion deferred = thread->deferred_card_mark();
+  if (!deferred.is_empty()) {
+    {
+      // Verify that the storage points to a parsable object in heap
+      DEBUG_ONLY(oop old_obj = oop(deferred.start());)
+      assert(is_in(old_obj), "Not in allocated heap");
+      assert(!can_elide_initializing_store_barrier(old_obj),
+             "Else should have been filtered in defer_store_barrier()");
+      assert(!is_in_permanent(old_obj), "Sanity: not expected");
+      assert(old_obj->is_oop(true), "Not an oop");
+      assert(old_obj->is_parsable(), "Will not be concurrently parsable");
+      assert(deferred.word_size() == (size_t)(old_obj->size()),
+             "Mismatch: multiple objects?");
+    }
+    BarrierSet* bs = barrier_set();
+    assert(bs->has_write_region_opt(), "No write_region() on BarrierSet");
+    bs->write_region(deferred);
+    // "Clear" the deferred_card_mark field
+    thread->set_deferred_card_mark(MemRegion());
+  }
+  assert(thread->deferred_card_mark().is_empty(), "invariant");
+}
+
+// Helper for ReduceInitialCardMarks. For performance,
+// compiled code may elide card-marks for initializing stores
+// to a newly allocated object along the fast-path. We
+// compensate for such elided card-marks as follows:
+// (a) Generational, non-concurrent collectors, such as
+//     GenCollectedHeap(ParNew,DefNew,Tenured) and
+//     ParallelScavengeHeap(ParallelGC, ParallelOldGC)
+//     need the card-mark if and only if the region is
+//     in the old gen, and do not care if the card-mark
+//     succeeds or precedes the initializing stores themselves,
+//     so long as the card-mark is completed before the next
+//     scavenge. For all these cases, we can do a card mark
+//     at the point at which we do a slow path allocation
+//     in the old gen. For uniformity, however, we end
+//     up using the same scheme (see below) for all three
+//     cases (deferring the card-mark appropriately).
+// (b) GenCollectedHeap(ConcurrentMarkSweepGeneration) requires
+//     in addition that the card-mark for an old gen allocated
+//     object strictly follow any associated initializing stores.
+//     In these cases, the memRegion remembered below is
+//     used to card-mark the entire region either just before the next
+//     slow-path allocation by this thread or just before the next scavenge or
+//     CMS-associated safepoint, whichever of these events happens first.
+//     (The implicit assumption is that the object has been fully
+//     initialized by this point, a fact that we assert when doing the
+//     card-mark.)
+// (c) G1CollectedHeap(G1) uses two kinds of write barriers. When a
+//     G1 concurrent marking is in progress an SATB (pre-write-)barrier is
+//     is used to remember the pre-value of any store. Initializing
+//     stores will not need this barrier, so we need not worry about
+//     compensating for the missing pre-barrier here. Turning now
+//     to the post-barrier, we note that G1 needs a RS update barrier
+//     which simply enqueues a (sequence of) dirty cards which may
+//     optionally be refined by the concurrent update threads. Note
+//     that this barrier need only be applied to a non-young write,
+//     but, like in CMS, because of the presence of concurrent refinement
+//     (much like CMS' precleaning), must strictly follow the oop-store.
+//     Thus, using the same protocol for maintaining the intended
+//     invariants turns out, serendepitously, to be the same for all
+//     three collectors/heap types above.
+//
+// For each future collector, this should be reexamined with
+// that specific collector in mind.
+oop CollectedHeap::defer_store_barrier(JavaThread* thread, oop new_obj) {
+  // If a previous card-mark was deferred, flush it now.
+  flush_deferred_store_barrier(thread);
+  if (can_elide_initializing_store_barrier(new_obj)) {
+    // The deferred_card_mark region should be empty
+    // following the flush above.
+    assert(thread->deferred_card_mark().is_empty(), "Error");
+  } else {
+    // Remember info for the newly deferred store barrier
+    MemRegion deferred = MemRegion((HeapWord*)new_obj, new_obj->size());
+    assert(!deferred.is_empty(), "Error");
+    thread->set_deferred_card_mark(deferred);
+  }
+  return new_obj;
+}
+
 size_t CollectedHeap::filler_array_hdr_size() {
   return size_t(arrayOopDesc::header_size(T_INT));
 }
@@ -225,16 +308,6 @@
   fill_with_object_impl(start, words);
 }
 
-oop CollectedHeap::new_store_barrier(oop new_obj) {
-  // %%% This needs refactoring.  (It was imported from the server compiler.)
-  guarantee(can_elide_tlab_store_barriers(), "store barrier elision not supported");
-  BarrierSet* bs = this->barrier_set();
-  assert(bs->has_write_region_opt(), "Barrier set does not have write_region");
-  int new_size = new_obj->size();
-  bs->write_region(MemRegion((HeapWord*)new_obj, new_size));
-  return new_obj;
-}
-
 HeapWord* CollectedHeap::allocate_new_tlab(size_t size) {
   guarantee(false, "thread-local allocation buffers not supported");
   return NULL;
--- a/src/share/vm/gc_interface/collectedHeap.hpp	Tue Oct 20 16:34:08 2009 -0400
+++ b/src/share/vm/gc_interface/collectedHeap.hpp	Fri Oct 23 18:44:33 2009 -0700
@@ -415,9 +415,14 @@
     guarantee(false, "thread-local allocation buffers not supported");
     return 0;
   }
+
   // Can a compiler initialize a new object without store barriers?
   // This permission only extends from the creation of a new object
-  // via a TLAB up to the first subsequent safepoint.
+  // via a TLAB up to the first subsequent safepoint. If such permission
+  // is granted for this heap type, the compiler promises to call
+  // defer_store_barrier() below on any slow path allocation of
+  // a new object for which such initializing store barriers will
+  // have been elided.
   virtual bool can_elide_tlab_store_barriers() const = 0;
 
   // If a compiler is eliding store barriers for TLAB-allocated objects,
@@ -425,8 +430,19 @@
   // an object allocated anywhere.  The compiler's runtime support
   // promises to call this function on such a slow-path-allocated
   // object before performing initializations that have elided
-  // store barriers.  Returns new_obj, or maybe a safer copy thereof.
-  virtual oop new_store_barrier(oop new_obj);
+  // store barriers. Returns new_obj, or maybe a safer copy thereof.
+  virtual oop defer_store_barrier(JavaThread* thread, oop new_obj);
+
+  // Answers whether an initializing store to a new object currently
+  // allocated at the given address doesn't need a (deferred) store
+  // barrier. Returns "true" if it doesn't need an initializing
+  // store barrier; answers "false" if it does.
+  virtual bool can_elide_initializing_store_barrier(oop new_obj) = 0;
+
+  // If the CollectedHeap was asked to defer a store barrier above,
+  // this informs it to flush such a deferred store barrier to the
+  // remembered set.
+  virtual void flush_deferred_store_barrier(JavaThread* thread);
 
   // Can a compiler elide a store barrier when it writes
   // a permanent oop into the heap?  Applies when the compiler
--- a/src/share/vm/memory/genCollectedHeap.hpp	Tue Oct 20 16:34:08 2009 -0400
+++ b/src/share/vm/memory/genCollectedHeap.hpp	Fri Oct 23 18:44:33 2009 -0700
@@ -260,6 +260,20 @@
     return true;
   }
 
+  // We don't need barriers for stores to objects in the
+  // young gen and, a fortiori, for initializing stores to
+  // objects therein. This applies to {DefNew,ParNew}+{Tenured,CMS}
+  // only and may need to be re-examined in case other
+  // kinds of collectors are implemented in the future.
+  virtual bool can_elide_initializing_store_barrier(oop new_obj) {
+    // We wanted to assert that:-
+    // assert(UseParNewGC || UseSerialGC || UseConcMarkSweepGC,
+    //       "Check can_elide_initializing_store_barrier() for this collector");
+    // but unfortunately the flag UseSerialGC need not necessarily always
+    // be set when DefNew+Tenured are being used.
+    return is_in_youngest((void*)new_obj);
+  }
+
   // Can a compiler elide a store barrier when it writes
   // a permanent oop into the heap?  Applies when the compiler
   // is storing x to the heap, where x->is_perm() is true.
--- a/src/share/vm/opto/graphKit.cpp	Tue Oct 20 16:34:08 2009 -0400
+++ b/src/share/vm/opto/graphKit.cpp	Fri Oct 23 18:44:33 2009 -0700
@@ -3186,6 +3186,15 @@
       return;
   }
 
+  if (use_ReduceInitialCardMarks()
+      && obj == just_allocated_object(control())) {
+    // We can skip marks on a freshly-allocated object in Eden.
+    // Keep this code in sync with maybe_defer_card_mark() in runtime.cpp.
+    // That routine informs GC to take appropriate compensating steps
+    // so as to make this card-mark elision safe.
+    return;
+  }
+
   if (!use_precise) {
     // All card marks for a (non-array) instance are in one place:
     adr = obj;
--- a/src/share/vm/opto/library_call.cpp	Tue Oct 20 16:34:08 2009 -0400
+++ b/src/share/vm/opto/library_call.cpp	Fri Oct 23 18:44:33 2009 -0700
@@ -4160,13 +4160,13 @@
           result_mem ->set_req(_objArray_path, reset_memory());
         }
       }
-      // We can dispense with card marks if we know the allocation
-      // comes out of eden (TLAB)...  In fact, ReduceInitialCardMarks
-      // causes the non-eden paths to simulate a fresh allocation,
-      // insofar that no further card marks are required to initialize
-      // the object.
-
       // Otherwise, there are no card marks to worry about.
+      // (We can dispense with card marks if we know the allocation
+      //  comes out of eden (TLAB)...  In fact, ReduceInitialCardMarks
+      //  causes the non-eden paths to take compensating steps to
+      //  simulate a fresh allocation, so that no further
+      //  card marks are required in compiled code to initialize
+      //  the object.)
 
       if (!stopped()) {
         copy_to_clone(obj, alloc_obj, obj_size, true, false);
--- a/src/share/vm/opto/runtime.cpp	Tue Oct 20 16:34:08 2009 -0400
+++ b/src/share/vm/opto/runtime.cpp	Fri Oct 23 18:44:33 2009 -0700
@@ -143,18 +143,20 @@
 // We failed the fast-path allocation.  Now we need to do a scavenge or GC
 // and try allocation again.
 
-void OptoRuntime::do_eager_card_mark(JavaThread* thread) {
+void OptoRuntime::maybe_defer_card_mark(JavaThread* thread) {
   // After any safepoint, just before going back to compiled code,
-  // we perform a card mark.  This lets the compiled code omit
-  // card marks for initialization of new objects.
-  // Keep this code consistent with GraphKit::store_barrier.
+  // we inform the GC that we will be doing initializing writes to
+  // this object in the future without emitting card-marks, so
+  // GC may take any compensating steps.
+  // NOTE: Keep this code consistent with GraphKit::store_barrier.
 
   oop new_obj = thread->vm_result();
   if (new_obj == NULL)  return;
 
   assert(Universe::heap()->can_elide_tlab_store_barriers(),
          "compiler must check this first");
-  new_obj = Universe::heap()->new_store_barrier(new_obj);
+  // GC may decide to give back a safer copy of new_obj.
+  new_obj = Universe::heap()->defer_store_barrier(thread, new_obj);
   thread->set_vm_result(new_obj);
 }
 
@@ -197,8 +199,8 @@
   JRT_BLOCK_END;
 
   if (GraphKit::use_ReduceInitialCardMarks()) {
-    // do them now so we don't have to do them on the fast path
-    do_eager_card_mark(thread);
+    // inform GC that we won't do card marks for initializing writes.
+    maybe_defer_card_mark(thread);
   }
 JRT_END
 
@@ -236,8 +238,8 @@
   JRT_BLOCK_END;
 
   if (GraphKit::use_ReduceInitialCardMarks()) {
-    // do them now so we don't have to do them on the fast path
-    do_eager_card_mark(thread);
+    // inform GC that we won't do card marks for initializing writes.
+    maybe_defer_card_mark(thread);
   }
 JRT_END
 
--- a/src/share/vm/opto/runtime.hpp	Tue Oct 20 16:34:08 2009 -0400
+++ b/src/share/vm/opto/runtime.hpp	Fri Oct 23 18:44:33 2009 -0700
@@ -133,8 +133,8 @@
   // Allocate storage for a objArray or typeArray
   static void new_array_C(klassOopDesc* array_klass, int len, JavaThread *thread);
 
-  // Post-allocation step for implementing ReduceInitialCardMarks:
-  static void do_eager_card_mark(JavaThread* thread);
+  // Post-slow-path-allocation step for implementing ReduceInitialCardMarks:
+  static void maybe_defer_card_mark(JavaThread* thread);
 
   // Allocate storage for a multi-dimensional arrays
   // Note: needs to be fixed for arbitrary number of dimensions
--- a/src/share/vm/runtime/thread.cpp	Tue Oct 20 16:34:08 2009 -0400
+++ b/src/share/vm/runtime/thread.cpp	Fri Oct 23 18:44:33 2009 -0700
@@ -1213,6 +1213,7 @@
 {
   initialize();
   _is_attaching = is_attaching;
+  assert(_deferred_card_mark.is_empty(), "Default MemRegion ctor");
 }
 
 bool JavaThread::reguard_stack(address cur_sp) {
@@ -2318,6 +2319,10 @@
 
 
 void JavaThread::oops_do(OopClosure* f, CodeBlobClosure* cf) {
+  // Flush deferred store-barriers, if any, associated with
+  // initializing stores done by this JavaThread in the current epoch.
+  Universe::heap()->flush_deferred_store_barrier(this);
+
   // The ThreadProfiler oops_do is done from FlatProfiler::oops_do
   // since there may be more than one thread using each ThreadProfiler.
 
--- a/src/share/vm/runtime/thread.hpp	Tue Oct 20 16:34:08 2009 -0400
+++ b/src/share/vm/runtime/thread.hpp	Fri Oct 23 18:44:33 2009 -0700
@@ -684,8 +684,13 @@
   methodOop     _callee_target;
 
   // Oop results of VM runtime calls
-  oop           _vm_result;                      // Used to pass back an oop result into Java code, GC-preserved
-  oop           _vm_result_2;                    // Used to pass back an oop result into Java code, GC-preserved
+  oop           _vm_result;    // Used to pass back an oop result into Java code, GC-preserved
+  oop           _vm_result_2;  // Used to pass back an oop result into Java code, GC-preserved
+
+  // See ReduceInitialCardMarks: this holds the precise space interval of
+  // the most recent slow path allocation for which compiled code has
+  // elided card-marks for performance along the fast-path.
+  MemRegion     _deferred_card_mark;
 
   MonitorChunk* _monitor_chunks;                 // Contains the off stack monitors
                                                  // allocated during deoptimization
@@ -1082,6 +1087,9 @@
   oop  vm_result_2() const                       { return _vm_result_2; }
   void set_vm_result_2  (oop x)                  { _vm_result_2   = x; }
 
+  MemRegion deferred_card_mark() const           { return _deferred_card_mark; }
+  void set_deferred_card_mark(MemRegion mr)      { _deferred_card_mark = mr;   }
+
   // Exception handling for compiled methods
   oop      exception_oop() const                 { return _exception_oop; }
   int      exception_stack_size() const          { return _exception_stack_size; }
--- a/src/share/vm/utilities/taskqueue.hpp	Tue Oct 20 16:34:08 2009 -0400
+++ b/src/share/vm/utilities/taskqueue.hpp	Fri Oct 23 18:44:33 2009 -0700
@@ -207,7 +207,7 @@
     // Actually means 0, so do the push.
     uint localBot = _bottom;
     _elems[localBot] = t;
-    _bottom = increment_index(localBot);
+    OrderAccess::release_store(&_bottom, increment_index(localBot));
     return true;
   }
   return false;
@@ -465,19 +465,7 @@
 #endif
 };
 
-#define SIMPLE_STACK 0
-
 template<class E> inline bool GenericTaskQueue<E>::push(E t) {
-#if SIMPLE_STACK
-  uint localBot = _bottom;
-  if (_bottom < max_elems()) {
-    _elems[localBot] = t;
-    _bottom = localBot + 1;
-    return true;
-  } else {
-    return false;
-  }
-#else
   uint localBot = _bottom;
   assert((localBot >= 0) && (localBot < N), "_bottom out of range.");
   idx_t top = _age.top();
@@ -485,23 +473,14 @@
   assert((dirty_n_elems >= 0) && (dirty_n_elems < N), "n_elems out of range.");
   if (dirty_n_elems < max_elems()) {
     _elems[localBot] = t;
-    _bottom = increment_index(localBot);
+    OrderAccess::release_store(&_bottom, increment_index(localBot));
     return true;
   } else {
     return push_slow(t, dirty_n_elems);
   }
-#endif
 }
 
 template<class E> inline bool GenericTaskQueue<E>::pop_local(E& t) {
-#if SIMPLE_STACK
-  uint localBot = _bottom;
-  assert(localBot > 0, "precondition.");
-  localBot--;
-  t = _elems[localBot];
-  _bottom = localBot;
-  return true;
-#else
   uint localBot = _bottom;
   // This value cannot be N-1.  That can only occur as a result of
   // the assignment to bottom in this method.  If it does, this method
@@ -529,7 +508,6 @@
     // path.
     return pop_local_slow(localBot, _age.get());
   }
-#endif
 }
 
 typedef oop Task;
--- a/test/gc/6845368/bigobj.java	Tue Oct 20 16:34:08 2009 -0400
+++ b/test/gc/6845368/bigobj.java	Fri Oct 23 18:44:33 2009 -0700
@@ -3,7 +3,7 @@
    @bug 6845368
    @summary ensure gc updates references > 64K bytes from the start of the obj
    @author John Coomes
-   @run main/othervm -Xmx64m bigobj
+   @run main/othervm/timeout=720 -Xmx64m bigobj
 */
 
 // Allocate an object with a block of reference fields that starts more