# HG changeset patch
# User tonyp
# Date 1270565985 14400
# Node ID 41643676408ba403ea31bb040125d1ff22bcd4d8
# Parent  e9ae45b9ec4806d8c7c578c11a90805b062c9883
6909756: G1: guarantee(G1CollectedHeap::heap()->mark_in_progress(),"Precondition.")
Summary: Make sure that two marking cycles do not overlap, i.e., a new one can only start after the concurrent marking thread finishes all its work. In the fix I piggy-back a couple of minor extra fixes: some general code reformatting for consistency (only around the code I modified), the removal of a field (G1CollectorPolicy::_should_initiate_conc_mark) which doesn't seem to be used at all (it's only set but never read), as well as moving the "is GC locker active" test earlier into the G1 pause / Full GC and using a more appropriate method for it.
Reviewed-by: johnc, jmasa, jcoomes, ysr

diff -r e9ae45b9ec48 -r 41643676408b src/share/vm/gc_implementation/g1/concurrentMark.cpp
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Mon Apr 05 12:19:22 2010 -0400
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Tue Apr 06 10:59:45 2010 -0400
@@ -708,24 +708,46 @@
 //
 
 void ConcurrentMark::clearNextBitmap() {
-   guarantee(!G1CollectedHeap::heap()->mark_in_progress(), "Precondition.");
-
-   // clear the mark bitmap (no grey objects to start with).
-   // We need to do this in chunks and offer to yield in between
-   // each chunk.
-   HeapWord* start  = _nextMarkBitMap->startWord();
-   HeapWord* end    = _nextMarkBitMap->endWord();
-   HeapWord* cur    = start;
-   size_t chunkSize = M;
-   while (cur < end) {
-     HeapWord* next = cur + chunkSize;
-     if (next > end)
-       next = end;
-     MemRegion mr(cur,next);
-     _nextMarkBitMap->clearRange(mr);
-     cur = next;
-     do_yield_check();
-   }
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  G1CollectorPolicy* g1p = g1h->g1_policy();
+
+  // Make sure that the concurrent mark thread looks to still be in
+  // the current cycle.
+  guarantee(cmThread()->during_cycle(), "invariant");
+
+  // We are finishing up the current cycle by clearing the next
+  // marking bitmap and getting it ready for the next cycle. During
+  // this time no other cycle can start. So, let's make sure that this
+  // is the case.
+  guarantee(!g1h->mark_in_progress(), "invariant");
+
+  // clear the mark bitmap (no grey objects to start with).
+  // We need to do this in chunks and offer to yield in between
+  // each chunk.
+  HeapWord* start  = _nextMarkBitMap->startWord();
+  HeapWord* end    = _nextMarkBitMap->endWord();
+  HeapWord* cur    = start;
+  size_t chunkSize = M;
+  while (cur < end) {
+    HeapWord* next = cur + chunkSize;
+    if (next > end)
+      next = end;
+    MemRegion mr(cur,next);
+    _nextMarkBitMap->clearRange(mr);
+    cur = next;
+    do_yield_check();
+
+    // Repeat the asserts from above. We'll do them as asserts here to
+    // minimize their overhead on the product. However, we'll have
+    // them as guarantees at the beginning / end of the bitmap
+    // clearing to get some checking in the product.
+    assert(cmThread()->during_cycle(), "invariant");
+    assert(!g1h->mark_in_progress(), "invariant");
+  }
+
+  // Repeat the asserts from above.
+  guarantee(cmThread()->during_cycle(), "invariant");
+  guarantee(!g1h->mark_in_progress(), "invariant");
 }
 
 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
diff -r e9ae45b9ec48 -r 41643676408b src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp
--- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp	Mon Apr 05 12:19:22 2010 -0400
+++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp	Tue Apr 06 10:59:45 2010 -0400
@@ -42,8 +42,8 @@
 
  private:
   ConcurrentMark*                  _cm;
-  bool                             _started;
-  bool                             _in_progress;
+  volatile bool                    _started;
+  volatile bool                    _in_progress;
 
   void sleepBeforeNextCycle();
 
@@ -67,15 +67,25 @@
   // Counting virtual time so far.
   double vtime_count_accum() { return _vtime_count_accum; }
 
-  ConcurrentMark* cm()                           { return _cm;     }
+  ConcurrentMark* cm()     { return _cm; }
+
+  void set_started()       { _started = true;  }
+  void clear_started()     { _started = false; }
+  bool started()           { return _started;  }
+
+  void set_in_progress()   { _in_progress = true;  }
+  void clear_in_progress() { _in_progress = false; }
+  bool in_progress()       { return _in_progress;  }
 
-  void            set_started()                  { _started = true;   }
-  void            clear_started()                { _started = false;  }
-  bool            started()                      { return _started;   }
-
-  void            set_in_progress()              { _in_progress = true;   }
-  void            clear_in_progress()            { _in_progress = false;  }
-  bool            in_progress()                  { return _in_progress;   }
+  // This flag returns true from the moment a marking cycle is
+  // initiated (during the initial-mark pause when started() is set)
+  // to the moment when the cycle completes (just after the next
+  // marking bitmap has been cleared and in_progress() is
+  // cleared). While this flag is true we will not start another cycle
+  // so that cycles do not overlap. We cannot use just in_progress()
+  // as the CM thread might take some time to wake up before noticing
+  // that started() is set and set in_progress().
+  bool during_cycle()      { return started() || in_progress(); }
 
   // Yield for GC
   void            yield();
diff -r e9ae45b9ec48 -r 41643676408b src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Mon Apr 05 12:19:22 2010 -0400
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Tue Apr 06 10:59:45 2010 -0400
@@ -902,6 +902,10 @@
 
 void G1CollectedHeap::do_collection(bool full, bool clear_all_soft_refs,
                                     size_t word_size) {
+  if (GC_locker::check_active_before_gc()) {
+    return; // GC is disabled (e.g. JNI GetXXXCritical operation)
+  }
+
   ResourceMark rm;
 
   if (PrintHeapAtGC) {
@@ -916,10 +920,6 @@
   assert(SafepointSynchronize::is_at_safepoint(), "should be at safepoint");
   assert(Thread::current() == VMThread::vm_thread(), "should be in vm thread");
 
-  if (GC_locker::is_active()) {
-    return; // GC is disabled (e.g. JNI GetXXXCritical operation)
-  }
-
   {
     IsGCActiveMark x;
 
@@ -2658,6 +2658,10 @@
 
 void
 G1CollectedHeap::do_collection_pause_at_safepoint() {
+  if (GC_locker::check_active_before_gc()) {
+    return; // GC is disabled (e.g. JNI GetXXXCritical operation)
+  }
+
   if (PrintHeapAtGC) {
     Universe::print_heap_before_gc();
   }
@@ -2665,6 +2669,11 @@
   {
     ResourceMark rm;
 
+    // This call will decide whether this pause is an initial-mark
+    // pause. If it is, during_initial_mark_pause() will return true
+    // for the duration of this pause.
+    g1_policy()->decide_on_conc_mark_initiation();
+
     char verbose_str[128];
     sprintf(verbose_str, "GC pause ");
     if (g1_policy()->in_young_gc_mode()) {
@@ -2673,7 +2682,7 @@
       else
         strcat(verbose_str, "(partial)");
     }
-    if (g1_policy()->should_initiate_conc_mark())
+    if (g1_policy()->during_initial_mark_pause())
       strcat(verbose_str, " (initial-mark)");
 
     // if PrintGCDetails is on, we'll print long statistics information
@@ -2697,10 +2706,6 @@
              "young list should be well formed");
     }
 
-    if (GC_locker::is_active()) {
-      return; // GC is disabled (e.g. JNI GetXXXCritical operation)
-    }
-
     bool abandoned = false;
     { // Call to jvmpi::post_class_unload_events must occur outside of active GC
       IsGCActiveMark x;
@@ -2756,7 +2761,7 @@
       _young_list->print();
 #endif // SCAN_ONLY_VERBOSE
 
-      if (g1_policy()->should_initiate_conc_mark()) {
+      if (g1_policy()->during_initial_mark_pause()) {
         concurrent_mark()->checkpointRootsInitialPre();
       }
       save_marks();
@@ -2858,7 +2863,7 @@
       }
 
       if (g1_policy()->in_young_gc_mode() &&
-          g1_policy()->should_initiate_conc_mark()) {
+          g1_policy()->during_initial_mark_pause()) {
         concurrent_mark()->checkpointRootsInitialPost();
         set_marking_started();
         // CAUTION: after the doConcurrentMark() call below,
@@ -3977,7 +3982,7 @@
     OopsInHeapRegionClosure        *scan_perm_cl;
     OopsInHeapRegionClosure        *scan_so_cl;
 
-    if (_g1h->g1_policy()->should_initiate_conc_mark()) {
+    if (_g1h->g1_policy()->during_initial_mark_pause()) {
       scan_root_cl = &scan_mark_root_cl;
       scan_perm_cl = &scan_mark_perm_cl;
       scan_so_cl   = &scan_mark_heap_rs_cl;
@@ -4140,7 +4145,7 @@
   FilterAndMarkInHeapRegionAndIntoCSClosure scan_and_mark(this, &boc, concurrent_mark());
 
   OopsInHeapRegionClosure *foc;
-  if (g1_policy()->should_initiate_conc_mark())
+  if (g1_policy()->during_initial_mark_pause())
     foc = &scan_and_mark;
   else
     foc = &scan_only;
diff -r e9ae45b9ec48 -r 41643676408b src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Mon Apr 05 12:19:22 2010 -0400
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Tue Apr 06 10:59:45 2010 -0400
@@ -178,8 +178,8 @@
   // so the hack is to do the cast  QQQ FIXME
   _pauses_btwn_concurrent_mark((size_t)G1PausesBtwnConcMark),
   _n_marks_since_last_pause(0),
-  _conc_mark_initiated(false),
-  _should_initiate_conc_mark(false),
+  _initiate_conc_mark_if_possible(false),
+  _during_initial_mark_pause(false),
   _should_revert_to_full_young_gcs(false),
   _last_full_young_gc(false),
 
@@ -793,7 +793,7 @@
                            elapsed_time_ms,
                            calculations,
                            full_young_gcs() ? "full" : "partial",
-                           should_initiate_conc_mark() ? " i-m" : "",
+                           during_initial_mark_pause() ? " i-m" : "",
                            _in_marking_window,
                            _in_marking_window_im);
 #endif // TRACE_CALC_YOUNG_CONFIG
@@ -1040,7 +1040,8 @@
   set_full_young_gcs(true);
   _last_full_young_gc = false;
   _should_revert_to_full_young_gcs = false;
-  _should_initiate_conc_mark = false;
+  clear_initiate_conc_mark_if_possible();
+  clear_during_initial_mark_pause();
   _known_garbage_bytes = 0;
   _known_garbage_ratio = 0.0;
   _in_marking_window = false;
@@ -1186,7 +1187,8 @@
 void G1CollectorPolicy::record_concurrent_mark_init_end_pre(double
                                                    mark_init_elapsed_time_ms) {
   _during_marking = true;
-  _should_initiate_conc_mark = false;
+  assert(!initiate_conc_mark_if_possible(), "we should have cleared it by now");
+  clear_during_initial_mark_pause();
   _cur_mark_stop_world_time_ms = mark_init_elapsed_time_ms;
 }
 
@@ -1257,7 +1259,6 @@
   }
   _n_pauses_at_mark_end = _n_pauses;
   _n_marks_since_last_pause++;
-  _conc_mark_initiated = false;
 }
 
 void
@@ -1453,17 +1454,24 @@
 #endif // PRODUCT
 
   if (in_young_gc_mode()) {
-    last_pause_included_initial_mark = _should_initiate_conc_mark;
+    last_pause_included_initial_mark = during_initial_mark_pause();
     if (last_pause_included_initial_mark)
       record_concurrent_mark_init_end_pre(0.0);
 
     size_t min_used_targ =
       (_g1->capacity() / 100) * InitiatingHeapOccupancyPercent;
 
-    if (cur_used_bytes > min_used_targ) {
-      if (cur_used_bytes <= _prev_collection_pause_used_at_end_bytes) {
-      } else if (!_g1->mark_in_progress() && !_last_full_young_gc) {
-        _should_initiate_conc_mark = true;
+
+    if (!_g1->mark_in_progress() && !_last_full_young_gc) {
+      assert(!last_pause_included_initial_mark, "invariant");
+      if (cur_used_bytes > min_used_targ &&
+          cur_used_bytes > _prev_collection_pause_used_at_end_bytes) {
+        assert(!during_initial_mark_pause(), "we should not see this here");
+
+        // Note: this might have already been set, if during the last
+        // pause we decided to start a cycle but at the beginning of
+        // this pause we decided to postpone it. That's OK.
+        set_initiate_conc_mark_if_possible();
       }
     }
 
@@ -1754,7 +1762,7 @@
 
   bool new_in_marking_window = _in_marking_window;
   bool new_in_marking_window_im = false;
-  if (_should_initiate_conc_mark) {
+  if (during_initial_mark_pause()) {
     new_in_marking_window = true;
     new_in_marking_window_im = true;
   }
@@ -2173,7 +2181,13 @@
   if (predicted_time_ms > _expensive_region_limit_ms) {
     if (!in_young_gc_mode()) {
         set_full_young_gcs(true);
-      _should_initiate_conc_mark = true;
+        // We might want to do something different here. However,
+        // right now we don't support the non-generational G1 mode
+        // (and in fact we are planning to remove the associated code,
+        // see CR 6814390). So, let's leave it as is and this will be
+        // removed some time in the future
+        ShouldNotReachHere();
+        set_during_initial_mark_pause();
     } else
       // no point in doing another partial one
       _should_revert_to_full_young_gcs = true;
@@ -2697,6 +2711,50 @@
 #endif
 
 void
+G1CollectorPolicy::decide_on_conc_mark_initiation() {
+  // We are about to decide on whether this pause will be an
+  // initial-mark pause.
+
+  // First, during_initial_mark_pause() should not be already set. We
+  // will set it here if we have to. However, it should be cleared by
+  // the end of the pause (it's only set for the duration of an
+  // initial-mark pause).
+  assert(!during_initial_mark_pause(), "pre-condition");
+
+  if (initiate_conc_mark_if_possible()) {
+    // We had noticed on a previous pause that the heap occupancy has
+    // gone over the initiating threshold and we should start a
+    // concurrent marking cycle. So we might initiate one.
+
+    bool during_cycle = _g1->concurrent_mark()->cmThread()->during_cycle();
+    if (!during_cycle) {
+      // The concurrent marking thread is not "during a cycle", i.e.,
+      // it has completed the last one. So we can go ahead and
+      // initiate a new cycle.
+
+      set_during_initial_mark_pause();
+
+      // And we can now clear initiate_conc_mark_if_possible() as
+      // we've already acted on it.
+      clear_initiate_conc_mark_if_possible();
+    } else {
+      // The concurrent marking thread is still finishing up the
+      // previous cycle. If we start one right now the two cycles
+      // overlap. In particular, the concurrent marking thread might
+      // be in the process of clearing the next marking bitmap (which
+      // we will use for the next cycle if we start one). Starting a
+      // cycle now will be bad given that parts of the marking
+      // information might get cleared by the marking thread. And we
+      // cannot wait for the marking thread to finish the cycle as it
+      // periodically yields while clearing the next marking bitmap
+      // and, if it's in a yield point, it's waiting for us to
+      // finish. So, at this point we will not start a cycle and we'll
+      // let the concurrent marking thread complete the last one.
+    }
+  }
+}
+
+void
 G1CollectorPolicy_BestRegionsFirst::
 record_collection_pause_start(double start_time_sec, size_t start_used) {
   G1CollectorPolicy::record_collection_pause_start(start_time_sec, start_used);
diff -r e9ae45b9ec48 -r 41643676408b src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Mon Apr 05 12:19:22 2010 -0400
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Tue Apr 06 10:59:45 2010 -0400
@@ -724,11 +724,31 @@
 
   size_t _n_marks_since_last_pause;
 
-  // True iff CM has been initiated.
-  bool _conc_mark_initiated;
+  // At the end of a pause we check the heap occupancy and we decide
+  // whether we will start a marking cycle during the next pause. If
+  // we decide that we want to do that, we will set this parameter to
+  // true. So, this parameter will stay true between the end of a
+  // pause and the beginning of a subsequent pause (not necessarily
+  // the next one, see the comments on the next field) when we decide
+  // that we will indeed start a marking cycle and do the initial-mark
+  // work.
+  volatile bool _initiate_conc_mark_if_possible;
 
-  // True iff CM should be initiated
-  bool _should_initiate_conc_mark;
+  // If initiate_conc_mark_if_possible() is set at the beginning of a
+  // pause, it is a suggestion that the pause should start a marking
+  // cycle by doing the initial-mark work. However, it is possible
+  // that the concurrent marking thread is still finishing up the
+  // previous marking cycle (e.g., clearing the next marking
+  // bitmap). If that is the case we cannot start a new cycle and
+  // we'll have to wait for the concurrent marking thread to finish
+  // what it is doing. In this case we will postpone the marking cycle
+  // initiation decision for the next pause. When we eventually decide
+  // to start a cycle, we will set _during_initial_mark_pause which
+  // will stay true until the end of the initial-mark pause and it's
+  // the condition that indicates that a pause is doing the
+  // initial-mark work.
+  volatile bool _during_initial_mark_pause;
+
   bool _should_revert_to_full_young_gcs;
   bool _last_full_young_gc;
 
@@ -981,9 +1001,21 @@
   // Add "hr" to the CS.
   void add_to_collection_set(HeapRegion* hr);
 
-  bool should_initiate_conc_mark()      { return _should_initiate_conc_mark; }
-  void set_should_initiate_conc_mark()  { _should_initiate_conc_mark = true; }
-  void unset_should_initiate_conc_mark(){ _should_initiate_conc_mark = false; }
+  bool initiate_conc_mark_if_possible()       { return _initiate_conc_mark_if_possible;  }
+  void set_initiate_conc_mark_if_possible()   { _initiate_conc_mark_if_possible = true;  }
+  void clear_initiate_conc_mark_if_possible() { _initiate_conc_mark_if_possible = false; }
+
+  bool during_initial_mark_pause()      { return _during_initial_mark_pause;  }
+  void set_during_initial_mark_pause()  { _during_initial_mark_pause = true;  }
+  void clear_during_initial_mark_pause(){ _during_initial_mark_pause = false; }
+
+  // This is called at the very beginning of an evacuation pause (it
+  // has to be the first thing that the pause does). If
+  // initiate_conc_mark_if_possible() is true, and the concurrent
+  // marking thread has completed its work during the previous cycle,
+  // it will set during_initial_mark_pause() to so that the pause does
+  // the initial-mark work and start a marking cycle.
+  void decide_on_conc_mark_initiation();
 
   // If an expansion would be appropriate, because recent GC overhead had
   // exceeded the desired limit, return an amount to expand by.