changeset 2050:4e66274b6bb3

7014261: G1: RSet-related failures Summary: A race between the concurrent cleanup thread and the VM thread while it is processing the "expanded sparse table list" causes both threads to try to free the same sparse table entry and either causes one of the threads to fail or leaves the entry in an inconsistent state. The solution is purge all entries on the expanded list that correspond go regions that are being cleaned up. Reviewed-by: brutisso, johnc
author tonyp
date Tue, 25 Jan 2011 17:58:19 -0500
parents bd2e08334e84
children 908c5092d72a
files src/share/vm/gc_implementation/g1/concurrentMark.cpp src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp src/share/vm/gc_implementation/g1/sparsePRT.cpp src/share/vm/gc_implementation/g1/sparsePRT.hpp
diffstat 7 files changed, 171 insertions(+), 36 deletions(-) [+]
line wrap: on
line diff
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Fri Jan 21 11:30:22 2011 -0500
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Tue Jan 25 17:58:19 2011 -0500
@@ -1500,21 +1500,19 @@
   size_t _max_live_bytes;
   size_t _regions_claimed;
   size_t _freed_bytes;
-  FreeRegionList _local_cleanup_list;
-  HumongousRegionSet _humongous_proxy_set;
+  FreeRegionList* _local_cleanup_list;
+  HumongousRegionSet* _humongous_proxy_set;
+  HRRSCleanupTask* _hrrs_cleanup_task;
   double _claimed_region_time;
   double _max_region_time;
 
 public:
   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
-                             int worker_num);
+                             int worker_num,
+                             FreeRegionList* local_cleanup_list,
+                             HumongousRegionSet* humongous_proxy_set,
+                             HRRSCleanupTask* hrrs_cleanup_task);
   size_t freed_bytes() { return _freed_bytes; }
-  FreeRegionList* local_cleanup_list() {
-    return &_local_cleanup_list;
-  }
-  HumongousRegionSet* humongous_proxy_set() {
-    return &_humongous_proxy_set;
-  }
 
   bool doHeapRegion(HeapRegion *r);
 
@@ -1541,7 +1539,12 @@
 
   void work(int i) {
     double start = os::elapsedTime();
-    G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i);
+    FreeRegionList local_cleanup_list("Local Cleanup List");
+    HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
+    HRRSCleanupTask hrrs_cleanup_task;
+    G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list,
+                                           &humongous_proxy_set,
+                                           &hrrs_cleanup_task);
     if (G1CollectedHeap::use_parallel_gc_threads()) {
       _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
                                             HeapRegion::NoteEndClaimValue);
@@ -1553,15 +1556,17 @@
     // Now update the lists
     _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
                                             NULL /* free_list */,
-                                            g1_note_end.humongous_proxy_set(),
+                                            &humongous_proxy_set,
                                             true /* par */);
     {
       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
       _max_live_bytes += g1_note_end.max_live_bytes();
       _freed_bytes += g1_note_end.freed_bytes();
 
-      _cleanup_list->add_as_tail(g1_note_end.local_cleanup_list());
-      assert(g1_note_end.local_cleanup_list()->is_empty(), "post-condition");
+      _cleanup_list->add_as_tail(&local_cleanup_list);
+      assert(local_cleanup_list.is_empty(), "post-condition");
+
+      HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
     }
     double end = os::elapsedTime();
     if (G1PrintParCleanupStats) {
@@ -1602,13 +1607,17 @@
 
 G1NoteEndOfConcMarkClosure::
 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
-                           int worker_num)
+                           int worker_num,
+                           FreeRegionList* local_cleanup_list,
+                           HumongousRegionSet* humongous_proxy_set,
+                           HRRSCleanupTask* hrrs_cleanup_task)
   : _g1(g1), _worker_num(worker_num),
     _max_live_bytes(0), _regions_claimed(0),
     _freed_bytes(0),
     _claimed_region_time(0.0), _max_region_time(0.0),
-    _local_cleanup_list("Local Cleanup List"),
-    _humongous_proxy_set("Local Cleanup Humongous Proxy Set") { }
+    _local_cleanup_list(local_cleanup_list),
+    _humongous_proxy_set(humongous_proxy_set),
+    _hrrs_cleanup_task(hrrs_cleanup_task) { }
 
 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *hr) {
   // We use a claim value of zero here because all regions
@@ -1619,11 +1628,12 @@
     _regions_claimed++;
     hr->note_end_of_marking();
     _max_live_bytes += hr->max_live_bytes();
-    _g1->free_region_if_totally_empty(hr,
-                                      &_freed_bytes,
-                                      &_local_cleanup_list,
-                                      &_humongous_proxy_set,
-                                      true /* par */);
+    _g1->free_region_if_empty(hr,
+                              &_freed_bytes,
+                              _local_cleanup_list,
+                              _humongous_proxy_set,
+                              _hrrs_cleanup_task,
+                              true /* par */);
     double region_time = (os::elapsedTime() - start);
     _claimed_region_time += region_time;
     if (region_time > _max_region_time) _max_region_time = region_time;
@@ -1659,6 +1669,8 @@
 
   double start = os::elapsedTime();
 
+  HeapRegionRemSet::reset_for_cleanup_tasks();
+
   // Do counting once more with the world stopped for good measure.
   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
                                         &_region_bm, &_card_bm);
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Fri Jan 21 11:30:22 2011 -0500
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Tue Jan 25 17:58:19 2011 -0500
@@ -4925,10 +4925,11 @@
   COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
 }
 
-void G1CollectedHeap::free_region_if_totally_empty(HeapRegion* hr,
+void G1CollectedHeap::free_region_if_empty(HeapRegion* hr,
                                      size_t* pre_used,
                                      FreeRegionList* free_list,
                                      HumongousRegionSet* humongous_proxy_set,
+                                     HRRSCleanupTask* hrrs_cleanup_task,
                                      bool par) {
   if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
     if (hr->isHumongous()) {
@@ -4937,6 +4938,8 @@
     } else {
       free_region(hr, pre_used, free_list, par);
     }
+  } else {
+    hr->rem_set()->do_cleanup_work(hrrs_cleanup_task);
   }
 }
 
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Fri Jan 21 11:30:22 2011 -0500
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Tue Jan 25 17:58:19 2011 -0500
@@ -40,6 +40,7 @@
 
 class HeapRegion;
 class HeapRegionSeq;
+class HRRSCleanupTask;
 class PermanentGenerationSpec;
 class GenerationSpec;
 class OopsInHeapRegionClosure;
@@ -1099,11 +1100,12 @@
   // all dead. It calls either free_region() or
   // free_humongous_region() depending on the type of the region that
   // is passed to it.
-  void free_region_if_totally_empty(HeapRegion* hr,
-                                    size_t* pre_used,
-                                    FreeRegionList* free_list,
-                                    HumongousRegionSet* humongous_proxy_set,
-                                    bool par);
+  void free_region_if_empty(HeapRegion* hr,
+                            size_t* pre_used,
+                            FreeRegionList* free_list,
+                            HumongousRegionSet* humongous_proxy_set,
+                            HRRSCleanupTask* hrrs_cleanup_task,
+                            bool par);
 
   // It appends the free list to the master free list and updates the
   // master humongous list according to the contents of the proxy
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Fri Jan 21 11:30:22 2011 -0500
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Tue Jan 25 17:58:19 2011 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -463,7 +463,6 @@
   }
 
   static void par_contract_all();
-
 };
 
 void PosParPRT::par_contract_all() {
@@ -1070,6 +1069,11 @@
 
 }
 
+void
+OtherRegionsTable::do_cleanup_work(HRRSCleanupTask* hrrs_cleanup_task) {
+  _sparse_table.do_cleanup_work(hrrs_cleanup_task);
+}
+
 // Determines how many threads can add records to an rset in parallel.
 // This can be done by either mutator threads together with the
 // concurrent refinement threads or GC threads.
@@ -1384,6 +1388,19 @@
   }
 }
 
+void HeapRegionRemSet::reset_for_cleanup_tasks() {
+  SparsePRT::reset_for_cleanup_tasks();
+}
+
+void HeapRegionRemSet::do_cleanup_work(HRRSCleanupTask* hrrs_cleanup_task) {
+  _other_regions.do_cleanup_work(hrrs_cleanup_task);
+}
+
+void
+HeapRegionRemSet::finish_cleanup_task(HRRSCleanupTask* hrrs_cleanup_task) {
+  SparsePRT::finish_cleanup_task(hrrs_cleanup_task);
+}
+
 #ifndef PRODUCT
 void HeapRegionRemSet::test() {
   os::sleep(Thread::current(), (jlong)5000, false);
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Fri Jan 21 11:30:22 2011 -0500
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Tue Jan 25 17:58:19 2011 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -38,6 +38,10 @@
 class PosParPRT;
 class SparsePRT;
 
+// Essentially a wrapper around SparsePRTCleanupTask. See
+// sparsePRT.hpp for more details.
+class HRRSCleanupTask : public SparsePRTCleanupTask {
+};
 
 // The "_coarse_map" is a bitmap with one bit for each region, where set
 // bits indicate that the corresponding region may contain some pointer
@@ -156,6 +160,8 @@
   // "from_hr" is being cleared; remove any entries from it.
   void clear_incoming_entry(HeapRegion* from_hr);
 
+  void do_cleanup_work(HRRSCleanupTask* hrrs_cleanup_task);
+
   // Declare the heap size (in # of regions) to the OtherRegionsTable.
   // (Uses it to initialize from_card_cache).
   static void init_from_card_cache(size_t max_regions);
@@ -165,10 +171,8 @@
   static void shrink_from_card_cache(size_t new_n_regs);
 
   static void print_from_card_cache();
-
 };
 
-
 class HeapRegionRemSet : public CHeapObj {
   friend class VMStructs;
   friend class HeapRegionRemSetIterator;
@@ -342,11 +346,16 @@
   static void print_recorded();
   static void record_event(Event evnt);
 
+  // These are wrappers for the similarly-named methods on
+  // SparsePRT. Look at sparsePRT.hpp for more details.
+  static void reset_for_cleanup_tasks();
+  void do_cleanup_work(HRRSCleanupTask* hrrs_cleanup_task);
+  static void finish_cleanup_task(HRRSCleanupTask* hrrs_cleanup_task);
+
   // Run unit tests.
 #ifndef PRODUCT
   static void test();
 #endif
-
 };
 
 class HeapRegionRemSetIterator : public CHeapObj {
--- a/src/share/vm/gc_implementation/g1/sparsePRT.cpp	Fri Jan 21 11:30:22 2011 -0500
+++ b/src/share/vm/gc_implementation/g1/sparsePRT.cpp	Tue Jan 25 17:58:19 2011 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -415,6 +415,38 @@
   return NULL;
 }
 
+void SparsePRT::reset_for_cleanup_tasks() {
+  _head_expanded_list = NULL;
+}
+
+void SparsePRT::do_cleanup_work(SparsePRTCleanupTask* sprt_cleanup_task) {
+  if (should_be_on_expanded_list()) {
+    sprt_cleanup_task->add(this);
+  }
+}
+
+void SparsePRT::finish_cleanup_task(SparsePRTCleanupTask* sprt_cleanup_task) {
+  assert(ParGCRareEvent_lock->owned_by_self(), "pre-condition");
+  SparsePRT* head = sprt_cleanup_task->head();
+  SparsePRT* tail = sprt_cleanup_task->tail();
+  if (head != NULL) {
+    assert(tail != NULL, "if head is not NULL, so should tail");
+
+    tail->set_next_expanded(_head_expanded_list);
+    _head_expanded_list = head;
+  } else {
+    assert(tail == NULL, "if head is NULL, so should tail");
+  }
+}
+
+bool SparsePRT::should_be_on_expanded_list() {
+  if (_expanded) {
+    assert(_cur != _next, "if _expanded is true, cur should be != _next");
+  } else {
+    assert(_cur == _next, "if _expanded is false, cur should be == _next");
+  }
+  return expanded();
+}
 
 void SparsePRT::cleanup_all() {
   // First clean up all expanded tables so they agree on next and cur.
@@ -484,6 +516,7 @@
     _cur->clear();
   }
   _next = _cur;
+  _expanded = false;
 }
 
 void SparsePRT::cleanup() {
@@ -518,3 +551,15 @@
   }
   add_to_expanded_list(this);
 }
+
+void SparsePRTCleanupTask::add(SparsePRT* sprt) {
+  assert(sprt->should_be_on_expanded_list(), "pre-condition");
+
+  sprt->set_next_expanded(NULL);
+  if (_tail != NULL) {
+    _tail->set_next_expanded(sprt);
+  } else {
+    _head = sprt;
+  }
+  _tail = sprt;
+}
--- a/src/share/vm/gc_implementation/g1/sparsePRT.hpp	Fri Jan 21 11:30:22 2011 -0500
+++ b/src/share/vm/gc_implementation/g1/sparsePRT.hpp	Tue Jan 25 17:58:19 2011 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -212,8 +212,11 @@
 // mutex.
 
 class SparsePRTIter;
+class SparsePRTCleanupTask;
 
 class SparsePRT VALUE_OBJ_CLASS_SPEC {
+  friend class SparsePRTCleanupTask;
+
   //  Iterations are done on the _cur hash table, since they only need to
   //  see entries visible at the start of a collection pause.
   //  All other operations are done using the _next hash table.
@@ -238,6 +241,8 @@
   SparsePRT* next_expanded() { return _next_expanded; }
   void set_next_expanded(SparsePRT* nxt) { _next_expanded = nxt; }
 
+  bool should_be_on_expanded_list();
+
   static SparsePRT* _head_expanded_list;
 
 public:
@@ -284,12 +289,36 @@
   static void add_to_expanded_list(SparsePRT* sprt);
   static SparsePRT* get_from_expanded_list();
 
+  // The purpose of these three methods is to help the GC workers
+  // during the cleanup pause to recreate the expanded list, purging
+  // any tables from it that belong to regions that are freed during
+  // cleanup (if we don't purge those tables, there is a race that
+  // causes various crashes; see CR 7014261).
+  //
+  // We chose to recreate the expanded list, instead of purging
+  // entries from it by iterating over it, to avoid this serial phase
+  // at the end of the cleanup pause.
+  //
+  // The three methods below work as follows:
+  // * reset_for_cleanup_tasks() : Nulls the expanded list head at the
+  //   start of the cleanup pause.
+  // * do_cleanup_work() : Called by the cleanup workers for every
+  //   region that is not free / is being freed by the cleanup
+  //   pause. It creates a list of expanded tables whose head / tail
+  //   are on the thread-local SparsePRTCleanupTask object.
+  // * finish_cleanup_task() : Called by the cleanup workers after
+  //   they complete their cleanup task. It adds the local list into
+  //   the global expanded list. It assumes that the
+  //   ParGCRareEvent_lock is being held to ensure MT-safety.
+  static void reset_for_cleanup_tasks();
+  void do_cleanup_work(SparsePRTCleanupTask* sprt_cleanup_task);
+  static void finish_cleanup_task(SparsePRTCleanupTask* sprt_cleanup_task);
+
   bool contains_card(RegionIdx_t region_id, CardIdx_t card_index) const {
     return _next->contains_card(region_id, card_index);
   }
 };
 
-
 class SparsePRTIter: public RSHashTableIter {
 public:
   void init(const SparsePRT* sprt) {
@@ -300,4 +329,22 @@
   }
 };
 
+// This allows each worker during a cleanup pause to create a
+// thread-local list of sparse tables that have been expanded and need
+// to be processed at the beginning of the next GC pause. This lists
+// are concatenated into the single expanded list at the end of the
+// cleanup pause.
+class SparsePRTCleanupTask VALUE_OBJ_CLASS_SPEC {
+private:
+  SparsePRT* _head;
+  SparsePRT* _tail;
+
+public:
+  SparsePRTCleanupTask() : _head(NULL), _tail(NULL) { }
+
+  void add(SparsePRT* sprt);
+  SparsePRT* head() { return _head; }
+  SparsePRT* tail() { return _tail; }
+};
+
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_SPARSEPRT_HPP