changeset 2462:92da084fefc9

6668573: CMS: reference processing crash if ParallelCMSThreads > ParallelGCThreads Summary: Use _max_num_q = max(discovery_degree, processing_degree), and let balance_queues() redistribute from discovery_degree to processing_degree of queues. This should also allow a more dynamic and flexible parallelism policy in the future. Reviewed-by: jmasa, johnc
author ysr
date Thu, 17 Mar 2011 10:32:46 -0700
parents dde920245681
children 048f98400b8e
files src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp src/share/vm/gc_implementation/g1/concurrentMark.cpp src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp src/share/vm/gc_implementation/parNew/parNewGeneration.cpp src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp src/share/vm/memory/generation.cpp src/share/vm/memory/referenceProcessor.cpp src/share/vm/memory/referenceProcessor.hpp src/share/vm/utilities/workgroup.cpp src/share/vm/utilities/workgroup.hpp
diffstat 13 files changed, 149 insertions(+), 154 deletions(-) [+]
line wrap: on
line diff
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Wed Mar 16 10:37:08 2011 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Thu Mar 17 10:32:46 2011 -0700
@@ -292,13 +292,15 @@
 void CMSCollector::ref_processor_init() {
   if (_ref_processor == NULL) {
     // Allocate and initialize a reference processor
-    _ref_processor = ReferenceProcessor::create_ref_processor(
-        _span,                               // span
-        _cmsGen->refs_discovery_is_atomic(), // atomic_discovery
-        _cmsGen->refs_discovery_is_mt(),     // mt_discovery
-        &_is_alive_closure,
-        ParallelGCThreads,
-        ParallelRefProcEnabled);
+    _ref_processor =
+      new ReferenceProcessor(_span,                               // span
+                             (ParallelGCThreads > 1) && ParallelRefProcEnabled, // mt processing
+                             (int) ParallelGCThreads,             // mt processing degree
+                             _cmsGen->refs_discovery_is_mt(),     // mt discovery
+                             (int) MAX2(ConcGCThreads, ParallelGCThreads), // mt discovery degree
+                             _cmsGen->refs_discovery_is_atomic(), // discovery is not atomic
+                             &_is_alive_closure,                  // closure for liveness info
+                             false);                              // next field updates do not need write barrier
     // Initialize the _ref_processor field of CMSGen
     _cmsGen->set_ref_processor(_ref_processor);
 
@@ -641,7 +643,7 @@
   }
 
   // Support for multi-threaded concurrent phases
-  if (CollectedHeap::use_parallel_gc_threads() && CMSConcurrentMTEnabled) {
+  if (CMSConcurrentMTEnabled) {
     if (FLAG_IS_DEFAULT(ConcGCThreads)) {
       // just for now
       FLAG_SET_DEFAULT(ConcGCThreads, (ParallelGCThreads + 3)/4);
@@ -1990,17 +1992,16 @@
   // Temporarily widen the span of the weak reference processing to
   // the entire heap.
   MemRegion new_span(GenCollectedHeap::heap()->reserved_region());
-  ReferenceProcessorSpanMutator x(ref_processor(), new_span);
-
+  ReferenceProcessorSpanMutator rp_mut_span(ref_processor(), new_span);
   // Temporarily, clear the "is_alive_non_header" field of the
   // reference processor.
-  ReferenceProcessorIsAliveMutator y(ref_processor(), NULL);
-
+  ReferenceProcessorIsAliveMutator rp_mut_closure(ref_processor(), NULL);
   // Temporarily make reference _processing_ single threaded (non-MT).
-  ReferenceProcessorMTProcMutator z(ref_processor(), false);
-
+  ReferenceProcessorMTProcMutator rp_mut_mt_processing(ref_processor(), false);
   // Temporarily make refs discovery atomic
-  ReferenceProcessorAtomicMutator w(ref_processor(), true);
+  ReferenceProcessorAtomicMutator rp_mut_atomic(ref_processor(), true);
+  // Temporarily make reference _discovery_ single threaded (non-MT)
+  ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(ref_processor(), false);
 
   ref_processor()->set_enqueuing_is_done(false);
   ref_processor()->enable_discovery();
@@ -4265,9 +4266,7 @@
 
   // Refs discovery is already non-atomic.
   assert(!ref_processor()->discovery_is_atomic(), "Should be non-atomic");
-  // Mutate the Refs discovery so it is MT during the
-  // multi-threaded marking phase.
-  ReferenceProcessorMTMutator mt(ref_processor(), num_workers > 1);
+  assert(ref_processor()->discovery_is_mt(), "Discovery should be MT");
   DEBUG_ONLY(RememberKlassesChecker cmx(should_unload_classes());)
   conc_workers()->start_task(&tsk);
   while (tsk.yielded()) {
@@ -4320,6 +4319,8 @@
   ResourceMark rm;
   HandleMark   hm;
 
+  // Temporarily make refs discovery single threaded (non-MT)
+  ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(ref_processor(), false);
   MarkFromRootsClosure markFromRootsClosure(this, _span, &_markBitMap,
     &_markStack, &_revisitStack, CMSYield && asynch);
   // the last argument to iterate indicates whether the iteration
@@ -4358,10 +4359,6 @@
   verify_overflow_empty();
   _abort_preclean = false;
   if (CMSPrecleaningEnabled) {
-    // Precleaning is currently not MT but the reference processor
-    // may be set for MT.  Disable it temporarily here.
-    ReferenceProcessor* rp = ref_processor();
-    ReferenceProcessorMTProcMutator z(rp, false);
     _eden_chunk_index = 0;
     size_t used = get_eden_used();
     size_t capacity = get_eden_capacity();
@@ -4504,11 +4501,16 @@
          _collectorState == AbortablePreclean, "incorrect state");
   ResourceMark rm;
   HandleMark   hm;
+
+  // Precleaning is currently not MT but the reference processor
+  // may be set for MT.  Disable it temporarily here.
+  ReferenceProcessor* rp = ref_processor();
+  ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
+
   // Do one pass of scrubbing the discovered reference lists
   // to remove any reference objects with strongly-reachable
   // referents.
   if (clean_refs) {
-    ReferenceProcessor* rp = ref_processor();
     CMSPrecleanRefsYieldClosure yield_cl(this);
     assert(rp->span().equals(_span), "Spans should be equal");
     CMSKeepAliveClosure keep_alive(this, _span, &_markBitMap,
@@ -5578,8 +5580,10 @@
   // in the multi-threaded case, but we special-case n=1 here to get
   // repeatable measurements of the 1-thread overhead of the parallel code.
   if (n_workers > 1) {
-    // Make refs discovery MT-safe
-    ReferenceProcessorMTMutator mt(ref_processor(), true);
+    // Make refs discovery MT-safe, if it isn't already: it may not
+    // necessarily be so, since it's possible that we are doing
+    // ST marking.
+    ReferenceProcessorMTDiscoveryMutator mt(ref_processor(), true);
     GenCollectedHeap::StrongRootsScope srs(gch);
     workers->run_task(&tsk);
   } else {
@@ -5705,14 +5709,19 @@
                       CMSBitMap*       mark_bit_map,
                       AbstractWorkGang* workers,
                       OopTaskQueueSet* task_queues):
+    // XXX Should superclass AGTWOQ also know about AWG since it knows
+    // about the task_queues used by the AWG? Then it could initialize
+    // the terminator() object. See 6984287. The set_for_termination()
+    // below is a temporary band-aid for the regression in 6984287.
     AbstractGangTaskWOopQueues("Process referents by policy in parallel",
       task_queues),
     _task(task),
     _collector(collector), _span(span), _mark_bit_map(mark_bit_map)
-    {
-      assert(_collector->_span.equals(_span) && !_span.is_empty(),
-             "Inconsistency in _span");
-    }
+  {
+    assert(_collector->_span.equals(_span) && !_span.is_empty(),
+           "Inconsistency in _span");
+    set_for_termination(workers->active_workers());
+  }
 
   OopTaskQueueSet* task_queues() { return queues(); }
 
@@ -5874,8 +5883,7 @@
       // That is OK as long as the Reference lists are balanced (see
       // balance_all_queues() and balance_queues()).
 
-
-      rp->set_mt_degree(ParallelGCThreads);
+      rp->set_active_mt_degree(ParallelGCThreads);
       CMSRefProcTaskExecutor task_executor(*this);
       rp->process_discovered_references(&_is_alive_closure,
                                         &cmsKeepAliveClosure,
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Wed Mar 16 10:37:08 2011 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Thu Mar 17 10:32:46 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1133,7 +1133,7 @@
     // rare that the cost of the CAS's involved is in the
     // noise. That's a measurement that should be done, and
     // the code simplified if that turns out to be the case.
-    return false;
+    return ConcGCThreads > 1;
   }
 
   // Override
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Wed Mar 16 10:37:08 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Thu Mar 17 10:32:46 2011 -0700
@@ -2141,21 +2141,22 @@
   G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap());
   G1CMDrainMarkingStackClosure
     g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive);
-
   // We use the work gang from the G1CollectedHeap and we utilize all
   // the worker threads.
-  int active_workers = MAX2(MIN2(g1h->workers()->total_workers(), (int)_max_task_num), 1);
+  int active_workers = g1h->workers() ? g1h->workers()->total_workers() : 1;
+  active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1);
 
   G1RefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(),
                                           g1h->workers(), active_workers);
 
+
   if (rp->processing_is_mt()) {
     // Set the degree of MT here.  If the discovery is done MT, there
     // may have been a different number of threads doing the discovery
     // and a different number of discovered lists may have Ref objects.
     // That is OK as long as the Reference lists are balanced (see
     // balance_all_queues() and balance_queues()).
-    rp->set_mt_degree(active_workers);
+    rp->set_active_mt_degree(active_workers);
 
     rp->process_discovered_references(&g1_is_alive,
                                       &g1_keep_alive,
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Mar 16 10:37:08 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu Mar 17 10:32:46 2011 -0700
@@ -1462,7 +1462,7 @@
     // how reference processing currently works in G1.
 
     // Temporarily make reference _discovery_ single threaded (non-MT).
-    ReferenceProcessorMTMutator rp_disc_ser(ref_processor(), false);
+    ReferenceProcessorMTDiscoveryMutator rp_disc_ser(ref_processor(), false);
 
     // Temporarily make refs discovery atomic
     ReferenceProcessorAtomicMutator rp_disc_atomic(ref_processor(), true);
@@ -2219,16 +2219,16 @@
 
   SharedHeap::ref_processing_init();
   MemRegion mr = reserved_region();
-  _ref_processor = ReferenceProcessor::create_ref_processor(
-                                         mr,    // span
-                                         false, // Reference discovery is not atomic
-                                         true,  // mt_discovery
-                                         &_is_alive_closure, // is alive closure
-                                                             // for efficiency
-                                         ParallelGCThreads,
-                                         ParallelRefProcEnabled,
-                                         true); // Setting next fields of discovered
-                                                // lists requires a barrier.
+  _ref_processor =
+    new ReferenceProcessor(mr,    // span
+                           ParallelRefProcEnabled && (ParallelGCThreads > 1),    // mt processing
+                           (int) ParallelGCThreads,   // degree of mt processing
+                           ParallelGCThreads > 1 || ConcGCThreads > 1,  // mt discovery
+                           (int) MAX2(ParallelGCThreads, ConcGCThreads), // degree of mt discovery
+                           false,                     // Reference discovery is not atomic
+                           &_is_alive_closure,        // is alive closure for efficiency
+                           true);                     // Setting next fields of discovered
+                                                      // lists requires a barrier.
 }
 
 size_t G1CollectedHeap::capacity() const {
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Wed Mar 16 10:37:08 2011 -0700
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Thu Mar 17 10:32:46 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1530,13 +1530,15 @@
 {
   if (_ref_processor == NULL) {
     // Allocate and initialize a reference processor
-    _ref_processor = ReferenceProcessor::create_ref_processor(
-        _reserved,                  // span
-        refs_discovery_is_atomic(), // atomic_discovery
-        refs_discovery_is_mt(),     // mt_discovery
-        NULL,                       // is_alive_non_header
-        ParallelGCThreads,
-        ParallelRefProcEnabled);
+    _ref_processor =
+      new ReferenceProcessor(_reserved,                  // span
+                             ParallelRefProcEnabled && (ParallelGCThreads > 1), // mt processing
+                             (int) ParallelGCThreads,    // mt processing degree
+                             refs_discovery_is_mt(),     // mt discovery
+                             (int) ParallelGCThreads,    // mt discovery degree
+                             refs_discovery_is_atomic(), // atomic_discovery
+                             NULL,                       // is_alive_non_header
+                             false);                     // write barrier for next field updates
   }
 }
 
--- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp	Wed Mar 16 10:37:08 2011 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp	Thu Mar 17 10:32:46 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -58,9 +58,7 @@
 
 void PSMarkSweep::initialize() {
   MemRegion mr = Universe::heap()->reserved_region();
-  _ref_processor = new ReferenceProcessor(mr,
-                                          true,    // atomic_discovery
-                                          false);  // mt_discovery
+  _ref_processor = new ReferenceProcessor(mr);     // a vanilla ref proc
   _counters = new CollectorCounters("PSMarkSweep", 1);
 }
 
--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Wed Mar 16 10:37:08 2011 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Thu Mar 17 10:32:46 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -827,13 +827,15 @@
   assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity");
 
   MemRegion mr = heap->reserved_region();
-  _ref_processor = ReferenceProcessor::create_ref_processor(
-    mr,                         // span
-    true,                       // atomic_discovery
-    true,                       // mt_discovery
-    &_is_alive_closure,
-    ParallelGCThreads,
-    ParallelRefProcEnabled);
+  _ref_processor =
+    new ReferenceProcessor(mr,            // span
+                           ParallelRefProcEnabled && (ParallelGCThreads > 1), // mt processing
+                           (int) ParallelGCThreads, // mt processing degree
+                           true,          // mt discovery
+                           (int) ParallelGCThreads, // mt discovery degree
+                           true,          // atomic_discovery
+                           &_is_alive_closure, // non-header is alive closure
+                           false);        // write barrier for next field updates
   _counters = new CollectorCounters("PSParallelCompact", 1);
 
   // Initialize static fields in ParCompactionManager.
--- a/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp	Wed Mar 16 10:37:08 2011 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp	Thu Mar 17 10:32:46 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -796,13 +796,15 @@
 
   // Initialize ref handling object for scavenging.
   MemRegion mr = young_gen->reserved();
-  _ref_processor = ReferenceProcessor::create_ref_processor(
-    mr,                         // span
-    true,                       // atomic_discovery
-    true,                       // mt_discovery
-    NULL,                       // is_alive_non_header
-    ParallelGCThreads,
-    ParallelRefProcEnabled);
+  _ref_processor =
+    new ReferenceProcessor(mr,                         // span
+                           ParallelRefProcEnabled && (ParallelGCThreads > 1), // mt processing
+                           (int) ParallelGCThreads,    // mt processing degree
+                           true,                       // mt discovery
+                           (int) ParallelGCThreads,    // mt discovery degree
+                           true,                       // atomic_discovery
+                           NULL,                       // header provides liveness info
+                           false);                     // next field updates do not need write barrier
 
   // Cache the cardtable
   BarrierSet* bs = Universe::heap()->barrier_set();
--- a/src/share/vm/memory/generation.cpp	Wed Mar 16 10:37:08 2011 -0700
+++ b/src/share/vm/memory/generation.cpp	Thu Mar 17 10:32:46 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -83,14 +83,11 @@
 }
 
 // By default we get a single threaded default reference processor;
-// generations needing multi-threaded refs discovery override this method.
+// generations needing multi-threaded refs processing or discovery override this method.
 void Generation::ref_processor_init() {
   assert(_ref_processor == NULL, "a reference processor already exists");
   assert(!_reserved.is_empty(), "empty generation?");
-  _ref_processor =
-    new ReferenceProcessor(_reserved,                  // span
-                           refs_discovery_is_atomic(), // atomic_discovery
-                           refs_discovery_is_mt());    // mt_discovery
+  _ref_processor = new ReferenceProcessor(_reserved);    // a vanilla reference processor
   if (_ref_processor == NULL) {
     vm_exit_during_initialization("Could not allocate ReferenceProcessor object");
   }
--- a/src/share/vm/memory/referenceProcessor.cpp	Wed Mar 16 10:37:08 2011 -0700
+++ b/src/share/vm/memory/referenceProcessor.cpp	Thu Mar 17 10:32:46 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -102,40 +102,17 @@
             "Unrecongnized RefDiscoveryPolicy");
 }
 
-ReferenceProcessor*
-ReferenceProcessor::create_ref_processor(MemRegion          span,
-                                         bool               atomic_discovery,
-                                         bool               mt_discovery,
-                                         BoolObjectClosure* is_alive_non_header,
-                                         int                parallel_gc_threads,
-                                         bool               mt_processing,
-                                         bool               dl_needs_barrier) {
-  int mt_degree = 1;
-  if (parallel_gc_threads > 1) {
-    mt_degree = parallel_gc_threads;
-  }
-  ReferenceProcessor* rp =
-    new ReferenceProcessor(span, atomic_discovery,
-                           mt_discovery, mt_degree,
-                           mt_processing && (parallel_gc_threads > 0),
-                           dl_needs_barrier);
-  if (rp == NULL) {
-    vm_exit_during_initialization("Could not allocate ReferenceProcessor object");
-  }
-  rp->set_is_alive_non_header(is_alive_non_header);
-  rp->setup_policy(false /* default soft ref policy */);
-  return rp;
-}
-
 ReferenceProcessor::ReferenceProcessor(MemRegion span,
-                                       bool      atomic_discovery,
+                                       bool      mt_processing,
+                                       int       mt_processing_degree,
                                        bool      mt_discovery,
-                                       int       mt_degree,
-                                       bool      mt_processing,
+                                       int       mt_discovery_degree,
+                                       bool      atomic_discovery,
+                                       BoolObjectClosure* is_alive_non_header,
                                        bool      discovered_list_needs_barrier)  :
   _discovering_refs(false),
   _enqueuing_is_done(false),
-  _is_alive_non_header(NULL),
+  _is_alive_non_header(is_alive_non_header),
   _discovered_list_needs_barrier(discovered_list_needs_barrier),
   _bs(NULL),
   _processing_is_mt(mt_processing),
@@ -144,8 +121,8 @@
   _span = span;
   _discovery_is_atomic = atomic_discovery;
   _discovery_is_mt     = mt_discovery;
-  _num_q               = mt_degree;
-  _max_num_q           = mt_degree;
+  _num_q               = MAX2(1, mt_processing_degree);
+  _max_num_q           = MAX2(_num_q, mt_discovery_degree);
   _discoveredSoftRefs  = NEW_C_HEAP_ARRAY(DiscoveredList, _max_num_q * subclasses_of_ref);
   if (_discoveredSoftRefs == NULL) {
     vm_exit_during_initialization("Could not allocated RefProc Array");
@@ -163,6 +140,7 @@
   if (discovered_list_needs_barrier) {
     _bs = Universe::heap()->barrier_set();
   }
+  setup_policy(false /* default soft ref policy */);
 }
 
 #ifndef PRODUCT
@@ -405,15 +383,14 @@
   { }
 
   virtual void work(unsigned int work_id) {
-    assert(work_id < (unsigned int)_ref_processor.num_q(), "Index out-of-bounds");
+    assert(work_id < (unsigned int)_ref_processor.max_num_q(), "Index out-of-bounds");
     // Simplest first cut: static partitioning.
     int index = work_id;
     // The increment on "index" must correspond to the maximum number of queues
     // (n_queues) with which that ReferenceProcessor was created.  That
     // is because of the "clever" way the discovered references lists were
-    // allocated and are indexed into.  That number is ParallelGCThreads
-    // currently.  Assert that.
-    assert(_n_queues == (int) ParallelGCThreads, "Different number not expected");
+    // allocated and are indexed into.
+    assert(_n_queues == (int) _ref_processor.max_num_q(), "Different number not expected");
     for (int j = 0;
          j < subclasses_of_ref;
          j++, index += _n_queues) {
@@ -672,7 +649,7 @@
     }
   }
   NOT_PRODUCT(
-    if (PrintGCDetails && TraceReferenceGC) {
+    if (PrintGCDetails && TraceReferenceGC && (iter.processed() > 0)) {
       gclog_or_tty->print_cr(" Dropped %d active Refs out of %d "
         "Refs in discovered list " INTPTR_FORMAT,
         iter.removed(), iter.processed(), (address)refs_list.head());
@@ -711,7 +688,7 @@
   // Now close the newly reachable set
   complete_gc->do_void();
   NOT_PRODUCT(
-    if (PrintGCDetails && TraceReferenceGC) {
+    if (PrintGCDetails && TraceReferenceGC && (iter.processed() > 0)) {
       gclog_or_tty->print_cr(" Dropped %d active Refs out of %d "
         "Refs in discovered list " INTPTR_FORMAT,
         iter.removed(), iter.processed(), (address)refs_list.head());
@@ -951,7 +928,7 @@
   }
   if (PrintReferenceGC && PrintGCDetails) {
     size_t total = 0;
-    for (int i = 0; i < _num_q; ++i) {
+    for (int i = 0; i < _max_num_q; ++i) {
       total += refs_lists[i].length();
     }
     gclog_or_tty->print(", %u refs", total);
@@ -967,7 +944,7 @@
       RefProcPhase1Task phase1(*this, refs_lists, policy, true /*marks_oops_alive*/);
       task_executor->execute(phase1);
     } else {
-      for (int i = 0; i < _num_q; i++) {
+      for (int i = 0; i < _max_num_q; i++) {
         process_phase1(refs_lists[i], policy,
                        is_alive, keep_alive, complete_gc);
       }
@@ -983,7 +960,7 @@
     RefProcPhase2Task phase2(*this, refs_lists, !discovery_is_atomic() /*marks_oops_alive*/);
     task_executor->execute(phase2);
   } else {
-    for (int i = 0; i < _num_q; i++) {
+    for (int i = 0; i < _max_num_q; i++) {
       process_phase2(refs_lists[i], is_alive, keep_alive, complete_gc);
     }
   }
@@ -994,7 +971,7 @@
     RefProcPhase3Task phase3(*this, refs_lists, clear_referent, true /*marks_oops_alive*/);
     task_executor->execute(phase3);
   } else {
-    for (int i = 0; i < _num_q; i++) {
+    for (int i = 0; i < _max_num_q; i++) {
       process_phase3(refs_lists[i], clear_referent,
                      is_alive, keep_alive, complete_gc);
     }
@@ -1008,7 +985,7 @@
   //   for (int j = 0; j < _num_q; j++) {
   //     int index = i * _max_num_q + j;
   for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) {
-    if (TraceReferenceGC && PrintGCDetails && ((i % _num_q) == 0)) {
+    if (TraceReferenceGC && PrintGCDetails && ((i % _max_num_q) == 0)) {
       gclog_or_tty->print_cr(
         "\nScrubbing %s discovered list of Null referents",
         list_name(i));
@@ -1350,7 +1327,7 @@
   {
     TraceTime tt("Preclean WeakReferences", PrintGCDetails && PrintReferenceGC,
               false, gclog_or_tty);
-    for (int i = 0; i < _num_q; i++) {
+    for (int i = 0; i < _max_num_q; i++) {
       if (yield->should_return()) {
         return;
       }
@@ -1363,7 +1340,7 @@
   {
     TraceTime tt("Preclean FinalReferences", PrintGCDetails && PrintReferenceGC,
               false, gclog_or_tty);
-    for (int i = 0; i < _num_q; i++) {
+    for (int i = 0; i < _max_num_q; i++) {
       if (yield->should_return()) {
         return;
       }
@@ -1376,7 +1353,7 @@
   {
     TraceTime tt("Preclean PhantomReferences", PrintGCDetails && PrintReferenceGC,
               false, gclog_or_tty);
-    for (int i = 0; i < _num_q; i++) {
+    for (int i = 0; i < _max_num_q; i++) {
       if (yield->should_return()) {
         return;
       }
@@ -1433,7 +1410,7 @@
   complete_gc->do_void();
 
   NOT_PRODUCT(
-    if (PrintGCDetails && PrintReferenceGC) {
+    if (PrintGCDetails && PrintReferenceGC && (iter.processed() > 0)) {
       gclog_or_tty->print_cr(" Dropped %d Refs out of %d "
         "Refs in discovered list " INTPTR_FORMAT,
         iter.removed(), iter.processed(), (address)refs_list.head());
--- a/src/share/vm/memory/referenceProcessor.hpp	Wed Mar 16 10:37:08 2011 -0700
+++ b/src/share/vm/memory/referenceProcessor.hpp	Thu Mar 17 10:32:46 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -71,7 +71,7 @@
   bool        _enqueuing_is_done;     // true if all weak references enqueued
   bool        _processing_is_mt;      // true during phases when
                                       // reference processing is MT.
-  int         _next_id;               // round-robin counter in
+  int         _next_id;               // round-robin mod _num_q counter in
                                       // support of work distribution
 
   // For collectors that do not keep GC marking information
@@ -103,7 +103,8 @@
 
  public:
   int num_q()                            { return _num_q; }
-  void set_mt_degree(int v)              { _num_q = v; }
+  int max_num_q()                        { return _max_num_q; }
+  void set_active_mt_degree(int v)       { _num_q = v; }
   DiscoveredList* discovered_soft_refs() { return _discoveredSoftRefs; }
   static oop  sentinel_ref()             { return _sentinelRef; }
   static oop* adr_sentinel_ref()         { return &_sentinelRef; }
@@ -216,6 +217,7 @@
                                    VoidClosure*       complete_gc,
                                    YieldClosure*      yield);
 
+  // round-robin mod _num_q (not: _not_ mode _max_num_q)
   int next_id() {
     int id = _next_id;
     if (++_next_id == _num_q) {
@@ -256,23 +258,15 @@
     _max_num_q(0),
     _processing_is_mt(false),
     _next_id(0)
-  {}
-
-  ReferenceProcessor(MemRegion span, bool atomic_discovery,
-                     bool mt_discovery,
-                     int mt_degree = 1,
-                     bool mt_processing = false,
-                     bool discovered_list_needs_barrier = false);
+  { }
 
-  // Allocates and initializes a reference processor.
-  static ReferenceProcessor* create_ref_processor(
-    MemRegion          span,
-    bool               atomic_discovery,
-    bool               mt_discovery,
-    BoolObjectClosure* is_alive_non_header = NULL,
-    int                parallel_gc_threads = 1,
-    bool               mt_processing = false,
-    bool               discovered_list_needs_barrier = false);
+  // Default parameters give you a vanilla reference processor.
+  ReferenceProcessor(MemRegion span,
+                     bool mt_processing = false, int mt_processing_degree = 1,
+                     bool mt_discovery  = false, int mt_discovery_degree  = 1,
+                     bool atomic_discovery = true,
+                     BoolObjectClosure* is_alive_non_header = NULL,
+                     bool discovered_list_needs_barrier = false);
 
   // RefDiscoveryPolicy values
   enum DiscoveryPolicy {
@@ -397,20 +391,20 @@
 // A utility class to temporarily change the MT'ness of
 // reference discovery for the given ReferenceProcessor
 // in the scope that contains it.
-class ReferenceProcessorMTMutator: StackObj {
+class ReferenceProcessorMTDiscoveryMutator: StackObj {
  private:
   ReferenceProcessor* _rp;
   bool                _saved_mt;
 
  public:
-  ReferenceProcessorMTMutator(ReferenceProcessor* rp,
-                              bool mt):
+  ReferenceProcessorMTDiscoveryMutator(ReferenceProcessor* rp,
+                                       bool mt):
     _rp(rp) {
     _saved_mt = _rp->discovery_is_mt();
     _rp->set_mt_discovery(mt);
   }
 
-  ~ReferenceProcessorMTMutator() {
+  ~ReferenceProcessorMTDiscoveryMutator() {
     _rp->set_mt_discovery(_saved_mt);
   }
 };
--- a/src/share/vm/utilities/workgroup.cpp	Wed Mar 16 10:37:08 2011 -0700
+++ b/src/share/vm/utilities/workgroup.cpp	Thu Mar 17 10:32:46 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -156,7 +156,7 @@
     tty->print_cr("/nFinished work gang %s: %d/%d sequence %d",
                   name(), finished_workers(), total_workers(),
                   _sequence_number);
-    }
+  }
 }
 
 void AbstractWorkGang::stop() {
--- a/src/share/vm/utilities/workgroup.hpp	Wed Mar 16 10:37:08 2011 -0700
+++ b/src/share/vm/utilities/workgroup.hpp	Thu Mar 17 10:32:46 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,6 +36,20 @@
 # include "thread_windows.inline.hpp"
 #endif
 
+// Task class hierarchy:
+//   AbstractGangTask
+//     AbstractGangTaskWOopQueues
+//
+// Gang/Group class hierarchy:
+//   AbstractWorkGang
+//     WorkGang
+//       FlexibleWorkGang
+//         YieldingFlexibleWorkGang (defined in another file)
+//
+// Worker class hierarchy:
+//   GangWorker (subclass of WorkerThread)
+//     YieldingFlexibleGangWorker   (defined in another file)
+
 // Forward declarations of classes defined here
 
 class WorkGang;