changeset 8775:20bad8c6c7b4 jdk8u51-b31

Merge
author asaha
date Fri, 19 Jun 2015 08:06:17 -0700
parents 37eb076f5b7a (current diff) 48603bfe8438 (diff)
children ea47136e6ea4 985a04254462
files .hgtags make/hotspot_version
diffstat 27 files changed, 1196 insertions(+), 1155 deletions(-) [+]
line wrap: on
line diff
--- a/.hgtags	Mon Jun 08 14:12:27 2015 -0700
+++ b/.hgtags	Fri Jun 19 08:06:17 2015 -0700
@@ -648,6 +648,7 @@
 dc3c47fc6218003b23338b978b3f13a6d7976b41 jdk8u45-b34
 3cb364e46590add7cb42ec8b6565a3c62adf824d jdk8u45-b35
 48b09bb741171b0069000ac1cf5407ef2357d3d1 jdk8u45-b36
+3c2ea5da6afd55a524e25dc56747940324befda2 jdk8u45-b37
 b22b01407a8140041545afe1f2d6335db4d94ba5 jdk8u51-b00
 c1de2652a48c1d4a0c96707acc73db3cd317df2a jdk8u51-b01
 8f03c2f5fc170da5fca2cf65734941efb619feca jdk8u51-b02
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Mon Jun 08 14:12:27 2015 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Fri Jun 19 08:06:17 2015 -0700
@@ -598,7 +598,7 @@
   _collector_policy(cp),
   _should_unload_classes(CMSClassUnloadingEnabled),
   _concurrent_cycles_since_last_unload(0),
-  _roots_scanning_options(SharedHeap::SO_None),
+  _roots_scanning_options(GenCollectedHeap::SO_None),
   _inter_sweep_estimate(CMS_SweepWeight, CMS_SweepPadding),
   _intra_sweep_estimate(CMS_SweepWeight, CMS_SweepPadding),
   _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) CMSTracer()),
@@ -3068,7 +3068,7 @@
   gch->gen_process_roots(_cmsGen->level(),
                          true,   // younger gens are roots
                          true,   // activate StrongRootsScope
-                         SharedHeap::ScanningOption(roots_scanning_options()),
+                         GenCollectedHeap::ScanningOption(roots_scanning_options()),
                          should_unload_classes(),
                          &notOlder,
                          NULL,
@@ -3136,7 +3136,7 @@
   gch->gen_process_roots(_cmsGen->level(),
                          true,   // younger gens are roots
                          true,   // activate StrongRootsScope
-                         SharedHeap::ScanningOption(roots_scanning_options()),
+                         GenCollectedHeap::ScanningOption(roots_scanning_options()),
                          should_unload_classes(),
                          &notOlder,
                          NULL,
@@ -3327,7 +3327,7 @@
 void CMSCollector::setup_cms_unloading_and_verification_state() {
   const  bool should_verify =   VerifyBeforeGC || VerifyAfterGC || VerifyDuringGC
                              || VerifyBeforeExit;
-  const  int  rso           =   SharedHeap::SO_AllCodeCache;
+  const  int  rso           =   GenCollectedHeap::SO_AllCodeCache;
 
   // We set the proper root for this CMS cycle here.
   if (should_unload_classes()) {   // Should unload classes this cycle
@@ -3753,7 +3753,7 @@
       gch->gen_process_roots(_cmsGen->level(),
                              true,   // younger gens are roots
                              true,   // activate StrongRootsScope
-                             SharedHeap::ScanningOption(roots_scanning_options()),
+                             GenCollectedHeap::ScanningOption(roots_scanning_options()),
                              should_unload_classes(),
                              &notOlder,
                              NULL,
@@ -5254,13 +5254,13 @@
   gch->gen_process_roots(_collector->_cmsGen->level(),
                          false,     // yg was scanned above
                          false,     // this is parallel code
-                         SharedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
+                         GenCollectedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
                          _collector->should_unload_classes(),
                          &par_mri_cl,
                          NULL,
                          &cld_closure);
   assert(_collector->should_unload_classes()
-         || (_collector->CMSCollector::roots_scanning_options() & SharedHeap::SO_AllCodeCache),
+         || (_collector->CMSCollector::roots_scanning_options() & GenCollectedHeap::SO_AllCodeCache),
          "if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops");
   _timer.stop();
   if (PrintCMSStatistics != 0) {
@@ -5390,14 +5390,14 @@
   gch->gen_process_roots(_collector->_cmsGen->level(),
                          false,     // yg was scanned above
                          false,     // this is parallel code
-                         SharedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
+                         GenCollectedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
                          _collector->should_unload_classes(),
                          &par_mrias_cl,
                          NULL,
                          NULL);     // The dirty klasses will be handled below
 
   assert(_collector->should_unload_classes()
-         || (_collector->CMSCollector::roots_scanning_options() & SharedHeap::SO_AllCodeCache),
+         || (_collector->CMSCollector::roots_scanning_options() & GenCollectedHeap::SO_AllCodeCache),
          "if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops");
   _timer.stop();
   if (PrintCMSStatistics != 0) {
@@ -5982,14 +5982,14 @@
     gch->gen_process_roots(_cmsGen->level(),
                            true,  // younger gens as roots
                            false, // use the local StrongRootsScope
-                           SharedHeap::ScanningOption(roots_scanning_options()),
+                           GenCollectedHeap::ScanningOption(roots_scanning_options()),
                            should_unload_classes(),
                            &mrias_cl,
                            NULL,
                            NULL); // The dirty klasses will be handled below
 
     assert(should_unload_classes()
-           || (roots_scanning_options() & SharedHeap::SO_AllCodeCache),
+           || (roots_scanning_options() & GenCollectedHeap::SO_AllCodeCache),
            "if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops");
   }
 
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Mon Jun 08 14:12:27 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Fri Jun 19 08:06:17 2015 -0700
@@ -46,6 +46,7 @@
 #include "gc_implementation/g1/g1ParScanThreadState.inline.hpp"
 #include "gc_implementation/g1/g1RegionToSpaceMapper.hpp"
 #include "gc_implementation/g1/g1RemSet.inline.hpp"
+#include "gc_implementation/g1/g1RootProcessor.hpp"
 #include "gc_implementation/g1/g1StringDedup.hpp"
 #include "gc_implementation/g1/g1YCTypes.hpp"
 #include "gc_implementation/g1/heapRegion.inline.hpp"
@@ -85,18 +86,6 @@
 // apply to TLAB allocation, which is not part of this interface: it
 // is done by clients of this interface.)
 
-// Notes on implementation of parallelism in different tasks.
-//
-// G1ParVerifyTask uses heap_region_par_iterate_chunked() for parallelism.
-// The number of GC workers is passed to heap_region_par_iterate_chunked().
-// It does use run_task() which sets _n_workers in the task.
-// G1ParTask executes g1_process_roots() ->
-// SharedHeap::process_roots() which calls eventually to
-// CardTableModRefBS::par_non_clean_card_iterate_work() which uses
-// SequentialSubTasksDone.  SharedHeap::process_roots() also
-// directly uses SubTasksDone (_process_strong_tasks field in SharedHeap).
-//
-
 // Local to this file.
 
 class RefineCardTableEntryClosure: public CardTableEntryClosure {
@@ -1855,7 +1844,6 @@
   _is_alive_closure_stw(this),
   _ref_processor_cm(NULL),
   _ref_processor_stw(NULL),
-  _process_strong_tasks(new SubTasksDone(G1H_PS_NumElements)),
   _bot_shared(NULL),
   _evac_failure_scan_stack(NULL),
   _mark_in_progress(false),
@@ -1888,9 +1876,6 @@
   _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()) {
 
   _g1h = this;
-  if (_process_strong_tasks == NULL || !_process_strong_tasks->valid()) {
-    vm_exit_during_initialization("Failed necessary allocation.");
-  }
 
   _allocator = G1Allocator::create_allocator(_g1h);
   _humongous_object_threshold_in_words = HeapRegion::GrainWords / 2;
@@ -2299,11 +2284,11 @@
   hot_card_cache->drain(worker_i, g1_rem_set(), into_cset_dcq);
 
   DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
-  int n_completed_buffers = 0;
+  size_t n_completed_buffers = 0;
   while (dcqs.apply_closure_to_completed_buffer(cl, worker_i, 0, true)) {
     n_completed_buffers++;
   }
-  g1_policy()->phase_times()->record_update_rs_processed_buffers(worker_i, n_completed_buffers);
+  g1_policy()->phase_times()->record_thread_work_item(G1GCPhaseTimes::UpdateRS, worker_i, n_completed_buffers);
   dcqs.clear_n_completed_buffers();
   assert(!dcqs.completed_buffers_exist_dirty(), "Completed buffers exist!");
 }
@@ -3292,11 +3277,12 @@
     G1VerifyCodeRootOopClosure codeRootsCl(this, &rootsCl, vo);
     G1VerifyCodeRootBlobClosure blobsCl(&codeRootsCl);
 
-    process_all_roots(true,            // activate StrongRootsScope
-                      SO_AllCodeCache, // roots scanning options
-                      &rootsCl,
-                      &cldCl,
-                      &blobsCl);
+    {
+      G1RootProcessor root_processor(this);
+      root_processor.process_all_roots(&rootsCl,
+                                       &cldCl,
+                                       &blobsCl);
+    }
 
     bool failures = rootsCl.failures() || codeRootsCl.failures();
 
@@ -3901,10 +3887,10 @@
 
     TraceCPUTime tcpu(G1Log::finer(), true, gclog_or_tty);
 
-    int active_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
+    uint active_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
                                 workers()->active_workers() : 1);
     double pause_start_sec = os::elapsedTime();
-    g1_policy()->phase_times()->note_gc_start(active_workers);
+    g1_policy()->phase_times()->note_gc_start(active_workers, mark_in_progress());
     log_gc_header();
 
     TraceCollectorStats tcs(g1mm()->incremental_collection_counters());
@@ -4582,60 +4568,11 @@
   }
 };
 
-class G1CodeBlobClosure : public CodeBlobClosure {
-  class HeapRegionGatheringOopClosure : public OopClosure {
-    G1CollectedHeap* _g1h;
-    OopClosure* _work;
-    nmethod* _nm;
-
-    template <typename T>
-    void do_oop_work(T* p) {
-      _work->do_oop(p);
-      T oop_or_narrowoop = oopDesc::load_heap_oop(p);
-      if (!oopDesc::is_null(oop_or_narrowoop)) {
-        oop o = oopDesc::decode_heap_oop_not_null(oop_or_narrowoop);
-        HeapRegion* hr = _g1h->heap_region_containing_raw(o);
-        assert(!_g1h->obj_in_cs(o) || hr->rem_set()->strong_code_roots_list_contains(_nm), "if o still in CS then evacuation failed and nm must already be in the remset");
-        hr->add_strong_code_root(_nm);
-      }
-    }
-
-  public:
-    HeapRegionGatheringOopClosure(OopClosure* oc) : _g1h(G1CollectedHeap::heap()), _work(oc), _nm(NULL) {}
-
-    void do_oop(oop* o) {
-      do_oop_work(o);
-    }
-
-    void do_oop(narrowOop* o) {
-      do_oop_work(o);
-    }
-
-    void set_nm(nmethod* nm) {
-      _nm = nm;
-    }
-  };
-
-  HeapRegionGatheringOopClosure _oc;
-public:
-  G1CodeBlobClosure(OopClosure* oc) : _oc(oc) {}
-
-  void do_code_blob(CodeBlob* cb) {
-    nmethod* nm = cb->as_nmethod_or_null();
-    if (nm != NULL) {
-      if (!nm->test_set_oops_do_mark()) {
-        _oc.set_nm(nm);
-        nm->oops_do(&_oc);
-        nm->fix_oop_relocations();
-      }
-    }
-  }
-};
-
 class G1ParTask : public AbstractGangTask {
 protected:
   G1CollectedHeap*       _g1h;
   RefToScanQueueSet      *_queues;
+  G1RootProcessor*       _root_processor;
   ParallelTaskTerminator _terminator;
   uint _n_workers;
 
@@ -4643,10 +4580,11 @@
   Mutex* stats_lock() { return &_stats_lock; }
 
 public:
-  G1ParTask(G1CollectedHeap* g1h, RefToScanQueueSet *task_queues)
+  G1ParTask(G1CollectedHeap* g1h, RefToScanQueueSet *task_queues, G1RootProcessor* root_processor)
     : AbstractGangTask("G1 collection"),
       _g1h(g1h),
       _queues(task_queues),
+      _root_processor(root_processor),
       _terminator(0, _queues),
       _stats_lock(Mutex::leaf, "parallel G1 stats lock", true)
   {}
@@ -4660,13 +4598,7 @@
   ParallelTaskTerminator* terminator() { return &_terminator; }
 
   virtual void set_for_termination(int active_workers) {
-    // This task calls set_n_termination() in par_non_clean_card_iterate_work()
-    // in the young space (_par_seq_tasks) in the G1 heap
-    // for SequentialSubTasksDone.
-    // This task also uses SubTasksDone in SharedHeap and G1CollectedHeap
-    // both of which need setting by set_n_termination().
-    _g1h->SharedHeap::set_n_termination(active_workers);
-    _g1h->set_n_termination(active_workers);
+    _root_processor->set_num_workers(active_workers);
     terminator()->reset_for_reuse(active_workers);
     _n_workers = active_workers;
   }
@@ -4703,8 +4635,7 @@
   void work(uint worker_id) {
     if (worker_id >= _n_workers) return;  // no work needed this round
 
-    double start_time_ms = os::elapsedTime() * 1000.0;
-    _g1h->g1_policy()->phase_times()->record_gc_worker_start_time(worker_id, start_time_ms);
+    _g1h->g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::GCWorkerStart, worker_id, os::elapsedTime());
 
     {
       ResourceMark rm;
@@ -4736,24 +4667,21 @@
                                                                                     false, // Process all klasses.
                                                                                     true); // Need to claim CLDs.
 
-      G1CodeBlobClosure scan_only_code_cl(&scan_only_root_cl);
-      G1CodeBlobClosure scan_mark_code_cl(&scan_mark_root_cl);
-      // IM Weak code roots are handled later.
-
       OopClosure* strong_root_cl;
       OopClosure* weak_root_cl;
       CLDClosure* strong_cld_cl;
       CLDClosure* weak_cld_cl;
-      CodeBlobClosure* strong_code_cl;
+
+      bool trace_metadata = false;
 
       if (_g1h->g1_policy()->during_initial_mark_pause()) {
         // We also need to mark copied objects.
         strong_root_cl = &scan_mark_root_cl;
         strong_cld_cl  = &scan_mark_cld_cl;
-        strong_code_cl = &scan_mark_code_cl;
         if (ClassUnloadingWithConcurrentMark) {
           weak_root_cl = &scan_mark_weak_root_cl;
           weak_cld_cl  = &scan_mark_weak_cld_cl;
+          trace_metadata = true;
         } else {
           weak_root_cl = &scan_mark_root_cl;
           weak_cld_cl  = &scan_mark_cld_cl;
@@ -4763,31 +4691,32 @@
         weak_root_cl   = &scan_only_root_cl;
         strong_cld_cl  = &scan_only_cld_cl;
         weak_cld_cl    = &scan_only_cld_cl;
-        strong_code_cl = &scan_only_code_cl;
       }
 
-
-      G1ParPushHeapRSClosure  push_heap_rs_cl(_g1h, &pss);
-
       pss.start_strong_roots();
-      _g1h->g1_process_roots(strong_root_cl,
-                             weak_root_cl,
-                             &push_heap_rs_cl,
-                             strong_cld_cl,
-                             weak_cld_cl,
-                             strong_code_cl,
-                             worker_id);
-
+
+      _root_processor->evacuate_roots(strong_root_cl,
+                                      weak_root_cl,
+                                      strong_cld_cl,
+                                      weak_cld_cl,
+                                      trace_metadata,
+                                      worker_id);
+
+      G1ParPushHeapRSClosure push_heap_rs_cl(_g1h, &pss);
+      _root_processor->scan_remembered_sets(&push_heap_rs_cl,
+                                            weak_root_cl,
+                                            worker_id);
       pss.end_strong_roots();
 
       {
         double start = os::elapsedTime();
         G1ParEvacuateFollowersClosure evac(_g1h, &pss, _queues, &_terminator);
         evac.do_void();
-        double elapsed_ms = (os::elapsedTime()-start)*1000.0;
-        double term_ms = pss.term_time()*1000.0;
-        _g1h->g1_policy()->phase_times()->add_obj_copy_time(worker_id, elapsed_ms-term_ms);
-        _g1h->g1_policy()->phase_times()->record_termination(worker_id, term_ms, pss.term_attempts());
+        double elapsed_sec = os::elapsedTime() - start;
+        double term_sec = pss.term_time();
+        _g1h->g1_policy()->phase_times()->add_time_secs(G1GCPhaseTimes::ObjCopy, worker_id, elapsed_sec - term_sec);
+        _g1h->g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::Termination, worker_id, term_sec);
+        _g1h->g1_policy()->phase_times()->record_thread_work_item(G1GCPhaseTimes::Termination, worker_id, pss.term_attempts());
       }
       _g1h->g1_policy()->record_thread_age_table(pss.age_table());
       _g1h->update_surviving_young_words(pss.surviving_young_words()+1);
@@ -4803,100 +4732,10 @@
       // destructors are executed here and are included as part of the
       // "GC Worker Time".
     }
-
-    double end_time_ms = os::elapsedTime() * 1000.0;
-    _g1h->g1_policy()->phase_times()->record_gc_worker_end_time(worker_id, end_time_ms);
+    _g1h->g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::GCWorkerEnd, worker_id, os::elapsedTime());
   }
 };
 
-// *** Common G1 Evacuation Stuff
-
-// This method is run in a GC worker.
-
-void
-G1CollectedHeap::
-g1_process_roots(OopClosure* scan_non_heap_roots,
-                 OopClosure* scan_non_heap_weak_roots,
-                 OopsInHeapRegionClosure* scan_rs,
-                 CLDClosure* scan_strong_clds,
-                 CLDClosure* scan_weak_clds,
-                 CodeBlobClosure* scan_strong_code,
-                 uint worker_i) {
-
-  // First scan the shared roots.
-  double ext_roots_start = os::elapsedTime();
-  double closure_app_time_sec = 0.0;
-
-  bool during_im = _g1h->g1_policy()->during_initial_mark_pause();
-  bool trace_metadata = during_im && ClassUnloadingWithConcurrentMark;
-
-  BufferingOopClosure buf_scan_non_heap_roots(scan_non_heap_roots);
-  BufferingOopClosure buf_scan_non_heap_weak_roots(scan_non_heap_weak_roots);
-
-  process_roots(false, // no scoping; this is parallel code
-                SharedHeap::SO_None,
-                &buf_scan_non_heap_roots,
-                &buf_scan_non_heap_weak_roots,
-                scan_strong_clds,
-                // Unloading Initial Marks handle the weak CLDs separately.
-                (trace_metadata ? NULL : scan_weak_clds),
-                scan_strong_code);
-
-  // Now the CM ref_processor roots.
-  if (!_process_strong_tasks->is_task_claimed(G1H_PS_refProcessor_oops_do)) {
-    // We need to treat the discovered reference lists of the
-    // concurrent mark ref processor as roots and keep entries
-    // (which are added by the marking threads) on them live
-    // until they can be processed at the end of marking.
-    ref_processor_cm()->weak_oops_do(&buf_scan_non_heap_roots);
-  }
-
-  if (trace_metadata) {
-    // Barrier to make sure all workers passed
-    // the strong CLD and strong nmethods phases.
-    active_strong_roots_scope()->wait_until_all_workers_done_with_threads(n_par_threads());
-
-    // Now take the complement of the strong CLDs.
-    ClassLoaderDataGraph::roots_cld_do(NULL, scan_weak_clds);
-  }
-
-  // Finish up any enqueued closure apps (attributed as object copy time).
-  buf_scan_non_heap_roots.done();
-  buf_scan_non_heap_weak_roots.done();
-
-  double obj_copy_time_sec = buf_scan_non_heap_roots.closure_app_seconds()
-      + buf_scan_non_heap_weak_roots.closure_app_seconds();
-
-  g1_policy()->phase_times()->record_obj_copy_time(worker_i, obj_copy_time_sec * 1000.0);
-
-  double ext_root_time_ms =
-    ((os::elapsedTime() - ext_roots_start) - obj_copy_time_sec) * 1000.0;
-
-  g1_policy()->phase_times()->record_ext_root_scan_time(worker_i, ext_root_time_ms);
-
-  // During conc marking we have to filter the per-thread SATB buffers
-  // to make sure we remove any oops into the CSet (which will show up
-  // as implicitly live).
-  double satb_filtering_ms = 0.0;
-  if (!_process_strong_tasks->is_task_claimed(G1H_PS_filter_satb_buffers)) {
-    if (mark_in_progress()) {
-      double satb_filter_start = os::elapsedTime();
-
-      JavaThread::satb_mark_queue_set().filter_thread_buffers();
-
-      satb_filtering_ms = (os::elapsedTime() - satb_filter_start) * 1000.0;
-    }
-  }
-  g1_policy()->phase_times()->record_satb_filtering_time(worker_i, satb_filtering_ms);
-
-  // Now scan the complement of the collection set.
-  G1CodeBlobClosure scavenge_cs_nmethods(scan_non_heap_weak_roots);
-
-  g1_rem_set()->oops_into_collection_set_do(scan_rs, &scavenge_cs_nmethods, worker_i);
-
-  _process_strong_tasks->all_tasks_completed();
-}
-
 class G1StringSymbolTableUnlinkTask : public AbstractGangTask {
 private:
   BoolObjectClosure* _is_alive;
@@ -5310,7 +5149,8 @@
   G1RedirtyLoggedCardsTask(DirtyCardQueueSet* queue) : AbstractGangTask("Redirty Cards"), _queue(queue) { }
 
   virtual void work(uint worker_id) {
-    double start_time = os::elapsedTime();
+    G1GCPhaseTimes* phase_times = G1CollectedHeap::heap()->g1_policy()->phase_times();
+    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::RedirtyCards, worker_id);
 
     RedirtyLoggedCardTableEntryClosure cl;
     if (G1CollectedHeap::heap()->use_parallel_gc_threads()) {
@@ -5319,9 +5159,7 @@
       _queue->apply_closure_to_all_completed_buffers(&cl);
     }
 
-    G1GCPhaseTimes* timer = G1CollectedHeap::heap()->g1_policy()->phase_times();
-    timer->record_redirty_logged_cards_time_ms(worker_id, (os::elapsedTime() - start_time) * 1000.0);
-    timer->record_redirty_logged_cards_processed_cards(worker_id, cl.num_processed());
+    phase_times->record_thread_work_item(G1GCPhaseTimes::RedirtyCards, worker_id, cl.num_processed());
   }
 };
 
@@ -5885,7 +5723,6 @@
     n_workers = 1;
   }
 
-  G1ParTask g1_par_task(this, _task_queues);
 
   init_for_evac_failure(NULL);
 
@@ -5896,7 +5733,8 @@
   double end_par_time_sec;
 
   {
-    StrongRootsScope srs(this);
+    G1RootProcessor root_processor(this);
+    G1ParTask g1_par_task(this, _task_queues, &root_processor);
     // InitialMark needs claim bits to keep track of the marked-through CLDs.
     if (g1_policy()->during_initial_mark_pause()) {
       ClassLoaderDataGraph::clear_claimed_marks();
@@ -5917,18 +5755,20 @@
     end_par_time_sec = os::elapsedTime();
 
     // Closing the inner scope will execute the destructor
-    // for the StrongRootsScope object. We record the current
+    // for the G1RootProcessor object. We record the current
     // elapsed time before closing the scope so that time
-    // taken for the SRS destructor is NOT included in the
+    // taken for the destructor is NOT included in the
     // reported parallel time.
   }
 
+  G1GCPhaseTimes* phase_times = g1_policy()->phase_times();
+
   double par_time_ms = (end_par_time_sec - start_par_time_sec) * 1000.0;
-  g1_policy()->phase_times()->record_par_time(par_time_ms);
+  phase_times->record_par_time(par_time_ms);
 
   double code_root_fixup_time_ms =
         (os::elapsedTime() - end_par_time_sec) * 1000.0;
-  g1_policy()->phase_times()->record_code_root_fixup_time(code_root_fixup_time_ms);
+  phase_times->record_code_root_fixup_time(code_root_fixup_time_ms);
 
   set_par_threads(0);
 
@@ -5939,14 +5779,15 @@
   // not copied during the pause.
   process_discovered_references(n_workers);
 
-  // Weak root processing.
-  {
+  if (G1StringDedup::is_enabled()) {
+    double fixup_start = os::elapsedTime();
+
     G1STWIsAliveClosure is_alive(this);
     G1KeepAliveClosure keep_alive(this);
-    JNIHandles::weak_oops_do(&is_alive, &keep_alive);
-    if (G1StringDedup::is_enabled()) {
-      G1StringDedup::unlink_or_oops_do(&is_alive, &keep_alive);
-    }
+    G1StringDedup::unlink_or_oops_do(&is_alive, &keep_alive, true, phase_times);
+
+    double fixup_time_ms = (os::elapsedTime() - fixup_start) * 1000.0;
+    phase_times->record_string_dedup_fixup_time(fixup_time_ms);
   }
 
   _allocator->release_gc_alloc_regions(n_workers, evacuation_info);
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Mon Jun 08 14:12:27 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Fri Jun 19 08:06:17 2015 -0700
@@ -812,22 +812,6 @@
   // statistics or updating free lists.
   void abandon_collection_set(HeapRegion* cs_head);
 
-  // Applies "scan_non_heap_roots" to roots outside the heap,
-  // "scan_rs" to roots inside the heap (having done "set_region" to
-  // indicate the region in which the root resides),
-  // and does "scan_metadata" If "scan_rs" is
-  // NULL, then this step is skipped.  The "worker_i"
-  // param is for use with parallel roots processing, and should be
-  // the "i" of the calling parallel worker thread's work(i) function.
-  // In the sequential case this param will be ignored.
-  void g1_process_roots(OopClosure* scan_non_heap_roots,
-                        OopClosure* scan_non_heap_weak_roots,
-                        OopsInHeapRegionClosure* scan_rs,
-                        CLDClosure* scan_strong_clds,
-                        CLDClosure* scan_weak_clds,
-                        CodeBlobClosure* scan_strong_code,
-                        uint worker_i);
-
   // The concurrent marker (and the thread it runs in.)
   ConcurrentMark* _cm;
   ConcurrentMarkThread* _cmThread;
@@ -1014,21 +998,10 @@
   // of G1CollectedHeap::_gc_time_stamp.
   unsigned int* _worker_cset_start_region_time_stamp;
 
-  enum G1H_process_roots_tasks {
-    G1H_PS_filter_satb_buffers,
-    G1H_PS_refProcessor_oops_do,
-    // Leave this one last.
-    G1H_PS_NumElements
-  };
-
-  SubTasksDone* _process_strong_tasks;
-
   volatile bool _free_regions_coming;
 
 public:
 
-  SubTasksDone* process_strong_tasks() { return _process_strong_tasks; }
-
   void set_refine_cte_cl_concurrency(bool concurrent);
 
   RefToScanQueue *task_queue(int i) const;
@@ -1061,21 +1034,11 @@
   // Initialize weak reference processing.
   virtual void ref_processing_init();
 
-  void set_par_threads(uint t) {
-    SharedHeap::set_par_threads(t);
-    // Done in SharedHeap but oddly there are
-    // two _process_strong_tasks's in a G1CollectedHeap
-    // so do it here too.
-    _process_strong_tasks->set_n_threads(t);
-  }
-
+  // Explicitly import set_par_threads into this scope
+  using SharedHeap::set_par_threads;
   // Set _n_par_threads according to a policy TBD.
   void set_par_threads();
 
-  void set_n_termination(int t) {
-    _process_strong_tasks->set_n_threads(t);
-  }
-
   virtual CollectedHeap::Name kind() const {
     return CollectedHeap::G1CollectedHeap;
   }
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Mon Jun 08 14:12:27 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Fri Jun 19 08:06:17 2015 -0700
@@ -1084,7 +1084,7 @@
   if (update_stats) {
     double cost_per_card_ms = 0.0;
     if (_pending_cards > 0) {
-      cost_per_card_ms = phase_times()->average_last_update_rs_time() / (double) _pending_cards;
+      cost_per_card_ms = phase_times()->average_time_ms(G1GCPhaseTimes::UpdateRS) / (double) _pending_cards;
       _cost_per_card_ms_seq->add(cost_per_card_ms);
     }
 
@@ -1092,7 +1092,7 @@
 
     double cost_per_entry_ms = 0.0;
     if (cards_scanned > 10) {
-      cost_per_entry_ms = phase_times()->average_last_scan_rs_time() / (double) cards_scanned;
+      cost_per_entry_ms = phase_times()->average_time_ms(G1GCPhaseTimes::ScanRS) / (double) cards_scanned;
       if (_last_gc_was_young) {
         _cost_per_entry_ms_seq->add(cost_per_entry_ms);
       } else {
@@ -1134,7 +1134,7 @@
     double cost_per_byte_ms = 0.0;
 
     if (copied_bytes > 0) {
-      cost_per_byte_ms = phase_times()->average_last_obj_copy_time() / (double) copied_bytes;
+      cost_per_byte_ms = phase_times()->average_time_ms(G1GCPhaseTimes::ObjCopy) / (double) copied_bytes;
       if (_in_marking_window) {
         _cost_per_byte_ms_during_cm_seq->add(cost_per_byte_ms);
       } else {
@@ -1143,8 +1143,8 @@
     }
 
     double all_other_time_ms = pause_time_ms -
-      (phase_times()->average_last_update_rs_time() + phase_times()->average_last_scan_rs_time()
-      + phase_times()->average_last_obj_copy_time() + phase_times()->average_last_termination_time());
+      (phase_times()->average_time_ms(G1GCPhaseTimes::UpdateRS) + phase_times()->average_time_ms(G1GCPhaseTimes::ScanRS) +
+          phase_times()->average_time_ms(G1GCPhaseTimes::ObjCopy) + phase_times()->average_time_ms(G1GCPhaseTimes::Termination));
 
     double young_other_time_ms = 0.0;
     if (young_cset_region_length() > 0) {
@@ -1185,8 +1185,8 @@
 
   // Note that _mmu_tracker->max_gc_time() returns the time in seconds.
   double update_rs_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0;
-  adjust_concurrent_refinement(phase_times()->average_last_update_rs_time(),
-                               phase_times()->sum_last_update_rs_processed_buffers(), update_rs_time_goal_ms);
+  adjust_concurrent_refinement(phase_times()->average_time_ms(G1GCPhaseTimes::UpdateRS),
+                               phase_times()->sum_thread_work_items(G1GCPhaseTimes::UpdateRS), update_rs_time_goal_ms);
 
   _collectionSetChooser->verify();
 }
@@ -2189,19 +2189,19 @@
     _other.add(pause_time_ms - phase_times->accounted_time_ms());
     _root_region_scan_wait.add(phase_times->root_region_scan_wait_time_ms());
     _parallel.add(phase_times->cur_collection_par_time_ms());
-    _ext_root_scan.add(phase_times->average_last_ext_root_scan_time());
-    _satb_filtering.add(phase_times->average_last_satb_filtering_times_ms());
-    _update_rs.add(phase_times->average_last_update_rs_time());
-    _scan_rs.add(phase_times->average_last_scan_rs_time());
-    _obj_copy.add(phase_times->average_last_obj_copy_time());
-    _termination.add(phase_times->average_last_termination_time());
+    _ext_root_scan.add(phase_times->average_time_ms(G1GCPhaseTimes::ExtRootScan));
+    _satb_filtering.add(phase_times->average_time_ms(G1GCPhaseTimes::SATBFiltering));
+    _update_rs.add(phase_times->average_time_ms(G1GCPhaseTimes::UpdateRS));
+    _scan_rs.add(phase_times->average_time_ms(G1GCPhaseTimes::ScanRS));
+    _obj_copy.add(phase_times->average_time_ms(G1GCPhaseTimes::ObjCopy));
+    _termination.add(phase_times->average_time_ms(G1GCPhaseTimes::Termination));
 
-    double parallel_known_time = phase_times->average_last_ext_root_scan_time() +
-      phase_times->average_last_satb_filtering_times_ms() +
-      phase_times->average_last_update_rs_time() +
-      phase_times->average_last_scan_rs_time() +
-      phase_times->average_last_obj_copy_time() +
-      + phase_times->average_last_termination_time();
+    double parallel_known_time = phase_times->average_time_ms(G1GCPhaseTimes::ExtRootScan) +
+      phase_times->average_time_ms(G1GCPhaseTimes::SATBFiltering) +
+      phase_times->average_time_ms(G1GCPhaseTimes::UpdateRS) +
+      phase_times->average_time_ms(G1GCPhaseTimes::ScanRS) +
+      phase_times->average_time_ms(G1GCPhaseTimes::ObjCopy) +
+      phase_times->average_time_ms(G1GCPhaseTimes::Termination);
 
     double parallel_other_time = phase_times->cur_collection_par_time_ms() - parallel_known_time;
     _parallel_other.add(parallel_other_time);
--- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp	Mon Jun 08 14:12:27 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp	Fri Jun 19 08:06:17 2015 -0700
@@ -22,12 +22,13 @@
  *
  */
 
-
 #include "precompiled.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1GCPhaseTimes.hpp"
 #include "gc_implementation/g1/g1Log.hpp"
 #include "gc_implementation/g1/g1StringDedup.hpp"
+#include "memory/allocation.hpp"
+#include "runtime/os.hpp"
 
 // Helper class for avoiding interleaved logging
 class LineBuffer: public StackObj {
@@ -70,184 +71,243 @@
     va_end(ap);
   }
 
+  void print_cr() {
+    gclog_or_tty->print_cr("%s", _buffer);
+    _cur = _indent_level * INDENT_CHARS;
+  }
+
   void append_and_print_cr(const char* format, ...)  ATTRIBUTE_PRINTF(2, 3) {
     va_list ap;
     va_start(ap, format);
     vappend(format, ap);
     va_end(ap);
-    gclog_or_tty->print_cr("%s", _buffer);
-    _cur = _indent_level * INDENT_CHARS;
+    print_cr();
   }
 };
 
-PRAGMA_DIAG_PUSH
-PRAGMA_FORMAT_NONLITERAL_IGNORED
 template <class T>
-void WorkerDataArray<T>::print(int level, const char* title) {
-  if (_length == 1) {
-    // No need for min, max, average and sum for only one worker
-    LineBuffer buf(level);
-    buf.append("[%s:  ", title);
-    buf.append(_print_format, _data[0]);
-    buf.append_and_print_cr("]");
-    return;
+class WorkerDataArray  : public CHeapObj<mtGC> {
+  friend class G1GCParPhasePrinter;
+  T*          _data;
+  uint        _length;
+  const char* _title;
+  bool        _print_sum;
+  int         _log_level;
+  uint        _indent_level;
+  bool        _enabled;
+
+  WorkerDataArray<size_t>* _thread_work_items;
+
+  NOT_PRODUCT(T uninitialized();)
+
+  // We are caching the sum and average to only have to calculate them once.
+  // This is not done in an MT-safe way. It is intended to allow single
+  // threaded code to call sum() and average() multiple times in any order
+  // without having to worry about the cost.
+  bool   _has_new_data;
+  T      _sum;
+  T      _min;
+  T      _max;
+  double _average;
+
+ public:
+  WorkerDataArray(uint length, const char* title, bool print_sum, int log_level, uint indent_level) :
+    _title(title), _length(0), _print_sum(print_sum), _log_level(log_level), _indent_level(indent_level),
+    _has_new_data(true), _thread_work_items(NULL), _enabled(true) {
+    assert(length > 0, "Must have some workers to store data for");
+    _length = length;
+    _data = NEW_C_HEAP_ARRAY(T, _length, mtGC);
+  }
+
+  ~WorkerDataArray() {
+    FREE_C_HEAP_ARRAY(T, _data, mtGC);
+  }
+
+  void link_thread_work_items(WorkerDataArray<size_t>* thread_work_items) {
+    _thread_work_items = thread_work_items;
+  }
+
+  WorkerDataArray<size_t>* thread_work_items() { return _thread_work_items; }
+
+  void set(uint worker_i, T value) {
+    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
+    assert(_data[worker_i] == WorkerDataArray<T>::uninitialized(), err_msg("Overwriting data for worker %d in %s", worker_i, _title));
+    _data[worker_i] = value;
+    _has_new_data = true;
+  }
+
+  void set_thread_work_item(uint worker_i, size_t value) {
+    assert(_thread_work_items != NULL, "No sub count");
+    _thread_work_items->set(worker_i, value);
   }
 
-  T min = _data[0];
-  T max = _data[0];
-  T sum = 0;
+  T get(uint worker_i) {
+    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
+    assert(_data[worker_i] != WorkerDataArray<T>::uninitialized(), err_msg("No data added for worker %d", worker_i));
+    return _data[worker_i];
+  }
+
+  void add(uint worker_i, T value) {
+    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
+    assert(_data[worker_i] != WorkerDataArray<T>::uninitialized(), err_msg("No data to add to for worker %d", worker_i));
+    _data[worker_i] += value;
+    _has_new_data = true;
+  }
 
-  LineBuffer buf(level);
-  buf.append("[%s:", title);
-  for (uint i = 0; i < _length; ++i) {
-    T val = _data[i];
-    min = MIN2(val, min);
-    max = MAX2(val, max);
-    sum += val;
-    if (G1Log::finest()) {
-      buf.append("  ");
-      buf.append(_print_format, val);
-    }
+  double average(){
+    calculate_totals();
+    return _average;
+  }
+
+  T sum() {
+    calculate_totals();
+    return _sum;
+  }
+
+  T minimum() {
+    calculate_totals();
+    return _min;
   }
 
-  if (G1Log::finest()) {
-    buf.append_and_print_cr("%s", "");
+  T maximum() {
+    calculate_totals();
+    return _max;
   }
 
-  double avg = (double)sum / (double)_length;
-  buf.append(" Min: ");
-  buf.append(_print_format, min);
-  buf.append(", Avg: ");
-  buf.append("%.1lf", avg); // Always print average as a double
-  buf.append(", Max: ");
-  buf.append(_print_format, max);
-  buf.append(", Diff: ");
-  buf.append(_print_format, max - min);
-  if (_print_sum) {
-    // for things like the start and end times the sum is not
-    // that relevant
-    buf.append(", Sum: ");
-    buf.append(_print_format, sum);
+  void reset() PRODUCT_RETURN;
+  void verify() PRODUCT_RETURN;
+
+  void set_enabled(bool enabled) { _enabled = enabled; }
+
+  int log_level() { return _log_level;  }
+
+ private:
+
+  void calculate_totals(){
+    if (!_has_new_data) {
+      return;
+    }
+
+    _sum = (T)0;
+    _min = _data[0];
+    _max = _min;
+    for (uint i = 0; i < _length; ++i) {
+      T val = _data[i];
+      _sum += val;
+      _min = MIN2(_min, val);
+      _max = MAX2(_max, val);
+    }
+    _average = (double)_sum / (double)_length;
+    _has_new_data = false;
   }
-  buf.append_and_print_cr("]");
-}
-PRAGMA_DIAG_POP
+};
+
 
 #ifndef PRODUCT
 
-template <> const int WorkerDataArray<int>::_uninitialized = -1;
-template <> const double WorkerDataArray<double>::_uninitialized = -1.0;
-template <> const size_t WorkerDataArray<size_t>::_uninitialized = (size_t)-1;
+template <>
+size_t WorkerDataArray<size_t>::uninitialized() {
+  return (size_t)-1;
+}
+
+template <>
+double WorkerDataArray<double>::uninitialized() {
+  return -1.0;
+}
 
 template <class T>
 void WorkerDataArray<T>::reset() {
   for (uint i = 0; i < _length; i++) {
-    _data[i] = (T)_uninitialized;
+    _data[i] = WorkerDataArray<T>::uninitialized();
+  }
+  if (_thread_work_items != NULL) {
+    _thread_work_items->reset();
   }
 }
 
 template <class T>
 void WorkerDataArray<T>::verify() {
+  if (!_enabled) {
+    return;
+  }
+
   for (uint i = 0; i < _length; i++) {
-    assert(_data[i] != _uninitialized,
-        err_msg("Invalid data for worker " UINT32_FORMAT ", data: %lf, uninitialized: %lf",
-            i, (double)_data[i], (double)_uninitialized));
+    assert(_data[i] != WorkerDataArray<T>::uninitialized(),
+        err_msg("Invalid data for worker %u in '%s'", i, _title));
+  }
+  if (_thread_work_items != NULL) {
+    _thread_work_items->verify();
   }
 }
 
 #endif
 
 G1GCPhaseTimes::G1GCPhaseTimes(uint max_gc_threads) :
-  _max_gc_threads(max_gc_threads),
-  _last_gc_worker_start_times_ms(_max_gc_threads, "%.1lf", false),
-  _last_ext_root_scan_times_ms(_max_gc_threads, "%.1lf"),
-  _last_satb_filtering_times_ms(_max_gc_threads, "%.1lf"),
-  _last_update_rs_times_ms(_max_gc_threads, "%.1lf"),
-  _last_update_rs_processed_buffers(_max_gc_threads, "%d"),
-  _last_scan_rs_times_ms(_max_gc_threads, "%.1lf"),
-  _last_strong_code_root_scan_times_ms(_max_gc_threads, "%.1lf"),
-  _last_obj_copy_times_ms(_max_gc_threads, "%.1lf"),
-  _last_termination_times_ms(_max_gc_threads, "%.1lf"),
-  _last_termination_attempts(_max_gc_threads, SIZE_FORMAT),
-  _last_gc_worker_end_times_ms(_max_gc_threads, "%.1lf", false),
-  _last_gc_worker_times_ms(_max_gc_threads, "%.1lf"),
-  _last_gc_worker_other_times_ms(_max_gc_threads, "%.1lf"),
-  _last_redirty_logged_cards_time_ms(_max_gc_threads, "%.1lf"),
-  _last_redirty_logged_cards_processed_cards(_max_gc_threads, SIZE_FORMAT),
-  _cur_string_dedup_queue_fixup_worker_times_ms(_max_gc_threads, "%.1lf"),
-  _cur_string_dedup_table_fixup_worker_times_ms(_max_gc_threads, "%.1lf")
+  _max_gc_threads(max_gc_threads)
 {
   assert(max_gc_threads > 0, "Must have some GC threads");
+
+  _gc_par_phases[GCWorkerStart] = new WorkerDataArray<double>(max_gc_threads, "GC Worker Start (ms)", false, G1Log::LevelFiner, 2);
+  _gc_par_phases[ExtRootScan] = new WorkerDataArray<double>(max_gc_threads, "Ext Root Scanning (ms)", true, G1Log::LevelFiner, 2);
+  _gc_par_phases[SATBFiltering] = new WorkerDataArray<double>(max_gc_threads, "SATB Filtering (ms)", true, G1Log::LevelFiner, 2);
+  _gc_par_phases[UpdateRS] = new WorkerDataArray<double>(max_gc_threads, "Update RS (ms)", true, G1Log::LevelFiner, 2);
+  _gc_par_phases[ScanRS] = new WorkerDataArray<double>(max_gc_threads, "Scan RS (ms)", true, G1Log::LevelFiner, 2);
+  _gc_par_phases[CodeRoots] = new WorkerDataArray<double>(max_gc_threads, "Code Root Scanning (ms)", true, G1Log::LevelFiner, 2);
+  _gc_par_phases[ObjCopy] = new WorkerDataArray<double>(max_gc_threads, "Object Copy (ms)", true, G1Log::LevelFiner, 2);
+  _gc_par_phases[Termination] = new WorkerDataArray<double>(max_gc_threads, "Termination (ms)", true, G1Log::LevelFiner, 2);
+  _gc_par_phases[GCWorkerTotal] = new WorkerDataArray<double>(max_gc_threads, "GC Worker Total (ms)", true, G1Log::LevelFiner, 2);
+  _gc_par_phases[GCWorkerEnd] = new WorkerDataArray<double>(max_gc_threads, "GC Worker End (ms)", false, G1Log::LevelFiner, 2);
+  _gc_par_phases[Other] = new WorkerDataArray<double>(max_gc_threads, "GC Worker Other (ms)", true, G1Log::LevelFiner, 2);
+
+  _update_rs_processed_buffers = new WorkerDataArray<size_t>(max_gc_threads, "Processed Buffers", true, G1Log::LevelFiner, 3);
+  _gc_par_phases[UpdateRS]->link_thread_work_items(_update_rs_processed_buffers);
+
+  _termination_attempts = new WorkerDataArray<size_t>(max_gc_threads, "Termination Attempts", true, G1Log::LevelFinest, 3);
+  _gc_par_phases[Termination]->link_thread_work_items(_termination_attempts);
+
+  _gc_par_phases[StringDedupQueueFixup] = new WorkerDataArray<double>(max_gc_threads, "Queue Fixup (ms)", true, G1Log::LevelFiner, 2);
+  _gc_par_phases[StringDedupTableFixup] = new WorkerDataArray<double>(max_gc_threads, "Table Fixup (ms)", true, G1Log::LevelFiner, 2);
+
+  _gc_par_phases[RedirtyCards] = new WorkerDataArray<double>(max_gc_threads, "Parallel Redirty", true, G1Log::LevelFinest, 3);
+  _redirtied_cards = new WorkerDataArray<size_t>(max_gc_threads, "Redirtied Cards", true, G1Log::LevelFinest, 3);
+  _gc_par_phases[RedirtyCards]->link_thread_work_items(_redirtied_cards);
 }
 
-void G1GCPhaseTimes::note_gc_start(uint active_gc_threads) {
+void G1GCPhaseTimes::note_gc_start(uint active_gc_threads, bool mark_in_progress) {
   assert(active_gc_threads > 0, "The number of threads must be > 0");
-  assert(active_gc_threads <= _max_gc_threads, "The number of active threads must be <= the max nubmer of threads");
+  assert(active_gc_threads <= _max_gc_threads, "The number of active threads must be <= the max number of threads");
   _active_gc_threads = active_gc_threads;
 
-  _last_gc_worker_start_times_ms.reset();
-  _last_ext_root_scan_times_ms.reset();
-  _last_satb_filtering_times_ms.reset();
-  _last_update_rs_times_ms.reset();
-  _last_update_rs_processed_buffers.reset();
-  _last_scan_rs_times_ms.reset();
-  _last_strong_code_root_scan_times_ms.reset();
-  _last_obj_copy_times_ms.reset();
-  _last_termination_times_ms.reset();
-  _last_termination_attempts.reset();
-  _last_gc_worker_end_times_ms.reset();
-  _last_gc_worker_times_ms.reset();
-  _last_gc_worker_other_times_ms.reset();
+  for (int i = 0; i < GCParPhasesSentinel; i++) {
+    _gc_par_phases[i]->reset();
+  }
 
-  _last_redirty_logged_cards_time_ms.reset();
-  _last_redirty_logged_cards_processed_cards.reset();
+  _gc_par_phases[SATBFiltering]->set_enabled(mark_in_progress);
 
+  _gc_par_phases[StringDedupQueueFixup]->set_enabled(G1StringDedup::is_enabled());
+  _gc_par_phases[StringDedupTableFixup]->set_enabled(G1StringDedup::is_enabled());
 }
 
 void G1GCPhaseTimes::note_gc_end() {
-  _last_gc_worker_start_times_ms.verify();
-  _last_ext_root_scan_times_ms.verify();
-  _last_satb_filtering_times_ms.verify();
-  _last_update_rs_times_ms.verify();
-  _last_update_rs_processed_buffers.verify();
-  _last_scan_rs_times_ms.verify();
-  _last_strong_code_root_scan_times_ms.verify();
-  _last_obj_copy_times_ms.verify();
-  _last_termination_times_ms.verify();
-  _last_termination_attempts.verify();
-  _last_gc_worker_end_times_ms.verify();
+  for (uint i = 0; i < _active_gc_threads; i++) {
+    double worker_time = _gc_par_phases[GCWorkerEnd]->get(i) - _gc_par_phases[GCWorkerStart]->get(i);
+    record_time_secs(GCWorkerTotal, i , worker_time);
 
-  for (uint i = 0; i < _active_gc_threads; i++) {
-    double worker_time = _last_gc_worker_end_times_ms.get(i) - _last_gc_worker_start_times_ms.get(i);
-    _last_gc_worker_times_ms.set(i, worker_time);
+    double worker_known_time =
+        _gc_par_phases[ExtRootScan]->get(i) +
+        _gc_par_phases[SATBFiltering]->get(i) +
+        _gc_par_phases[UpdateRS]->get(i) +
+        _gc_par_phases[ScanRS]->get(i) +
+        _gc_par_phases[CodeRoots]->get(i) +
+        _gc_par_phases[ObjCopy]->get(i) +
+        _gc_par_phases[Termination]->get(i);
 
-    double worker_known_time = _last_ext_root_scan_times_ms.get(i) +
-                               _last_satb_filtering_times_ms.get(i) +
-                               _last_update_rs_times_ms.get(i) +
-                               _last_scan_rs_times_ms.get(i) +
-                               _last_strong_code_root_scan_times_ms.get(i) +
-                               _last_obj_copy_times_ms.get(i) +
-                               _last_termination_times_ms.get(i);
-
-    double worker_other_time = worker_time - worker_known_time;
-    _last_gc_worker_other_times_ms.set(i, worker_other_time);
+    record_time_secs(Other, i, worker_time - worker_known_time);
   }
 
-  _last_gc_worker_times_ms.verify();
-  _last_gc_worker_other_times_ms.verify();
-
-  _last_redirty_logged_cards_time_ms.verify();
-  _last_redirty_logged_cards_processed_cards.verify();
-}
-
-void G1GCPhaseTimes::note_string_dedup_fixup_start() {
-  _cur_string_dedup_queue_fixup_worker_times_ms.reset();
-  _cur_string_dedup_table_fixup_worker_times_ms.reset();
-}
-
-void G1GCPhaseTimes::note_string_dedup_fixup_end() {
-  _cur_string_dedup_queue_fixup_worker_times_ms.verify();
-  _cur_string_dedup_table_fixup_worker_times_ms.verify();
+  for (int i = 0; i < GCParPhasesSentinel; i++) {
+    _gc_par_phases[i]->verify();
+  }
 }
 
 void G1GCPhaseTimes::print_stats(int level, const char* str, double value) {
@@ -259,7 +319,7 @@
 }
 
 void G1GCPhaseTimes::print_stats(int level, const char* str, double value, uint workers) {
-  LineBuffer(level).append_and_print_cr("[%s: %.1lf ms, GC Workers: " UINT32_FORMAT "]", str, value, workers);
+  LineBuffer(level).append_and_print_cr("[%s: %.1lf ms, GC Workers: %u]", str, value, workers);
 }
 
 double G1GCPhaseTimes::accounted_time_ms() {
@@ -287,46 +347,172 @@
     return misc_time_ms;
 }
 
+// record the time a phase took in seconds
+void G1GCPhaseTimes::record_time_secs(GCParPhases phase, uint worker_i, double secs) {
+  _gc_par_phases[phase]->set(worker_i, secs);
+}
+
+// add a number of seconds to a phase
+void G1GCPhaseTimes::add_time_secs(GCParPhases phase, uint worker_i, double secs) {
+  _gc_par_phases[phase]->add(worker_i, secs);
+}
+
+void G1GCPhaseTimes::record_thread_work_item(GCParPhases phase, uint worker_i, size_t count) {
+  _gc_par_phases[phase]->set_thread_work_item(worker_i, count);
+}
+
+// return the average time for a phase in milliseconds
+double G1GCPhaseTimes::average_time_ms(GCParPhases phase) {
+  return _gc_par_phases[phase]->average() * 1000.0;
+}
+
+double G1GCPhaseTimes::get_time_ms(GCParPhases phase, uint worker_i) {
+  return _gc_par_phases[phase]->get(worker_i) * 1000.0;
+}
+
+double G1GCPhaseTimes::sum_time_ms(GCParPhases phase) {
+  return _gc_par_phases[phase]->sum() * 1000.0;
+}
+
+double G1GCPhaseTimes::min_time_ms(GCParPhases phase) {
+  return _gc_par_phases[phase]->minimum() * 1000.0;
+}
+
+double G1GCPhaseTimes::max_time_ms(GCParPhases phase) {
+  return _gc_par_phases[phase]->maximum() * 1000.0;
+}
+
+size_t G1GCPhaseTimes::get_thread_work_item(GCParPhases phase, uint worker_i) {
+  assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
+  return _gc_par_phases[phase]->thread_work_items()->get(worker_i);
+}
+
+size_t G1GCPhaseTimes::sum_thread_work_items(GCParPhases phase) {
+  assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
+  return _gc_par_phases[phase]->thread_work_items()->sum();
+}
+
+double G1GCPhaseTimes::average_thread_work_items(GCParPhases phase) {
+  assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
+  return _gc_par_phases[phase]->thread_work_items()->average();
+}
+
+size_t G1GCPhaseTimes::min_thread_work_items(GCParPhases phase) {
+  assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
+  return _gc_par_phases[phase]->thread_work_items()->minimum();
+}
+
+size_t G1GCPhaseTimes::max_thread_work_items(GCParPhases phase) {
+  assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
+  return _gc_par_phases[phase]->thread_work_items()->maximum();
+}
+
+class G1GCParPhasePrinter : public StackObj {
+  G1GCPhaseTimes* _phase_times;
+ public:
+  G1GCParPhasePrinter(G1GCPhaseTimes* phase_times) : _phase_times(phase_times) {}
+
+  void print(G1GCPhaseTimes::GCParPhases phase_id) {
+    WorkerDataArray<double>* phase = _phase_times->_gc_par_phases[phase_id];
+
+    if (phase->_log_level > G1Log::level() || !phase->_enabled) {
+      return;
+    }
+
+    if (phase->_length == 1) {
+      print_single_length(phase_id, phase);
+    } else {
+      print_multi_length(phase_id, phase);
+    }
+  }
+
+ private:
+
+  void print_single_length(G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<double>* phase) {
+    // No need for min, max, average and sum for only one worker
+    LineBuffer buf(phase->_indent_level);
+    buf.append_and_print_cr("[%s:  %.1lf]", phase->_title, _phase_times->get_time_ms(phase_id, 0));
+
+    if (phase->_thread_work_items != NULL) {
+      LineBuffer buf2(phase->_thread_work_items->_indent_level);
+      buf2.append_and_print_cr("[%s:  "SIZE_FORMAT"]", phase->_thread_work_items->_title, _phase_times->sum_thread_work_items(phase_id));
+    }
+  }
+
+  void print_time_values(LineBuffer& buf, G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<double>* phase) {
+    for (uint i = 0; i < phase->_length; ++i) {
+      buf.append("  %.1lf", _phase_times->get_time_ms(phase_id, i));
+    }
+    buf.print_cr();
+  }
+
+  void print_count_values(LineBuffer& buf, G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<size_t>* thread_work_items) {
+    for (uint i = 0; i < thread_work_items->_length; ++i) {
+      buf.append("  " SIZE_FORMAT, _phase_times->get_thread_work_item(phase_id, i));
+    }
+    buf.print_cr();
+  }
+
+  void print_thread_work_items(G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<size_t>* thread_work_items) {
+    LineBuffer buf(thread_work_items->_indent_level);
+    buf.append("[%s:", thread_work_items->_title);
+
+    if (G1Log::finest()) {
+      print_count_values(buf, phase_id, thread_work_items);
+    }
+
+    assert(thread_work_items->_print_sum, err_msg("%s does not have print sum true even though it is a count", thread_work_items->_title));
+
+    buf.append_and_print_cr(" Min: " SIZE_FORMAT ", Avg: %.1lf, Max: " SIZE_FORMAT ", Diff: " SIZE_FORMAT ", Sum: " SIZE_FORMAT "]",
+        _phase_times->min_thread_work_items(phase_id), _phase_times->average_thread_work_items(phase_id), _phase_times->max_thread_work_items(phase_id),
+        _phase_times->max_thread_work_items(phase_id) - _phase_times->min_thread_work_items(phase_id), _phase_times->sum_thread_work_items(phase_id));
+  }
+
+  void print_multi_length(G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<double>* phase) {
+    LineBuffer buf(phase->_indent_level);
+    buf.append("[%s:", phase->_title);
+
+    if (G1Log::finest()) {
+      print_time_values(buf, phase_id, phase);
+    }
+
+    buf.append(" Min: %.1lf, Avg: %.1lf, Max: %.1lf, Diff: %.1lf",
+        _phase_times->min_time_ms(phase_id), _phase_times->average_time_ms(phase_id), _phase_times->max_time_ms(phase_id),
+        _phase_times->max_time_ms(phase_id) - _phase_times->min_time_ms(phase_id));
+
+    if (phase->_print_sum) {
+      // for things like the start and end times the sum is not
+      // that relevant
+      buf.append(", Sum: %.1lf", _phase_times->sum_time_ms(phase_id));
+    }
+
+    buf.append_and_print_cr("]");
+
+    if (phase->_thread_work_items != NULL) {
+      print_thread_work_items(phase_id, phase->_thread_work_items);
+    }
+  }
+};
+
 void G1GCPhaseTimes::print(double pause_time_sec) {
+  G1GCParPhasePrinter par_phase_printer(this);
+
   if (_root_region_scan_wait_time_ms > 0.0) {
     print_stats(1, "Root Region Scan Waiting", _root_region_scan_wait_time_ms);
   }
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    print_stats(1, "Parallel Time", _cur_collection_par_time_ms, _active_gc_threads);
-    _last_gc_worker_start_times_ms.print(2, "GC Worker Start (ms)");
-    _last_ext_root_scan_times_ms.print(2, "Ext Root Scanning (ms)");
-    if (_last_satb_filtering_times_ms.sum() > 0.0) {
-      _last_satb_filtering_times_ms.print(2, "SATB Filtering (ms)");
-    }
-    _last_update_rs_times_ms.print(2, "Update RS (ms)");
-      _last_update_rs_processed_buffers.print(3, "Processed Buffers");
-    _last_scan_rs_times_ms.print(2, "Scan RS (ms)");
-    _last_strong_code_root_scan_times_ms.print(2, "Code Root Scanning (ms)");
-    _last_obj_copy_times_ms.print(2, "Object Copy (ms)");
-    _last_termination_times_ms.print(2, "Termination (ms)");
-    if (G1Log::finest()) {
-      _last_termination_attempts.print(3, "Termination Attempts");
-    }
-    _last_gc_worker_other_times_ms.print(2, "GC Worker Other (ms)");
-    _last_gc_worker_times_ms.print(2, "GC Worker Total (ms)");
-    _last_gc_worker_end_times_ms.print(2, "GC Worker End (ms)");
-  } else {
-    _last_ext_root_scan_times_ms.print(1, "Ext Root Scanning (ms)");
-    if (_last_satb_filtering_times_ms.sum() > 0.0) {
-      _last_satb_filtering_times_ms.print(1, "SATB Filtering (ms)");
-    }
-    _last_update_rs_times_ms.print(1, "Update RS (ms)");
-      _last_update_rs_processed_buffers.print(2, "Processed Buffers");
-    _last_scan_rs_times_ms.print(1, "Scan RS (ms)");
-    _last_strong_code_root_scan_times_ms.print(1, "Code Root Scanning (ms)");
-    _last_obj_copy_times_ms.print(1, "Object Copy (ms)");
+
+  print_stats(1, "Parallel Time", _cur_collection_par_time_ms, _active_gc_threads);
+  for (int i = 0; i <= GCMainParPhasesLast; i++) {
+    par_phase_printer.print((GCParPhases) i);
   }
+
   print_stats(1, "Code Root Fixup", _cur_collection_code_root_fixup_time_ms);
   print_stats(1, "Code Root Purge", _cur_strong_code_root_purge_time_ms);
   if (G1StringDedup::is_enabled()) {
     print_stats(1, "String Dedup Fixup", _cur_string_dedup_fixup_time_ms, _active_gc_threads);
-    _cur_string_dedup_queue_fixup_worker_times_ms.print(2, "Queue Fixup (ms)");
-    _cur_string_dedup_table_fixup_worker_times_ms.print(2, "Table Fixup (ms)");
+    for (int i = StringDedupPhasesFirst; i <= StringDedupPhasesLast; i++) {
+      par_phase_printer.print((GCParPhases) i);
+    }
   }
   print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
   double misc_time_ms = pause_time_sec * MILLIUNITS - accounted_time_ms();
@@ -350,10 +536,7 @@
   print_stats(2, "Ref Proc", _cur_ref_proc_time_ms);
   print_stats(2, "Ref Enq", _cur_ref_enq_time_ms);
   print_stats(2, "Redirty Cards", _recorded_redirty_logged_cards_time_ms);
-  if (G1Log::finest()) {
-    _last_redirty_logged_cards_time_ms.print(3, "Parallel Redirty");
-    _last_redirty_logged_cards_processed_cards.print(3, "Redirtied Cards");
-  }
+  par_phase_printer.print(RedirtyCards);
   if (G1ReclaimDeadHumongousObjectsAtYoungGC) {
     print_stats(2, "Humongous Reclaim", _cur_fast_reclaim_humongous_time_ms);
     if (G1Log::finest()) {
@@ -373,3 +556,17 @@
     print_stats(2, "Verify After", _cur_verify_after_time_ms);
   }
 }
+
+G1GCParPhaseTimesTracker::G1GCParPhaseTimesTracker(G1GCPhaseTimes* phase_times, G1GCPhaseTimes::GCParPhases phase, uint worker_id) :
+    _phase_times(phase_times), _phase(phase), _worker_id(worker_id) {
+  if (_phase_times != NULL) {
+    _start_time = os::elapsedTime();
+  }
+}
+
+G1GCParPhaseTimesTracker::~G1GCParPhaseTimesTracker() {
+  if (_phase_times != NULL) {
+    _phase_times->record_time_secs(_phase, _worker_id, os::elapsedTime() - _start_time);
+  }
+}
+
--- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp	Mon Jun 08 14:12:27 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp	Fri Jun 19 08:06:17 2015 -0700
@@ -26,106 +26,46 @@
 #define SHARE_VM_GC_IMPLEMENTATION_G1_G1GCPHASETIMESLOG_HPP
 
 #include "memory/allocation.hpp"
-#include "gc_interface/gcCause.hpp"
-
-template <class T>
-class WorkerDataArray  : public CHeapObj<mtGC> {
-  T*          _data;
-  uint        _length;
-  const char* _print_format;
-  bool        _print_sum;
-
-  NOT_PRODUCT(static const T _uninitialized;)
-
-  // We are caching the sum and average to only have to calculate them once.
-  // This is not done in an MT-safe way. It is intended to allow single
-  // threaded code to call sum() and average() multiple times in any order
-  // without having to worry about the cost.
-  bool   _has_new_data;
-  T      _sum;
-  double _average;
-
- public:
-  WorkerDataArray(uint length, const char* print_format, bool print_sum = true) :
-  _length(length), _print_format(print_format), _print_sum(print_sum), _has_new_data(true) {
-    assert(length > 0, "Must have some workers to store data for");
-    _data = NEW_C_HEAP_ARRAY(T, _length, mtGC);
-  }
-
-  ~WorkerDataArray() {
-    FREE_C_HEAP_ARRAY(T, _data, mtGC);
-  }
-
-  void set(uint worker_i, T value) {
-    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
-    assert(_data[worker_i] == (T)-1, err_msg("Overwriting data for worker %d", worker_i));
-    _data[worker_i] = value;
-    _has_new_data = true;
-  }
 
-  T get(uint worker_i) {
-    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
-    assert(_data[worker_i] != (T)-1, err_msg("No data to add to for worker %d", worker_i));
-    return _data[worker_i];
-  }
-
-  void add(uint worker_i, T value) {
-    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
-    assert(_data[worker_i] != (T)-1, err_msg("No data to add to for worker %d", worker_i));
-    _data[worker_i] += value;
-    _has_new_data = true;
-  }
-
-  double average(){
-    if (_has_new_data) {
-      calculate_totals();
-    }
-    return _average;
-  }
+class LineBuffer;
 
-  T sum() {
-    if (_has_new_data) {
-      calculate_totals();
-    }
-    return _sum;
-  }
-
-  void print(int level, const char* title);
-
-  void reset() PRODUCT_RETURN;
-  void verify() PRODUCT_RETURN;
-
- private:
-
-  void calculate_totals(){
-    _sum = (T)0;
-    for (uint i = 0; i < _length; ++i) {
-      _sum += _data[i];
-    }
-    _average = (double)_sum / (double)_length;
-    _has_new_data = false;
-  }
-};
+template <class T> class WorkerDataArray;
 
 class G1GCPhaseTimes : public CHeapObj<mtGC> {
+  friend class G1GCParPhasePrinter;
 
- private:
   uint _active_gc_threads;
   uint _max_gc_threads;
 
-  WorkerDataArray<double> _last_gc_worker_start_times_ms;
-  WorkerDataArray<double> _last_ext_root_scan_times_ms;
-  WorkerDataArray<double> _last_satb_filtering_times_ms;
-  WorkerDataArray<double> _last_update_rs_times_ms;
-  WorkerDataArray<int>    _last_update_rs_processed_buffers;
-  WorkerDataArray<double> _last_scan_rs_times_ms;
-  WorkerDataArray<double> _last_strong_code_root_scan_times_ms;
-  WorkerDataArray<double> _last_obj_copy_times_ms;
-  WorkerDataArray<double> _last_termination_times_ms;
-  WorkerDataArray<size_t> _last_termination_attempts;
-  WorkerDataArray<double> _last_gc_worker_end_times_ms;
-  WorkerDataArray<double> _last_gc_worker_times_ms;
-  WorkerDataArray<double> _last_gc_worker_other_times_ms;
+ public:
+  enum GCParPhases {
+    GCWorkerStart,
+    ExtRootScan,
+    SATBFiltering,
+    UpdateRS,
+    ScanRS,
+    CodeRoots,
+    ObjCopy,
+    Termination,
+    Other,
+    GCWorkerTotal,
+    GCWorkerEnd,
+    StringDedupQueueFixup,
+    StringDedupTableFixup,
+    RedirtyCards,
+    GCParPhasesSentinel
+  };
+
+ private:
+  // Markers for grouping the phases in the GCPhases enum above
+  static const int GCMainParPhasesLast = GCWorkerEnd;
+  static const int StringDedupPhasesFirst = StringDedupQueueFixup;
+  static const int StringDedupPhasesLast = StringDedupTableFixup;
+
+  WorkerDataArray<double>* _gc_par_phases[GCParPhasesSentinel];
+  WorkerDataArray<size_t>* _update_rs_processed_buffers;
+  WorkerDataArray<size_t>* _termination_attempts;
+  WorkerDataArray<size_t>* _redirtied_cards;
 
   double _cur_collection_par_time_ms;
   double _cur_collection_code_root_fixup_time_ms;
@@ -135,9 +75,7 @@
   double _cur_evac_fail_restore_remsets;
   double _cur_evac_fail_remove_self_forwards;
 
-  double                  _cur_string_dedup_fixup_time_ms;
-  WorkerDataArray<double> _cur_string_dedup_queue_fixup_worker_times_ms;
-  WorkerDataArray<double> _cur_string_dedup_table_fixup_worker_times_ms;
+  double _cur_string_dedup_fixup_time_ms;
 
   double _cur_clear_ct_time_ms;
   double _cur_ref_proc_time_ms;
@@ -149,8 +87,6 @@
   double _recorded_young_cset_choice_time_ms;
   double _recorded_non_young_cset_choice_time_ms;
 
-  WorkerDataArray<double> _last_redirty_logged_cards_time_ms;
-  WorkerDataArray<size_t> _last_redirty_logged_cards_processed_cards;
   double _recorded_redirty_logged_cards_time_ms;
 
   double _recorded_young_free_cset_time_ms;
@@ -171,54 +107,34 @@
 
  public:
   G1GCPhaseTimes(uint max_gc_threads);
-  void note_gc_start(uint active_gc_threads);
+  void note_gc_start(uint active_gc_threads, bool mark_in_progress);
   void note_gc_end();
   void print(double pause_time_sec);
 
-  void record_gc_worker_start_time(uint worker_i, double ms) {
-    _last_gc_worker_start_times_ms.set(worker_i, ms);
-  }
-
-  void record_ext_root_scan_time(uint worker_i, double ms) {
-    _last_ext_root_scan_times_ms.set(worker_i, ms);
-  }
+  // record the time a phase took in seconds
+  void record_time_secs(GCParPhases phase, uint worker_i, double secs);
 
-  void record_satb_filtering_time(uint worker_i, double ms) {
-    _last_satb_filtering_times_ms.set(worker_i, ms);
-  }
+  // add a number of seconds to a phase
+  void add_time_secs(GCParPhases phase, uint worker_i, double secs);
 
-  void record_update_rs_time(uint worker_i, double ms) {
-    _last_update_rs_times_ms.set(worker_i, ms);
-  }
+  void record_thread_work_item(GCParPhases phase, uint worker_i, size_t count);
 
-  void record_update_rs_processed_buffers(uint worker_i, int processed_buffers) {
-    _last_update_rs_processed_buffers.set(worker_i, processed_buffers);
-  }
+  // return the average time for a phase in milliseconds
+  double average_time_ms(GCParPhases phase);
 
-  void record_scan_rs_time(uint worker_i, double ms) {
-    _last_scan_rs_times_ms.set(worker_i, ms);
-  }
-
-  void record_strong_code_root_scan_time(uint worker_i, double ms) {
-    _last_strong_code_root_scan_times_ms.set(worker_i, ms);
-  }
-
-  void record_obj_copy_time(uint worker_i, double ms) {
-    _last_obj_copy_times_ms.set(worker_i, ms);
-  }
+  size_t sum_thread_work_items(GCParPhases phase);
 
-  void add_obj_copy_time(uint worker_i, double ms) {
-    _last_obj_copy_times_ms.add(worker_i, ms);
-  }
+ private:
+  double get_time_ms(GCParPhases phase, uint worker_i);
+  double sum_time_ms(GCParPhases phase);
+  double min_time_ms(GCParPhases phase);
+  double max_time_ms(GCParPhases phase);
+  size_t get_thread_work_item(GCParPhases phase, uint worker_i);
+  double average_thread_work_items(GCParPhases phase);
+  size_t min_thread_work_items(GCParPhases phase);
+  size_t max_thread_work_items(GCParPhases phase);
 
-  void record_termination(uint worker_i, double ms, size_t attempts) {
-    _last_termination_times_ms.set(worker_i, ms);
-    _last_termination_attempts.set(worker_i, attempts);
-  }
-
-  void record_gc_worker_end_time(uint worker_i, double ms) {
-    _last_gc_worker_end_times_ms.set(worker_i, ms);
-  }
+ public:
 
   void record_clear_ct_time(double ms) {
     _cur_clear_ct_time_ms = ms;
@@ -248,21 +164,10 @@
     _cur_evac_fail_remove_self_forwards = ms;
   }
 
-  void note_string_dedup_fixup_start();
-  void note_string_dedup_fixup_end();
-
   void record_string_dedup_fixup_time(double ms) {
     _cur_string_dedup_fixup_time_ms = ms;
   }
 
-  void record_string_dedup_queue_fixup_worker_time(uint worker_id, double ms) {
-    _cur_string_dedup_queue_fixup_worker_times_ms.set(worker_id, ms);
-  }
-
-  void record_string_dedup_table_fixup_worker_time(uint worker_id, double ms) {
-    _cur_string_dedup_table_fixup_worker_times_ms.set(worker_id, ms);
-  }
-
   void record_ref_proc_time(double ms) {
     _cur_ref_proc_time_ms = ms;
   }
@@ -301,14 +206,6 @@
     _recorded_non_young_cset_choice_time_ms = time_ms;
   }
 
-  void record_redirty_logged_cards_time_ms(uint worker_i, double time_ms) {
-    _last_redirty_logged_cards_time_ms.set(worker_i, time_ms);
-  }
-
-  void record_redirty_logged_cards_processed_cards(uint worker_i, size_t processed_buffers) {
-    _last_redirty_logged_cards_processed_cards.set(worker_i, processed_buffers);
-  }
-
   void record_redirty_logged_cards_time_ms(double time_ms) {
     _recorded_redirty_logged_cards_time_ms = time_ms;
   }
@@ -362,38 +259,16 @@
   double fast_reclaim_humongous_time_ms() {
     return _cur_fast_reclaim_humongous_time_ms;
   }
-
-  double average_last_update_rs_time() {
-    return _last_update_rs_times_ms.average();
-  }
-
-  int sum_last_update_rs_processed_buffers() {
-    return _last_update_rs_processed_buffers.sum();
-  }
-
-  double average_last_scan_rs_time(){
-    return _last_scan_rs_times_ms.average();
-  }
+};
 
-  double average_last_strong_code_root_scan_time(){
-    return _last_strong_code_root_scan_times_ms.average();
-  }
-
-  double average_last_obj_copy_time() {
-    return _last_obj_copy_times_ms.average();
-  }
-
-  double average_last_termination_time() {
-    return _last_termination_times_ms.average();
-  }
-
-  double average_last_ext_root_scan_time() {
-    return _last_ext_root_scan_times_ms.average();
-  }
-
-  double average_last_satb_filtering_times_ms() {
-    return _last_satb_filtering_times_ms.average();
-  }
+class G1GCParPhaseTimesTracker : public StackObj {
+  double _start_time;
+  G1GCPhaseTimes::GCParPhases _phase;
+  G1GCPhaseTimes* _phase_times;
+  uint _worker_id;
+public:
+  G1GCParPhaseTimesTracker(G1GCPhaseTimes* phase_times, G1GCPhaseTimes::GCParPhases phase, uint worker_id);
+  ~G1GCParPhaseTimesTracker();
 };
 
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1GCPHASETIMESLOG_HPP
--- a/src/share/vm/gc_implementation/g1/g1Log.hpp	Mon Jun 08 14:12:27 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1Log.hpp	Fri Jun 19 08:06:17 2015 -0700
@@ -28,6 +28,7 @@
 #include "memory/allocation.hpp"
 
 class G1Log : public AllStatic {
+ public:
   typedef enum {
     LevelNone,
     LevelFine,
@@ -35,6 +36,7 @@
     LevelFinest
   } LogLevel;
 
+ private:
   static LogLevel _level;
 
  public:
@@ -50,6 +52,10 @@
     return _level == LevelFinest;
   }
 
+  static LogLevel level() {
+    return _level;
+  }
+
   static void init();
 };
 
--- a/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Mon Jun 08 14:12:27 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Fri Jun 19 08:06:17 2015 -0700
@@ -31,6 +31,7 @@
 #include "code/icBuffer.hpp"
 #include "gc_implementation/g1/g1Log.hpp"
 #include "gc_implementation/g1/g1MarkSweep.hpp"
+#include "gc_implementation/g1/g1RootProcessor.hpp"
 #include "gc_implementation/g1/g1StringDedup.hpp"
 #include "gc_implementation/shared/gcHeapSummary.hpp"
 #include "gc_implementation/shared/gcTimer.hpp"
@@ -126,21 +127,22 @@
   GCTraceTime tm("phase 1", G1Log::fine() && Verbose, true, gc_timer(), gc_tracer()->gc_id());
   GenMarkSweep::trace(" 1");
 
-  SharedHeap* sh = SharedHeap::heap();
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
 
   // Need cleared claim bits for the roots processing
   ClassLoaderDataGraph::clear_claimed_marks();
 
   MarkingCodeBlobClosure follow_code_closure(&GenMarkSweep::follow_root_closure, !CodeBlobToOopClosure::FixRelocations);
-  sh->process_strong_roots(true,   // activate StrongRootsScope
-                           SharedHeap::SO_None,
-                           &GenMarkSweep::follow_root_closure,
-                           &GenMarkSweep::follow_cld_closure,
-                           &follow_code_closure);
+  {
+    G1RootProcessor root_processor(g1h);
+    root_processor.process_strong_roots(&GenMarkSweep::follow_root_closure,
+                                        &GenMarkSweep::follow_cld_closure,
+                                        &follow_code_closure);
+  }
 
   // Process reference objects found during marking
   ReferenceProcessor* rp = GenMarkSweep::ref_processor();
-  assert(rp == G1CollectedHeap::heap()->ref_processor_stw(), "Sanity");
+  assert(rp == g1h->ref_processor_stw(), "Sanity");
 
   rp->setup_policy(clear_all_softrefs);
   const ReferenceProcessorStats& stats =
@@ -226,6 +228,12 @@
   }
 };
 
+class G1AlwaysTrueClosure: public BoolObjectClosure {
+public:
+  bool do_object_b(oop p) { return true; }
+};
+static G1AlwaysTrueClosure always_true;
+
 void G1MarkSweep::mark_sweep_phase3() {
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 
@@ -233,24 +241,23 @@
   GCTraceTime tm("phase 3", G1Log::fine() && Verbose, true, gc_timer(), gc_tracer()->gc_id());
   GenMarkSweep::trace("3");
 
-  SharedHeap* sh = SharedHeap::heap();
-
   // Need cleared claim bits for the roots processing
   ClassLoaderDataGraph::clear_claimed_marks();
 
   CodeBlobToOopClosure adjust_code_closure(&GenMarkSweep::adjust_pointer_closure, CodeBlobToOopClosure::FixRelocations);
-  sh->process_all_roots(true,  // activate StrongRootsScope
-                        SharedHeap::SO_AllCodeCache,
-                        &GenMarkSweep::adjust_pointer_closure,
-                        &GenMarkSweep::adjust_cld_closure,
-                        &adjust_code_closure);
+  {
+    G1RootProcessor root_processor(g1h);
+    root_processor.process_all_roots(&GenMarkSweep::adjust_pointer_closure,
+                                     &GenMarkSweep::adjust_cld_closure,
+                                     &adjust_code_closure);
+  }
 
   assert(GenMarkSweep::ref_processor() == g1h->ref_processor_stw(), "Sanity");
   g1h->ref_processor_stw()->weak_oops_do(&GenMarkSweep::adjust_pointer_closure);
 
   // Now adjust pointers in remaining weak roots.  (All of which should
   // have been cleared if they pointed to non-surviving objects.)
-  sh->process_weak_roots(&GenMarkSweep::adjust_pointer_closure);
+  JNIHandles::weak_oops_do(&always_true, &GenMarkSweep::adjust_pointer_closure);
 
   if (G1StringDedup::is_enabled()) {
     G1StringDedup::oops_do(&GenMarkSweep::adjust_pointer_closure);
--- a/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Mon Jun 08 14:12:27 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Fri Jun 19 08:06:17 2015 -0700
@@ -239,14 +239,14 @@
   G1CollectedHeap* _g1;
   G1RemSet* _g1_rem_set;
   HeapRegion* _from;
-  OopsInHeapRegionClosure* _push_ref_cl;
+  G1ParPushHeapRSClosure* _push_ref_cl;
   bool _record_refs_into_cset;
   uint _worker_i;
 
 public:
   G1UpdateRSOrPushRefOopClosure(G1CollectedHeap* g1h,
                                 G1RemSet* rs,
-                                OopsInHeapRegionClosure* push_ref_cl,
+                                G1ParPushHeapRSClosure* push_ref_cl,
                                 bool record_refs_into_cset,
                                 uint worker_i = 0);
 
--- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Mon Jun 08 14:12:27 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Fri Jun 19 08:06:17 2015 -0700
@@ -78,9 +78,8 @@
     _cards_scanned(NULL), _total_cards_scanned(0),
     _prev_period_summary()
 {
-  _seq_task = new SubTasksDone(NumSeqTasks);
   guarantee(n_workers() > 0, "There should be some workers");
-  _cset_rs_update_cl = NEW_C_HEAP_ARRAY(OopsInHeapRegionClosure*, n_workers(), mtGC);
+  _cset_rs_update_cl = NEW_C_HEAP_ARRAY(G1ParPushHeapRSClosure*, n_workers(), mtGC);
   for (uint i = 0; i < n_workers(); i++) {
     _cset_rs_update_cl[i] = NULL;
   }
@@ -90,11 +89,10 @@
 }
 
 G1RemSet::~G1RemSet() {
-  delete _seq_task;
   for (uint i = 0; i < n_workers(); i++) {
     assert(_cset_rs_update_cl[i] == NULL, "it should be");
   }
-  FREE_C_HEAP_ARRAY(OopsInHeapRegionClosure*, _cset_rs_update_cl, mtGC);
+  FREE_C_HEAP_ARRAY(G1ParPushHeapRSClosure*, _cset_rs_update_cl, mtGC);
 }
 
 void CountNonCleanMemRegionClosure::do_MemRegion(MemRegion mr) {
@@ -108,7 +106,7 @@
   size_t _cards_done, _cards;
   G1CollectedHeap* _g1h;
 
-  OopsInHeapRegionClosure* _oc;
+  G1ParPushHeapRSClosure* _oc;
   CodeBlobClosure* _code_root_cl;
 
   G1BlockOffsetSharedArray* _bot_shared;
@@ -120,7 +118,7 @@
   bool   _try_claimed;
 
 public:
-  ScanRSClosure(OopsInHeapRegionClosure* oc,
+  ScanRSClosure(G1ParPushHeapRSClosure* oc,
                 CodeBlobClosure* code_root_cl,
                 uint worker_i) :
     _oc(oc),
@@ -142,8 +140,7 @@
   void scanCard(size_t index, HeapRegion *r) {
     // Stack allocate the DirtyCardToOopClosure instance
     HeapRegionDCTOC cl(_g1h, r, _oc,
-                       CardTableModRefBS::Precise,
-                       HeapRegionDCTOC::IntoCSFilterKind);
+                       CardTableModRefBS::Precise);
 
     // Set the "from" region in the closure.
     _oc->set_region(r);
@@ -240,7 +237,7 @@
   size_t cards_looked_up() { return _cards;}
 };
 
-void G1RemSet::scanRS(OopsInHeapRegionClosure* oc,
+void G1RemSet::scanRS(G1ParPushHeapRSClosure* oc,
                       CodeBlobClosure* code_root_cl,
                       uint worker_i) {
   double rs_time_start = os::elapsedTime();
@@ -258,9 +255,8 @@
   assert(_cards_scanned != NULL, "invariant");
   _cards_scanned[worker_i] = scanRScl.cards_done();
 
-  _g1p->phase_times()->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0);
-  _g1p->phase_times()->record_strong_code_root_scan_time(worker_i,
-                                                         scanRScl.strong_code_root_scan_time_sec() * 1000.0);
+  _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::ScanRS, worker_i, scan_rs_time_sec);
+  _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::CodeRoots, worker_i, scanRScl.strong_code_root_scan_time_sec());
 }
 
 // Closure used for updating RSets and recording references that
@@ -297,29 +293,18 @@
 };
 
 void G1RemSet::updateRS(DirtyCardQueue* into_cset_dcq, uint worker_i) {
-  double start = os::elapsedTime();
+  G1GCParPhaseTimesTracker x(_g1p->phase_times(), G1GCPhaseTimes::UpdateRS, worker_i);
   // Apply the given closure to all remaining log entries.
   RefineRecordRefsIntoCSCardTableEntryClosure into_cset_update_rs_cl(_g1, into_cset_dcq);
 
   _g1->iterate_dirty_card_closure(&into_cset_update_rs_cl, into_cset_dcq, false, worker_i);
-
-  // Now there should be no dirty cards.
-  if (G1RSLogCheckCardTable) {
-    CountNonCleanMemRegionClosure cl(_g1);
-    _ct_bs->mod_card_iterate(&cl);
-    // XXX This isn't true any more: keeping cards of young regions
-    // marked dirty broke it.  Need some reasonable fix.
-    guarantee(cl.n() == 0, "Card table should be clean.");
-  }
-
-  _g1p->phase_times()->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0);
 }
 
 void G1RemSet::cleanupHRRS() {
   HeapRegionRemSet::cleanup();
 }
 
-void G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,
+void G1RemSet::oops_into_collection_set_do(G1ParPushHeapRSClosure* oc,
                                            CodeBlobClosure* code_root_cl,
                                            uint worker_i) {
 #if CARD_REPEAT_HISTO
@@ -344,23 +329,8 @@
 
   assert((ParallelGCThreads > 0) || worker_i == 0, "invariant");
 
-  // The two flags below were introduced temporarily to serialize
-  // the updating and scanning of remembered sets. There are some
-  // race conditions when these two operations are done in parallel
-  // and they are causing failures. When we resolve said race
-  // conditions, we'll revert back to parallel remembered set
-  // updating and scanning. See CRs 6677707 and 6677708.
-  if (G1UseParallelRSetUpdating || (worker_i == 0)) {
-    updateRS(&into_cset_dcq, worker_i);
-  } else {
-    _g1p->phase_times()->record_update_rs_processed_buffers(worker_i, 0);
-    _g1p->phase_times()->record_update_rs_time(worker_i, 0.0);
-  }
-  if (G1UseParallelRSetScanning || (worker_i == 0)) {
-    scanRS(oc, code_root_cl, worker_i);
-  } else {
-    _g1p->phase_times()->record_scan_rs_time(worker_i, 0.0);
-  }
+  updateRS(&into_cset_dcq, worker_i);
+  scanRS(oc, code_root_cl, worker_i);
 
   // We now clear the cached values of _cset_rs_update_cl for this worker
   _cset_rs_update_cl[worker_i] = NULL;
@@ -461,7 +431,7 @@
 G1UpdateRSOrPushRefOopClosure::
 G1UpdateRSOrPushRefOopClosure(G1CollectedHeap* g1h,
                               G1RemSet* rs,
-                              OopsInHeapRegionClosure* push_ref_cl,
+                              G1ParPushHeapRSClosure* push_ref_cl,
                               bool record_refs_into_cset,
                               uint worker_i) :
   _g1(g1h), _g1_rem_set(rs), _from(NULL),
@@ -562,7 +532,7 @@
   ct_freq_note_card(_ct_bs->index_for(start));
 #endif
 
-  OopsInHeapRegionClosure* oops_in_heap_closure = NULL;
+  G1ParPushHeapRSClosure* oops_in_heap_closure = NULL;
   if (check_for_refs_into_cset) {
     // ConcurrentG1RefineThreads have worker numbers larger than what
     // _cset_rs_update_cl[] is set up to handle. But those threads should
--- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Mon Jun 08 14:12:27 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Fri Jun 19 08:06:17 2015 -0700
@@ -33,6 +33,7 @@
 class G1CollectedHeap;
 class CardTableModRefBarrierSet;
 class ConcurrentG1Refine;
+class G1ParPushHeapRSClosure;
 
 // A G1RemSet in which each heap region has a rem set that records the
 // external heap references into it.  Uses a mod ref bs to track updates,
@@ -58,7 +59,6 @@
   };
 
   CardTableModRefBS*     _ct_bs;
-  SubTasksDone*          _seq_task;
   G1CollectorPolicy*     _g1p;
 
   ConcurrentG1Refine*    _cg1r;
@@ -68,7 +68,7 @@
 
   // Used for caching the closure that is responsible for scanning
   // references into the collection set.
-  OopsInHeapRegionClosure** _cset_rs_update_cl;
+  G1ParPushHeapRSClosure** _cset_rs_update_cl;
 
   // Print the given summary info
   virtual void print_summary_info(G1RemSetSummary * summary, const char * header = NULL);
@@ -95,7 +95,7 @@
   // partitioning the work to be done. It should be the same as
   // the "i" passed to the calling thread's work(i) function.
   // In the sequential case this param will be ignored.
-  void oops_into_collection_set_do(OopsInHeapRegionClosure* blk,
+  void oops_into_collection_set_do(G1ParPushHeapRSClosure* blk,
                                    CodeBlobClosure* code_root_cl,
                                    uint worker_i);
 
@@ -107,7 +107,7 @@
   void prepare_for_oops_into_collection_set_do();
   void cleanup_after_oops_into_collection_set_do();
 
-  void scanRS(OopsInHeapRegionClosure* oc,
+  void scanRS(G1ParPushHeapRSClosure* oc,
               CodeBlobClosure* code_root_cl,
               uint worker_i);
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1RootProcessor.cpp	Fri Jun 19 08:06:17 2015 -0700
@@ -0,0 +1,290 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+
+#include "classfile/symbolTable.hpp"
+#include "classfile/systemDictionary.hpp"
+#include "code/codeCache.hpp"
+#include "gc_implementation/g1/bufferingOopClosure.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1CollectorPolicy.hpp"
+#include "gc_implementation/g1/g1GCPhaseTimes.hpp"
+#include "gc_implementation/g1/g1RemSet.inline.hpp"
+#include "gc_implementation/g1/g1RootProcessor.hpp"
+#include "memory/allocation.inline.hpp"
+#include "runtime/fprofiler.hpp"
+#include "runtime/mutex.hpp"
+#include "services/management.hpp"
+
+class G1CodeBlobClosure : public CodeBlobClosure {
+  class HeapRegionGatheringOopClosure : public OopClosure {
+    G1CollectedHeap* _g1h;
+    OopClosure* _work;
+    nmethod* _nm;
+
+    template <typename T>
+    void do_oop_work(T* p) {
+      _work->do_oop(p);
+      T oop_or_narrowoop = oopDesc::load_heap_oop(p);
+      if (!oopDesc::is_null(oop_or_narrowoop)) {
+        oop o = oopDesc::decode_heap_oop_not_null(oop_or_narrowoop);
+        HeapRegion* hr = _g1h->heap_region_containing_raw(o);
+        assert(!_g1h->obj_in_cs(o) || hr->rem_set()->strong_code_roots_list_contains(_nm), "if o still in CS then evacuation failed and nm must already be in the remset");
+        hr->add_strong_code_root(_nm);
+      }
+    }
+
+  public:
+    HeapRegionGatheringOopClosure(OopClosure* oc) : _g1h(G1CollectedHeap::heap()), _work(oc), _nm(NULL) {}
+
+    void do_oop(oop* o) {
+      do_oop_work(o);
+    }
+
+    void do_oop(narrowOop* o) {
+      do_oop_work(o);
+    }
+
+    void set_nm(nmethod* nm) {
+      _nm = nm;
+    }
+  };
+
+  HeapRegionGatheringOopClosure _oc;
+public:
+  G1CodeBlobClosure(OopClosure* oc) : _oc(oc) {}
+
+  void do_code_blob(CodeBlob* cb) {
+    nmethod* nm = cb->as_nmethod_or_null();
+    if (nm != NULL) {
+      if (!nm->test_set_oops_do_mark()) {
+        _oc.set_nm(nm);
+        nm->oops_do(&_oc);
+        nm->fix_oop_relocations();
+      }
+    }
+  }
+};
+
+
+void G1RootProcessor::worker_has_discovered_all_strong_classes() {
+  uint n_workers = _g1h->n_par_threads();
+  assert(ClassUnloadingWithConcurrentMark, "Currently only needed when doing G1 Class Unloading");
+
+  uint new_value = (uint)Atomic::add(1, &_n_workers_discovered_strong_classes);
+  if (new_value == n_workers) {
+    // This thread is last. Notify the others.
+    MonitorLockerEx ml(&_lock, Mutex::_no_safepoint_check_flag);
+    _lock.notify_all();
+  }
+}
+
+void G1RootProcessor::wait_until_all_strong_classes_discovered() {
+  uint n_workers = _g1h->n_par_threads();
+  assert(ClassUnloadingWithConcurrentMark, "Currently only needed when doing G1 Class Unloading");
+
+  if ((uint)_n_workers_discovered_strong_classes != n_workers) {
+    MonitorLockerEx ml(&_lock, Mutex::_no_safepoint_check_flag);
+    while ((uint)_n_workers_discovered_strong_classes != n_workers) {
+      _lock.wait(Mutex::_no_safepoint_check_flag, 0, false);
+    }
+  }
+}
+
+G1RootProcessor::G1RootProcessor(G1CollectedHeap* g1h) :
+    _g1h(g1h),
+    _process_strong_tasks(new SubTasksDone(G1RP_PS_NumElements)),
+    _srs(g1h),
+    _lock(Mutex::leaf, "G1 Root Scanning barrier lock", false),
+    _n_workers_discovered_strong_classes(0) {}
+
+void G1RootProcessor::evacuate_roots(OopClosure* scan_non_heap_roots,
+                                     OopClosure* scan_non_heap_weak_roots,
+                                     CLDClosure* scan_strong_clds,
+                                     CLDClosure* scan_weak_clds,
+                                     bool trace_metadata,
+                                     uint worker_i) {
+  // First scan the shared roots.
+  double ext_roots_start = os::elapsedTime();
+
+  BufferingOopClosure buf_scan_non_heap_roots(scan_non_heap_roots);
+  BufferingOopClosure buf_scan_non_heap_weak_roots(scan_non_heap_weak_roots);
+
+  OopClosure* const weak_roots = &buf_scan_non_heap_weak_roots;
+  OopClosure* const strong_roots = &buf_scan_non_heap_roots;
+
+  // CodeBlobClosures are not interoperable with BufferingOopClosures
+  G1CodeBlobClosure root_code_blobs(scan_non_heap_roots);
+
+  process_java_roots(strong_roots,
+                     trace_metadata ? scan_strong_clds : NULL,
+                     scan_strong_clds,
+                     trace_metadata ? NULL : scan_weak_clds,
+                     &root_code_blobs);
+
+  // This is the point where this worker thread will not find more strong CLDs/nmethods.
+  // Report this so G1 can synchronize the strong and weak CLDs/nmethods processing.
+  if (trace_metadata) {
+    worker_has_discovered_all_strong_classes();
+  }
+
+  process_vm_roots(strong_roots, weak_roots);
+
+  // Now the CM ref_processor roots.
+  if (!_process_strong_tasks->is_task_claimed(G1RP_PS_refProcessor_oops_do)) {
+    // We need to treat the discovered reference lists of the
+    // concurrent mark ref processor as roots and keep entries
+    // (which are added by the marking threads) on them live
+    // until they can be processed at the end of marking.
+    _g1h->ref_processor_cm()->weak_oops_do(&buf_scan_non_heap_roots);
+  }
+
+  if (trace_metadata) {
+    // Barrier to make sure all workers passed
+    // the strong CLD and strong nmethods phases.
+    wait_until_all_strong_classes_discovered();
+
+    // Now take the complement of the strong CLDs.
+    ClassLoaderDataGraph::roots_cld_do(NULL, scan_weak_clds);
+  }
+
+  // Finish up any enqueued closure apps (attributed as object copy time).
+  buf_scan_non_heap_roots.done();
+  buf_scan_non_heap_weak_roots.done();
+
+  double obj_copy_time_sec = buf_scan_non_heap_roots.closure_app_seconds()
+      + buf_scan_non_heap_weak_roots.closure_app_seconds();
+
+  G1GCPhaseTimes* phase_times = _g1h->g1_policy()->phase_times();
+  phase_times->record_time_secs(G1GCPhaseTimes::ObjCopy, worker_i, obj_copy_time_sec);
+
+  double ext_root_time_sec = os::elapsedTime() - ext_roots_start - obj_copy_time_sec;
+
+  phase_times->record_time_secs(G1GCPhaseTimes::ExtRootScan, worker_i, ext_root_time_sec);
+
+  // During conc marking we have to filter the per-thread SATB buffers
+  // to make sure we remove any oops into the CSet (which will show up
+  // as implicitly live).
+  {
+    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::SATBFiltering, worker_i);
+    if (!_process_strong_tasks->is_task_claimed(G1RP_PS_filter_satb_buffers) && _g1h->mark_in_progress()) {
+      JavaThread::satb_mark_queue_set().filter_thread_buffers();
+    }
+  }
+
+  _process_strong_tasks->all_tasks_completed();
+}
+
+void G1RootProcessor::process_strong_roots(OopClosure* oops,
+                                           CLDClosure* clds,
+                                           CodeBlobClosure* blobs) {
+
+  process_java_roots(oops, clds, clds, NULL, blobs);
+  process_vm_roots(oops, NULL);
+
+  _process_strong_tasks->all_tasks_completed();
+}
+
+void G1RootProcessor::process_all_roots(OopClosure* oops,
+                                        CLDClosure* clds,
+                                        CodeBlobClosure* blobs) {
+
+  process_java_roots(oops, NULL, clds, clds, NULL);
+  process_vm_roots(oops, oops);
+
+  if (!_process_strong_tasks->is_task_claimed(G1RP_PS_CodeCache_oops_do)) {
+    CodeCache::blobs_do(blobs);
+  }
+
+  _process_strong_tasks->all_tasks_completed();
+}
+
+void G1RootProcessor::process_java_roots(OopClosure* strong_roots,
+                                         CLDClosure* thread_stack_clds,
+                                         CLDClosure* strong_clds,
+                                         CLDClosure* weak_clds,
+                                         CodeBlobClosure* strong_code) {
+  assert(thread_stack_clds == NULL || weak_clds == NULL, "There is overlap between those, only one may be set");
+  // Iterating over the CLDG and the Threads are done early to allow us to
+  // first process the strong CLDs and nmethods and then, after a barrier,
+  // let the thread process the weak CLDs and nmethods.
+
+  if (!_process_strong_tasks->is_task_claimed(G1RP_PS_ClassLoaderDataGraph_oops_do)) {
+    ClassLoaderDataGraph::roots_cld_do(strong_clds, weak_clds);
+  }
+
+  Threads::possibly_parallel_oops_do(strong_roots, thread_stack_clds, strong_code);
+}
+
+void G1RootProcessor::process_vm_roots(OopClosure* strong_roots,
+                                       OopClosure* weak_roots) {
+
+  if (!_process_strong_tasks->is_task_claimed(G1RP_PS_Universe_oops_do)) {
+    Universe::oops_do(strong_roots);
+  }
+
+  if (!_process_strong_tasks->is_task_claimed(G1RP_PS_JNIHandles_oops_do)) {
+    JNIHandles::oops_do(strong_roots);
+  }
+
+  if (!_process_strong_tasks-> is_task_claimed(G1RP_PS_ObjectSynchronizer_oops_do)) {
+    ObjectSynchronizer::oops_do(strong_roots);
+  }
+
+  if (!_process_strong_tasks->is_task_claimed(G1RP_PS_FlatProfiler_oops_do)) {
+    FlatProfiler::oops_do(strong_roots);
+  }
+
+  if (!_process_strong_tasks->is_task_claimed(G1RP_PS_Management_oops_do)) {
+    Management::oops_do(strong_roots);
+  }
+
+  if (!_process_strong_tasks->is_task_claimed(G1RP_PS_jvmti_oops_do)) {
+    JvmtiExport::oops_do(strong_roots);
+  }
+
+  if (!_process_strong_tasks->is_task_claimed(G1RP_PS_SystemDictionary_oops_do)) {
+    SystemDictionary::roots_oops_do(strong_roots, weak_roots);
+  }
+
+  // All threads execute the following. A specific chunk of buckets
+  // from the StringTable are the individual tasks.
+  if (weak_roots != NULL) {
+    StringTable::possibly_parallel_oops_do(weak_roots);
+  }
+}
+
+void G1RootProcessor::scan_remembered_sets(G1ParPushHeapRSClosure* scan_rs,
+                                           OopClosure* scan_non_heap_weak_roots,
+                                           uint worker_i) {
+  // Now scan the complement of the collection set.
+  G1CodeBlobClosure scavenge_cs_nmethods(scan_non_heap_weak_roots);
+
+  _g1h->g1_rem_set()->oops_into_collection_set_do(scan_rs, &scavenge_cs_nmethods, worker_i);
+}
+
+void G1RootProcessor::set_num_workers(int active_workers) {
+  _process_strong_tasks->set_n_threads(active_workers);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1RootProcessor.hpp	Fri Jun 19 08:06:17 2015 -0700
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_ROOTPROCESSOR_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_ROOTPROCESSOR_HPP
+
+#include "memory/allocation.hpp"
+#include "memory/sharedHeap.hpp"
+#include "runtime/mutex.hpp"
+
+class CLDClosure;
+class CodeBlobClosure;
+class G1CollectedHeap;
+class G1ParPushHeapRSClosure;
+class Monitor;
+class OopClosure;
+class SubTasksDone;
+
+// Scoped object to assist in applying oop, CLD and code blob closures to
+// root locations. Handles claiming of different root scanning tasks
+// and takes care of global state for root scanning via a StrongRootsScope.
+// In the parallel case there is a shared G1RootProcessor object where all
+// worker thread call the process_roots methods.
+class G1RootProcessor : public StackObj {
+  G1CollectedHeap* _g1h;
+  SubTasksDone* _process_strong_tasks;
+  SharedHeap::StrongRootsScope _srs;
+
+  // Used to implement the Thread work barrier.
+  Monitor _lock;
+  volatile jint _n_workers_discovered_strong_classes;
+
+  enum G1H_process_roots_tasks {
+    G1RP_PS_Universe_oops_do,
+    G1RP_PS_JNIHandles_oops_do,
+    G1RP_PS_ObjectSynchronizer_oops_do,
+    G1RP_PS_FlatProfiler_oops_do,
+    G1RP_PS_Management_oops_do,
+    G1RP_PS_SystemDictionary_oops_do,
+    G1RP_PS_ClassLoaderDataGraph_oops_do,
+    G1RP_PS_jvmti_oops_do,
+    G1RP_PS_CodeCache_oops_do,
+    G1RP_PS_filter_satb_buffers,
+    G1RP_PS_refProcessor_oops_do,
+    // Leave this one last.
+    G1RP_PS_NumElements
+  };
+
+  void worker_has_discovered_all_strong_classes();
+  void wait_until_all_strong_classes_discovered();
+
+  void process_java_roots(OopClosure* scan_non_heap_roots,
+                          CLDClosure* thread_stack_clds,
+                          CLDClosure* scan_strong_clds,
+                          CLDClosure* scan_weak_clds,
+                          CodeBlobClosure* scan_strong_code);
+
+  void process_vm_roots(OopClosure* scan_non_heap_roots,
+                        OopClosure* scan_non_heap_weak_roots);
+
+public:
+  G1RootProcessor(G1CollectedHeap* g1h);
+
+  // Apply closures to the strongly and weakly reachable roots in the system
+  // in a single pass.
+  // Record and report timing measurements for sub phases using the worker_i
+  void evacuate_roots(OopClosure* scan_non_heap_roots,
+                      OopClosure* scan_non_heap_weak_roots,
+                      CLDClosure* scan_strong_clds,
+                      CLDClosure* scan_weak_clds,
+                      bool trace_metadata,
+                      uint worker_i);
+
+  // Apply oops, clds and blobs to all strongly reachable roots in the system
+  void process_strong_roots(OopClosure* oops,
+                            CLDClosure* clds,
+                            CodeBlobClosure* blobs);
+
+  // Apply oops, clds and blobs to strongly and weakly reachable roots in the system
+  void process_all_roots(OopClosure* oops,
+                         CLDClosure* clds,
+                         CodeBlobClosure* blobs);
+
+  // Apply scan_rs to all locations in the union of the remembered sets for all
+  // regions in the collection set
+  // (having done "set_region" to indicate the region in which the root resides),
+  void scan_remembered_sets(G1ParPushHeapRSClosure* scan_rs,
+                            OopClosure* scan_non_heap_weak_roots,
+                            uint worker_i);
+
+  // Inform the root processor about the number of worker threads
+  void set_num_workers(int active_workers);
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_ROOTPROCESSOR_HPP
--- a/src/share/vm/gc_implementation/g1/g1StringDedup.cpp	Mon Jun 08 14:12:27 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1StringDedup.cpp	Fri Jun 19 08:06:17 2015 -0700
@@ -105,7 +105,7 @@
 
 void G1StringDedup::oops_do(OopClosure* keep_alive) {
   assert(is_enabled(), "String deduplication not enabled");
-  unlink_or_oops_do(NULL, keep_alive);
+  unlink_or_oops_do(NULL, keep_alive, true /* allow_resize_and_rehash */);
 }
 
 void G1StringDedup::unlink(BoolObjectClosure* is_alive) {
@@ -122,37 +122,35 @@
 class G1StringDedupUnlinkOrOopsDoTask : public AbstractGangTask {
 private:
   G1StringDedupUnlinkOrOopsDoClosure _cl;
+  G1GCPhaseTimes* _phase_times;
 
 public:
   G1StringDedupUnlinkOrOopsDoTask(BoolObjectClosure* is_alive,
                                   OopClosure* keep_alive,
-                                  bool allow_resize_and_rehash) :
+                                  bool allow_resize_and_rehash,
+                                  G1GCPhaseTimes* phase_times) :
     AbstractGangTask("G1StringDedupUnlinkOrOopsDoTask"),
-    _cl(is_alive, keep_alive, allow_resize_and_rehash) {
-  }
+    _cl(is_alive, keep_alive, allow_resize_and_rehash), _phase_times(phase_times) { }
 
   virtual void work(uint worker_id) {
-    double queue_fixup_start = os::elapsedTime();
-    G1StringDedupQueue::unlink_or_oops_do(&_cl);
-
-    double table_fixup_start = os::elapsedTime();
-    G1StringDedupTable::unlink_or_oops_do(&_cl, worker_id);
-
-    double queue_fixup_time_ms = (table_fixup_start - queue_fixup_start) * 1000.0;
-    double table_fixup_time_ms = (os::elapsedTime() - table_fixup_start) * 1000.0;
-    G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
-    g1p->phase_times()->record_string_dedup_queue_fixup_worker_time(worker_id, queue_fixup_time_ms);
-    g1p->phase_times()->record_string_dedup_table_fixup_worker_time(worker_id, table_fixup_time_ms);
+    {
+      G1GCParPhaseTimesTracker x(_phase_times, G1GCPhaseTimes::StringDedupQueueFixup, worker_id);
+      G1StringDedupQueue::unlink_or_oops_do(&_cl);
+    }
+    {
+      G1GCParPhaseTimesTracker x(_phase_times, G1GCPhaseTimes::StringDedupTableFixup, worker_id);
+      G1StringDedupTable::unlink_or_oops_do(&_cl, worker_id);
+    }
   }
 };
 
-void G1StringDedup::unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* keep_alive, bool allow_resize_and_rehash) {
+void G1StringDedup::unlink_or_oops_do(BoolObjectClosure* is_alive,
+                                      OopClosure* keep_alive,
+                                      bool allow_resize_and_rehash,
+                                      G1GCPhaseTimes* phase_times) {
   assert(is_enabled(), "String deduplication not enabled");
-  G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
-  g1p->phase_times()->note_string_dedup_fixup_start();
-  double fixup_start = os::elapsedTime();
 
-  G1StringDedupUnlinkOrOopsDoTask task(is_alive, keep_alive, allow_resize_and_rehash);
+  G1StringDedupUnlinkOrOopsDoTask task(is_alive, keep_alive, allow_resize_and_rehash, phase_times);
   if (G1CollectedHeap::use_parallel_gc_threads()) {
     G1CollectedHeap* g1h = G1CollectedHeap::heap();
     g1h->set_par_threads();
@@ -161,10 +159,6 @@
   } else {
     task.work(0);
   }
-
-  double fixup_time_ms = (os::elapsedTime() - fixup_start) * 1000.0;
-  g1p->phase_times()->record_string_dedup_fixup_time(fixup_time_ms);
-  g1p->phase_times()->note_string_dedup_fixup_end();
 }
 
 void G1StringDedup::threads_do(ThreadClosure* tc) {
--- a/src/share/vm/gc_implementation/g1/g1StringDedup.hpp	Mon Jun 08 14:12:27 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1StringDedup.hpp	Fri Jun 19 08:06:17 2015 -0700
@@ -90,6 +90,7 @@
 class ThreadClosure;
 class outputStream;
 class G1StringDedupTable;
+class G1GCPhaseTimes;
 
 //
 // Main interface for interacting with string deduplication.
@@ -130,7 +131,7 @@
   static void oops_do(OopClosure* keep_alive);
   static void unlink(BoolObjectClosure* is_alive);
   static void unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* keep_alive,
-                                bool allow_resize_and_rehash = true);
+                                bool allow_resize_and_rehash, G1GCPhaseTimes* phase_times = NULL);
 
   static void threads_do(ThreadClosure* tc);
   static void print_worker_threads_on(outputStream* st);
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Mon Jun 08 14:12:27 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Fri Jun 19 08:06:17 2015 -0700
@@ -217,14 +217,6 @@
   product(uintx, G1HeapRegionSize, 0,                                       \
           "Size of the G1 regions.")                                        \
                                                                             \
-  experimental(bool, G1UseParallelRSetUpdating, true,                       \
-          "Enables the parallelization of remembered set updating "         \
-          "during evacuation pauses")                                       \
-                                                                            \
-  experimental(bool, G1UseParallelRSetScanning, true,                       \
-          "Enables the parallelization of remembered set scanning "         \
-          "during evacuation pauses")                                       \
-                                                                            \
   product(uintx, G1ConcRefinementThreads, 0,                                \
           "If non-0 is the number of parallel rem set update threads, "     \
           "otherwise the value is determined ergonomically.")               \
--- a/src/share/vm/gc_implementation/g1/heapRegion.cpp	Mon Jun 08 14:12:27 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp	Fri Jun 19 08:06:17 2015 -0700
@@ -47,93 +47,55 @@
 size_t HeapRegion::CardsPerRegion    = 0;
 
 HeapRegionDCTOC::HeapRegionDCTOC(G1CollectedHeap* g1,
-                                 HeapRegion* hr, ExtendedOopClosure* cl,
-                                 CardTableModRefBS::PrecisionStyle precision,
-                                 FilterKind fk) :
+                                 HeapRegion* hr,
+                                 G1ParPushHeapRSClosure* cl,
+                                 CardTableModRefBS::PrecisionStyle precision) :
   DirtyCardToOopClosure(hr, cl, precision, NULL),
-  _hr(hr), _fk(fk), _g1(g1) { }
+  _hr(hr), _rs_scan(cl), _g1(g1) { }
 
 FilterOutOfRegionClosure::FilterOutOfRegionClosure(HeapRegion* r,
                                                    OopClosure* oc) :
   _r_bottom(r->bottom()), _r_end(r->end()), _oc(oc) { }
 
-template<class ClosureType>
-HeapWord* walk_mem_region_loop(ClosureType* cl, G1CollectedHeap* g1h,
-                               HeapRegion* hr,
-                               HeapWord* cur, HeapWord* top) {
-  oop cur_oop = oop(cur);
-  size_t oop_size = hr->block_size(cur);
-  HeapWord* next_obj = cur + oop_size;
-  while (next_obj < top) {
-    // Keep filtering the remembered set.
-    if (!g1h->is_obj_dead(cur_oop, hr)) {
-      // Bottom lies entirely below top, so we can call the
-      // non-memRegion version of oop_iterate below.
-      cur_oop->oop_iterate(cl);
-    }
-    cur = next_obj;
-    cur_oop = oop(cur);
-    oop_size = hr->block_size(cur);
-    next_obj = cur + oop_size;
-  }
-  return cur;
-}
-
 void HeapRegionDCTOC::walk_mem_region(MemRegion mr,
                                       HeapWord* bottom,
                                       HeapWord* top) {
   G1CollectedHeap* g1h = _g1;
   size_t oop_size;
-  ExtendedOopClosure* cl2 = NULL;
-
-  FilterIntoCSClosure intoCSFilt(this, g1h, _cl);
-  FilterOutOfRegionClosure outOfRegionFilt(_hr, _cl);
-
-  switch (_fk) {
-  case NoFilterKind:          cl2 = _cl; break;
-  case IntoCSFilterKind:      cl2 = &intoCSFilt; break;
-  case OutOfRegionFilterKind: cl2 = &outOfRegionFilt; break;
-  default:                    ShouldNotReachHere();
-  }
+  HeapWord* cur = bottom;
 
   // Start filtering what we add to the remembered set. If the object is
   // not considered dead, either because it is marked (in the mark bitmap)
   // or it was allocated after marking finished, then we add it. Otherwise
   // we can safely ignore the object.
-  if (!g1h->is_obj_dead(oop(bottom), _hr)) {
-    oop_size = oop(bottom)->oop_iterate(cl2, mr);
+  if (!g1h->is_obj_dead(oop(cur), _hr)) {
+    oop_size = oop(cur)->oop_iterate(_rs_scan, mr);
   } else {
-    oop_size = _hr->block_size(bottom);
+    oop_size = _hr->block_size(cur);
   }
 
-  bottom += oop_size;
-
-  if (bottom < top) {
-    // We replicate the loop below for several kinds of possible filters.
-    switch (_fk) {
-    case NoFilterKind:
-      bottom = walk_mem_region_loop(_cl, g1h, _hr, bottom, top);
-      break;
+  cur += oop_size;
 
-    case IntoCSFilterKind: {
-      FilterIntoCSClosure filt(this, g1h, _cl);
-      bottom = walk_mem_region_loop(&filt, g1h, _hr, bottom, top);
-      break;
-    }
-
-    case OutOfRegionFilterKind: {
-      FilterOutOfRegionClosure filt(_hr, _cl);
-      bottom = walk_mem_region_loop(&filt, g1h, _hr, bottom, top);
-      break;
-    }
-
-    default:
-      ShouldNotReachHere();
+  if (cur < top) {
+    oop cur_oop = oop(cur);
+    oop_size = _hr->block_size(cur);
+    HeapWord* next_obj = cur + oop_size;
+    while (next_obj < top) {
+      // Keep filtering the remembered set.
+      if (!g1h->is_obj_dead(cur_oop, _hr)) {
+        // Bottom lies entirely below top, so we can call the
+        // non-memRegion version of oop_iterate below.
+        cur_oop->oop_iterate(_rs_scan);
+      }
+      cur = next_obj;
+      cur_oop = oop(cur);
+      oop_size = _hr->block_size(cur);
+      next_obj = cur + oop_size;
     }
 
     // Last object. Need to do dead-obj filtering here too.
-    if (!g1h->is_obj_dead(oop(bottom), _hr)) {
-      oop(bottom)->oop_iterate(cl2, mr);
+    if (!g1h->is_obj_dead(oop(cur), _hr)) {
+      oop(cur)->oop_iterate(_rs_scan, mr);
     }
   }
 }
--- a/src/share/vm/gc_implementation/g1/heapRegion.hpp	Mon Jun 08 14:12:27 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp	Fri Jun 19 08:06:17 2015 -0700
@@ -67,17 +67,9 @@
 // sets.
 
 class HeapRegionDCTOC : public DirtyCardToOopClosure {
-public:
-  // Specification of possible DirtyCardToOopClosure filtering.
-  enum FilterKind {
-    NoFilterKind,
-    IntoCSFilterKind,
-    OutOfRegionFilterKind
-  };
-
-protected:
+private:
   HeapRegion* _hr;
-  FilterKind _fk;
+  G1ParPushHeapRSClosure* _rs_scan;
   G1CollectedHeap* _g1;
 
   // Walk the given memory region from bottom to (actual) top
@@ -90,9 +82,9 @@
 
 public:
   HeapRegionDCTOC(G1CollectedHeap* g1,
-                  HeapRegion* hr, ExtendedOopClosure* cl,
-                  CardTableModRefBS::PrecisionStyle precision,
-                  FilterKind fk);
+                  HeapRegion* hr,
+                  G1ParPushHeapRSClosure* cl,
+                  CardTableModRefBS::PrecisionStyle precision);
 };
 
 // The complicating factor is that BlockOffsetTable diverged
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Mon Jun 08 14:12:27 2015 -0700
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Fri Jun 19 08:06:17 2015 -0700
@@ -622,7 +622,7 @@
                          true,  // Process younger gens, if any,
                                 // as strong roots.
                          false, // no scope; this is parallel code
-                         SharedHeap::SO_ScavengeCodeCache,
+                         GenCollectedHeap::SO_ScavengeCodeCache,
                          GenCollectedHeap::StrongAndWeakRoots,
                          &par_scan_state.to_space_root_closure(),
                          &par_scan_state.older_gen_closure(),
--- a/src/share/vm/memory/defNewGeneration.cpp	Mon Jun 08 14:12:27 2015 -0700
+++ b/src/share/vm/memory/defNewGeneration.cpp	Fri Jun 19 08:06:17 2015 -0700
@@ -629,7 +629,7 @@
                          true,  // Process younger gens, if any,
                                 // as strong roots.
                          true,  // activate StrongRootsScope
-                         SharedHeap::SO_ScavengeCodeCache,
+                         GenCollectedHeap::SO_ScavengeCodeCache,
                          GenCollectedHeap::StrongAndWeakRoots,
                          &fsc_with_no_gc_barrier,
                          &fsc_with_gc_barrier,
--- a/src/share/vm/memory/genCollectedHeap.cpp	Mon Jun 08 14:12:27 2015 -0700
+++ b/src/share/vm/memory/genCollectedHeap.cpp	Fri Jun 19 08:06:17 2015 -0700
@@ -26,6 +26,7 @@
 #include "classfile/symbolTable.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "classfile/vmSymbols.hpp"
+#include "code/codeCache.hpp"
 #include "code/icBuffer.hpp"
 #include "gc_implementation/shared/collectorCounters.hpp"
 #include "gc_implementation/shared/gcTrace.hpp"
@@ -49,6 +50,7 @@
 #include "runtime/handles.inline.hpp"
 #include "runtime/java.hpp"
 #include "runtime/vmThread.hpp"
+#include "services/management.hpp"
 #include "services/memoryService.hpp"
 #include "utilities/vmError.hpp"
 #include "utilities/workgroup.hpp"
@@ -63,7 +65,15 @@
 
 // The set of potentially parallel tasks in root scanning.
 enum GCH_strong_roots_tasks {
-  // We probably want to parallelize both of these internally, but for now...
+  GCH_PS_Universe_oops_do,
+  GCH_PS_JNIHandles_oops_do,
+  GCH_PS_ObjectSynchronizer_oops_do,
+  GCH_PS_FlatProfiler_oops_do,
+  GCH_PS_Management_oops_do,
+  GCH_PS_SystemDictionary_oops_do,
+  GCH_PS_ClassLoaderDataGraph_oops_do,
+  GCH_PS_jvmti_oops_do,
+  GCH_PS_CodeCache_oops_do,
   GCH_PS_younger_gens,
   // Leave this one last.
   GCH_PS_NumElements
@@ -72,13 +82,9 @@
 GenCollectedHeap::GenCollectedHeap(GenCollectorPolicy *policy) :
   SharedHeap(policy),
   _gen_policy(policy),
-  _gen_process_roots_tasks(new SubTasksDone(GCH_PS_NumElements)),
+  _process_strong_tasks(new SubTasksDone(GCH_PS_NumElements)),
   _full_collections_completed(0)
 {
-  if (_gen_process_roots_tasks == NULL ||
-      !_gen_process_roots_tasks->valid()) {
-    vm_exit_during_initialization("Failed necessary allocation.");
-  }
   assert(policy != NULL, "Sanity check");
 }
 
@@ -590,29 +596,137 @@
 
 void GenCollectedHeap::set_par_threads(uint t) {
   SharedHeap::set_par_threads(t);
-  _gen_process_roots_tasks->set_n_threads(t);
+  set_n_termination(t);
+}
+
+void GenCollectedHeap::set_n_termination(uint t) {
+  _process_strong_tasks->set_n_threads(t);
 }
 
-void GenCollectedHeap::
-gen_process_roots(int level,
-                  bool younger_gens_as_roots,
-                  bool activate_scope,
-                  SharedHeap::ScanningOption so,
-                  OopsInGenClosure* not_older_gens,
-                  OopsInGenClosure* weak_roots,
-                  OopsInGenClosure* older_gens,
-                  CLDClosure* cld_closure,
-                  CLDClosure* weak_cld_closure,
-                  CodeBlobClosure* code_closure) {
+#ifdef ASSERT
+class AssertNonScavengableClosure: public OopClosure {
+public:
+  virtual void do_oop(oop* p) {
+    assert(!Universe::heap()->is_in_partial_collection(*p),
+      "Referent should not be scavengable.");  }
+  virtual void do_oop(narrowOop* p) { ShouldNotReachHere(); }
+};
+static AssertNonScavengableClosure assert_is_non_scavengable_closure;
+#endif
+
+void GenCollectedHeap::process_roots(bool activate_scope,
+                                     ScanningOption so,
+                                     OopClosure* strong_roots,
+                                     OopClosure* weak_roots,
+                                     CLDClosure* strong_cld_closure,
+                                     CLDClosure* weak_cld_closure,
+                                     CodeBlobClosure* code_roots) {
+  StrongRootsScope srs(this, activate_scope);
 
   // General roots.
-  SharedHeap::process_roots(activate_scope, so,
-                            not_older_gens, weak_roots,
-                            cld_closure, weak_cld_closure,
-                            code_closure);
+  assert(_strong_roots_parity != 0, "must have called prologue code");
+  assert(code_roots != NULL, "code root closure should always be set");
+  // _n_termination for _process_strong_tasks should be set up stream
+  // in a method not running in a GC worker.  Otherwise the GC worker
+  // could be trying to change the termination condition while the task
+  // is executing in another GC worker.
+
+  if (!_process_strong_tasks->is_task_claimed(GCH_PS_ClassLoaderDataGraph_oops_do)) {
+    ClassLoaderDataGraph::roots_cld_do(strong_cld_closure, weak_cld_closure);
+  }
+
+  // Some CLDs contained in the thread frames should be considered strong.
+  // Don't process them if they will be processed during the ClassLoaderDataGraph phase.
+  CLDClosure* roots_from_clds_p = (strong_cld_closure != weak_cld_closure) ? strong_cld_closure : NULL;
+  // Only process code roots from thread stacks if we aren't visiting the entire CodeCache anyway
+  CodeBlobClosure* roots_from_code_p = (so & SO_AllCodeCache) ? NULL : code_roots;
+
+  Threads::possibly_parallel_oops_do(strong_roots, roots_from_clds_p, roots_from_code_p);
+
+  if (!_process_strong_tasks->is_task_claimed(GCH_PS_Universe_oops_do)) {
+    Universe::oops_do(strong_roots);
+  }
+  // Global (strong) JNI handles
+  if (!_process_strong_tasks->is_task_claimed(GCH_PS_JNIHandles_oops_do)) {
+    JNIHandles::oops_do(strong_roots);
+  }
+
+  if (!_process_strong_tasks->is_task_claimed(GCH_PS_ObjectSynchronizer_oops_do)) {
+    ObjectSynchronizer::oops_do(strong_roots);
+  }
+  if (!_process_strong_tasks->is_task_claimed(GCH_PS_FlatProfiler_oops_do)) {
+    FlatProfiler::oops_do(strong_roots);
+  }
+  if (!_process_strong_tasks->is_task_claimed(GCH_PS_Management_oops_do)) {
+    Management::oops_do(strong_roots);
+  }
+  if (!_process_strong_tasks->is_task_claimed(GCH_PS_jvmti_oops_do)) {
+    JvmtiExport::oops_do(strong_roots);
+  }
+
+  if (!_process_strong_tasks->is_task_claimed(GCH_PS_SystemDictionary_oops_do)) {
+    SystemDictionary::roots_oops_do(strong_roots, weak_roots);
+  }
+
+  // All threads execute the following. A specific chunk of buckets
+  // from the StringTable are the individual tasks.
+  if (weak_roots != NULL) {
+    if (CollectedHeap::use_parallel_gc_threads()) {
+      StringTable::possibly_parallel_oops_do(weak_roots);
+    } else {
+      StringTable::oops_do(weak_roots);
+    }
+  }
+
+  if (!_process_strong_tasks->is_task_claimed(GCH_PS_CodeCache_oops_do)) {
+    if (so & SO_ScavengeCodeCache) {
+      assert(code_roots != NULL, "must supply closure for code cache");
+
+      // We only visit parts of the CodeCache when scavenging.
+      CodeCache::scavenge_root_nmethods_do(code_roots);
+    }
+    if (so & SO_AllCodeCache) {
+      assert(code_roots != NULL, "must supply closure for code cache");
+
+      // CMSCollector uses this to do intermediate-strength collections.
+      // We scan the entire code cache, since CodeCache::do_unloading is not called.
+      CodeCache::blobs_do(code_roots);
+    }
+    // Verify that the code cache contents are not subject to
+    // movement by a scavenging collection.
+    DEBUG_ONLY(CodeBlobToOopClosure assert_code_is_non_scavengable(&assert_is_non_scavengable_closure, !CodeBlobToOopClosure::FixRelocations));
+    DEBUG_ONLY(CodeCache::asserted_non_scavengable_nmethods_do(&assert_code_is_non_scavengable));
+  }
+
+}
+
+void GenCollectedHeap::gen_process_roots(int level,
+                                         bool younger_gens_as_roots,
+                                         bool activate_scope,
+                                         ScanningOption so,
+                                         bool only_strong_roots,
+                                         OopsInGenClosure* not_older_gens,
+                                         OopsInGenClosure* older_gens,
+                                         CLDClosure* cld_closure) {
+  const bool is_adjust_phase = !only_strong_roots && !younger_gens_as_roots;
+
+  bool is_moving_collection = false;
+  if (level == 0 || is_adjust_phase) {
+    // young collections are always moving
+    is_moving_collection = true;
+  }
+
+  MarkingCodeBlobClosure mark_code_closure(not_older_gens, is_moving_collection);
+  OopsInGenClosure* weak_roots = only_strong_roots ? NULL : not_older_gens;
+  CLDClosure* weak_cld_closure = only_strong_roots ? NULL : cld_closure;
+
+  process_roots(activate_scope, so,
+                not_older_gens, weak_roots,
+                cld_closure, weak_cld_closure,
+                &mark_code_closure);
 
   if (younger_gens_as_roots) {
-    if (!_gen_process_roots_tasks->is_task_claimed(GCH_PS_younger_gens)) {
+    if (!_process_strong_tasks->is_task_claimed(GCH_PS_younger_gens)) {
       for (int i = 0; i < level; i++) {
         not_older_gens->set_generation(_gens[i]);
         _gens[i]->oop_iterate(not_older_gens);
@@ -628,43 +742,18 @@
     older_gens->reset_generation();
   }
 
-  _gen_process_roots_tasks->all_tasks_completed();
+  _process_strong_tasks->all_tasks_completed();
 }
 
-void GenCollectedHeap::
-gen_process_roots(int level,
-                  bool younger_gens_as_roots,
-                  bool activate_scope,
-                  SharedHeap::ScanningOption so,
-                  bool only_strong_roots,
-                  OopsInGenClosure* not_older_gens,
-                  OopsInGenClosure* older_gens,
-                  CLDClosure* cld_closure) {
 
-  const bool is_adjust_phase = !only_strong_roots && !younger_gens_as_roots;
-
-  bool is_moving_collection = false;
-  if (level == 0 || is_adjust_phase) {
-    // young collections are always moving
-    is_moving_collection = true;
-  }
-
-  MarkingCodeBlobClosure mark_code_closure(not_older_gens, is_moving_collection);
-  CodeBlobClosure* code_closure = &mark_code_closure;
-
-  gen_process_roots(level,
-                    younger_gens_as_roots,
-                    activate_scope, so,
-                    not_older_gens, only_strong_roots ? NULL : not_older_gens,
-                    older_gens,
-                    cld_closure, only_strong_roots ? NULL : cld_closure,
-                    code_closure);
-
-}
+class AlwaysTrueClosure: public BoolObjectClosure {
+public:
+  bool do_object_b(oop p) { return true; }
+};
+static AlwaysTrueClosure always_true;
 
 void GenCollectedHeap::gen_process_weak_roots(OopClosure* root_closure) {
-  SharedHeap::process_weak_roots(root_closure);
-  // "Local" "weak" refs
+  JNIHandles::weak_oops_do(&always_true, root_closure);
   for (int i = 0; i < _n_gens; i++) {
     _gens[i]->ref_processor()->weak_oops_do(root_closure);
   }
--- a/src/share/vm/memory/genCollectedHeap.hpp	Mon Jun 08 14:12:27 2015 -0700
+++ b/src/share/vm/memory/genCollectedHeap.hpp	Fri Jun 19 08:06:17 2015 -0700
@@ -79,8 +79,7 @@
 
   // Data structure for claiming the (potentially) parallel tasks in
   // (gen-specific) roots processing.
-  SubTasksDone* _gen_process_roots_tasks;
-  SubTasksDone* gen_process_roots_tasks() { return _gen_process_roots_tasks; }
+  SubTasksDone* _process_strong_tasks;
 
   // In block contents verification, the number of header words to skip
   NOT_PRODUCT(static size_t _skip_header_HeapWords;)
@@ -390,6 +389,7 @@
   static GenCollectedHeap* heap();
 
   void set_par_threads(uint t);
+  void set_n_termination(uint t);
 
   // Invoke the "do_oop" method of one of the closures "not_older_gens"
   // or "older_gens" on root locations for the generation at
@@ -403,11 +403,25 @@
   // The "so" argument determines which of the roots
   // the closure is applied to:
   // "SO_None" does none;
+  enum ScanningOption {
+    SO_None                =  0x0,
+    SO_AllCodeCache        =  0x8,
+    SO_ScavengeCodeCache   = 0x10
+  };
+
  private:
+  void process_roots(bool activate_scope,
+                     ScanningOption so,
+                     OopClosure* strong_roots,
+                     OopClosure* weak_roots,
+                     CLDClosure* strong_cld_closure,
+                     CLDClosure* weak_cld_closure,
+                     CodeBlobClosure* code_roots);
+
   void gen_process_roots(int level,
                          bool younger_gens_as_roots,
                          bool activate_scope,
-                         SharedHeap::ScanningOption so,
+                         ScanningOption so,
                          OopsInGenClosure* not_older_gens,
                          OopsInGenClosure* weak_roots,
                          OopsInGenClosure* older_gens,
@@ -422,7 +436,7 @@
   void gen_process_roots(int level,
                          bool younger_gens_as_roots,
                          bool activate_scope,
-                         SharedHeap::ScanningOption so,
+                         ScanningOption so,
                          bool only_strong_roots,
                          OopsInGenClosure* not_older_gens,
                          OopsInGenClosure* older_gens,
--- a/src/share/vm/memory/genMarkSweep.cpp	Mon Jun 08 14:12:27 2015 -0700
+++ b/src/share/vm/memory/genMarkSweep.cpp	Fri Jun 19 08:06:17 2015 -0700
@@ -210,8 +210,8 @@
   gch->gen_process_roots(level,
                          false, // Younger gens are not roots.
                          true,  // activate StrongRootsScope
-                         SharedHeap::SO_None,
-                         GenCollectedHeap::StrongRootsOnly,
+                         GenCollectedHeap::SO_None,
+                         ClassUnloading,
                          &follow_root_closure,
                          &follow_root_closure,
                          &follow_cld_closure);
@@ -295,7 +295,7 @@
   gch->gen_process_roots(level,
                          false, // Younger gens are not roots.
                          true,  // activate StrongRootsScope
-                         SharedHeap::SO_AllCodeCache,
+                         GenCollectedHeap::SO_AllCodeCache,
                          GenCollectedHeap::StrongAndWeakRoots,
                          &adjust_pointer_closure,
                          &adjust_pointer_closure,
--- a/src/share/vm/memory/sharedHeap.cpp	Mon Jun 08 14:12:27 2015 -0700
+++ b/src/share/vm/memory/sharedHeap.cpp	Fri Jun 19 08:06:17 2015 -0700
@@ -32,7 +32,6 @@
 #include "runtime/atomic.inline.hpp"
 #include "runtime/fprofiler.hpp"
 #include "runtime/java.hpp"
-#include "services/management.hpp"
 #include "utilities/copy.hpp"
 #include "utilities/workgroup.hpp"
 
@@ -40,33 +39,13 @@
 
 SharedHeap* SharedHeap::_sh;
 
-// The set of potentially parallel tasks in root scanning.
-enum SH_process_roots_tasks {
-  SH_PS_Universe_oops_do,
-  SH_PS_JNIHandles_oops_do,
-  SH_PS_ObjectSynchronizer_oops_do,
-  SH_PS_FlatProfiler_oops_do,
-  SH_PS_Management_oops_do,
-  SH_PS_SystemDictionary_oops_do,
-  SH_PS_ClassLoaderDataGraph_oops_do,
-  SH_PS_jvmti_oops_do,
-  SH_PS_CodeCache_oops_do,
-  // Leave this one last.
-  SH_PS_NumElements
-};
-
 SharedHeap::SharedHeap(CollectorPolicy* policy_) :
   CollectedHeap(),
   _collector_policy(policy_),
   _rem_set(NULL),
-  _strong_roots_scope(NULL),
   _strong_roots_parity(0),
-  _process_strong_tasks(new SubTasksDone(SH_PS_NumElements)),
   _workers(NULL)
 {
-  if (_process_strong_tasks == NULL || !_process_strong_tasks->valid()) {
-    vm_exit_during_initialization("Failed necessary allocation.");
-  }
   _sh = this;  // ch is static, should be set only once.
   if ((UseParNewGC ||
       (UseConcMarkSweepGC && (CMSParallelInitialMarkEnabled ||
@@ -84,14 +63,6 @@
   }
 }
 
-int SharedHeap::n_termination() {
-  return _process_strong_tasks->n_threads();
-}
-
-void SharedHeap::set_n_termination(int t) {
-  _process_strong_tasks->set_n_threads(t);
-}
-
 bool SharedHeap::heap_lock_held_for_gc() {
   Thread* t = Thread::current();
   return    Heap_lock->owned_by_self()
@@ -102,31 +73,6 @@
 void SharedHeap::set_par_threads(uint t) {
   assert(t == 0 || !UseSerialGC, "Cannot have parallel threads");
   _n_par_threads = t;
-  _process_strong_tasks->set_n_threads(t);
-}
-
-#ifdef ASSERT
-class AssertNonScavengableClosure: public OopClosure {
-public:
-  virtual void do_oop(oop* p) {
-    assert(!Universe::heap()->is_in_partial_collection(*p),
-      "Referent should not be scavengable.");  }
-  virtual void do_oop(narrowOop* p) { ShouldNotReachHere(); }
-};
-static AssertNonScavengableClosure assert_is_non_scavengable_closure;
-#endif
-
-SharedHeap::StrongRootsScope* SharedHeap::active_strong_roots_scope() const {
-  return _strong_roots_scope;
-}
-void SharedHeap::register_strong_roots_scope(SharedHeap::StrongRootsScope* scope) {
-  assert(_strong_roots_scope == NULL, "Should only have one StrongRootsScope active");
-  assert(scope != NULL, "Illegal argument");
-  _strong_roots_scope = scope;
-}
-void SharedHeap::unregister_strong_roots_scope(SharedHeap::StrongRootsScope* scope) {
-  assert(_strong_roots_scope == scope, "Wrong scope unregistered");
-  _strong_roots_scope = NULL;
 }
 
 void SharedHeap::change_strong_roots_parity() {
@@ -140,174 +86,15 @@
 }
 
 SharedHeap::StrongRootsScope::StrongRootsScope(SharedHeap* heap, bool activate)
-  : MarkScope(activate), _sh(heap), _n_workers_done_with_threads(0)
+  : MarkScope(activate), _sh(heap)
 {
   if (_active) {
-    _sh->register_strong_roots_scope(this);
     _sh->change_strong_roots_parity();
     // Zero the claimed high water mark in the StringTable
     StringTable::clear_parallel_claimed_index();
   }
 }
 
-SharedHeap::StrongRootsScope::~StrongRootsScope() {
-  if (_active) {
-    _sh->unregister_strong_roots_scope(this);
-  }
-}
-
-Monitor* SharedHeap::StrongRootsScope::_lock = new Monitor(Mutex::leaf, "StrongRootsScope lock", false);
-
-void SharedHeap::StrongRootsScope::mark_worker_done_with_threads(uint n_workers) {
-  // The Thread work barrier is only needed by G1 Class Unloading.
-  // No need to use the barrier if this is single-threaded code.
-  if (UseG1GC && ClassUnloadingWithConcurrentMark && n_workers > 0) {
-    uint new_value = (uint)Atomic::add(1, &_n_workers_done_with_threads);
-    if (new_value == n_workers) {
-      // This thread is last. Notify the others.
-      MonitorLockerEx ml(_lock, Mutex::_no_safepoint_check_flag);
-      _lock->notify_all();
-    }
-  }
-}
-
-void SharedHeap::StrongRootsScope::wait_until_all_workers_done_with_threads(uint n_workers) {
-  assert(UseG1GC,                          "Currently only used by G1");
-  assert(ClassUnloadingWithConcurrentMark, "Currently only needed when doing G1 Class Unloading");
-
-  // No need to use the barrier if this is single-threaded code.
-  if (n_workers > 0 && (uint)_n_workers_done_with_threads != n_workers) {
-    MonitorLockerEx ml(_lock, Mutex::_no_safepoint_check_flag);
-    while ((uint)_n_workers_done_with_threads != n_workers) {
-      _lock->wait(Mutex::_no_safepoint_check_flag, 0, false);
-    }
-  }
-}
-
-void SharedHeap::process_roots(bool activate_scope,
-                               ScanningOption so,
-                               OopClosure* strong_roots,
-                               OopClosure* weak_roots,
-                               CLDClosure* strong_cld_closure,
-                               CLDClosure* weak_cld_closure,
-                               CodeBlobClosure* code_roots) {
-  StrongRootsScope srs(this, activate_scope);
-
-  // General roots.
-  assert(_strong_roots_parity != 0, "must have called prologue code");
-  assert(code_roots != NULL, "code root closure should always be set");
-  // _n_termination for _process_strong_tasks should be set up stream
-  // in a method not running in a GC worker.  Otherwise the GC worker
-  // could be trying to change the termination condition while the task
-  // is executing in another GC worker.
-
-  // Iterating over the CLDG and the Threads are done early to allow G1 to
-  // first process the strong CLDs and nmethods and then, after a barrier,
-  // let the thread process the weak CLDs and nmethods.
-
-  if (!_process_strong_tasks->is_task_claimed(SH_PS_ClassLoaderDataGraph_oops_do)) {
-    ClassLoaderDataGraph::roots_cld_do(strong_cld_closure, weak_cld_closure);
-  }
-
-  // Some CLDs contained in the thread frames should be considered strong.
-  // Don't process them if they will be processed during the ClassLoaderDataGraph phase.
-  CLDClosure* roots_from_clds_p = (strong_cld_closure != weak_cld_closure) ? strong_cld_closure : NULL;
-  // Only process code roots from thread stacks if we aren't visiting the entire CodeCache anyway
-  CodeBlobClosure* roots_from_code_p = (so & SO_AllCodeCache) ? NULL : code_roots;
-
-  Threads::possibly_parallel_oops_do(strong_roots, roots_from_clds_p, roots_from_code_p);
-
-  // This is the point where this worker thread will not find more strong CLDs/nmethods.
-  // Report this so G1 can synchronize the strong and weak CLDs/nmethods processing.
-  active_strong_roots_scope()->mark_worker_done_with_threads(n_par_threads());
-
-  if (!_process_strong_tasks->is_task_claimed(SH_PS_Universe_oops_do)) {
-    Universe::oops_do(strong_roots);
-  }
-  // Global (strong) JNI handles
-  if (!_process_strong_tasks->is_task_claimed(SH_PS_JNIHandles_oops_do))
-    JNIHandles::oops_do(strong_roots);
-
-  if (!_process_strong_tasks-> is_task_claimed(SH_PS_ObjectSynchronizer_oops_do))
-    ObjectSynchronizer::oops_do(strong_roots);
-  if (!_process_strong_tasks->is_task_claimed(SH_PS_FlatProfiler_oops_do))
-    FlatProfiler::oops_do(strong_roots);
-  if (!_process_strong_tasks->is_task_claimed(SH_PS_Management_oops_do))
-    Management::oops_do(strong_roots);
-  if (!_process_strong_tasks->is_task_claimed(SH_PS_jvmti_oops_do))
-    JvmtiExport::oops_do(strong_roots);
-
-  if (!_process_strong_tasks->is_task_claimed(SH_PS_SystemDictionary_oops_do)) {
-    SystemDictionary::roots_oops_do(strong_roots, weak_roots);
-  }
-
-  // All threads execute the following. A specific chunk of buckets
-  // from the StringTable are the individual tasks.
-  if (weak_roots != NULL) {
-    if (CollectedHeap::use_parallel_gc_threads()) {
-      StringTable::possibly_parallel_oops_do(weak_roots);
-    } else {
-      StringTable::oops_do(weak_roots);
-    }
-  }
-
-  if (!_process_strong_tasks->is_task_claimed(SH_PS_CodeCache_oops_do)) {
-    if (so & SO_ScavengeCodeCache) {
-      assert(code_roots != NULL, "must supply closure for code cache");
-
-      // We only visit parts of the CodeCache when scavenging.
-      CodeCache::scavenge_root_nmethods_do(code_roots);
-    }
-    if (so & SO_AllCodeCache) {
-      assert(code_roots != NULL, "must supply closure for code cache");
-
-      // CMSCollector uses this to do intermediate-strength collections.
-      // We scan the entire code cache, since CodeCache::do_unloading is not called.
-      CodeCache::blobs_do(code_roots);
-    }
-    // Verify that the code cache contents are not subject to
-    // movement by a scavenging collection.
-    DEBUG_ONLY(CodeBlobToOopClosure assert_code_is_non_scavengable(&assert_is_non_scavengable_closure, !CodeBlobToOopClosure::FixRelocations));
-    DEBUG_ONLY(CodeCache::asserted_non_scavengable_nmethods_do(&assert_code_is_non_scavengable));
-  }
-
-  _process_strong_tasks->all_tasks_completed();
-}
-
-void SharedHeap::process_all_roots(bool activate_scope,
-                                   ScanningOption so,
-                                   OopClosure* roots,
-                                   CLDClosure* cld_closure,
-                                   CodeBlobClosure* code_closure) {
-  process_roots(activate_scope, so,
-                roots, roots,
-                cld_closure, cld_closure,
-                code_closure);
-}
-
-void SharedHeap::process_strong_roots(bool activate_scope,
-                                      ScanningOption so,
-                                      OopClosure* roots,
-                                      CLDClosure* cld_closure,
-                                      CodeBlobClosure* code_closure) {
-  process_roots(activate_scope, so,
-                roots, NULL,
-                cld_closure, NULL,
-                code_closure);
-}
-
-
-class AlwaysTrueClosure: public BoolObjectClosure {
-public:
-  bool do_object_b(oop p) { return true; }
-};
-static AlwaysTrueClosure always_true;
-
-void SharedHeap::process_weak_roots(OopClosure* root_closure) {
-  // Global (weak) JNI handles
-  JNIHandles::weak_oops_do(&always_true, root_closure);
-}
-
 void SharedHeap::set_barrier_set(BarrierSet* bs) {
   _barrier_set = bs;
   // Cached barrier set for fast access in oops
--- a/src/share/vm/memory/sharedHeap.hpp	Mon Jun 08 14:12:27 2015 -0700
+++ b/src/share/vm/memory/sharedHeap.hpp	Fri Jun 19 08:06:17 2015 -0700
@@ -61,18 +61,18 @@
 //    counts the number of tasks that have been done and then reset
 //    the SubTasksDone so that it can be used again.  When the number of
 //    tasks is set to the number of GC workers, then _n_threads must
-//    be set to the number of active GC workers. G1CollectedHeap,
-//    HRInto_G1RemSet, GenCollectedHeap and SharedHeap have SubTasksDone.
-//    This seems too many.
+//    be set to the number of active GC workers. G1RootProcessor and
+//    GenCollectedHeap have SubTasksDone.
 //    3) SequentialSubTasksDone has an _n_threads that is used in
 //    a way similar to SubTasksDone and has the same dependency on the
 //    number of active GC workers.  CompactibleFreeListSpace and Space
 //    have SequentialSubTasksDone's.
-// Example of using SubTasksDone and SequentialSubTasksDone
-// G1CollectedHeap::g1_process_roots()
-//  to SharedHeap::process_roots() and uses
-//  SubTasksDone* _process_strong_tasks to claim tasks.
-//  process_roots() calls
+//
+// Examples of using SubTasksDone and SequentialSubTasksDone:
+//  G1RootProcessor and GenCollectedHeap::process_roots() use
+//  SubTasksDone* _process_strong_tasks to claim tasks for workers
+//
+//  GenCollectedHeap::gen_process_roots() calls
 //      rem_set()->younger_refs_iterate()
 //  to scan the card table and which eventually calls down into
 //  CardTableModRefBS::par_non_clean_card_iterate_work().  This method
@@ -104,10 +104,6 @@
   friend class VM_GC_Operation;
   friend class VM_CGC_Operation;
 
-private:
-  // For claiming strong_roots tasks.
-  SubTasksDone* _process_strong_tasks;
-
 protected:
   // There should be only a single instance of "SharedHeap" in a program.
   // This is enforced with the protected constructor below, which will also
@@ -144,7 +140,6 @@
   static SharedHeap* heap() { return _sh; }
 
   void set_barrier_set(BarrierSet* bs);
-  SubTasksDone* process_strong_tasks() { return _process_strong_tasks; }
 
   // Does operations required after initialization has been done.
   virtual void post_initialize();
@@ -201,69 +196,19 @@
   // strong_roots_prologue calls change_strong_roots_parity, if
   // parallel tasks are enabled.
   class StrongRootsScope : public MarkingCodeBlobClosure::MarkScope {
-    // Used to implement the Thread work barrier.
-    static Monitor* _lock;
-
     SharedHeap*   _sh;
-    volatile jint _n_workers_done_with_threads;
 
    public:
     StrongRootsScope(SharedHeap* heap, bool activate = true);
-    ~StrongRootsScope();
-
-    // Mark that this thread is done with the Threads work.
-    void mark_worker_done_with_threads(uint n_workers);
-    // Wait until all n_workers are done with the Threads work.
-    void wait_until_all_workers_done_with_threads(uint n_workers);
   };
   friend class StrongRootsScope;
 
-  // The current active StrongRootScope
-  StrongRootsScope* _strong_roots_scope;
-
-  StrongRootsScope* active_strong_roots_scope() const;
-
  private:
-  void register_strong_roots_scope(StrongRootsScope* scope);
-  void unregister_strong_roots_scope(StrongRootsScope* scope);
   void change_strong_roots_parity();
 
  public:
-  enum ScanningOption {
-    SO_None                =  0x0,
-    SO_AllCodeCache        =  0x8,
-    SO_ScavengeCodeCache   = 0x10
-  };
-
   FlexibleWorkGang* workers() const { return _workers; }
 
-  // Invoke the "do_oop" method the closure "roots" on all root locations.
-  // The "so" argument determines which roots the closure is applied to:
-  // "SO_None" does none;
-  // "SO_AllCodeCache" applies the closure to all elements of the CodeCache.
-  // "SO_ScavengeCodeCache" applies the closure to elements on the scavenge root list in the CodeCache.
-  void process_roots(bool activate_scope,
-                     ScanningOption so,
-                     OopClosure* strong_roots,
-                     OopClosure* weak_roots,
-                     CLDClosure* strong_cld_closure,
-                     CLDClosure* weak_cld_closure,
-                     CodeBlobClosure* code_roots);
-  void process_all_roots(bool activate_scope,
-                         ScanningOption so,
-                         OopClosure* roots,
-                         CLDClosure* cld_closure,
-                         CodeBlobClosure* code_roots);
-  void process_strong_roots(bool activate_scope,
-                            ScanningOption so,
-                            OopClosure* roots,
-                            CLDClosure* cld_closure,
-                            CodeBlobClosure* code_roots);
-
-
-  // Apply "root_closure" to the JNI weak roots..
-  void process_weak_roots(OopClosure* root_closure);
-
   // The functions below are helper functions that a subclass of
   // "SharedHeap" can use in the implementation of its virtual
   // functions.
@@ -278,9 +223,6 @@
   // (such as process roots) subsequently.
   virtual void set_par_threads(uint t);
 
-  int n_termination();
-  void set_n_termination(int t);
-
   //
   // New methods from CollectedHeap
   //
@@ -292,8 +234,4 @@
                              size_t capacity);
 };
 
-inline SharedHeap::ScanningOption operator|(SharedHeap::ScanningOption so0, SharedHeap::ScanningOption so1) {
-  return static_cast<SharedHeap::ScanningOption>(static_cast<int>(so0) | static_cast<int>(so1));
-}
-
 #endif // SHARE_VM_MEMORY_SHAREDHEAP_HPP
--- a/src/share/vm/runtime/arguments.cpp	Mon Jun 08 14:12:27 2015 -0700
+++ b/src/share/vm/runtime/arguments.cpp	Fri Jun 19 08:06:17 2015 -0700
@@ -1372,6 +1372,12 @@
   if (!FLAG_IS_DEFAULT(CMSParPromoteBlocksToClaim) || !FLAG_IS_DEFAULT(OldPLABWeight)) {
     CFLS_LAB::modify_initialization(OldPLABSize, OldPLABWeight);
   }
+
+  if (!ClassUnloading) {
+    FLAG_SET_CMDLINE(bool, CMSClassUnloadingEnabled, false);
+    FLAG_SET_CMDLINE(bool, ExplicitGCInvokesConcurrentAndUnloadsClasses, false);
+  }
+
   if (PrintGCDetails && Verbose) {
     tty->print_cr("MarkStackSize: %uk  MarkStackSizeMax: %uk",
       (unsigned int) (MarkStackSize / K), (uint) (MarkStackSizeMax / K));