Mercurial > hg > jdk9-shenandoah > hotspot

--- a/src/os/aix/vm/perfMemory_aix.cpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/os/aix/vm/perfMemory_aix.cpp	Thu Aug 27 18:51:22 2015 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2012, 2013 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -454,13 +454,27 @@
     *saved_cwd_fd = result;
   }

-  // Set the current directory to dirname by using the fd of the directory.
+  // Set the current directory to dirname by using the fd of the directory and
+  // handle errors, otherwise shared memory files will be created in cwd.
   result = fchdir(fd);
-
-  return dirp;
+  if (result == OS_ERR) {
+    if (PrintMiscellaneous && Verbose) {
+      warning("could not change to directory %s", dirname);
+    }
+    if (*saved_cwd_fd != -1) {
+      ::close(*saved_cwd_fd);
+      *saved_cwd_fd = -1;
+    }
+    // Close the directory.
+    os::closedir(dirp);
+    return NULL;
+  } else {
+    return dirp;
+  }
 }

 // Close the directory and restore the current working directory.
+//
 static void close_directory_secure_cwd(DIR* dirp, int saved_cwd_fd) {

   int result;
--- a/src/os/bsd/vm/perfMemory_bsd.cpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/os/bsd/vm/perfMemory_bsd.cpp	Thu Aug 27 18:51:22 2015 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -375,10 +375,23 @@
     *saved_cwd_fd = result;
   }

-  // Set the current directory to dirname by using the fd of the directory.
+  // Set the current directory to dirname by using the fd of the directory and
+  // handle errors, otherwise shared memory files will be created in cwd.
   result = fchdir(fd);
-
-  return dirp;
+  if (result == OS_ERR) {
+    if (PrintMiscellaneous && Verbose) {
+      warning("could not change to directory %s", dirname);
+    }
+    if (*saved_cwd_fd != -1) {
+      ::close(*saved_cwd_fd);
+      *saved_cwd_fd = -1;
+    }
+    // Close the directory.
+    os::closedir(dirp);
+    return NULL;
+  } else {
+    return dirp;
+  }
 }

 // Close the directory and restore the current working directory.
--- a/src/os/linux/vm/perfMemory_linux.cpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/os/linux/vm/perfMemory_linux.cpp	Thu Aug 27 18:51:22 2015 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -374,10 +374,23 @@
     *saved_cwd_fd = result;
   }

-  // Set the current directory to dirname by using the fd of the directory.
+  // Set the current directory to dirname by using the fd of the directory and
+  // handle errors, otherwise shared memory files will be created in cwd.
   result = fchdir(fd);
-
-  return dirp;
+  if (result == OS_ERR) {
+    if (PrintMiscellaneous && Verbose) {
+      warning("could not change to directory %s", dirname);
+    }
+    if (*saved_cwd_fd != -1) {
+      ::close(*saved_cwd_fd);
+      *saved_cwd_fd = -1;
+    }
+    // Close the directory.
+    os::closedir(dirp);
+    return NULL;
+  } else {
+    return dirp;
+  }
 }

 // Close the directory and restore the current working directory.
--- a/src/os/solaris/vm/perfMemory_solaris.cpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/os/solaris/vm/perfMemory_solaris.cpp	Thu Aug 27 18:51:22 2015 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -377,10 +377,23 @@
     *saved_cwd_fd = result;
   }

-  // Set the current directory to dirname by using the fd of the directory.
+  // Set the current directory to dirname by using the fd of the directory and
+  // handle errors, otherwise shared memory files will be created in cwd.
   result = fchdir(fd);
-
-  return dirp;
+  if (result == OS_ERR) {
+    if (PrintMiscellaneous && Verbose) {
+      warning("could not change to directory %s", dirname);
+    }
+    if (*saved_cwd_fd != -1) {
+      ::close(*saved_cwd_fd);
+      *saved_cwd_fd = -1;
+    }
+    // Close the directory.
+    os::closedir(dirp);
+    return NULL;
+  } else {
+    return dirp;
+  }
 }

 // Close the directory and restore the current working directory.
--- a/src/share/vm/gc/cms/parNewGeneration.cpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/gc/cms/parNewGeneration.cpp	Thu Aug 27 18:51:22 2015 +0200
@@ -25,7 +25,7 @@
 #include "precompiled.hpp"
 #include "gc/cms/compactibleFreeListSpace.hpp"
 #include "gc/cms/concurrentMarkSweepGeneration.hpp"
-#include "gc/cms/parNewGeneration.hpp"
+#include "gc/cms/parNewGeneration.inline.hpp"
 #include "gc/cms/parOopClosures.inline.hpp"
 #include "gc/serial/defNewGeneration.inline.hpp"
 #include "gc/shared/adaptiveSizePolicy.hpp"
@@ -248,8 +248,7 @@
         }
       }
       if (buf_space != NULL) {
-        plab->set_word_size(buf_size);
-        plab->set_buf(buf_space);
+        plab->set_buf(buf_space, buf_size);
         record_survivor_plab(buf_space, buf_size);
         obj = plab->allocate_aligned(word_sz, SurvivorAlignmentInBytes);
         // Note that we cannot compare buf_size < word_sz below
--- a/src/share/vm/gc/cms/parNewGeneration.hpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/gc/cms/parNewGeneration.hpp	Thu Aug 27 18:51:22 2015 +0200
@@ -169,11 +169,7 @@
   // Allocate a to-space block of size "sz", or else return NULL.
   HeapWord* alloc_in_to_space_slow(size_t word_sz);

-  HeapWord* alloc_in_to_space(size_t word_sz) {
-    HeapWord* obj = to_space_alloc_buffer()->allocate_aligned(word_sz, SurvivorAlignmentInBytes);
-    if (obj != NULL) return obj;
-    else return alloc_in_to_space_slow(word_sz);
-  }
+  inline HeapWord* alloc_in_to_space(size_t word_sz);

   HeapWord* young_old_boundary() { return _young_old_boundary; }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc/cms/parNewGeneration.inline.hpp	Thu Aug 27 18:51:22 2015 +0200
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_CMS_PARNEWGENERATION_INLINE_HPP
+#define SHARE_VM_GC_CMS_PARNEWGENERATION_INLINE_HPP
+
+#include "gc/cms/parNewGeneration.hpp"
+#include "gc/shared/plab.inline.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+inline HeapWord* ParScanThreadState::alloc_in_to_space(size_t word_sz) {
+  HeapWord* obj = to_space_alloc_buffer()->allocate_aligned(word_sz, SurvivorAlignmentInBytes);
+  if (obj != NULL) return obj;
+  else return alloc_in_to_space_slow(word_sz);
+}
+#endif // SHARE_VM_GC_CMS_PARNEWGENERATION_INLINE_HPP
--- a/src/share/vm/gc/g1/g1AllocRegion.cpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/gc/g1/g1AllocRegion.cpp	Thu Aug 27 18:51:22 2015 +0200
@@ -46,10 +46,11 @@
   _dummy_region = dummy_region;
 }

-void G1AllocRegion::fill_up_remaining_space(HeapRegion* alloc_region,
-                                            bool bot_updates) {
+size_t G1AllocRegion::fill_up_remaining_space(HeapRegion* alloc_region,
+                                              bool bot_updates) {
   assert(alloc_region != NULL && alloc_region != _dummy_region,
          "pre-condition");
+  size_t result = 0;

   // Other threads might still be trying to allocate using a CAS out
   // of the region we are trying to retire, as they can do so without
@@ -73,6 +74,7 @@
       // If the allocation was successful we should fill in the space.
       CollectedHeap::fill_with_object(dummy, free_word_size);
       alloc_region->set_pre_dummy_top(dummy);
+      result += free_word_size * HeapWordSize;
       break;
     }

@@ -81,13 +83,18 @@
     // allocation and they fill up the region. In that case, we can
     // just get out of the loop.
   }
+  result += alloc_region->free();
+
   assert(alloc_region->free() / HeapWordSize < min_word_size_to_fill,
          "post-condition");
+  return result;
 }

-void G1AllocRegion::retire(bool fill_up) {
+size_t G1AllocRegion::retire(bool fill_up) {
   assert(_alloc_region != NULL, ar_ext_msg(this, "not initialized properly"));

+  size_t result = 0;
+
   trace("retiring");
   HeapRegion* alloc_region = _alloc_region;
   if (alloc_region != _dummy_region) {
@@ -98,7 +105,7 @@
            ar_ext_msg(this, "the alloc region should never be empty"));

     if (fill_up) {
-      fill_up_remaining_space(alloc_region, _bot_updates);
+      result = fill_up_remaining_space(alloc_region, _bot_updates);
     }

     assert(alloc_region->used() >= _used_bytes_before,
@@ -109,6 +116,8 @@
     _alloc_region = _dummy_region;
   }
   trace("retired");
+
+  return result;
 }

 HeapWord* G1AllocRegion::new_alloc_region_and_allocate(size_t word_size,
@@ -196,11 +205,11 @@
 }

 #if G1_ALLOC_REGION_TRACING
-void G1AllocRegion::trace(const char* str, size_t word_size, HeapWord* result) {
+void G1AllocRegion::trace(const char* str, size_t min_word_size, size_t desired_word_size, size_t actual_word_size, HeapWord* result) {
   // All the calls to trace that set either just the size or the size
   // and the result are considered part of level 2 tracing and are
   // skipped during level 1 tracing.
-  if ((word_size == 0 && result == NULL) || (G1_ALLOC_REGION_TRACING > 1)) {
+  if ((actual_word_size == 0 && result == NULL) || (G1_ALLOC_REGION_TRACING > 1)) {
     const size_t buffer_length = 128;
     char hr_buffer[buffer_length];
     char rest_buffer[buffer_length];
@@ -217,10 +226,10 @@

     if (G1_ALLOC_REGION_TRACING > 1) {
       if (result != NULL) {
-        jio_snprintf(rest_buffer, buffer_length, SIZE_FORMAT " " PTR_FORMAT,
-                     word_size, result);
-      } else if (word_size != 0) {
-        jio_snprintf(rest_buffer, buffer_length, SIZE_FORMAT, word_size);
+        jio_snprintf(rest_buffer, buffer_length, "min " SIZE_FORMAT " desired " SIZE_FORMAT " actual " SIZE_FORMAT " " PTR_FORMAT,
+                     min_word_size, desired_word_size, actual_word_size, result);
+      } else if (min_word_size != 0) {
+        jio_snprintf(rest_buffer, buffer_length, "min " SIZE_FORMAT " desired " SIZE_FORMAT, min_word_size, desired_word_size);
       } else {
         jio_snprintf(rest_buffer, buffer_length, "");
       }
@@ -251,26 +260,25 @@
   _g1h->retire_mutator_alloc_region(alloc_region, allocated_bytes);
 }

-HeapRegion* SurvivorGCAllocRegion::allocate_new_region(size_t word_size,
-                                                       bool force) {
+HeapRegion* G1GCAllocRegion::allocate_new_region(size_t word_size,
+                                                 bool force) {
   assert(!force, "not supported for GC alloc regions");
-  return _g1h->new_gc_alloc_region(word_size, count(), InCSetState::Young);
+  return _g1h->new_gc_alloc_region(word_size, count(), _purpose);
 }

-void SurvivorGCAllocRegion::retire_region(HeapRegion* alloc_region,
-                                          size_t allocated_bytes) {
-  _g1h->retire_gc_alloc_region(alloc_region, allocated_bytes, InCSetState::Young);
+void G1GCAllocRegion::retire_region(HeapRegion* alloc_region,
+                                    size_t allocated_bytes) {
+  _g1h->retire_gc_alloc_region(alloc_region, allocated_bytes, _purpose);
 }

-HeapRegion* OldGCAllocRegion::allocate_new_region(size_t word_size,
-                                                  bool force) {
-  assert(!force, "not supported for GC alloc regions");
-  return _g1h->new_gc_alloc_region(word_size, count(), InCSetState::Old);
-}
-
-void OldGCAllocRegion::retire_region(HeapRegion* alloc_region,
-                                     size_t allocated_bytes) {
-  _g1h->retire_gc_alloc_region(alloc_region, allocated_bytes, InCSetState::Old);
+size_t G1GCAllocRegion::retire(bool fill_up) {
+  HeapRegion* retired = get();
+  size_t end_waste = G1AllocRegion::retire(fill_up);
+  // Do not count retirement of the dummy allocation region.
+  if (retired != NULL) {
+    _stats->add_region_end_waste(end_waste / HeapWordSize);
+  }
+  return end_waste;
 }

 HeapRegion* OldGCAllocRegion::release() {
--- a/src/share/vm/gc/g1/g1AllocRegion.hpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/gc/g1/g1AllocRegion.hpp	Thu Aug 27 18:51:22 2015 +0200
@@ -26,6 +26,8 @@
 #define SHARE_VM_GC_G1_G1ALLOCREGION_HPP

 #include "gc/g1/heapRegion.hpp"
+#include "gc/g1/g1EvacStats.hpp"
+#include "gc/g1/g1InCSetState.hpp"

 class G1CollectedHeap;

@@ -102,16 +104,22 @@
   static inline HeapWord* par_allocate(HeapRegion* alloc_region,
                                        size_t word_size,
                                        bool bot_updates);
+  // Perform a MT-safe allocation out of the given region, with the given
+  // minimum and desired size. Returns the actual size allocated (between
+  // minimum and desired size) in actual_word_size if the allocation has been
+  // successful.
+  static inline HeapWord* par_allocate(HeapRegion* alloc_region,
+                                       size_t min_word_size,
+                                       size_t desired_word_size,
+                                       size_t* actual_word_size,
+                                       bool bot_updates);

   // Ensure that the region passed as a parameter has been filled up
   // so that noone else can allocate out of it any more.
-  static void fill_up_remaining_space(HeapRegion* alloc_region,
-                                      bool bot_updates);
-
-  // Retire the active allocating region. If fill_up is true then make
-  // sure that the region is full before we retire it so that noone
-  // else can allocate out of it.
-  void retire(bool fill_up);
+  // Returns the number of bytes that have been wasted by filled up
+  // the space.
+  static size_t fill_up_remaining_space(HeapRegion* alloc_region,
+                                        bool bot_updates);

   // After a region is allocated by alloc_new_region, this
   // method is used to set it as the active alloc_region
@@ -126,6 +134,12 @@
   void fill_in_ext_msg(ar_ext_msg* msg, const char* message);

 protected:
+  // Retire the active allocating region. If fill_up is true then make
+  // sure that the region is full before we retire it so that no one
+  // else can allocate out of it.
+  // Returns the number of bytes that have been filled up during retire.
+  virtual size_t retire(bool fill_up);
+
   // For convenience as subclasses use it.
   static G1CollectedHeap* _g1h;

@@ -154,7 +168,18 @@
   // First-level allocation: Should be called without holding a
   // lock. It will try to allocate lock-free out of the active region,
   // or return NULL if it was unable to.
-  inline HeapWord* attempt_allocation(size_t word_size, bool bot_updates);
+  inline HeapWord* attempt_allocation(size_t word_size,
+                                      bool bot_updates);
+  // Perform an allocation out of the current allocation region, with the given
+  // minimum and desired size. Returns the actual size allocated (between
+  // minimum and desired size) in actual_word_size if the allocation has been
+  // successful.
+  // Should be called without holding a lock. It will try to allocate lock-free
+  // out of the active region, or return NULL if it was unable to.
+  inline HeapWord* attempt_allocation(size_t min_word_size,
+                                      size_t desired_word_size,
+                                      size_t* actual_word_size,
+                                      bool bot_updates);

   // Second-level allocation: Should be called while holding a
   // lock. It will try to first allocate lock-free out of the active
@@ -164,6 +189,14 @@
   // it conform to its locking protocol.
   inline HeapWord* attempt_allocation_locked(size_t word_size,
                                              bool bot_updates);
+  // Same as attempt_allocation_locked(size_t, bool), but allowing specification
+  // of minimum word size of the block in min_word_size, and the maximum word
+  // size of the allocation in desired_word_size. The actual size of the block is
+  // returned in actual_word_size.
+  inline HeapWord* attempt_allocation_locked(size_t min_word_size,
+                                             size_t desired_word_size,
+                                             size_t* actual_word_size,
+                                             bool bot_updates);

   // Should be called to allocate a new region even if the max of this
   // type of regions has been reached. Should only be called if other
@@ -186,9 +219,17 @@
   virtual HeapRegion* release();

 #if G1_ALLOC_REGION_TRACING
-  void trace(const char* str, size_t word_size = 0, HeapWord* result = NULL);
+  void trace(const char* str,
+             size_t min_word_size = 0,
+             size_t desired_word_size = 0,
+             size_t actual_word_size = 0,
+             HeapWord* result = NULL);
 #else // G1_ALLOC_REGION_TRACING
-  void trace(const char* str, size_t word_size = 0, HeapWord* result = NULL) { }
+  void trace(const char* str,
+             size_t min_word_size = 0,
+             size_t desired_word_size = 0,
+             size_t actual_word_size = 0,
+             HeapWord* result = NULL) { }
 #endif // G1_ALLOC_REGION_TRACING
 };

@@ -201,22 +242,33 @@
     : G1AllocRegion("Mutator Alloc Region", false /* bot_updates */) { }
 };

-class SurvivorGCAllocRegion : public G1AllocRegion {
+// Common base class for allocation regions used during GC.
+class G1GCAllocRegion : public G1AllocRegion {
 protected:
+  G1EvacStats* _stats;
+  InCSetState::in_cset_state_t _purpose;
+
   virtual HeapRegion* allocate_new_region(size_t word_size, bool force);
   virtual void retire_region(HeapRegion* alloc_region, size_t allocated_bytes);
+
+  virtual size_t retire(bool fill_up);
 public:
-  SurvivorGCAllocRegion()
-  : G1AllocRegion("Survivor GC Alloc Region", false /* bot_updates */) { }
+  G1GCAllocRegion(const char* name, bool bot_updates, G1EvacStats* stats, InCSetState::in_cset_state_t purpose)
+  : G1AllocRegion(name, bot_updates), _stats(stats), _purpose(purpose) {
+    assert(stats != NULL, "Must pass non-NULL PLAB statistics");
+  }
 };

-class OldGCAllocRegion : public G1AllocRegion {
-protected:
-  virtual HeapRegion* allocate_new_region(size_t word_size, bool force);
-  virtual void retire_region(HeapRegion* alloc_region, size_t allocated_bytes);
+class SurvivorGCAllocRegion : public G1GCAllocRegion {
 public:
-  OldGCAllocRegion()
-  : G1AllocRegion("Old GC Alloc Region", true /* bot_updates */) { }
+  SurvivorGCAllocRegion(G1EvacStats* stats)
+  : G1GCAllocRegion("Survivor GC Alloc Region", false /* bot_updates */, stats, InCSetState::Young) { }
+};
+
+class OldGCAllocRegion : public G1GCAllocRegion {
+public:
+  OldGCAllocRegion(G1EvacStats* stats)
+  : G1GCAllocRegion("Old GC Alloc Region", true /* bot_updates */, stats, InCSetState::Old) { }

   // This specialization of release() makes sure that the last card that has
   // been allocated into has been completely filled by a dummy object.  This
--- a/src/share/vm/gc/g1/g1AllocRegion.inline.hpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/gc/g1/g1AllocRegion.inline.hpp	Thu Aug 27 18:51:22 2015 +0200
@@ -40,52 +40,74 @@
   }
 }

+inline HeapWord* G1AllocRegion::par_allocate(HeapRegion* alloc_region, size_t word_size, bool bot_updates) {
+  size_t temp;
+  return par_allocate(alloc_region, word_size, word_size, &temp, bot_updates);
+}
+
 inline HeapWord* G1AllocRegion::par_allocate(HeapRegion* alloc_region,
-                                             size_t word_size,
+                                             size_t min_word_size,
+                                             size_t desired_word_size,
+                                             size_t* actual_word_size,
                                              bool bot_updates) {
   assert(alloc_region != NULL, err_msg("pre-condition"));
   assert(!alloc_region->is_empty(), err_msg("pre-condition"));

   if (!bot_updates) {
-    return alloc_region->par_allocate_no_bot_updates(word_size);
+    return alloc_region->par_allocate_no_bot_updates(min_word_size, desired_word_size, actual_word_size);
   } else {
-    return alloc_region->par_allocate(word_size);
+    return alloc_region->par_allocate(min_word_size, desired_word_size, actual_word_size);
   }
 }

-inline HeapWord* G1AllocRegion::attempt_allocation(size_t word_size,
+inline HeapWord* G1AllocRegion::attempt_allocation(size_t word_size, bool bot_updates) {
+  size_t temp;
+  return attempt_allocation(word_size, word_size, &temp, bot_updates);
+}
+
+inline HeapWord* G1AllocRegion::attempt_allocation(size_t min_word_size,
+                                                   size_t desired_word_size,
+                                                   size_t* actual_word_size,
                                                    bool bot_updates) {
   assert(bot_updates == _bot_updates, ar_ext_msg(this, "pre-condition"));

   HeapRegion* alloc_region = _alloc_region;
   assert(alloc_region != NULL, ar_ext_msg(this, "not initialized properly"));

-  HeapWord* result = par_allocate(alloc_region, word_size, bot_updates);
+  HeapWord* result = par_allocate(alloc_region, min_word_size, desired_word_size, actual_word_size, bot_updates);
   if (result != NULL) {
-    trace("alloc", word_size, result);
+    trace("alloc", min_word_size, desired_word_size, *actual_word_size, result);
     return result;
   }
-  trace("alloc failed", word_size);
+  trace("alloc failed", min_word_size, desired_word_size);
   return NULL;
 }

-inline HeapWord* G1AllocRegion::attempt_allocation_locked(size_t word_size,
+inline HeapWord* G1AllocRegion::attempt_allocation_locked(size_t word_size, bool bot_updates) {
+  size_t temp;
+  return attempt_allocation_locked(word_size, word_size, &temp, bot_updates);
+}
+
+inline HeapWord* G1AllocRegion::attempt_allocation_locked(size_t min_word_size,
+                                                          size_t desired_word_size,
+                                                          size_t* actual_word_size,
                                                           bool bot_updates) {
   // First we have to redo the allocation, assuming we're holding the
   // appropriate lock, in case another thread changed the region while
   // we were waiting to get the lock.
-  HeapWord* result = attempt_allocation(word_size, bot_updates);
+  HeapWord* result = attempt_allocation(min_word_size, desired_word_size, actual_word_size, bot_updates);
   if (result != NULL) {
     return result;
   }

   retire(true /* fill_up */);
-  result = new_alloc_region_and_allocate(word_size, false /* force */);
+  result = new_alloc_region_and_allocate(desired_word_size, false /* force */);
   if (result != NULL) {
-    trace("alloc locked (second attempt)", word_size, result);
+    *actual_word_size = desired_word_size;
+    trace("alloc locked (second attempt)", min_word_size, desired_word_size, *actual_word_size, result);
     return result;
   }
-  trace("alloc locked failed", word_size);
+  trace("alloc locked failed", min_word_size, desired_word_size);
   return NULL;
 }

@@ -94,13 +116,13 @@
   assert(bot_updates == _bot_updates, ar_ext_msg(this, "pre-condition"));
   assert(_alloc_region != NULL, ar_ext_msg(this, "not initialized properly"));

-  trace("forcing alloc");
+  trace("forcing alloc", word_size, word_size);
   HeapWord* result = new_alloc_region_and_allocate(word_size, true /* force */);
   if (result != NULL) {
-    trace("alloc forced", word_size, result);
+    trace("alloc forced", word_size, word_size, word_size, result);
     return result;
   }
-  trace("alloc forced failed", word_size);
+  trace("alloc forced failed", word_size, word_size);
   return NULL;
 }
--- a/src/share/vm/gc/g1/g1Allocator.cpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/gc/g1/g1Allocator.cpp	Thu Aug 27 18:51:22 2015 +0200
@@ -24,12 +24,20 @@

 #include "precompiled.hpp"
 #include "gc/g1/g1Allocator.inline.hpp"
+#include "gc/g1/g1AllocRegion.inline.hpp"
 #include "gc/g1/g1CollectedHeap.inline.hpp"
 #include "gc/g1/g1CollectorPolicy.hpp"
 #include "gc/g1/g1MarkSweep.hpp"
 #include "gc/g1/heapRegion.inline.hpp"
 #include "gc/g1/heapRegionSet.inline.hpp"

+G1DefaultAllocator::G1DefaultAllocator(G1CollectedHeap* heap) :
+  G1Allocator(heap),
+  _retained_old_gc_alloc_region(NULL),
+  _survivor_gc_alloc_region(heap->alloc_buffer_stats(InCSetState::Young)),
+  _old_gc_alloc_region(heap->alloc_buffer_stats(InCSetState::Old)) {
+}
+
 void G1DefaultAllocator::init_mutator_alloc_region() {
   assert(_mutator_alloc_region.get() == NULL, "pre-condition");
   _mutator_alloc_region.init();
@@ -79,6 +87,8 @@
 void G1DefaultAllocator::init_gc_alloc_regions(EvacuationInfo& evacuation_info) {
   assert_at_safepoint(true /* should_be_vm_thread */);

+  G1Allocator::init_gc_alloc_regions(evacuation_info);
+
   _survivor_gc_alloc_region.init();
   _old_gc_alloc_region.init();
   reuse_retained_old_region(evacuation_info,
@@ -101,10 +111,8 @@
     _retained_old_gc_alloc_region->record_retained_region();
   }

-  if (ResizePLAB) {
-    _g1h->alloc_buffer_stats(InCSetState::Young)->adjust_desired_plab_sz();
-    _g1h->alloc_buffer_stats(InCSetState::Old)->adjust_desired_plab_sz();
-  }
+  _g1h->alloc_buffer_stats(InCSetState::Young)->adjust_desired_plab_sz();
+  _g1h->alloc_buffer_stats(InCSetState::Old)->adjust_desired_plab_sz();
 }

 void G1DefaultAllocator::abandon_gc_alloc_regions() {
@@ -136,78 +144,159 @@
 HeapWord* G1Allocator::par_allocate_during_gc(InCSetState dest,
                                               size_t word_size,
                                               AllocationContext_t context) {
+  size_t temp = 0;
+  HeapWord* result = par_allocate_during_gc(dest, word_size, word_size, &temp, context);
+  assert(result == NULL || temp == word_size,
+         err_msg("Requested " SIZE_FORMAT " words, but got " SIZE_FORMAT " at " PTR_FORMAT,
+                 word_size, temp, p2i(result)));
+  return result;
+}
+
+HeapWord* G1Allocator::par_allocate_during_gc(InCSetState dest,
+                                              size_t min_word_size,
+                                              size_t desired_word_size,
+                                              size_t* actual_word_size,
+                                              AllocationContext_t context) {
   switch (dest.value()) {
     case InCSetState::Young:
-      return survivor_attempt_allocation(word_size, context);
+      return survivor_attempt_allocation(min_word_size, desired_word_size, actual_word_size, context);
     case InCSetState::Old:
-      return old_attempt_allocation(word_size, context);
+      return old_attempt_allocation(min_word_size, desired_word_size, actual_word_size, context);
     default:
       ShouldNotReachHere();
       return NULL; // Keep some compilers happy
   }
 }

-HeapWord* G1Allocator::survivor_attempt_allocation(size_t word_size,
+bool G1Allocator::survivor_is_full(AllocationContext_t context) const {
+  return _survivor_is_full;
+}
+
+bool G1Allocator::old_is_full(AllocationContext_t context) const {
+  return _old_is_full;
+}
+
+void G1Allocator::set_survivor_full(AllocationContext_t context) {
+  _survivor_is_full = true;
+}
+
+void G1Allocator::set_old_full(AllocationContext_t context) {
+  _old_is_full = true;
+}
+
+HeapWord* G1Allocator::survivor_attempt_allocation(size_t min_word_size,
+                                                   size_t desired_word_size,
+                                                   size_t* actual_word_size,
                                                    AllocationContext_t context) {
-  assert(!_g1h->is_humongous(word_size),
+  assert(!_g1h->is_humongous(desired_word_size),
          "we should not be seeing humongous-size allocations in this path");

-  HeapWord* result = survivor_gc_alloc_region(context)->attempt_allocation(word_size,
+  HeapWord* result = survivor_gc_alloc_region(context)->attempt_allocation(min_word_size,
+                                                                           desired_word_size,
+                                                                           actual_word_size,
                                                                            false /* bot_updates */);
-  if (result == NULL) {
+  if (result == NULL && !survivor_is_full(context)) {
     MutexLockerEx x(FreeList_lock, Mutex::_no_safepoint_check_flag);
-    result = survivor_gc_alloc_region(context)->attempt_allocation_locked(word_size,
+    result = survivor_gc_alloc_region(context)->attempt_allocation_locked(min_word_size,
+                                                                          desired_word_size,
+                                                                          actual_word_size,
                                                                           false /* bot_updates */);
+    if (result == NULL) {
+      set_survivor_full(context);
+    }
   }
   if (result != NULL) {
-    _g1h->dirty_young_block(result, word_size);
+    _g1h->dirty_young_block(result, *actual_word_size);
   }
   return result;
 }

-HeapWord* G1Allocator::old_attempt_allocation(size_t word_size,
+HeapWord* G1Allocator::old_attempt_allocation(size_t min_word_size,
+                                              size_t desired_word_size,
+                                              size_t* actual_word_size,
                                               AllocationContext_t context) {
-  assert(!_g1h->is_humongous(word_size),
+  assert(!_g1h->is_humongous(desired_word_size),
          "we should not be seeing humongous-size allocations in this path");

-  HeapWord* result = old_gc_alloc_region(context)->attempt_allocation(word_size,
+  HeapWord* result = old_gc_alloc_region(context)->attempt_allocation(min_word_size,
+                                                                      desired_word_size,
+                                                                      actual_word_size,
                                                                       true /* bot_updates */);
-  if (result == NULL) {
+  if (result == NULL && !old_is_full(context)) {
     MutexLockerEx x(FreeList_lock, Mutex::_no_safepoint_check_flag);
-    result = old_gc_alloc_region(context)->attempt_allocation_locked(word_size,
+    result = old_gc_alloc_region(context)->attempt_allocation_locked(min_word_size,
+                                                                     desired_word_size,
+                                                                     actual_word_size,
                                                                      true /* bot_updates */);
+    if (result == NULL) {
+      set_old_full(context);
+    }
   }
   return result;
 }

+void G1Allocator::init_gc_alloc_regions(EvacuationInfo& evacuation_info) {
+  _survivor_is_full = false;
+  _old_is_full = false;
+}
+
 G1PLABAllocator::G1PLABAllocator(G1Allocator* allocator) :
   _g1h(G1CollectedHeap::heap()),
   _allocator(allocator),
   _survivor_alignment_bytes(calc_survivor_alignment_bytes()) {
+  for (size_t i = 0; i < ARRAY_SIZE(_direct_allocated); i++) {
+    _direct_allocated[i] = 0;
+  }
+}
+
+bool G1PLABAllocator::may_throw_away_buffer(size_t const allocation_word_sz, size_t const buffer_size) const {
+  return (allocation_word_sz * 100 < buffer_size * ParallelGCBufferWastePct);
 }

 HeapWord* G1PLABAllocator::allocate_direct_or_new_plab(InCSetState dest,
                                                        size_t word_sz,
-                                                       AllocationContext_t context) {
-  size_t gclab_word_size = _g1h->desired_plab_sz(dest);
-  if (word_sz * 100 < gclab_word_size * ParallelGCBufferWastePct) {
+                                                       AllocationContext_t context,
+                                                       bool* plab_refill_failed) {
+  size_t plab_word_size = G1CollectedHeap::heap()->desired_plab_sz(dest);
+  size_t required_in_plab = PLAB::size_required_for_allocation(word_sz);
+
+  // Only get a new PLAB if the allocation fits and it would not waste more than
+  // ParallelGCBufferWastePct in the existing buffer.
+  if ((required_in_plab <= plab_word_size) &&
+    may_throw_away_buffer(required_in_plab, plab_word_size)) {
+
     G1PLAB* alloc_buf = alloc_buffer(dest, context);
     alloc_buf->retire();

-    HeapWord* buf = _allocator->par_allocate_during_gc(dest, gclab_word_size, context);
-    if (buf == NULL) {
-      return NULL; // Let caller handle allocation failure.
+    size_t actual_plab_size = 0;
+    HeapWord* buf = _allocator->par_allocate_during_gc(dest,
+                                                       required_in_plab,
+                                                       plab_word_size,
+                                                       &actual_plab_size,
+                                                       context);
+
+    assert(buf == NULL || ((actual_plab_size >= required_in_plab) && (actual_plab_size <= plab_word_size)),
+           err_msg("Requested at minimum " SIZE_FORMAT ", desired " SIZE_FORMAT " words, but got " SIZE_FORMAT " at " PTR_FORMAT,
+                   required_in_plab, plab_word_size, actual_plab_size, p2i(buf)));
+
+    if (buf != NULL) {
+      alloc_buf->set_buf(buf, actual_plab_size);
+
+      HeapWord* const obj = alloc_buf->allocate(word_sz);
+      assert(obj != NULL, err_msg("PLAB should have been big enough, tried to allocate "
+                                  SIZE_FORMAT " requiring " SIZE_FORMAT " PLAB size " SIZE_FORMAT,
+                                  word_sz, required_in_plab, plab_word_size));
+      return obj;
     }
     // Otherwise.
-    alloc_buf->set_word_size(gclab_word_size);
-    alloc_buf->set_buf(buf);
-
-    HeapWord* const obj = alloc_buf->allocate(word_sz);
-    assert(obj != NULL, "buffer was definitely big enough...");
-    return obj;
-  } else {
-    return _allocator->par_allocate_during_gc(dest, word_sz, context);
+    *plab_refill_failed = true;
   }
+  // Try direct allocation.
+  HeapWord* result = _allocator->par_allocate_during_gc(dest, word_sz, context);
+  if (result != NULL) {
+    _direct_allocated[dest.value()] += word_sz;
+  }
+  return result;
 }

 void G1PLABAllocator::undo_allocation(InCSetState dest, HeapWord* obj, size_t word_sz, AllocationContext_t context) {
@@ -225,11 +314,14 @@
   _alloc_buffers[InCSetState::Old]  = &_tenured_alloc_buffer;
 }

-void G1DefaultPLABAllocator::retire_alloc_buffers() {
+void G1DefaultPLABAllocator::flush_and_retire_stats() {
   for (uint state = 0; state < InCSetState::Num; state++) {
     G1PLAB* const buf = _alloc_buffers[state];
     if (buf != NULL) {
-      buf->flush_and_retire_stats(_g1h->alloc_buffer_stats(state));
+      G1EvacStats* stats = _g1h->alloc_buffer_stats(state);
+      buf->flush_and_retire_stats(stats);
+      stats->add_direct_allocated(_direct_allocated[state]);
+      _direct_allocated[state] = 0;
     }
   }
 }
--- a/src/share/vm/gc/g1/g1Allocator.hpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/gc/g1/g1Allocator.hpp	Thu Aug 27 18:51:22 2015 +0200
@@ -38,23 +38,36 @@
 // Also keeps track of retained regions across GCs.
 class G1Allocator : public CHeapObj<mtGC> {
   friend class VMStructs;
+private:
+  bool _survivor_is_full;
+  bool _old_is_full;
 protected:
   G1CollectedHeap* _g1h;

   virtual MutatorAllocRegion* mutator_alloc_region(AllocationContext_t context) = 0;

+  virtual bool survivor_is_full(AllocationContext_t context) const;
+  virtual bool old_is_full(AllocationContext_t context) const;
+
+  virtual void set_survivor_full(AllocationContext_t context);
+  virtual void set_old_full(AllocationContext_t context);
+
   // Accessors to the allocation regions.
   virtual SurvivorGCAllocRegion* survivor_gc_alloc_region(AllocationContext_t context) = 0;
   virtual OldGCAllocRegion* old_gc_alloc_region(AllocationContext_t context) = 0;

   // Allocation attempt during GC for a survivor object / PLAB.
-  inline HeapWord* survivor_attempt_allocation(size_t word_size,
+  inline HeapWord* survivor_attempt_allocation(size_t min_word_size,
+                                               size_t desired_word_size,
+                                               size_t* actual_word_size,
                                                AllocationContext_t context);
   // Allocation attempt during GC for an old object / PLAB.
-  inline HeapWord* old_attempt_allocation(size_t word_size,
+  inline HeapWord* old_attempt_allocation(size_t min_word_size,
+                                          size_t desired_word_size,
+                                          size_t* actual_word_size,
                                           AllocationContext_t context);
 public:
-  G1Allocator(G1CollectedHeap* heap) : _g1h(heap) { }
+  G1Allocator(G1CollectedHeap* heap) : _g1h(heap), _survivor_is_full(false), _old_is_full(false) { }
   virtual ~G1Allocator() { }

   static G1Allocator* create_allocator(G1CollectedHeap* g1h);
@@ -66,7 +79,7 @@
   virtual void init_mutator_alloc_region() = 0;
   virtual void release_mutator_alloc_region() = 0;

-  virtual void init_gc_alloc_regions(EvacuationInfo& evacuation_info) = 0;
+  virtual void init_gc_alloc_regions(EvacuationInfo& evacuation_info);
   virtual void release_gc_alloc_regions(EvacuationInfo& evacuation_info) = 0;
   virtual void abandon_gc_alloc_regions() = 0;

@@ -93,6 +106,12 @@
                                    size_t word_size,
                                    AllocationContext_t context);

+  HeapWord* par_allocate_during_gc(InCSetState dest,
+                                   size_t min_word_size,
+                                   size_t desired_word_size,
+                                   size_t* actual_word_size,
+                                   AllocationContext_t context);
+
   virtual size_t used_in_alloc_regions() = 0;
 };

@@ -114,7 +133,7 @@

   HeapRegion* _retained_old_gc_alloc_region;
 public:
-  G1DefaultAllocator(G1CollectedHeap* heap) : G1Allocator(heap), _retained_old_gc_alloc_region(NULL) { }
+  G1DefaultAllocator(G1CollectedHeap* heap);

   virtual void init_mutator_alloc_region();
   virtual void release_mutator_alloc_region();
@@ -163,8 +182,12 @@
     guarantee(_retired, "Allocation buffer has not been retired");
   }

-  virtual void set_buf(HeapWord* buf) {
-    PLAB::set_buf(buf);
+  // The amount of space in words wasted within the PLAB including
+  // waste due to refills and alignment.
+  size_t wasted() const { return _wasted; }
+
+  virtual void set_buf(HeapWord* buf, size_t word_size) {
+    PLAB::set_buf(buf, word_size);
     _retired = false;
   }

@@ -198,7 +221,10 @@
   // architectures have a special compare against zero instructions.
   const uint _survivor_alignment_bytes;

-  virtual void retire_alloc_buffers() = 0;
+  // Number of words allocated directly (not counting PLAB allocation).
+  size_t _direct_allocated[InCSetState::Num];
+
+  virtual void flush_and_retire_stats() = 0;
   virtual G1PLAB* alloc_buffer(InCSetState dest, AllocationContext_t context) = 0;

   // Calculate the survivor space object alignment in bytes. Returns that or 0 if
@@ -215,6 +241,11 @@
     }
   }

+  HeapWord* allocate_new_plab(InCSetState dest,
+                              size_t word_sz,
+                              AllocationContext_t context);
+
+  bool may_throw_away_buffer(size_t const allocation_word_sz, size_t const buffer_size) const;
 public:
   G1PLABAllocator(G1Allocator* allocator);
   virtual ~G1PLABAllocator() { }
@@ -225,31 +256,28 @@

   // Allocate word_sz words in dest, either directly into the regions or by
   // allocating a new PLAB. Returns the address of the allocated memory, NULL if
-  // not successful.
+  // not successful. Plab_refill_failed indicates whether an attempt to refill the
+  // PLAB failed or not.
   HeapWord* allocate_direct_or_new_plab(InCSetState dest,
                                         size_t word_sz,
-                                        AllocationContext_t context);
+                                        AllocationContext_t context,
+                                        bool* plab_refill_failed);

   // Allocate word_sz words in the PLAB of dest.  Returns the address of the
   // allocated memory, NULL if not successful.
-  HeapWord* plab_allocate(InCSetState dest,
-                          size_t word_sz,
-                          AllocationContext_t context) {
-    G1PLAB* buffer = alloc_buffer(dest, context);
-    if (_survivor_alignment_bytes == 0 || !dest.is_young()) {
-      return buffer->allocate(word_sz);
-    } else {
-      return buffer->allocate_aligned(word_sz, _survivor_alignment_bytes);
-    }
-  }
+  inline HeapWord* plab_allocate(InCSetState dest,
+                                 size_t word_sz,
+                                 AllocationContext_t context);

-  HeapWord* allocate(InCSetState dest, size_t word_sz,
-                     AllocationContext_t context) {
+  HeapWord* allocate(InCSetState dest,
+                     size_t word_sz,
+                     AllocationContext_t context,
+                     bool* refill_failed) {
     HeapWord* const obj = plab_allocate(dest, word_sz, context);
     if (obj != NULL) {
       return obj;
     }
-    return allocate_direct_or_new_plab(dest, word_sz, context);
+    return allocate_direct_or_new_plab(dest, word_sz, context, refill_failed);
   }

   void undo_allocation(InCSetState dest, HeapWord* obj, size_t word_sz, AllocationContext_t context);
@@ -273,7 +301,7 @@
     return _alloc_buffers[dest.value()];
   }

-  virtual void retire_alloc_buffers();
+  virtual void flush_and_retire_stats();

   virtual void waste(size_t& wasted, size_t& undo_wasted);
 };
--- a/src/share/vm/gc/g1/g1Allocator.inline.hpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/gc/g1/g1Allocator.inline.hpp	Thu Aug 27 18:51:22 2015 +0200
@@ -27,6 +27,7 @@

 #include "gc/g1/g1Allocator.hpp"
 #include "gc/g1/g1AllocRegion.inline.hpp"
+#include "gc/shared/plab.inline.hpp"

 HeapWord* G1Allocator::attempt_allocation(size_t word_size, AllocationContext_t context) {
   return mutator_alloc_region(context)->attempt_allocation(word_size, false /* bot_updates */);
@@ -43,4 +44,15 @@
   return mutator_alloc_region(context)->attempt_allocation_force(word_size, false /* bot_updates */);
 }

+inline HeapWord* G1PLABAllocator::plab_allocate(InCSetState dest,
+                                                size_t word_sz,
+                                                AllocationContext_t context) {
+  G1PLAB* buffer = alloc_buffer(dest, context);
+  if (_survivor_alignment_bytes == 0 || !dest.is_young()) {
+    return buffer->allocate(word_sz);
+  } else {
+    return buffer->allocate_aligned(word_sz, _survivor_alignment_bytes);
+  }
+}
+
 #endif // SHARE_VM_GC_G1_G1ALLOCATOR_HPP
--- a/src/share/vm/gc/g1/g1Allocator_ext.cpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/gc/g1/g1Allocator_ext.cpp	Thu Aug 27 18:51:22 2015 +0200
@@ -23,7 +23,7 @@
  */

 #include "precompiled.hpp"
-#include "gc/g1/g1Allocator.hpp"
+#include "gc/g1/g1Allocator.inline.hpp"
 #include "gc/g1/g1CollectedHeap.hpp"

 G1Allocator* G1Allocator::create_allocator(G1CollectedHeap* g1h) {
--- a/src/share/vm/gc/g1/g1BlockOffsetTable.inline.hpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/gc/g1/g1BlockOffsetTable.inline.hpp	Thu Aug 27 18:51:22 2015 +0200
@@ -26,7 +26,7 @@
 #define SHARE_VM_GC_G1_G1BLOCKOFFSETTABLE_INLINE_HPP

 #include "gc/g1/g1BlockOffsetTable.hpp"
-#include "gc/g1/heapRegion.inline.hpp"
+#include "gc/g1/heapRegion.hpp"
 #include "gc/shared/space.hpp"

 inline HeapWord* G1BlockOffsetTable::block_start(const void* addr) {
--- a/src/share/vm/gc/g1/g1CollectedHeap.cpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/gc/g1/g1CollectedHeap.cpp	Thu Aug 27 18:51:22 2015 +0200
@@ -1944,8 +1944,8 @@
   _young_list(new YoungList(this)),
   _gc_time_stamp(0),
   _summary_bytes_used(0),
-  _survivor_plab_stats(YoungPLABSize, PLABWeight),
-  _old_plab_stats(OldPLABSize, PLABWeight),
+  _survivor_evac_stats(YoungPLABSize, PLABWeight),
+  _old_evac_stats(OldPLABSize, PLABWeight),
   _expand_heap_after_alloc_failure(true),
   _surviving_young_words(NULL),
   _old_marking_cycles_started(0),
@@ -3504,6 +3504,13 @@
   return G1HeapSummary(heap_summary, used(), eden_used_bytes, eden_capacity_bytes, survivor_used_bytes);
 }

+G1EvacSummary G1CollectedHeap::create_g1_evac_summary(G1EvacStats* stats) {
+  return G1EvacSummary(stats->allocated(), stats->wasted(), stats->undo_wasted(),
+                       stats->unused(), stats->used(), stats->region_end_waste(),
+                       stats->regions_filled(), stats->direct_allocated(),
+                       stats->failure_used(), stats->failure_waste());
+}
+
 void G1CollectedHeap::trace_heap(GCWhen::Type when, const GCTracer* gc_tracer) {
   const G1HeapSummary& heap_summary = create_g1_heap_summary();
   gc_tracer->report_gc_heap_summary(when, heap_summary);
@@ -3753,8 +3760,7 @@
   cl.flush_rem_set_entries();
 }

-void
-G1CollectedHeap::setup_surviving_young_words() {
+void G1CollectedHeap::setup_surviving_young_words() {
   assert(_surviving_young_words == NULL, "pre-condition");
   uint array_length = g1_policy()->young_cset_region_length();
   _surviving_young_words = NEW_C_HEAP_ARRAY(size_t, (size_t) array_length, mtGC);
@@ -3770,17 +3776,15 @@
 #endif // !ASSERT
 }

-void
-G1CollectedHeap::update_surviving_young_words(size_t* surv_young_words) {
-  MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+void G1CollectedHeap::update_surviving_young_words(size_t* surv_young_words) {
+  assert_at_safepoint(true);
   uint array_length = g1_policy()->young_cset_region_length();
   for (uint i = 0; i < array_length; ++i) {
     _surviving_young_words[i] += surv_young_words[i];
   }
 }

-void
-G1CollectedHeap::cleanup_surviving_young_words() {
+void G1CollectedHeap::cleanup_surviving_young_words() {
   guarantee( _surviving_young_words != NULL, "pre-condition" );
   FREE_C_HEAP_ARRAY(size_t, _surviving_young_words);
   _surviving_young_words = NULL;
@@ -4375,6 +4379,13 @@
 }

 class G1ParEvacuateFollowersClosure : public VoidClosure {
+private:
+  double _start_term;
+  double _term_time;
+  size_t _term_attempts;
+
+  void start_term_time() { _term_attempts++; _start_term = os::elapsedTime(); }
+  void end_term_time() { _term_time += os::elapsedTime() - _start_term; }
 protected:
   G1CollectedHeap*              _g1h;
   G1ParScanThreadState*         _par_scan_state;
@@ -4391,19 +4402,23 @@
                                 RefToScanQueueSet* queues,
                                 ParallelTaskTerminator* terminator)
     : _g1h(g1h), _par_scan_state(par_scan_state),
-      _queues(queues), _terminator(terminator) {}
+      _queues(queues), _terminator(terminator),
+      _start_term(0.0), _term_time(0.0), _term_attempts(0) {}

   void do_void();

+  double term_time() const { return _term_time; }
+  size_t term_attempts() const { return _term_attempts; }
+
 private:
   inline bool offer_termination();
 };

 bool G1ParEvacuateFollowersClosure::offer_termination() {
   G1ParScanThreadState* const pss = par_scan_state();
-  pss->start_term_time();
+  start_term_time();
   const bool res = terminator()->offer_termination();
-  pss->end_term_time();
+  end_term_time();
   return res;
 }

@@ -4444,15 +4459,17 @@
 class G1ParTask : public AbstractGangTask {
 protected:
   G1CollectedHeap*       _g1h;
-  RefToScanQueueSet      *_queues;
+  G1ParScanThreadState** _pss;
+  RefToScanQueueSet*     _queues;
   G1RootProcessor*       _root_processor;
   ParallelTaskTerminator _terminator;
   uint _n_workers;

 public:
-  G1ParTask(G1CollectedHeap* g1h, RefToScanQueueSet *task_queues, G1RootProcessor* root_processor, uint n_workers)
+  G1ParTask(G1CollectedHeap* g1h, G1ParScanThreadState** per_thread_states, RefToScanQueueSet *task_queues, G1RootProcessor* root_processor, uint n_workers)
     : AbstractGangTask("G1 collection"),
       _g1h(g1h),
+      _pss(per_thread_states),
       _queues(task_queues),
       _root_processor(root_processor),
       _terminator(n_workers, _queues),
@@ -4499,7 +4516,8 @@
   void work(uint worker_id) {
     if (worker_id >= _n_workers) return;  // no work needed this round

-    _g1h->g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::GCWorkerStart, worker_id, os::elapsedTime());
+    double start_sec = os::elapsedTime();
+    _g1h->g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::GCWorkerStart, worker_id, start_sec);

     {
       ResourceMark rm;
@@ -4507,23 +4525,24 @@

       ReferenceProcessor*             rp = _g1h->ref_processor_stw();

-      G1ParScanThreadState            pss(_g1h, worker_id, rp);
+      G1ParScanThreadState*           pss = _pss[worker_id];
+      pss->set_ref_processor(rp);

       bool only_young = _g1h->collector_state()->gcs_are_young();

       // Non-IM young GC.
-      G1ParCopyClosure<G1BarrierNone, G1MarkNone>             scan_only_root_cl(_g1h, &pss, rp);
+      G1ParCopyClosure<G1BarrierNone, G1MarkNone>             scan_only_root_cl(_g1h, pss, rp);
       G1CLDClosure<G1MarkNone>                                scan_only_cld_cl(&scan_only_root_cl,
                                                                                only_young, // Only process dirty klasses.
                                                                                false);     // No need to claim CLDs.
       // IM young GC.
       //    Strong roots closures.
-      G1ParCopyClosure<G1BarrierNone, G1MarkFromRoot>         scan_mark_root_cl(_g1h, &pss, rp);
+      G1ParCopyClosure<G1BarrierNone, G1MarkFromRoot>         scan_mark_root_cl(_g1h, pss, rp);
       G1CLDClosure<G1MarkFromRoot>                            scan_mark_cld_cl(&scan_mark_root_cl,
                                                                                false, // Process all klasses.
                                                                                true); // Need to claim CLDs.
       //    Weak roots closures.
-      G1ParCopyClosure<G1BarrierNone, G1MarkPromotedFromRoot> scan_mark_weak_root_cl(_g1h, &pss, rp);
+      G1ParCopyClosure<G1BarrierNone, G1MarkPromotedFromRoot> scan_mark_weak_root_cl(_g1h, pss, rp);
       G1CLDClosure<G1MarkPromotedFromRoot>                    scan_mark_weak_cld_cl(&scan_mark_weak_root_cl,
                                                                                     false, // Process all klasses.
                                                                                     true); // Need to claim CLDs.
@@ -4554,8 +4573,7 @@
         weak_cld_cl    = &scan_only_cld_cl;
       }

-      pss.start_strong_roots();
-
+      double start_strong_roots_sec = os::elapsedTime();
       _root_processor->evacuate_roots(strong_root_cl,
                                       weak_root_cl,
                                       strong_cld_cl,
@@ -4563,32 +4581,45 @@
                                       trace_metadata,
                                       worker_id);

-      G1ParPushHeapRSClosure push_heap_rs_cl(_g1h, &pss);
+      G1ParPushHeapRSClosure push_heap_rs_cl(_g1h, pss);
       _root_processor->scan_remembered_sets(&push_heap_rs_cl,
                                             weak_root_cl,
                                             worker_id);
-      pss.end_strong_roots();
-
+      double strong_roots_sec = os::elapsedTime() - start_strong_roots_sec;
+
+      double term_sec = 0.0;
+      size_t evac_term_attempts = 0;
       {
         double start = os::elapsedTime();
-        G1ParEvacuateFollowersClosure evac(_g1h, &pss, _queues, &_terminator);
+        G1ParEvacuateFollowersClosure evac(_g1h, pss, _queues, &_terminator);
         evac.do_void();
+
+        evac_term_attempts = evac.term_attempts();
+        term_sec = evac.term_time();
         double elapsed_sec = os::elapsedTime() - start;
-        double term_sec = pss.term_time();
         _g1h->g1_policy()->phase_times()->add_time_secs(G1GCPhaseTimes::ObjCopy, worker_id, elapsed_sec - term_sec);
         _g1h->g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::Termination, worker_id, term_sec);
-        _g1h->g1_policy()->phase_times()->record_thread_work_item(G1GCPhaseTimes::Termination, worker_id, pss.term_attempts());
+        _g1h->g1_policy()->phase_times()->record_thread_work_item(G1GCPhaseTimes::Termination, worker_id, evac_term_attempts);
       }
-      _g1h->g1_policy()->record_thread_age_table(pss.age_table());
-      _g1h->update_surviving_young_words(pss.surviving_young_words()+1);
+
+      assert(pss->queue_is_empty(), "should be empty");

       if (PrintTerminationStats) {
         MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
-        pss.print_termination_stats();
+        size_t lab_waste;
+        size_t lab_undo_waste;
+        pss->waste(lab_waste, lab_undo_waste);
+        _g1h->print_termination_stats(gclog_or_tty,
+                                      worker_id,
+                                      (os::elapsedTime() - start_sec) * 1000.0,   /* elapsed time */
+                                      strong_roots_sec * 1000.0,                  /* strong roots time */
+                                      term_sec * 1000.0,                          /* evac term time */
+                                      evac_term_attempts,                         /* evac term attempts */
+                                      lab_waste,                                  /* alloc buffer waste */
+                                      lab_undo_waste                              /* undo waste */
+                                      );
       }

-      assert(pss.queue_is_empty(), "should be empty");
-
       // Close the inner scope so that the ResourceMark and HandleMark
       // destructors are executed here and are included as part of the
       // "GC Worker Time".
@@ -4597,6 +4628,31 @@
   }
 };

+void G1CollectedHeap::print_termination_stats_hdr(outputStream* const st) {
+  st->print_raw_cr("GC Termination Stats");
+  st->print_raw_cr("     elapsed  --strong roots-- -------termination------- ------waste (KiB)------");
+  st->print_raw_cr("thr     ms        ms      %        ms      %    attempts  total   alloc    undo");
+  st->print_raw_cr("--- --------- --------- ------ --------- ------ -------- ------- ------- -------");
+}
+
+void G1CollectedHeap::print_termination_stats(outputStream* const st,
+                                              uint worker_id,
+                                              double elapsed_ms,
+                                              double strong_roots_ms,
+                                              double term_ms,
+                                              size_t term_attempts,
+                                              size_t alloc_buffer_waste,
+                                              size_t undo_waste) const {
+  st->print_cr("%3d %9.2f %9.2f %6.2f "
+               "%9.2f %6.2f " SIZE_FORMAT_W(8) " "
+               SIZE_FORMAT_W(7) " " SIZE_FORMAT_W(7) " " SIZE_FORMAT_W(7),
+               worker_id, elapsed_ms, strong_roots_ms, strong_roots_ms * 100 / elapsed_ms,
+               term_ms, term_ms * 100 / elapsed_ms, term_attempts,
+               (alloc_buffer_waste + undo_waste) * HeapWordSize / K,
+               alloc_buffer_waste * HeapWordSize / K,
+               undo_waste * HeapWordSize / K);
+}
+
 class G1StringSymbolTableUnlinkTask : public AbstractGangTask {
 private:
   BoolObjectClosure* _is_alive;
@@ -5125,17 +5181,20 @@

 class G1STWRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
 private:
-  G1CollectedHeap*   _g1h;
-  RefToScanQueueSet* _queues;
-  WorkGang*          _workers;
-  uint               _active_workers;
+  G1CollectedHeap*        _g1h;
+  G1ParScanThreadState**  _pss;
+  RefToScanQueueSet*      _queues;
+  WorkGang*               _workers;
+  uint                    _active_workers;

 public:
   G1STWRefProcTaskExecutor(G1CollectedHeap* g1h,
+                           G1ParScanThreadState** per_thread_states,
                            WorkGang* workers,
                            RefToScanQueueSet *task_queues,
                            uint n_workers) :
     _g1h(g1h),
+    _pss(per_thread_states),
     _queues(task_queues),
     _workers(workers),
     _active_workers(n_workers)
@@ -5154,17 +5213,20 @@
   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
   ProcessTask&     _proc_task;
   G1CollectedHeap* _g1h;
-  RefToScanQueueSet *_task_queues;
+  G1ParScanThreadState** _pss;
+  RefToScanQueueSet* _task_queues;
   ParallelTaskTerminator* _terminator;

 public:
   G1STWRefProcTaskProxy(ProcessTask& proc_task,
-                     G1CollectedHeap* g1h,
-                     RefToScanQueueSet *task_queues,
-                     ParallelTaskTerminator* terminator) :
+                        G1CollectedHeap* g1h,
+                        G1ParScanThreadState** per_thread_states,
+                        RefToScanQueueSet *task_queues,
+                        ParallelTaskTerminator* terminator) :
     AbstractGangTask("Process reference objects in parallel"),
     _proc_task(proc_task),
     _g1h(g1h),
+    _pss(per_thread_states),
     _task_queues(task_queues),
     _terminator(terminator)
   {}
@@ -5176,11 +5238,12 @@

     G1STWIsAliveClosure is_alive(_g1h);

-    G1ParScanThreadState            pss(_g1h, worker_id, NULL);
-
-    G1ParScanExtRootClosure        only_copy_non_heap_cl(_g1h, &pss, NULL);
-
-    G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(_g1h, &pss, NULL);
+    G1ParScanThreadState*           pss = _pss[worker_id];
+    pss->set_ref_processor(NULL);
+
+    G1ParScanExtRootClosure        only_copy_non_heap_cl(_g1h, pss, NULL);
+
+    G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(_g1h, pss, NULL);

     OopClosure*                    copy_non_heap_cl = &only_copy_non_heap_cl;

@@ -5190,10 +5253,10 @@
     }

     // Keep alive closure.
-    G1CopyingKeepAliveClosure keep_alive(_g1h, copy_non_heap_cl, &pss);
+    G1CopyingKeepAliveClosure keep_alive(_g1h, copy_non_heap_cl, pss);

     // Complete GC closure
-    G1ParEvacuateFollowersClosure drain_queue(_g1h, &pss, _task_queues, _terminator);
+    G1ParEvacuateFollowersClosure drain_queue(_g1h, pss, _task_queues, _terminator);

     // Call the reference processing task's work routine.
     _proc_task.work(worker_id, is_alive, keep_alive, drain_queue);
@@ -5212,7 +5275,7 @@
   assert(_workers != NULL, "Need parallel worker threads.");

   ParallelTaskTerminator terminator(_active_workers, _queues);
-  G1STWRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _queues, &terminator);
+  G1STWRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _pss, _queues, &terminator);

   _workers->run_task(&proc_task_proxy);
 }
@@ -5254,15 +5317,17 @@

 class G1ParPreserveCMReferentsTask: public AbstractGangTask {
 protected:
-  G1CollectedHeap* _g1h;
-  RefToScanQueueSet      *_queues;
+  G1CollectedHeap*       _g1h;
+  G1ParScanThreadState** _pss;
+  RefToScanQueueSet*     _queues;
   ParallelTaskTerminator _terminator;
   uint _n_workers;

 public:
-  G1ParPreserveCMReferentsTask(G1CollectedHeap* g1h, uint workers, RefToScanQueueSet *task_queues) :
+  G1ParPreserveCMReferentsTask(G1CollectedHeap* g1h, G1ParScanThreadState** per_thread_states, int workers, RefToScanQueueSet *task_queues) :
     AbstractGangTask("ParPreserveCMReferents"),
     _g1h(g1h),
+    _pss(per_thread_states),
     _queues(task_queues),
     _terminator(workers, _queues),
     _n_workers(workers)
@@ -5272,12 +5337,13 @@
     ResourceMark rm;
     HandleMark   hm;

-    G1ParScanThreadState            pss(_g1h, worker_id, NULL);
-    assert(pss.queue_is_empty(), "both queue and overflow should be empty");
-
-    G1ParScanExtRootClosure        only_copy_non_heap_cl(_g1h, &pss, NULL);
-
-    G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(_g1h, &pss, NULL);
+    G1ParScanThreadState*          pss = _pss[worker_id];
+    pss->set_ref_processor(NULL);
+    assert(pss->queue_is_empty(), "both queue and overflow should be empty");
+
+    G1ParScanExtRootClosure        only_copy_non_heap_cl(_g1h, pss, NULL);
+
+    G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(_g1h, pss, NULL);

     OopClosure*                    copy_non_heap_cl = &only_copy_non_heap_cl;

@@ -5291,7 +5357,7 @@

     // Copying keep alive closure. Applied to referent objects that need
     // to be copied.
-    G1CopyingKeepAliveClosure keep_alive(_g1h, copy_non_heap_cl, &pss);
+    G1CopyingKeepAliveClosure keep_alive(_g1h, copy_non_heap_cl, pss);

     ReferenceProcessor* rp = _g1h->ref_processor_cm();

@@ -5324,15 +5390,15 @@
     }

     // Drain the queue - which may cause stealing
-    G1ParEvacuateFollowersClosure drain_queue(_g1h, &pss, _queues, &_terminator);
+    G1ParEvacuateFollowersClosure drain_queue(_g1h, pss, _queues, &_terminator);
     drain_queue.do_void();
     // Allocation buffers were retired at the end of G1ParEvacuateFollowersClosure
-    assert(pss.queue_is_empty(), "should be");
+    assert(pss->queue_is_empty(), "should be");
   }
 };

 // Weak Reference processing during an evacuation pause (part 1).
-void G1CollectedHeap::process_discovered_references() {
+void G1CollectedHeap::process_discovered_references(G1ParScanThreadState** per_thread_states) {
   double ref_proc_start = os::elapsedTime();

   ReferenceProcessor* rp = _ref_processor_stw;
@@ -5362,6 +5428,7 @@
   uint no_of_gc_workers = workers()->active_workers();

   G1ParPreserveCMReferentsTask keep_cm_referents(this,
+                                                 per_thread_states,
                                                  no_of_gc_workers,
                                                  _task_queues);

@@ -5376,16 +5443,17 @@
   // JNI refs.

   // Use only a single queue for this PSS.
-  G1ParScanThreadState            pss(this, 0, NULL);
-  assert(pss.queue_is_empty(), "pre-condition");
+  G1ParScanThreadState*           pss = per_thread_states[0];
+  pss->set_ref_processor(NULL);
+  assert(pss->queue_is_empty(), "pre-condition");

   // We do not embed a reference processor in the copying/scanning
   // closures while we're actually processing the discovered
   // reference objects.

-  G1ParScanExtRootClosure        only_copy_non_heap_cl(this, &pss, NULL);
-
-  G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(this, &pss, NULL);
+  G1ParScanExtRootClosure        only_copy_non_heap_cl(this, pss, NULL);
+
+  G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(this, pss, NULL);

   OopClosure*                    copy_non_heap_cl = &only_copy_non_heap_cl;

@@ -5395,10 +5463,10 @@
   }

   // Keep alive closure.
-  G1CopyingKeepAliveClosure keep_alive(this, copy_non_heap_cl, &pss);
+  G1CopyingKeepAliveClosure keep_alive(this, copy_non_heap_cl, pss);

   // Serial Complete GC closure
-  G1STWDrainQueueClosure drain_queue(this, &pss);
+  G1STWDrainQueueClosure drain_queue(this, pss);

   // Setup the soft refs policy...
   rp->setup_policy(false);
@@ -5417,7 +5485,7 @@
     assert(rp->num_q() == no_of_gc_workers, "sanity");
     assert(no_of_gc_workers <= rp->max_num_q(), "sanity");

-    G1STWRefProcTaskExecutor par_task_executor(this, workers(), _task_queues, no_of_gc_workers);
+    G1STWRefProcTaskExecutor par_task_executor(this, per_thread_states, workers(), _task_queues, no_of_gc_workers);
     stats = rp->process_discovered_references(&is_alive,
                                               &keep_alive,
                                               &drain_queue,
@@ -5429,14 +5497,14 @@
   _gc_tracer_stw->report_gc_reference_stats(stats);

   // We have completed copying any necessary live referent objects.
-  assert(pss.queue_is_empty(), "both queue and overflow should be empty");
+  assert(pss->queue_is_empty(), "both queue and overflow should be empty");

   double ref_proc_time = os::elapsedTime() - ref_proc_start;
   g1_policy()->phase_times()->record_ref_proc_time(ref_proc_time * 1000.0);
 }

 // Weak Reference processing during an evacuation pause (part 2).
-void G1CollectedHeap::enqueue_discovered_references() {
+void G1CollectedHeap::enqueue_discovered_references(G1ParScanThreadState** per_thread_states) {
   double ref_enq_start = os::elapsedTime();

   ReferenceProcessor* rp = _ref_processor_stw;
@@ -5455,7 +5523,7 @@
     assert(rp->num_q() == n_workers, "sanity");
     assert(n_workers <= rp->max_num_q(), "sanity");

-    G1STWRefProcTaskExecutor par_task_executor(this, workers(), _task_queues, n_workers);
+    G1STWRefProcTaskExecutor par_task_executor(this, per_thread_states, workers(), _task_queues, n_workers);
     rp->enqueue_discovered_references(&par_task_executor);
   }

@@ -5491,9 +5559,14 @@
   double start_par_time_sec = os::elapsedTime();
   double end_par_time_sec;

+  G1ParScanThreadState** per_thread_states = NEW_C_HEAP_ARRAY(G1ParScanThreadState*, n_workers, mtGC);
+  for (uint i = 0; i < n_workers; i++) {
+    per_thread_states[i] = new_par_scan_state(i);
+  }
+
   {
     G1RootProcessor root_processor(this, n_workers);
-    G1ParTask g1_par_task(this, _task_queues, &root_processor, n_workers);
+    G1ParTask g1_par_task(this, per_thread_states, _task_queues, &root_processor, n_workers);
     // InitialMark needs claim bits to keep track of the marked-through CLDs.
     if (collector_state()->during_initial_mark_pause()) {
       ClassLoaderDataGraph::clear_claimed_marks();
@@ -5501,7 +5574,7 @@

     // The individual threads will set their evac-failure closures.
     if (PrintTerminationStats) {
-      G1ParScanThreadState::print_termination_stats_hdr();
+      print_termination_stats_hdr(gclog_or_tty);
     }

     workers()->run_task(&g1_par_task);
@@ -5528,7 +5601,7 @@
   // as we may have to copy some 'reachable' referent
   // objects (and their reachable sub-graphs) that were
   // not copied during the pause.
-  process_discovered_references();
+  process_discovered_references(per_thread_states);

   if (G1StringDedup::is_enabled()) {
     double fixup_start = os::elapsedTime();
@@ -5544,6 +5617,14 @@
   _allocator->release_gc_alloc_regions(evacuation_info);
   g1_rem_set()->cleanup_after_oops_into_collection_set_do();

+  for (uint i = 0; i < n_workers; i++) {
+    G1ParScanThreadState* pss = per_thread_states[i];
+    delete pss;
+  }
+  FREE_C_HEAP_ARRAY(G1ParScanThreadState*, per_thread_states);
+
+  record_obj_copy_mem_stats();
+
   // Reset and re-enable the hot card cache.
   // Note the counts for the cards in the regions in the
   // collection set are reset when the collection set is freed.
@@ -5568,12 +5649,17 @@
   // will log these updates (and dirty their associated
   // cards). We need these updates logged to update any
   // RSets.
-  enqueue_discovered_references();
+  enqueue_discovered_references(per_thread_states);

   redirty_logged_cards();
   COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
 }

+void G1CollectedHeap::record_obj_copy_mem_stats() {
+  _gc_tracer_stw->report_evacuation_statistics(create_g1_evac_summary(&_survivor_evac_stats),
+                                               create_g1_evac_summary(&_old_evac_stats));
+}
+
 void G1CollectedHeap::free_region(HeapRegion* hr,
                                   FreeRegionList* free_list,
                                   bool par,
@@ -5972,6 +6058,11 @@
       cur->set_evacuation_failed(false);
       // The region is now considered to be old.
       cur->set_old();
+      // Do some allocation statistics accounting. Regions that failed evacuation
+      // are always made old, so there is no need to update anything in the young
+      // gen statistics, but we need to update old gen statistics.
+      size_t used_words = cur->marked_bytes() / HeapWordSize;
+      _old_evac_stats.add_failure_used_and_waste(used_words, HeapRegion::GrainWords - used_words);
       _old_set.add(cur);
       evacuation_info.increment_collectionset_used_after(cur->used());
     }
@@ -6217,6 +6308,10 @@
   }
 }

+bool G1CollectedHeap::is_old_gc_alloc_region(HeapRegion* hr) {
+  return _allocator->is_retained_old_region(hr);
+}
+
 void G1CollectedHeap::set_region_short_lived_locked(HeapRegion* hr) {
   _young_list->push_region(hr);
 }
--- a/src/share/vm/gc/g1/g1CollectedHeap.hpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/gc/g1/g1CollectedHeap.hpp	Thu Aug 27 18:51:22 2015 +0200
@@ -28,12 +28,12 @@
 #include "gc/g1/concurrentMark.hpp"
 #include "gc/g1/evacuationInfo.hpp"
 #include "gc/g1/g1AllocationContext.hpp"
-#include "gc/g1/g1Allocator.hpp"
 #include "gc/g1/g1BiasedArray.hpp"
 #include "gc/g1/g1CollectorState.hpp"
 #include "gc/g1/g1HRPrinter.hpp"
 #include "gc/g1/g1InCSetState.hpp"
 #include "gc/g1/g1MonitoringSupport.hpp"
+#include "gc/g1/g1EvacStats.hpp"
 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
 #include "gc/g1/g1YCTypes.hpp"
 #include "gc/g1/hSpaceCounters.hpp"
@@ -41,6 +41,7 @@
 #include "gc/g1/heapRegionSet.hpp"
 #include "gc/shared/barrierSet.hpp"
 #include "gc/shared/collectedHeap.hpp"
+#include "gc/shared/plab.hpp"
 #include "memory/memRegion.hpp"
 #include "utilities/stack.hpp"

@@ -54,6 +55,7 @@
 class HRRSCleanupTask;
 class GenerationSpec;
 class OopsInHeapRegionClosure;
+class G1ParScanThreadState;
 class G1KlassScanClosure;
 class G1ParScanThreadState;
 class ObjectClosure;
@@ -76,6 +78,8 @@
 class nmethod;
 class Ticks;
 class WorkGang;
+class G1Allocator;
+class G1ArchiveAllocator;

 typedef OverflowTaskQueue<StarTask, mtGC>         RefToScanQueue;
 typedef GenericTaskQueueSet<RefToScanQueue, mtGC> RefToScanQueueSet;
@@ -184,8 +188,7 @@
   friend class VM_G1IncCollectionPause;
   friend class VMStructs;
   friend class MutatorAllocRegion;
-  friend class SurvivorGCAllocRegion;
-  friend class OldGCAllocRegion;
+  friend class G1GCAllocRegion;

   // Closures used in implementation.
   friend class G1ParScanThreadState;
@@ -245,7 +248,7 @@
   // The sequence of all heap regions in the heap.
   HeapRegionManager _hrm;

-  // Handles non-humongous allocations in the G1CollectedHeap.
+  // Manages all allocations with regions except humongous object allocations.
   G1Allocator* _allocator;

   // Outside of GC pauses, the number of bytes used in all regions other
@@ -263,11 +266,11 @@
   // Statistics for each allocation context
   AllocationContextStats _allocation_context_stats;

-  // PLAB sizing policy for survivors.
-  PLABStats _survivor_plab_stats;
+  // GC allocation statistics policy for survivors.
+  G1EvacStats _survivor_evac_stats;

-  // PLAB sizing policy for tenured objects.
-  PLABStats _old_plab_stats;
+  // GC allocation statistics policy for tenured objects.
+  G1EvacStats _old_evac_stats;

   // It specifies whether we should attempt to expand the heap after a
   // region allocation failure. If heap expansion fails we set this to
@@ -581,11 +584,11 @@

   // Process any reference objects discovered during
   // an incremental evacuation pause.
-  void process_discovered_references();
+  void process_discovered_references(G1ParScanThreadState** per_thread_states);

   // Enqueue any remaining discovered references
   // after processing.
-  void enqueue_discovered_references();
+  void enqueue_discovered_references(G1ParScanThreadState** per_thread_states);

 public:
   WorkGang* workers() const { return _workers; }
@@ -606,7 +609,7 @@
   bool expand(size_t expand_bytes);

   // Returns the PLAB statistics for a given destination.
-  inline PLABStats* alloc_buffer_stats(InCSetState dest);
+  inline G1EvacStats* alloc_buffer_stats(InCSetState dest);

   // Determines PLAB size for a given destination.
   inline size_t desired_plab_sz(InCSetState dest);
@@ -680,6 +683,9 @@
   // Allocates a new heap region instance.
   HeapRegion* new_heap_region(uint hrs_index, MemRegion mr);

+  // Allocates a new per thread par scan state for the given thread id.
+  G1ParScanThreadState* new_par_scan_state(uint worker_id);
+
   // Allocate the highest free region in the reserved heap. This will commit
   // regions as necessary.
   HeapRegion* alloc_highest_free_region();
@@ -789,6 +795,20 @@
   // Actually do the work of evacuating the collection set.
   void evacuate_collection_set(EvacuationInfo& evacuation_info);

+  // Print the header for the per-thread termination statistics.
+  static void print_termination_stats_hdr(outputStream* const st);
+  // Print actual per-thread termination statistics.
+  void print_termination_stats(outputStream* const st,
+                               uint worker_id,
+                               double elapsed_ms,
+                               double strong_roots_ms,
+                               double term_ms,
+                               size_t term_attempts,
+                               size_t alloc_buffer_waste,
+                               size_t undo_waste) const;
+  // Update object copying statistics.
+  void record_obj_copy_mem_stats();
+
   // The g1 remembered set of the heap.
   G1RemSet* _g1_rem_set;

@@ -1195,9 +1215,7 @@

   // Determine whether the given region is one that we are using as an
   // old GC alloc region.
-  bool is_old_gc_alloc_region(HeapRegion* hr) {
-    return _allocator->is_retained_old_region(hr);
-  }
+  bool is_old_gc_alloc_region(HeapRegion* hr);

   // Perform a collection of the heap; intended for use in implementing
   // "System.gc".  This probably implies as full a collection as the
@@ -1566,6 +1584,7 @@
                         const VerifyOption vo) const;

   G1HeapSummary create_g1_heap_summary();
+  G1EvacSummary create_g1_evac_summary(G1EvacStats* stats);

   // Printing
--- a/src/share/vm/gc/g1/g1CollectedHeap.inline.hpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/gc/g1/g1CollectedHeap.inline.hpp	Thu Aug 27 18:51:22 2015 +0200
@@ -35,12 +35,12 @@
 #include "gc/shared/taskqueue.hpp"
 #include "runtime/orderAccess.inline.hpp"

-PLABStats* G1CollectedHeap::alloc_buffer_stats(InCSetState dest) {
+G1EvacStats* G1CollectedHeap::alloc_buffer_stats(InCSetState dest) {
   switch (dest.value()) {
     case InCSetState::Young:
-      return &_survivor_plab_stats;
+      return &_survivor_evac_stats;
     case InCSetState::Old:
-      return &_old_plab_stats;
+      return &_old_evac_stats;
     default:
       ShouldNotReachHere();
       return NULL; // Keep some compilers happy
--- a/src/share/vm/gc/g1/g1CollectedHeap_ext.cpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/gc/g1/g1CollectedHeap_ext.cpp	Thu Aug 27 18:51:22 2015 +0200
@@ -24,6 +24,7 @@

 #include "precompiled.hpp"
 #include "gc/g1/g1CollectedHeap.hpp"
+#include "gc/g1/g1ParScanThreadState.hpp"
 #include "gc/g1/heapRegion.inline.hpp"

 bool G1CollectedHeap::copy_allocation_context_stats(const jint* contexts,
@@ -37,3 +38,7 @@
                                              MemRegion mr) {
   return new HeapRegion(hrs_index, bot_shared(), mr);
 }
+
+G1ParScanThreadState* G1CollectedHeap::new_par_scan_state(uint worker_id) {
+  return new G1ParScanThreadState(this, worker_id);
+}
--- a/src/share/vm/gc/g1/g1CollectorPolicy.hpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/gc/g1/g1CollectorPolicy.hpp	Thu Aug 27 18:51:22 2015 +0200
@@ -26,8 +26,8 @@
 #define SHARE_VM_GC_G1_G1COLLECTORPOLICY_HPP

 #include "gc/g1/collectionSetChooser.hpp"
-#include "gc/g1/g1Allocator.hpp"
 #include "gc/g1/g1CollectorState.hpp"
+#include "gc/g1/g1InCSetState.hpp"
 #include "gc/g1/g1MMUTracker.hpp"
 #include "gc/shared/collectorPolicy.hpp"
--- a/src/share/vm/gc/g1/g1CollectorPolicy_ext.hpp	Wed Aug 26 09:49:37 2015 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef SHARE_VM_GC_G1_G1COLLECTORPOLICY_EXT_HPP
-#define SHARE_VM_GC_G1_G1COLLECTORPOLICY_EXT_HPP
-
-#include "gc/g1/g1CollectorPolicy.hpp"
-
-class G1CollectorPolicyExt : public G1CollectorPolicy { };
-
-#endif // SHARE_VM_GC_G1_G1COLLECTORPOLICY_EXT_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc/g1/g1EvacStats.cpp	Thu Aug 27 18:51:22 2015 +0200
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc/g1/g1EvacStats.hpp"
+#include "gc/shared/gcId.hpp"
+#include "trace/tracing.hpp"
+
+void G1EvacStats::adjust_desired_plab_sz() {
+  if (PrintPLAB) {
+    gclog_or_tty->print(" (allocated = " SIZE_FORMAT " wasted = " SIZE_FORMAT " "
+                        "unused = " SIZE_FORMAT " used = " SIZE_FORMAT " "
+                        "undo_waste = " SIZE_FORMAT " region_end_waste = " SIZE_FORMAT " "
+                        "regions filled = %u direct_allocated = " SIZE_FORMAT " "
+                        "failure_used = " SIZE_FORMAT " failure_waste = " SIZE_FORMAT ") ",
+                        _allocated, _wasted, _unused, used(), _undo_wasted, _region_end_waste,
+                        _regions_filled, _direct_allocated, _failure_used, _failure_waste);
+  }
+
+  if (ResizePLAB) {
+
+    assert(is_object_aligned(max_size()) && min_size() <= max_size(),
+           "PLAB clipping computation may be incorrect");
+
+    if (_allocated == 0) {
+      assert((_unused == 0),
+             err_msg("Inconsistency in PLAB stats: "
+                     "_allocated: "SIZE_FORMAT", "
+                     "_wasted: "SIZE_FORMAT", "
+                     "_region_end_waste: "SIZE_FORMAT", "
+                     "_unused: "SIZE_FORMAT", "
+                     "_used  : "SIZE_FORMAT,
+                     _allocated, _wasted, _region_end_waste, _unused, used()));
+      _allocated = 1;
+    }
+    // We account region end waste fully to PLAB allocation. This is not completely fair,
+    // but is a conservative assumption because PLABs may be sized flexibly while we
+    // cannot adjust direct allocations.
+    // In some cases, wasted_frac may become > 1 but that just reflects the problem
+    // with region_end_waste.
+    double wasted_frac    = (double)(_unused + _wasted + _region_end_waste) / (double)_allocated;
+    size_t target_refills = (size_t)((wasted_frac * TargetSurvivorRatio) / TargetPLABWastePct);
+    if (target_refills == 0) {
+      target_refills = 1;
+    }
+    size_t cur_plab_sz = used() / target_refills;
+    // Take historical weighted average
+    _filter.sample(cur_plab_sz);
+    // Clip from above and below, and align to object boundary
+    size_t plab_sz;
+    plab_sz = MAX2(min_size(), (size_t)_filter.average());
+    plab_sz = MIN2(max_size(), plab_sz);
+    plab_sz = align_object_size(plab_sz);
+    // Latch the result
+    _desired_net_plab_sz = plab_sz;
+    if (PrintPLAB) {
+      gclog_or_tty->print_cr(" (plab_sz = " SIZE_FORMAT " desired_plab_sz = " SIZE_FORMAT ") ", cur_plab_sz, plab_sz);
+    }
+  }
+  // Clear accumulators for next round.
+  reset();
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc/g1/g1EvacStats.hpp	Thu Aug 27 18:51:22 2015 +0200
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_gc_G1_G1EVACSTATS_HPP
+#define SHARE_VM_gc_G1_G1EVACSTATS_HPP
+
+#include "gc/shared/plab.hpp"
+#include "runtime/atomic.hpp"
+
+// Records various memory allocation statistics gathered during evacuation.
+class G1EvacStats : public PLABStats {
+ private:
+  size_t _region_end_waste; // Number of words wasted due to skipping to the next region.
+  uint   _regions_filled;   // Number of regions filled completely.
+  size_t _direct_allocated; // Number of words allocated directly into the regions.
+
+  // Number of words in live objects remaining in regions that ultimately suffered an
+  // evacuation failure. This is used in the regions when the regions are made old regions.
+  size_t _failure_used;
+  // Number of words wasted in regions which failed evacuation. This is the sum of space
+  // for objects successfully copied out of the regions (now dead space) plus waste at the
+  // end of regions.
+  size_t _failure_waste;
+
+  virtual void reset() {
+    PLABStats::reset();
+    _region_end_waste = 0;
+    _regions_filled = 0;
+    _direct_allocated = 0;
+    _failure_used = 0;
+    _failure_waste = 0;
+  }
+
+ public:
+  G1EvacStats(size_t desired_plab_sz_, unsigned wt) : PLABStats(desired_plab_sz_, wt),
+    _region_end_waste(0), _regions_filled(0), _direct_allocated(0),
+    _failure_used(0), _failure_waste(0) {
+  }
+
+  virtual void adjust_desired_plab_sz();
+
+  size_t allocated() const { return _allocated; }
+  size_t wasted() const { return _wasted; }
+  size_t unused() const { return _unused; }
+  size_t used() const { return allocated() - (wasted() + unused()); }
+  size_t undo_wasted() const { return _undo_wasted; }
+
+  uint regions_filled() const { return _regions_filled; }
+  size_t region_end_waste() const { return _region_end_waste; }
+  size_t direct_allocated() const { return _direct_allocated; }
+
+  // Amount of space in heapwords used in the failing regions when an evacuation failure happens.
+  size_t failure_used() const { return _failure_used; }
+  // Amount of space in heapwords wasted (unused) in the failing regions when an evacuation failure happens.
+  size_t failure_waste() const { return _failure_waste; }
+
+  void add_direct_allocated(size_t value) {
+    Atomic::add_ptr(value, &_direct_allocated);
+  }
+
+  void add_region_end_waste(size_t value) {
+    Atomic::add_ptr(value, &_region_end_waste);
+    Atomic::add_ptr(1, &_regions_filled);
+  }
+
+  void add_failure_used_and_waste(size_t used, size_t waste) {
+    Atomic::add_ptr(used, &_failure_used);
+    Atomic::add_ptr(waste, &_failure_waste);
+  }
+};
+
+#endif // SHARE_VM_gc_G1_G1EVACSTATS_HPP
--- a/src/share/vm/gc/g1/g1OopClosures.cpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/gc/g1/g1OopClosures.cpp	Thu Aug 27 18:51:22 2015 +0200
@@ -32,7 +32,11 @@

 G1ParCopyHelper::G1ParCopyHelper(G1CollectedHeap* g1,  G1ParScanThreadState* par_scan_state) :
   G1ParClosureSuper(g1, par_scan_state), _scanned_klass(NULL),
-  _cm(_g1->concurrent_mark()) {}
+  _cm(_g1->concurrent_mark()) { }
+
+G1ParCopyHelper::G1ParCopyHelper(G1CollectedHeap* g1) :
+  G1ParClosureSuper(g1), _scanned_klass(NULL),
+  _cm(_g1->concurrent_mark()) { }

 G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1) :
   _g1(g1), _par_scan_state(NULL), _worker_id(UINT_MAX) { }
--- a/src/share/vm/gc/g1/g1OopClosures.hpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/gc/g1/g1OopClosures.hpp	Thu Aug 27 18:51:22 2015 +0200
@@ -76,15 +76,13 @@

 class G1ParScanClosure : public G1ParClosureSuper {
 public:
-  G1ParScanClosure(G1CollectedHeap* g1, ReferenceProcessor* rp) :
-    G1ParClosureSuper(g1) {
-    assert(_ref_processor == NULL, "sanity");
-    _ref_processor = rp;
-  }
+  G1ParScanClosure(G1CollectedHeap* g1) : G1ParClosureSuper(g1) { }

   template <class T> void do_oop_nv(T* p);
   virtual void do_oop(oop* p)          { do_oop_nv(p); }
   virtual void do_oop(narrowOop* p)    { do_oop_nv(p); }
+
+  void set_ref_processor(ReferenceProcessor* ref_processor) { _ref_processor = ref_processor; }
 };

 // Add back base class for metadata
@@ -104,6 +102,7 @@
   void mark_forwarded_object(oop from_obj, oop to_obj);
  public:
   G1ParCopyHelper(G1CollectedHeap* g1,  G1ParScanThreadState* par_scan_state);
+  G1ParCopyHelper(G1CollectedHeap* g1);

   void set_scanned_klass(Klass* k) { _scanned_klass = k; }
   template <class T> void do_klass_barrier(T* p, oop new_obj);
@@ -132,6 +131,10 @@
     assert(_ref_processor == NULL, "sanity");
   }

+  G1ParCopyClosure(G1CollectedHeap* g1) : G1ParCopyHelper(g1) {
+    assert(_ref_processor == NULL, "sanity");
+  }
+
   template <class T> void do_oop_nv(T* p) { do_oop_work(p); }
   virtual void do_oop(oop* p)       { do_oop_nv(p); }
   virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
--- a/src/share/vm/gc/g1/g1ParScanThreadState.cpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/gc/g1/g1ParScanThreadState.cpp	Thu Aug 27 18:51:22 2015 +0200
@@ -23,6 +23,7 @@
  */

 #include "precompiled.hpp"
+#include "gc/g1/g1Allocator.inline.hpp"
 #include "gc/g1/g1CollectedHeap.inline.hpp"
 #include "gc/g1/g1OopClosures.inline.hpp"
 #include "gc/g1/g1ParScanThreadState.inline.hpp"
@@ -31,17 +32,19 @@
 #include "oops/oop.inline.hpp"
 #include "runtime/prefetch.inline.hpp"

-G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, uint worker_id, ReferenceProcessor* rp)
+G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, uint worker_id)
   : _g1h(g1h),
     _refs(g1h->task_queue(worker_id)),
     _dcq(&g1h->dirty_card_queue_set()),
     _ct_bs(g1h->g1_barrier_set()),
     _g1_rem(g1h->g1_rem_set()),
-    _hash_seed(17), _worker_id(worker_id),
-    _term_attempts(0),
+    _hash_seed(17),
+    _worker_id(worker_id),
     _tenuring_threshold(g1h->g1_policy()->tenuring_threshold()),
-    _age_table(false), _scanner(g1h, rp),
-    _strong_roots_time(0), _term_time(0) {
+    _age_table(false),
+    _scanner(g1h),
+    _old_gen_is_full(false)
+{
   _scanner.set_par_scan_thread_state(this);
   // we allocate G1YoungSurvRateNumRegions plus one entries, since
   // we "sacrifice" entry 0 to keep track of surviving bytes for
@@ -66,38 +69,20 @@
   // need to be moved to the next space.
   _dest[InCSetState::Young]        = InCSetState::Old;
   _dest[InCSetState::Old]          = InCSetState::Old;
-
-  _start = os::elapsedTime();
 }

 G1ParScanThreadState::~G1ParScanThreadState() {
-  _plab_allocator->retire_alloc_buffers();
+  // Update allocation statistics.
+  _plab_allocator->flush_and_retire_stats();
   delete _plab_allocator;
+  _g1h->g1_policy()->record_thread_age_table(&_age_table);
+  // Update heap statistics.
+  _g1h->update_surviving_young_words(_surviving_young_words);
   FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_base);
 }

-void G1ParScanThreadState::print_termination_stats_hdr(outputStream* const st) {
-  st->print_raw_cr("GC Termination Stats");
-  st->print_raw_cr("     elapsed  --strong roots-- -------termination------- ------waste (KiB)------");
-  st->print_raw_cr("thr     ms        ms      %        ms      %    attempts  total   alloc    undo");
-  st->print_raw_cr("--- --------- --------- ------ --------- ------ -------- ------- ------- -------");
-}
-
-void G1ParScanThreadState::print_termination_stats(outputStream* const st) const {
-  const double elapsed_ms = elapsed_time() * 1000.0;
-  const double s_roots_ms = strong_roots_time() * 1000.0;
-  const double term_ms    = term_time() * 1000.0;
-  size_t alloc_buffer_waste = 0;
-  size_t undo_waste = 0;
-  _plab_allocator->waste(alloc_buffer_waste, undo_waste);
-  st->print_cr("%3u %9.2f %9.2f %6.2f "
-               "%9.2f %6.2f " SIZE_FORMAT_W(8) " "
-               SIZE_FORMAT_W(7) " " SIZE_FORMAT_W(7) " " SIZE_FORMAT_W(7),
-               _worker_id, elapsed_ms, s_roots_ms, s_roots_ms * 100 / elapsed_ms,
-               term_ms, term_ms * 100 / elapsed_ms, term_attempts(),
-               (alloc_buffer_waste + undo_waste) * HeapWordSize / K,
-               alloc_buffer_waste * HeapWordSize / K,
-               undo_waste * HeapWordSize / K);
+void G1ParScanThreadState::waste(size_t& wasted, size_t& undo_wasted) {
+  _plab_allocator->waste(wasted, undo_wasted);
 }

 #ifdef ASSERT
@@ -152,26 +137,38 @@
 HeapWord* G1ParScanThreadState::allocate_in_next_plab(InCSetState const state,
                                                       InCSetState* dest,
                                                       size_t word_sz,
-                                                      AllocationContext_t const context) {
+                                                      AllocationContext_t const context,
+                                                      bool previous_plab_refill_failed) {
   assert(state.is_in_cset_or_humongous(), err_msg("Unexpected state: " CSETSTATE_FORMAT, state.value()));
   assert(dest->is_in_cset_or_humongous(), err_msg("Unexpected dest: " CSETSTATE_FORMAT, dest->value()));

   // Right now we only have two types of regions (young / old) so
   // let's keep the logic here simple. We can generalize it when necessary.
   if (dest->is_young()) {
+    bool plab_refill_in_old_failed = false;
     HeapWord* const obj_ptr = _plab_allocator->allocate(InCSetState::Old,
                                                         word_sz,
-                                                        context);
-    if (obj_ptr == NULL) {
-      return NULL;
-    }
+                                                        context,
+                                                        &plab_refill_in_old_failed);
     // Make sure that we won't attempt to copy any other objects out
     // of a survivor region (given that apparently we cannot allocate
-    // any new ones) to avoid coming into this slow path.
-    _tenuring_threshold = 0;
-    dest->set_old();
+    // any new ones) to avoid coming into this slow path again and again.
+    // Only consider failed PLAB refill here: failed inline allocations are
+    // typically large, so not indicative of remaining space.
+    if (previous_plab_refill_failed) {
+      _tenuring_threshold = 0;
+    }
+
+    if (obj_ptr != NULL) {
+      dest->set_old();
+    } else {
+      // We just failed to allocate in old gen. The same idea as explained above
+      // for making survivor gen unavailable for allocation applies for old gen.
+      _old_gen_is_full = plab_refill_in_old_failed;
+    }
     return obj_ptr;
   } else {
+    _old_gen_is_full = previous_plab_refill_failed;
     assert(dest->is_old(), err_msg("Unexpected dest: " CSETSTATE_FORMAT, dest->value()));
     // no other space to try.
     return NULL;
@@ -202,14 +199,20 @@

   uint age = 0;
   InCSetState dest_state = next_state(state, old_mark, age);
+  // The second clause is to prevent premature evacuation failure in case there
+  // is still space in survivor, but old gen is full.
+  if (_old_gen_is_full && dest_state.is_old()) {
+    return handle_evacuation_failure_par(old, old_mark);
+  }
   HeapWord* obj_ptr = _plab_allocator->plab_allocate(dest_state, word_sz, context);

   // PLAB allocations should succeed most of the time, so we'll
   // normally check against NULL once and that's it.
   if (obj_ptr == NULL) {
-    obj_ptr = _plab_allocator->allocate_direct_or_new_plab(dest_state, word_sz, context);
+    bool plab_refill_failed = false;
+    obj_ptr = _plab_allocator->allocate_direct_or_new_plab(dest_state, word_sz, context, &plab_refill_failed);
     if (obj_ptr == NULL) {
-      obj_ptr = allocate_in_next_plab(state, &dest_state, word_sz, context);
+      obj_ptr = allocate_in_next_plab(state, &dest_state, word_sz, context, plab_refill_failed);
       if (obj_ptr == NULL) {
         // This will either forward-to-self, or detect that someone else has
         // installed a forwarding pointer.
@@ -253,7 +256,7 @@
       } else {
         obj->set_mark(old_mark->set_age(age));
       }
-      age_table()->add(age, word_sz);
+      _age_table.add(age, word_sz);
     } else {
       obj->set_mark(old_mark);
     }
@@ -271,8 +274,7 @@
                                              obj);
     }

-    size_t* const surv_young_words = surviving_young_words();
-    surv_young_words[young_index] += word_sz;
+    _surviving_young_words[young_index] += word_sz;

     if (obj->is_objArray() && arrayOop(obj)->length() >= ParGCArrayScanChunk) {
       // We keep track of the next start index in the length field of
--- a/src/share/vm/gc/g1/g1ParScanThreadState.hpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/gc/g1/g1ParScanThreadState.hpp	Thu Aug 27 18:51:22 2015 +0200
@@ -35,16 +35,17 @@
 #include "memory/allocation.hpp"
 #include "oops/oop.hpp"

+class G1PLABAllocator;
 class HeapRegion;
 class outputStream;

-class G1ParScanThreadState : public StackObj {
+class G1ParScanThreadState : public CHeapObj<mtGC> {
  private:
   G1CollectedHeap* _g1h;
   RefToScanQueue*  _refs;
   DirtyCardQueue   _dcq;
   G1SATBCardTableModRefBS* _ct_bs;
-  G1RemSet* _g1_rem;
+  G1RemSet*         _g1_rem;

   G1PLABAllocator*  _plab_allocator;

@@ -57,20 +58,16 @@
   int  _hash_seed;
   uint _worker_id;

-  size_t _term_attempts;
-
-  double _start;
-  double _start_strong_roots;
-  double _strong_roots_time;
-  double _start_term;
-  double _term_time;
-
   // Map from young-age-index (0 == not young, 1 is youngest) to
   // surviving words. base is what we get back from the malloc call
   size_t* _surviving_young_words_base;
   // this points into the array, as we use the first few entries for padding
   size_t* _surviving_young_words;

+  // Indicates whether in the last generation (old) there is no more space
+  // available for allocation.
+  bool _old_gen_is_full;
+
 #define PADDING_ELEM_NUM (DEFAULT_CACHE_LINE_SIZE / sizeof(size_t))

   DirtyCardQueue& dirty_card_queue()             { return _dcq;  }
@@ -85,10 +82,10 @@
   }

  public:
-  G1ParScanThreadState(G1CollectedHeap* g1h, uint worker_id, ReferenceProcessor* rp);
+  G1ParScanThreadState(G1CollectedHeap* g1h, uint worker_id);
   ~G1ParScanThreadState();

-  ageTable*         age_table()       { return &_age_table;       }
+  void set_ref_processor(ReferenceProcessor* rp) { _scanner.set_ref_processor(rp); }

 #ifdef ASSERT
   bool queue_is_empty() const { return _refs->is_empty(); }
@@ -114,40 +111,14 @@

   uint worker_id() { return _worker_id; }

-  size_t term_attempts() const  { return _term_attempts; }
-  void note_term_attempt() { _term_attempts++; }
-
-  void start_strong_roots() {
-    _start_strong_roots = os::elapsedTime();
-  }
-  void end_strong_roots() {
-    _strong_roots_time += (os::elapsedTime() - _start_strong_roots);
-  }
-  double strong_roots_time() const { return _strong_roots_time; }
-
-  void start_term_time() {
-    note_term_attempt();
-    _start_term = os::elapsedTime();
-  }
-  void end_term_time() {
-    _term_time += (os::elapsedTime() - _start_term);
-  }
-  double term_time() const { return _term_time; }
-
-  double elapsed_time() const {
-    return os::elapsedTime() - _start;
-  }
-
-  // Print the header for the per-thread termination statistics.
-  static void print_termination_stats_hdr(outputStream* const st = gclog_or_tty);
-
-  // Print actual per-thread termination statistics.
-  void print_termination_stats(outputStream* const st = gclog_or_tty) const;
+  // Returns the current amount of waste due to alignment or not being able to fit
+  // objects within LABs and the undo waste.
+  virtual void waste(size_t& wasted, size_t& undo_wasted);

   size_t* surviving_young_words() {
-    // We add on to hide entry 0 which accumulates surviving words for
+    // We add one to hide entry 0 which accumulates surviving words for
     // age -1 regions (i.e. non-young ones)
-    return _surviving_young_words;
+    return _surviving_young_words + 1;
   }

  private:
@@ -190,12 +161,16 @@

   // Tries to allocate word_sz in the PLAB of the next "generation" after trying to
   // allocate into dest. State is the original (source) cset state for the object
-  // that is allocated for.
+  // that is allocated for. Previous_plab_refill_failed indicates whether previously
+  // a PLAB refill into "state" failed.
   // Returns a non-NULL pointer if successful, and updates dest if required.
+  // Also determines whether we should continue to try to allocate into the various
+  // generations or just end trying to allocate.
   HeapWord* allocate_in_next_plab(InCSetState const state,
                                   InCSetState* dest,
                                   size_t word_sz,
-                                  AllocationContext_t const context);
+                                  AllocationContext_t const context,
+                                  bool previous_plab_refill_failed);

   inline InCSetState next_state(InCSetState const state, markOop const m, uint& age);
  public:
--- a/src/share/vm/gc/g1/heapRegion.hpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/gc/g1/heapRegion.hpp	Thu Aug 27 18:51:22 2015 +0200
@@ -109,7 +109,7 @@
 // evacuation pauses between two cleanups, which is _highly_ unlikely.
 class G1OffsetTableContigSpace: public CompactibleSpace {
   friend class VMStructs;
-  HeapWord* _top;
+  HeapWord* volatile _top;
   HeapWord* volatile _scan_top;
  protected:
   G1BlockOffsetArrayContigSpace _offsets;
@@ -134,10 +134,18 @@
   // Reset the G1OffsetTableContigSpace.
   virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space);

-  HeapWord** top_addr() { return &_top; }
-  // Allocation helpers (return NULL if full).
-  inline HeapWord* allocate_impl(size_t word_size, HeapWord* end_value);
-  inline HeapWord* par_allocate_impl(size_t word_size, HeapWord* end_value);
+  HeapWord* volatile* top_addr() { return &_top; }
+  // Try to allocate at least min_word_size and up to desired_size from this Space.
+  // Returns NULL if not possible, otherwise sets actual_word_size to the amount of
+  // space allocated.
+  // This version assumes that all allocation requests to this Space are properly
+  // synchronized.
+  inline HeapWord* allocate_impl(size_t min_word_size, size_t desired_word_size, size_t* actual_word_size);
+  // Try to allocate at least min_word_size and up to desired_size from this Space.
+  // Returns NULL if not possible, otherwise sets actual_word_size to the amount of
+  // space allocated.
+  // This version synchronizes with other calls to par_allocate_impl().
+  inline HeapWord* par_allocate_impl(size_t min_word_size, size_t desired_word_size, size_t* actual_word_size);

  public:
   void reset_after_compaction() { set_top(compaction_top()); }
@@ -179,9 +187,14 @@
   HeapWord* block_start(const void* p);
   HeapWord* block_start_const(const void* p) const;

-  // Add offset table update.
+  // Allocation (return NULL if full).  Assumes the caller has established
+  // mutually exclusive access to the space.
+  HeapWord* allocate(size_t min_word_size, size_t desired_word_size, size_t* actual_word_size);
+  // Allocation (return NULL if full).  Enforces mutual exclusion internally.
+  HeapWord* par_allocate(size_t min_word_size, size_t desired_word_size, size_t* actual_word_size);
+
   virtual HeapWord* allocate(size_t word_size);
-  HeapWord* par_allocate(size_t word_size);
+  virtual HeapWord* par_allocate(size_t word_size);

   HeapWord* saved_mark_word() const { ShouldNotReachHere(); return NULL; }

@@ -351,8 +364,9 @@
   // Override for scan_and_forward support.
   void prepare_for_compaction(CompactPoint* cp);

-  inline HeapWord* par_allocate_no_bot_updates(size_t word_size);
+  inline HeapWord* par_allocate_no_bot_updates(size_t min_word_size, size_t desired_word_size, size_t* word_size);
   inline HeapWord* allocate_no_bot_updates(size_t word_size);
+  inline HeapWord* allocate_no_bot_updates(size_t min_word_size, size_t desired_word_size, size_t* actual_size);

   // If this region is a member of a HeapRegionManager, the index in that
   // sequence, otherwise -1.
--- a/src/share/vm/gc/g1/heapRegion.inline.hpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/gc/g1/heapRegion.inline.hpp	Thu Aug 27 18:51:22 2015 +0200
@@ -32,33 +32,39 @@
 #include "oops/oop.inline.hpp"
 #include "runtime/atomic.inline.hpp"

-// This version requires locking.
-inline HeapWord* G1OffsetTableContigSpace::allocate_impl(size_t size,
-                                                HeapWord* const end_value) {
+inline HeapWord* G1OffsetTableContigSpace::allocate_impl(size_t min_word_size,
+                                                         size_t desired_word_size,
+                                                         size_t* actual_size) {
   HeapWord* obj = top();
-  if (pointer_delta(end_value, obj) >= size) {
-    HeapWord* new_top = obj + size;
+  size_t available = pointer_delta(end(), obj);
+  size_t want_to_allocate = MIN2(available, desired_word_size);
+  if (want_to_allocate >= min_word_size) {
+    HeapWord* new_top = obj + want_to_allocate;
     set_top(new_top);
     assert(is_aligned(obj) && is_aligned(new_top), "checking alignment");
+    *actual_size = want_to_allocate;
     return obj;
   } else {
     return NULL;
   }
 }

-// This version is lock-free.
-inline HeapWord* G1OffsetTableContigSpace::par_allocate_impl(size_t size,
-                                                    HeapWord* const end_value) {
+inline HeapWord* G1OffsetTableContigSpace::par_allocate_impl(size_t min_word_size,
+                                                             size_t desired_word_size,
+                                                             size_t* actual_size) {
   do {
     HeapWord* obj = top();
-    if (pointer_delta(end_value, obj) >= size) {
-      HeapWord* new_top = obj + size;
+    size_t available = pointer_delta(end(), obj);
+    size_t want_to_allocate = MIN2(available, desired_word_size);
+    if (want_to_allocate >= min_word_size) {
+      HeapWord* new_top = obj + want_to_allocate;
       HeapWord* result = (HeapWord*)Atomic::cmpxchg_ptr(new_top, top_addr(), obj);
       // result can be one of two:
       //  the old top value: the exchange succeeded
       //  otherwise: the new value of the top is returned.
       if (result == obj) {
         assert(is_aligned(obj) && is_aligned(new_top), "checking alignment");
+        *actual_size = want_to_allocate;
         return obj;
       }
     } else {
@@ -67,20 +73,34 @@
   } while (true);
 }

-inline HeapWord* G1OffsetTableContigSpace::allocate(size_t size) {
-  HeapWord* res = allocate_impl(size, end());
+inline HeapWord* G1OffsetTableContigSpace::allocate(size_t min_word_size,
+                                                    size_t desired_word_size,
+                                                    size_t* actual_size) {
+  HeapWord* res = allocate_impl(min_word_size, desired_word_size, actual_size);
   if (res != NULL) {
-    _offsets.alloc_block(res, size);
+    _offsets.alloc_block(res, *actual_size);
   }
   return res;
 }

+inline HeapWord* G1OffsetTableContigSpace::allocate(size_t word_size) {
+  size_t temp;
+  return allocate(word_size, word_size, &temp);
+}
+
+inline HeapWord* G1OffsetTableContigSpace::par_allocate(size_t word_size) {
+  size_t temp;
+  return par_allocate(word_size, word_size, &temp);
+}
+
 // Because of the requirement of keeping "_offsets" up to date with the
 // allocations, we sequentialize these with a lock.  Therefore, best if
 // this is used for larger LAB allocations only.
-inline HeapWord* G1OffsetTableContigSpace::par_allocate(size_t size) {
+inline HeapWord* G1OffsetTableContigSpace::par_allocate(size_t min_word_size,
+                                                        size_t desired_word_size,
+                                                        size_t* actual_size) {
   MutexLocker x(&_par_alloc_lock);
-  return allocate(size);
+  return allocate(min_word_size, desired_word_size, actual_size);
 }

 inline HeapWord* G1OffsetTableContigSpace::block_start(const void* p) {
@@ -128,14 +148,23 @@
   return pointer_delta(next, addr);
 }

-inline HeapWord* HeapRegion::par_allocate_no_bot_updates(size_t word_size) {
+inline HeapWord* HeapRegion::par_allocate_no_bot_updates(size_t min_word_size,
+                                                         size_t desired_word_size,
+                                                         size_t* actual_word_size) {
   assert(is_young(), "we can only skip BOT updates on young regions");
-  return par_allocate_impl(word_size, end());
+  return par_allocate_impl(min_word_size, desired_word_size, actual_word_size);
 }

 inline HeapWord* HeapRegion::allocate_no_bot_updates(size_t word_size) {
+  size_t temp;
+  return allocate_no_bot_updates(word_size, word_size, &temp);
+}
+
+inline HeapWord* HeapRegion::allocate_no_bot_updates(size_t min_word_size,
+                                                     size_t desired_word_size,
+                                                     size_t* actual_word_size) {
   assert(is_young(), "we can only skip BOT updates on young regions");
-  return allocate_impl(word_size, end());
+  return allocate_impl(min_word_size, desired_word_size, actual_word_size);
 }

 inline void HeapRegion::note_start_of_marking() {
--- a/src/share/vm/gc/g1/heapRegionManager.cpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/gc/g1/heapRegionManager.cpp	Thu Aug 27 18:51:22 2015 +0200
@@ -428,7 +428,7 @@

     uncommit_regions(idx_last_found + num_last_found - to_remove, to_remove);

-    cur -= num_last_found;
+    cur = idx_last_found;
     removed += to_remove;
   }
--- a/src/share/vm/gc/g1/vmStructs_g1.hpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/gc/g1/vmStructs_g1.hpp	Thu Aug 27 18:51:22 2015 +0200
@@ -34,7 +34,7 @@
   static_field(HeapRegion, GrainBytes,        size_t)                         \
   static_field(HeapRegion, LogOfHRGrainBytes, int)                            \
                                                                               \
-  nonstatic_field(G1OffsetTableContigSpace, _top,       HeapWord*)            \
+  nonstatic_field(G1OffsetTableContigSpace, _top,       HeapWord* volatile)   \
                                                                               \
   nonstatic_field(G1HeapRegionTable, _base,             address)              \
   nonstatic_field(G1HeapRegionTable, _length,           size_t)               \
--- a/src/share/vm/gc/shared/gcHeapSummary.hpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/gc/shared/gcHeapSummary.hpp	Thu Aug 27 18:51:22 2015 +0200
@@ -189,4 +189,44 @@

 };

+class G1EvacSummary : public StackObj {
+private:
+  size_t _allocated;          // Total allocated
+  size_t _wasted;             // of which wasted (internal fragmentation)
+  size_t _undo_wasted;        // of which wasted on undo (is not used for calculation of PLAB size)
+  size_t _unused;             // Unused in last buffer
+  size_t _used;
+
+  size_t _region_end_waste; // Number of words wasted due to skipping to the next region.
+  uint   _regions_filled;   // Number of regions filled completely.
+  size_t _direct_allocated; // Number of words allocated directly into the regions.
+
+  // Number of words in live objects remaining in regions that ultimately suffered an
+  // evacuation failure. This is used in the regions when the regions are made old regions.
+  size_t _failure_used;
+  // Number of words wasted in regions which failed evacuation. This is the sum of space
+  // for objects successfully copied out of the regions (now dead space) plus waste at the
+  // end of regions.
+  size_t _failure_waste;
+public:
+  G1EvacSummary(size_t allocated, size_t wasted, size_t undo_wasted, size_t unused,
+    size_t used, size_t region_end_waste, uint regions_filled, size_t direct_allocated,
+    size_t failure_used, size_t failure_waste) :
+    _allocated(allocated), _wasted(wasted), _undo_wasted(undo_wasted), _unused(unused),
+    _used(used),  _region_end_waste(region_end_waste), _regions_filled(regions_filled),
+    _direct_allocated(direct_allocated), _failure_used(failure_used), _failure_waste(failure_waste)
+  { }
+
+  size_t allocated() const { return _allocated; }
+  size_t wasted() const { return _wasted; }
+  size_t undo_wasted() const { return _undo_wasted; }
+  size_t unused() const { return _unused; }
+  size_t used() const { return _used; }
+  size_t region_end_waste() const { return _region_end_waste; }
+  uint regions_filled() const { return _regions_filled; }
+  size_t direct_allocated() const { return _direct_allocated; }
+  size_t failure_used() const { return _failure_used; }
+  size_t failure_waste() const { return _failure_waste; }
+};
+
 #endif // SHARE_VM_GC_SHARED_GCHEAPSUMMARY_HPP
--- a/src/share/vm/gc/shared/gcTrace.cpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/gc/shared/gcTrace.cpp	Thu Aug 27 18:51:22 2015 +0200
@@ -252,4 +252,12 @@
   send_evacuation_failed_event(ef_info);
   ef_info.reset();
 }
+
+void G1NewTracer::report_evacuation_statistics(const G1EvacSummary& young_summary, const G1EvacSummary& old_summary) const {
+  assert_set_gc_id();
+
+  send_young_evacuation_statistics(young_summary);
+  send_old_evacuation_statistics(old_summary);
+}
+
 #endif
--- a/src/share/vm/gc/shared/gcTrace.hpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/gc/shared/gcTrace.hpp	Thu Aug 27 18:51:22 2015 +0200
@@ -45,6 +45,7 @@
 class MetaspaceSummary;
 class PSHeapSummary;
 class G1HeapSummary;
+class G1EvacSummary;
 class ReferenceProcessorStats;
 class TimePartitions;
 class BoolObjectClosure;
@@ -257,10 +258,14 @@
   void report_evacuation_info(EvacuationInfo* info);
   void report_evacuation_failed(EvacuationFailedInfo& ef_info);

+  void report_evacuation_statistics(const G1EvacSummary& young_summary, const G1EvacSummary& old_summary) const;
  private:
   void send_g1_young_gc_event();
   void send_evacuation_info_event(EvacuationInfo* info);
   void send_evacuation_failed_event(const EvacuationFailedInfo& ef_info) const;
+
+  void send_young_evacuation_statistics(const G1EvacSummary& summary) const;
+  void send_old_evacuation_statistics(const G1EvacSummary& summary) const;
 };
 #endif
--- a/src/share/vm/gc/shared/gcTraceSend.cpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/gc/shared/gcTraceSend.cpp	Thu Aug 27 18:51:22 2015 +0200
@@ -234,6 +234,37 @@
     e.commit();
   }
 }
+
+static TraceStructG1EvacStats create_g1_evacstats(unsigned gcid, const G1EvacSummary& summary) {
+  TraceStructG1EvacStats s;
+  s.set_gcId(gcid);
+  s.set_allocated(summary.allocated() * HeapWordSize);
+  s.set_wasted(summary.wasted() * HeapWordSize);
+  s.set_used(summary.used() * HeapWordSize);
+  s.set_undoWaste(summary.undo_wasted() * HeapWordSize);
+  s.set_regionEndWaste(summary.region_end_waste() * HeapWordSize);
+  s.set_regionsRefilled(summary.regions_filled());
+  s.set_directAllocated(summary.direct_allocated() * HeapWordSize);
+  s.set_failureUsed(summary.failure_used() * HeapWordSize);
+  s.set_failureWaste(summary.failure_waste() * HeapWordSize);
+  return s;
+}
+
+void G1NewTracer::send_young_evacuation_statistics(const G1EvacSummary& summary) const {
+  EventGCG1EvacuationYoungStatistics surv_evt;
+  if (surv_evt.should_commit()) {
+    surv_evt.set_stats(create_g1_evacstats(_shared_gc_info.gc_id().id(), summary));
+    surv_evt.commit();
+  }
+}
+
+void G1NewTracer::send_old_evacuation_statistics(const G1EvacSummary& summary) const {
+  EventGCG1EvacuationOldStatistics old_evt;
+  if (old_evt.should_commit()) {
+    old_evt.set_stats(create_g1_evacstats(_shared_gc_info.gc_id().id(), summary));
+    old_evt.commit();
+  }
+}
 #endif

 static TraceStructVirtualSpace to_trace_struct(const VirtualSpaceSummary& summary) {
--- a/src/share/vm/gc/shared/plab.cpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/gc/shared/plab.cpp	Thu Aug 27 18:51:22 2015 +0200
@@ -24,7 +24,7 @@

 #include "precompiled.hpp"
 #include "gc/shared/collectedHeap.hpp"
-#include "gc/shared/plab.hpp"
+#include "gc/shared/plab.inline.hpp"
 #include "gc/shared/threadLocalAllocBuffer.hpp"
 #include "oops/arrayOop.hpp"
 #include "oops/oop.inline.hpp"
--- a/src/share/vm/gc/shared/plab.hpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/gc/shared/plab.hpp	Thu Aug 27 18:51:22 2015 +0200
@@ -27,7 +27,6 @@

 #include "gc/shared/gcUtil.hpp"
 #include "memory/allocation.hpp"
-#include "runtime/atomic.hpp"
 #include "utilities/globalDefinitions.hpp"

 // Forward declarations.
@@ -75,6 +74,8 @@
   PLAB(size_t word_sz);
   virtual ~PLAB() {}

+  static size_t size_required_for_allocation(size_t word_size) { return word_size + AlignmentReserve; }
+
   // Minimum PLAB size.
   static size_t min_size();
   // Maximum PLAB size.
@@ -95,7 +96,7 @@
   }

   // Allocate the object aligned to "alignment_in_bytes".
-  HeapWord* allocate_aligned(size_t word_sz, unsigned short alignment_in_bytes);
+  inline HeapWord* allocate_aligned(size_t word_sz, unsigned short alignment_in_bytes);

   // Undo any allocation in the buffer, which is required to be of the
   // "obj" of the given "word_sz".
@@ -108,13 +109,6 @@
   size_t waste() { return _wasted; }
   size_t undo_waste() { return _undo_wasted; }

-  // Should only be done if we are about to reset with a new buffer of the
-  // given size.
-  void set_word_size(size_t new_word_sz) {
-    assert(new_word_sz > AlignmentReserve, "Too small");
-    _word_sz = new_word_sz;
-  }
-
   // The number of words of unallocated space remaining in the buffer.
   size_t words_remaining() {
     assert(_end >= _top, "Negative buffer");
@@ -126,7 +120,10 @@
   }

   // Sets the space of the buffer to be [buf, space+word_sz()).
-  virtual void set_buf(HeapWord* buf) {
+  virtual void set_buf(HeapWord* buf, size_t new_word_sz) {
+    assert(new_word_sz > AlignmentReserve, "Too small");
+    _word_sz = new_word_sz;
+
     _bottom   = buf;
     _top      = _bottom;
     _hard_end = _bottom + word_sz();
@@ -149,7 +146,8 @@
 };

 // PLAB book-keeping.
-class PLABStats VALUE_OBJ_CLASS_SPEC {
+class PLABStats : public CHeapObj<mtGC> {
+ protected:
   size_t _allocated;          // Total allocated
   size_t _wasted;             // of which wasted (internal fragmentation)
   size_t _undo_wasted;        // of which wasted on undo (is not used for calculation of PLAB size)
@@ -158,7 +156,7 @@
   AdaptiveWeightedAverage
          _filter;             // Integrator with decay

-  void reset() {
+  virtual void reset() {
     _allocated   = 0;
     _wasted      = 0;
     _undo_wasted = 0;
@@ -174,6 +172,8 @@
     _filter(wt)
   { }

+  virtual ~PLABStats() { }
+
   static const size_t min_size() {
     return PLAB::min_size();
   }
@@ -187,23 +187,15 @@

   // Updates the current desired PLAB size. Computes the new desired PLAB size with one gc worker thread,
   // updates _desired_plab_sz and clears sensor accumulators.
-  void adjust_desired_plab_sz();
+  virtual void adjust_desired_plab_sz();

-  void add_allocated(size_t v) {
-    Atomic::add_ptr(v, &_allocated);
-  }
+  inline void add_allocated(size_t v);

-  void add_unused(size_t v) {
-    Atomic::add_ptr(v, &_unused);
-  }
+  inline void add_unused(size_t v);

-  void add_wasted(size_t v) {
-    Atomic::add_ptr(v, &_wasted);
-  }
+  inline void add_wasted(size_t v);

-  void add_undo_wasted(size_t v) {
-    Atomic::add_ptr(v, &_undo_wasted);
-  }
+  inline void add_undo_wasted(size_t v);
 };

 #endif // SHARE_VM_GC_SHARED_PLAB_HPP
--- a/src/share/vm/gc/shared/plab.inline.hpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/gc/shared/plab.inline.hpp	Thu Aug 27 18:51:22 2015 +0200
@@ -27,9 +27,10 @@

 #include "gc/shared/collectedHeap.inline.hpp"
 #include "gc/shared/plab.hpp"
+#include "memory/allocation.inline.hpp"
+#include "runtime/atomic.inline.hpp"

-HeapWord* PLAB::allocate_aligned(size_t word_sz, unsigned short alignment_in_bytes) {
-
+inline HeapWord* PLAB::allocate_aligned(size_t word_sz, unsigned short alignment_in_bytes) {
   HeapWord* res = CollectedHeap::align_allocation_or_fail(_top, _end, alignment_in_bytes);
   if (res == NULL) {
     return NULL;
@@ -41,4 +42,20 @@
   return allocate(word_sz);
 }

+void PLABStats::add_allocated(size_t v) {
+  Atomic::add_ptr(v, &_allocated);
+}
+
+void PLABStats::add_unused(size_t v) {
+  Atomic::add_ptr(v, &_unused);
+}
+
+void PLABStats::add_wasted(size_t v) {
+  Atomic::add_ptr(v, &_wasted);
+}
+
+void PLABStats::add_undo_wasted(size_t v) {
+  Atomic::add_ptr(v, &_undo_wasted);
+}
+
 #endif // SHARE_VM_GC_SHARED_PLAB_INLINE_HPP
--- a/src/share/vm/memory/universe.cpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/memory/universe.cpp	Thu Aug 27 18:51:22 2015 +0200
@@ -77,7 +77,7 @@
 #if INCLUDE_ALL_GCS
 #include "gc/cms/cmsCollectorPolicy.hpp"
 #include "gc/g1/g1CollectedHeap.inline.hpp"
-#include "gc/g1/g1CollectorPolicy_ext.hpp"
+#include "gc/g1/g1CollectorPolicy.hpp"
 #include "gc/parallel/parallelScavengeHeap.hpp"
 #include "gc/shared/adaptiveSizePolicy.hpp"
 #endif // INCLUDE_ALL_GCS
@@ -694,13 +694,29 @@
   return JNI_OK;
 }

-template <class Heap, class Policy>
-jint Universe::create_heap() {
+CollectedHeap* Universe::create_heap() {
   assert(_collectedHeap == NULL, "Heap already created");
-  Policy* policy = new Policy();
-  policy->initialize_all();
-  _collectedHeap = new Heap(policy);
-  return _collectedHeap->initialize();
+#if !INCLUDE_ALL_GCS
+  if (UseParallelGC) {
+    fatal("UseParallelGC not supported in this VM.");
+  } else if (UseG1GC) {
+    fatal("UseG1GC not supported in this VM.");
+  } else if (UseConcMarkSweepGC) {
+    fatal("UseConcMarkSweepGC not supported in this VM.");
+#else
+  if (UseParallelGC) {
+    return Universe::create_heap_with_policy<ParallelScavengeHeap, GenerationSizer>();
+  } else if (UseG1GC) {
+    return Universe::create_heap_with_policy<G1CollectedHeap, G1CollectorPolicy>();
+  } else if (UseConcMarkSweepGC) {
+    return Universe::create_heap_with_policy<GenCollectedHeap, ConcurrentMarkSweepPolicy>();
+#endif
+  } else if (UseSerialGC) {
+    return Universe::create_heap_with_policy<GenCollectedHeap, MarkSweepPolicy>();
+  }
+
+  ShouldNotReachHere();
+  return NULL;
 }

 // Choose the heap base address and oop encoding mode
@@ -714,27 +730,12 @@
 jint Universe::initialize_heap() {
   jint status = JNI_ERR;

-#if !INCLUDE_ALL_GCS
-  if (UseParallelGC) {
-    fatal("UseParallelGC not supported in this VM.");
-  } else if (UseG1GC) {
-    fatal("UseG1GC not supported in this VM.");
-  } else if (UseConcMarkSweepGC) {
-    fatal("UseConcMarkSweepGC not supported in this VM.");
-#else
-  if (UseParallelGC) {
-    status = Universe::create_heap<ParallelScavengeHeap, GenerationSizer>();
-  } else if (UseG1GC) {
-    status = Universe::create_heap<G1CollectedHeap, G1CollectorPolicyExt>();
-  } else if (UseConcMarkSweepGC) {
-    status = Universe::create_heap<GenCollectedHeap, ConcurrentMarkSweepPolicy>();
-#endif
-  } else if (UseSerialGC) {
-    status = Universe::create_heap<GenCollectedHeap, MarkSweepPolicy>();
-  } else {
-    ShouldNotReachHere();
+  _collectedHeap = create_heap_ext();
+  if (_collectedHeap == NULL) {
+    _collectedHeap = create_heap();
   }

+  status = _collectedHeap->initialize();
   if (status != JNI_OK) {
     return status;
   }
--- a/src/share/vm/memory/universe.hpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/memory/universe.hpp	Thu Aug 27 18:51:22 2015 +0200
@@ -214,7 +214,9 @@
   static size_t _heap_capacity_at_last_gc;
   static size_t _heap_used_at_last_gc;

-  template <class Heap, class Policy> static jint create_heap();
+  template <class Heap, class Policy> static CollectedHeap* create_heap_with_policy();
+  static CollectedHeap* create_heap();
+  static CollectedHeap* create_heap_ext();
   static jint initialize_heap();
   static void initialize_basic_type_mirrors(TRAPS);
   static void fixup_mirrors(TRAPS);
--- a/src/share/vm/memory/universe.inline.hpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/memory/universe.inline.hpp	Thu Aug 27 18:51:22 2015 +0200
@@ -49,4 +49,11 @@
   _allocation_context_notification_obj = obj;
 }

+template <class Heap, class Policy>
+CollectedHeap* Universe::create_heap_with_policy() {
+  Policy* policy = new Policy();
+  policy->initialize_all();
+  return new Heap(policy);
+}
+
 #endif // SHARE_VM_MEMORY_UNIVERSE_INLINE_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/memory/universe_ext.cpp	Thu Aug 27 18:51:22 2015 +0200
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "memory/universe.hpp"
+
+CollectedHeap* Universe::create_heap_ext() {
+  return NULL;
+}
--- a/src/share/vm/runtime/globals.cpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/runtime/globals.cpp	Thu Aug 27 18:51:22 2015 +0200
@@ -310,13 +310,17 @@
 void Flag::get_locked_message(char* buf, int buflen) const {
   buf[0] = '\0';
   if (is_diagnostic() && !is_unlocked()) {
-    jio_snprintf(buf, buflen, "Error: VM option '%s' is diagnostic and must be enabled via -XX:+UnlockDiagnosticVMOptions.\n",
-                 _name);
+    jio_snprintf(buf, buflen,
+                 "Error: VM option '%s' is diagnostic and must be enabled via -XX:+UnlockDiagnosticVMOptions.\n"
+                 "Error: The unlock option must precede '%s'.\n",
+                 _name, _name);
     return;
   }
   if (is_experimental() && !is_unlocked()) {
-    jio_snprintf(buf, buflen, "Error: VM option '%s' is experimental and must be enabled via -XX:+UnlockExperimentalVMOptions.\n",
-                 _name);
+    jio_snprintf(buf, buflen,
+                 "Error: VM option '%s' is experimental and must be enabled via -XX:+UnlockExperimentalVMOptions.\n"
+                 "Error: The unlock option must precede '%s'.\n",
+                 _name, _name);
     return;
   }
   if (is_develop() && is_product_build()) {
--- a/src/share/vm/runtime/stubRoutines.cpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/runtime/stubRoutines.cpp	Thu Aug 27 18:51:22 2015 +0200
@@ -181,7 +181,7 @@
     StubGenerator_generate(&buffer, false);
     // When new stubs added we need to make sure there is some space left
     // to catch situation when we should increase size again.
-    assert(buffer.insts_remaining() > 200, "increase code_size1");
+    assert(code_size1 == 0 || buffer.insts_remaining() > 200, "increase code_size1");
   }
 }

@@ -274,7 +274,7 @@
     StubGenerator_generate(&buffer, true);
     // When new stubs added we need to make sure there is some space left
     // to catch situation when we should increase size again.
-    assert(buffer.insts_remaining() > 200, "increase code_size2");
+    assert(code_size2 == 0 || buffer.insts_remaining() > 200, "increase code_size2");
   }

 #ifdef ASSERT
--- a/src/share/vm/runtime/vmStructs.cpp	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/runtime/vmStructs.cpp	Thu Aug 27 18:51:22 2015 +0200
@@ -1565,6 +1565,7 @@
   declare_toplevel_type(Generation*)                                      \
   declare_toplevel_type(GenerationSpec**)                                 \
   declare_toplevel_type(HeapWord*)                                        \
+  declare_toplevel_type(HeapWord* volatile)                               \
   declare_toplevel_type(MemRegion*)                                       \
   declare_toplevel_type(OffsetTableContigSpace*)                          \
   declare_toplevel_type(Space*)                                           \
--- a/src/share/vm/trace/trace.xml	Wed Aug 26 09:49:37 2015 +0200
+++ b/src/share/vm/trace/trace.xml	Thu Aug 27 18:51:22 2015 +0200
@@ -346,6 +346,29 @@
       <value type="BYTES64" field="totalSize" label="Total Size" />
     </event>

+    <struct id="G1EvacStats">
+      <value type="UINT" field="gcId" label="GC ID" relation="GC_ID"/>
+      <value type="BYTES64" field="allocated" label="Allocated" description="Total memory allocated by PLABs"/>
+      <value type="BYTES64" field="wasted" label="Wasted" description="Total memory wasted within PLABs due to alignment or refill"/>
+      <value type="BYTES64" field="used" label="Used" description="Total memory occupied by objects within PLABs"/>
+      <value type="BYTES64" field="undoWaste" label="Undo Wasted" description="Total memory wasted due to allocation undo within PLABs"/>
+      <value type="BYTES64" field="regionEndWaste" label="Region End Wasted" description="Total memory wasted at the end of regions due to refill"/>
+      <value type="UINT" field="regionsRefilled" label="Region Refills" description="Total memory wasted at the end of regions due to refill"/>
+      <value type="BYTES64" field="directAllocated" label="Allocated (direct)" description="Total memory allocated using direct allocation outside of PLABs"/>
+      <value type="BYTES64" field="failureUsed" label="Used (failure)" description="Total memory occupied by objects in regions where evacuation failed"/>
+      <value type="BYTES64" field="failureWaste" label="Wasted (failure)" description="Total memory left unused in regions where evacuation failed"/>
+    </struct>
+
+    <event id="GCG1EvacuationYoungStatistics" path="vm/gc/detailed/g1_evac_young_stats" label="G1 Evacuation Statistics for Young" is_instant="true"
+           description="Memory related evacuation statistics during GC for the young generation">
+      <structvalue type="G1EvacStats" field="stats" label="Evacuation statistics"/>
+    </event>
+
+    <event id="GCG1EvacuationOldStatistics" path="vm/gc/detailed/g1_evac_old_stats" label="G1 Evacuation Memory Statistics for Old" is_instant="true"
+           description="Memory related evacuation statistics during GC for the old generation">
+      <structvalue type="G1EvacStats" field="stats" label="Evacuation statistics"/>
+    </event>
+
     <!-- Promotion events, Supported GCs are Parallel Scavange, G1 and CMS with Parallel New. -->
     <event id="PromoteObjectInNewPLAB" path="vm/gc/detailed/object_promotion_in_new_PLAB" label="Promotion in new PLAB"
         description="Object survived scavenge and was copied to a new Promotion Local Allocation Buffer (PLAB). Supported GCs are Parallel Scavange, G1 and CMS with Parallel New. Due to promotion being done in parallel an object might be reported multiple times as the GC threads race to copy all objects."