Mercurial > hg > shark > hotspot

--- a/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp	Tue Apr 05 14:12:31 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp	Wed Apr 06 16:02:53 2011 -0700
@@ -262,39 +262,18 @@
   for (int i = 0; i < _numMarkedRegions; i++) {
     assert(_markedRegions.at(i) != NULL, "Should be true by sorting!");
     _markedRegions.at(i)->set_sort_index(i);
-    if (G1PrintRegionLivenessInfo > 0) {
-      if (i == 0) gclog_or_tty->print_cr("Sorted marked regions:");
-      if (i < G1PrintRegionLivenessInfo ||
-          (_numMarkedRegions-i) < G1PrintRegionLivenessInfo) {
-        HeapRegion* hr = _markedRegions.at(i);
-        size_t u = hr->used();
-        gclog_or_tty->print_cr("  Region %d: %d used, %d max live, %5.2f%%.",
-                      i, u, hr->max_live_bytes(),
-                      100.0*(float)hr->max_live_bytes()/(float)u);
-      }
+  }
+  if (G1PrintRegionLivenessInfo) {
+    G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Sorting");
+    for (int i = 0; i < _numMarkedRegions; ++i) {
+      HeapRegion* r = _markedRegions.at(i);
+      cl.doHeapRegion(r);
     }
   }
-  if (G1PolicyVerbose > 1)
-    printSortedHeapRegions();
   assert(verify(), "should now be sorted");
 }

 void
-printHeapRegion(HeapRegion *hr) {
-  if (hr->isHumongous())
-    gclog_or_tty->print("H: ");
-  if (hr->in_collection_set())
-    gclog_or_tty->print("CS: ");
-  gclog_or_tty->print_cr("Region " PTR_FORMAT " (%s%s) "
-                         "[" PTR_FORMAT ", " PTR_FORMAT"] "
-                         "Used: " SIZE_FORMAT "K, garbage: " SIZE_FORMAT "K.",
-                         hr, hr->is_young() ? "Y " : "  ",
-                         hr->is_marked()? "M1" : "M0",
-                         hr->bottom(), hr->end(),
-                         hr->used()/K, hr->garbage_bytes()/K);
-}
-
-void
 CollectionSetChooser::addMarkedHeapRegion(HeapRegion* hr) {
   assert(!hr->isHumongous(),
          "Humongous regions shouldn't be added to the collection set");
@@ -351,27 +330,9 @@

 void
 CollectionSetChooser::updateAfterFullCollection() {
-  G1CollectedHeap* g1h = G1CollectedHeap::heap();
   clearMarkedHeapRegions();
 }

-void
-CollectionSetChooser::printSortedHeapRegions() {
-  gclog_or_tty->print_cr("Printing %d Heap Regions sorted by amount of known garbage",
-                _numMarkedRegions);
-
-  DEBUG_ONLY(int marked_count = 0;)
-  for (int i = 0; i < _markedRegions.length(); i++) {
-    HeapRegion* r = _markedRegions.at(i);
-    if (r != NULL) {
-      printHeapRegion(r);
-      DEBUG_ONLY(marked_count++;)
-    }
-  }
-  assert(marked_count == _numMarkedRegions, "must be");
-  gclog_or_tty->print_cr("Done sorted heap region print");
-}
-
 void CollectionSetChooser::removeRegion(HeapRegion *hr) {
   int si = hr->sort_index();
   assert(si == -1 || hr->is_marked(), "Sort index not valid.");
--- a/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp	Tue Apr 05 14:12:31 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp	Wed Apr 06 16:02:53 2011 -0700
@@ -100,8 +100,6 @@

   CollectionSetChooser();

-  void printSortedHeapRegions();
-
   void sortMarkedHeapRegions();
   void fillCache();
   bool addRegionToCache(void);
--- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Tue Apr 05 14:12:31 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Wed Apr 06 16:02:53 2011 -0700
@@ -31,23 +31,31 @@
 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
 #include "memory/space.inline.hpp"
 #include "runtime/atomic.hpp"
+#include "runtime/java.hpp"
 #include "utilities/copy.hpp"

 // Possible sizes for the card counts cache: odd primes that roughly double in size.
 // (See jvmtiTagMap.cpp).
-int ConcurrentG1Refine::_cc_cache_sizes[] = {
-        16381,    32771,    76831,    150001,   307261,
-       614563,  1228891,  2457733,   4915219,  9830479,
-     19660831, 39321619, 78643219, 157286461,       -1
+
+#define MAX_SIZE ((size_t) -1)
+
+size_t ConcurrentG1Refine::_cc_cache_sizes[] = {
+          16381,    32771,    76831,    150001,   307261,
+         614563,  1228891,  2457733,   4915219,  9830479,
+       19660831, 39321619, 78643219, 157286461,  MAX_SIZE
   };

 ConcurrentG1Refine::ConcurrentG1Refine() :
   _card_counts(NULL), _card_epochs(NULL),
-  _n_card_counts(0), _max_n_card_counts(0),
+  _n_card_counts(0), _max_cards(0), _max_n_card_counts(0),
   _cache_size_index(0), _expand_card_counts(false),
   _hot_cache(NULL),
   _def_use_cache(false), _use_cache(false),
-  _n_periods(0),
+  // We initialize the epochs of the array to 0. By initializing
+  // _n_periods to 1 and not 0 we automatically invalidate all the
+  // entries on the array. Otherwise we might accidentally think that
+  // we claimed a card that was in fact never set (see CR7033292).
+  _n_periods(1),
   _threads(NULL), _n_threads(0)
 {

@@ -98,27 +106,44 @@
 void ConcurrentG1Refine::init() {
   if (G1ConcRSLogCacheSize > 0) {
     _g1h = G1CollectedHeap::heap();
-    _max_n_card_counts =
-      (unsigned) (_g1h->max_capacity() >> CardTableModRefBS::card_shift);
+
+    _max_cards = _g1h->max_capacity() >> CardTableModRefBS::card_shift;
+    _max_n_card_counts = _max_cards * G1MaxHotCardCountSizePercent / 100;

     size_t max_card_num = ((size_t)1 << (sizeof(unsigned)*BitsPerByte-1)) - 1;
-    guarantee(_max_n_card_counts < max_card_num, "card_num representation");
+    guarantee(_max_cards < max_card_num, "card_num representation");

-    int desired = _max_n_card_counts / InitialCacheFraction;
-    for (_cache_size_index = 0;
-              _cc_cache_sizes[_cache_size_index] >= 0; _cache_size_index++) {
-      if (_cc_cache_sizes[_cache_size_index] >= desired) break;
-    }
-    _cache_size_index = MAX2(0, (_cache_size_index - 1));
+    // We need _n_card_counts to be less than _max_n_card_counts here
+    // so that the expansion call (below) actually allocates the
+    // _counts and _epochs arrays.
+    assert(_n_card_counts == 0, "pre-condition");
+    assert(_max_n_card_counts > 0, "pre-condition");

-    int initial_size = _cc_cache_sizes[_cache_size_index];
-    if (initial_size < 0) initial_size = _max_n_card_counts;
+    // Find the index into cache size array that is of a size that's
+    // large enough to hold desired_sz.
+    size_t desired_sz = _max_cards / InitialCacheFraction;
+    int desired_sz_index = 0;
+    while (_cc_cache_sizes[desired_sz_index] < desired_sz) {
+      desired_sz_index += 1;
+      assert(desired_sz_index <  MAX_CC_CACHE_INDEX, "invariant");
+    }
+    assert(desired_sz_index <  MAX_CC_CACHE_INDEX, "invariant");

-    // Make sure we don't go bigger than we will ever need
-    _n_card_counts = MIN2((unsigned) initial_size, _max_n_card_counts);
+    // If the desired_sz value is between two sizes then
+    // _cc_cache_sizes[desired_sz_index-1] < desired_sz <= _cc_cache_sizes[desired_sz_index]
+    // we will start with the lower size in the optimistic expectation that
+    // we will not need to expand up. Note desired_sz_index could also be 0.
+    if (desired_sz_index > 0 &&
+        _cc_cache_sizes[desired_sz_index] > desired_sz) {
+      desired_sz_index -= 1;
+    }

-    _card_counts = NEW_C_HEAP_ARRAY(CardCountCacheEntry, _n_card_counts);
-    _card_epochs = NEW_C_HEAP_ARRAY(CardEpochCacheEntry, _n_card_counts);
+    if (!expand_card_count_cache(desired_sz_index)) {
+      // Allocation was unsuccessful - exit
+      vm_exit_during_initialization("Could not reserve enough space for card count cache");
+    }
+    assert(_n_card_counts > 0, "post-condition");
+    assert(_cache_size_index == desired_sz_index, "post-condition");

     Copy::fill_to_bytes(&_card_counts[0],
                         _n_card_counts * sizeof(CardCountCacheEntry));
@@ -163,10 +188,13 @@

 ConcurrentG1Refine::~ConcurrentG1Refine() {
   if (G1ConcRSLogCacheSize > 0) {
+    // Please see the comment in allocate_card_count_cache
+    // for why we call os::malloc() and os::free() directly.
     assert(_card_counts != NULL, "Logic");
-    FREE_C_HEAP_ARRAY(CardCountCacheEntry, _card_counts);
+    os::free(_card_counts);
     assert(_card_epochs != NULL, "Logic");
-    FREE_C_HEAP_ARRAY(CardEpochCacheEntry, _card_epochs);
+    os::free(_card_epochs);
+
     assert(_hot_cache != NULL, "Logic");
     FREE_C_HEAP_ARRAY(jbyte*, _hot_cache);
   }
@@ -382,29 +410,93 @@
   }
 }

-void ConcurrentG1Refine::expand_card_count_cache() {
-  if (_n_card_counts < _max_n_card_counts) {
-    int new_idx = _cache_size_index+1;
-    int new_size = _cc_cache_sizes[new_idx];
-    if (new_size < 0) new_size = _max_n_card_counts;
+// The arrays used to hold the card counts and the epochs must have
+// a 1:1 correspondence. Hence they are allocated and freed together
+// Returns true if the allocations of both the counts and epochs
+// were successful; false otherwise.
+bool ConcurrentG1Refine::allocate_card_count_cache(size_t n,
+                                                   CardCountCacheEntry** counts,
+                                                   CardEpochCacheEntry** epochs) {
+  // We call the allocation/free routines directly for the counts
+  // and epochs arrays. The NEW_C_HEAP_ARRAY/FREE_C_HEAP_ARRAY
+  // macros call AllocateHeap and FreeHeap respectively.
+  // AllocateHeap will call vm_exit_out_of_memory in the event
+  // of an allocation failure and abort the JVM. With the
+  // _counts/epochs arrays we only need to abort the JVM if the
+  // initial allocation of these arrays fails.
+  //
+  // Additionally AllocateHeap/FreeHeap do some tracing of
+  // allocate/free calls so calling one without calling the
+  // other can cause inconsistencies in the tracing. So we
+  // call neither.

-    // Make sure we don't go bigger than we will ever need
-    new_size = MIN2((unsigned) new_size, _max_n_card_counts);
+  assert(*counts == NULL, "out param");
+  assert(*epochs == NULL, "out param");
+
+  size_t counts_size = n * sizeof(CardCountCacheEntry);
+  size_t epochs_size = n * sizeof(CardEpochCacheEntry);
+
+  *counts = (CardCountCacheEntry*) os::malloc(counts_size);
+  if (*counts == NULL) {
+    // allocation was unsuccessful
+    return false;
+  }
+
+  *epochs = (CardEpochCacheEntry*) os::malloc(epochs_size);
+  if (*epochs == NULL) {
+    // allocation was unsuccessful - free counts array
+    assert(*counts != NULL, "must be");
+    os::free(*counts);
+    *counts = NULL;
+    return false;
+  }

-    // Expand the card count and card epoch tables
-    if (new_size > (int)_n_card_counts) {
-      // We can just free and allocate a new array as we're
-      // not interested in preserving the contents
-      assert(_card_counts != NULL, "Logic!");
-      assert(_card_epochs != NULL, "Logic!");
-      FREE_C_HEAP_ARRAY(CardCountCacheEntry, _card_counts);
-      FREE_C_HEAP_ARRAY(CardEpochCacheEntry, _card_epochs);
-      _n_card_counts = new_size;
-      _card_counts = NEW_C_HEAP_ARRAY(CardCountCacheEntry, _n_card_counts);
-      _card_epochs = NEW_C_HEAP_ARRAY(CardEpochCacheEntry, _n_card_counts);
-      _cache_size_index = new_idx;
+  // We successfully allocated both counts and epochs
+  return true;
+}
+
+// Returns true if the card counts/epochs cache was
+// successfully expanded; false otherwise.
+bool ConcurrentG1Refine::expand_card_count_cache(int cache_size_idx) {
+  // Can we expand the card count and epoch tables?
+  if (_n_card_counts < _max_n_card_counts) {
+    assert(cache_size_idx >= 0 && cache_size_idx  < MAX_CC_CACHE_INDEX, "oob");
+
+    size_t cache_size = _cc_cache_sizes[cache_size_idx];
+    // Make sure we don't go bigger than we will ever need
+    cache_size = MIN2(cache_size, _max_n_card_counts);
+
+    // Should we expand the card count and card epoch tables?
+    if (cache_size > _n_card_counts) {
+      // We have been asked to allocate new, larger, arrays for
+      // the card counts and the epochs. Attempt the allocation
+      // of both before we free the existing arrays in case
+      // the allocation is unsuccessful...
+      CardCountCacheEntry* counts = NULL;
+      CardEpochCacheEntry* epochs = NULL;
+
+      if (allocate_card_count_cache(cache_size, &counts, &epochs)) {
+        // Allocation was successful.
+        // We can just free the old arrays; we're
+        // not interested in preserving the contents
+        if (_card_counts != NULL) os::free(_card_counts);
+        if (_card_epochs != NULL) os::free(_card_epochs);
+
+        // Cache the size of the arrays and the index that got us there.
+        _n_card_counts = cache_size;
+        _cache_size_index = cache_size_idx;
+
+        _card_counts = counts;
+        _card_epochs = epochs;
+
+        // We successfully allocated/expanded the caches.
+        return true;
+      }
     }
   }
+
+  // We did not successfully expand the caches.
+  return false;
 }

 void ConcurrentG1Refine::clear_and_record_card_counts() {
@@ -415,10 +507,16 @@
 #endif

   if (_expand_card_counts) {
-    expand_card_count_cache();
+    int new_idx = _cache_size_index + 1;
+
+    if (expand_card_count_cache(new_idx)) {
+      // Allocation was successful and  _n_card_counts has
+      // been updated to the new size. We only need to clear
+      // the epochs so we don't read a bogus epoch value
+      // when inserting a card into the hot card cache.
+      Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));
+    }
     _expand_card_counts = false;
-    // Only need to clear the epochs.
-    Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));
   }

   int this_epoch = (int) _n_periods;
--- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Tue Apr 05 14:12:31 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Wed Apr 06 16:02:53 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -94,7 +94,7 @@
   } CardEpochCacheEntry;

   julong make_epoch_entry(unsigned int card_num, unsigned int epoch) {
-    assert(0 <= card_num && card_num < _max_n_card_counts, "Bounds");
+    assert(0 <= card_num && card_num < _max_cards, "Bounds");
     assert(0 <= epoch && epoch <= _n_periods, "must be");

     return ((julong) card_num << card_num_shift) | epoch;
@@ -117,15 +117,24 @@
   CardEpochCacheEntry* _card_epochs;

   // The current number of buckets in the card count cache
-  unsigned _n_card_counts;
+  size_t _n_card_counts;
+
+  // The number of cards for the entire reserved heap
+  size_t _max_cards;

-  // The max number of buckets required for the number of
-  // cards for the entire reserved heap
-  unsigned _max_n_card_counts;
+  // The max number of buckets for the card counts and epochs caches.
+  // This is the maximum that the counts and epochs will grow to.
+  // It is specified as a fraction or percentage of _max_cards using
+  // G1MaxHotCardCountSizePercent.
+  size_t _max_n_card_counts;

   // Possible sizes of the cache: odd primes that roughly double in size.
   // (See jvmtiTagMap.cpp).
-  static int _cc_cache_sizes[];
+  enum {
+    MAX_CC_CACHE_INDEX = 15    // maximum index into the cache size array.
+  };
+
+  static size_t _cc_cache_sizes[MAX_CC_CACHE_INDEX];

   // The index in _cc_cache_sizes corresponding to the size of
   // _card_counts.
@@ -147,11 +156,22 @@
   CardTableModRefBS* _ct_bs;
   G1CollectedHeap*   _g1h;

-  // Expands the array that holds the card counts to the next size up
-  void expand_card_count_cache();
+  // Helper routine for expand_card_count_cache().
+  // The arrays used to hold the card counts and the epochs must have
+  // a 1:1 correspondence. Hence they are allocated and freed together.
+  // Returns true if the allocations of both the counts and epochs
+  // were successful; false otherwise.
+  bool allocate_card_count_cache(size_t n,
+                                 CardCountCacheEntry** counts,
+                                 CardEpochCacheEntry** epochs);
+
+  // Expands the arrays that hold the card counts and epochs
+  // to the cache size at index. Returns true if the expansion/
+  // allocation was successful; false otherwise.
+  bool expand_card_count_cache(int index);

   // hash a given key (index of card_ptr) with the specified size
-  static unsigned int hash(size_t key, int size) {
+  static unsigned int hash(size_t key, size_t size) {
     return (unsigned int) key % size;
   }
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Tue Apr 05 14:12:31 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Wed Apr 06 16:02:53 2011 -0700
@@ -1204,7 +1204,6 @@
   g1p->record_concurrent_mark_remark_end();
 }

-
 #define CARD_BM_TEST_MODE 0

 class CalcLiveObjectsClosure: public HeapRegionClosure {
@@ -1726,6 +1725,11 @@
   }
   _total_counting_time += this_final_counting_time;

+  if (G1PrintRegionLivenessInfo) {
+    G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
+    _g1h->heap_region_iterate(&cl);
+  }
+
   // Install newly created mark bitMap as "prev".
   swapMarkBitMaps();

@@ -3199,8 +3203,12 @@
                CMTask* task)
     : _g1h(g1h), _cm(cm), _task(task)
   {
-    _ref_processor = g1h->ref_processor();
-    assert(_ref_processor != NULL, "should not be NULL");
+    assert(_ref_processor == NULL, "should be initialized to NULL");
+
+    if (G1UseConcMarkReferenceProcessing) {
+      _ref_processor = g1h->ref_processor();
+      assert(_ref_processor != NULL, "should not be NULL");
+    }
   }
 };

@@ -4423,3 +4431,175 @@

   _marking_step_diffs_ms.add(0.5);
 }
+
+// These are formatting macros that are used below to ensure
+// consistent formatting. The *_H_* versions are used to format the
+// header for a particular value and they should be kept consistent
+// with the corresponding macro. Also note that most of the macros add
+// the necessary white space (as a prefix) which makes them a bit
+// easier to compose.
+
+// All the output lines are prefixed with this string to be able to
+// identify them easily in a large log file.
+#define G1PPRL_LINE_PREFIX            "###"
+
+#define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
+#ifdef _LP64
+#define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
+#else // _LP64
+#define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
+#endif // _LP64
+
+// For per-region info
+#define G1PPRL_TYPE_FORMAT            "   %-4s"
+#define G1PPRL_TYPE_H_FORMAT          "   %4s"
+#define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
+#define G1PPRL_BYTE_H_FORMAT          "  %9s"
+#define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
+#define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
+
+// For summary info
+#define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
+#define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
+#define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
+#define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
+
+G1PrintRegionLivenessInfoClosure::
+G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
+  : _out(out),
+    _total_used_bytes(0), _total_capacity_bytes(0),
+    _total_prev_live_bytes(0), _total_next_live_bytes(0),
+    _hum_used_bytes(0), _hum_capacity_bytes(0),
+    _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  MemRegion g1_committed = g1h->g1_committed();
+  MemRegion g1_reserved = g1h->g1_reserved();
+  double now = os::elapsedTime();
+
+  // Print the header of the output.
+  _out->cr();
+  _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
+  _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
+                 G1PPRL_SUM_ADDR_FORMAT("committed")
+                 G1PPRL_SUM_ADDR_FORMAT("reserved")
+                 G1PPRL_SUM_BYTE_FORMAT("region-size"),
+                 g1_committed.start(), g1_committed.end(),
+                 g1_reserved.start(), g1_reserved.end(),
+                 HeapRegion::GrainBytes);
+  _out->print_cr(G1PPRL_LINE_PREFIX);
+  _out->print_cr(G1PPRL_LINE_PREFIX
+                 G1PPRL_TYPE_H_FORMAT
+                 G1PPRL_ADDR_BASE_H_FORMAT
+                 G1PPRL_BYTE_H_FORMAT
+                 G1PPRL_BYTE_H_FORMAT
+                 G1PPRL_BYTE_H_FORMAT
+                 G1PPRL_DOUBLE_H_FORMAT,
+                 "type", "address-range",
+                 "used", "prev-live", "next-live", "gc-eff");
+}
+
+// It takes as a parameter a reference to one of the _hum_* fields, it
+// deduces the corresponding value for a region in a humongous region
+// series (either the region size, or what's left if the _hum_* field
+// is < the region size), and updates the _hum_* field accordingly.
+size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
+  size_t bytes = 0;
+  // The > 0 check is to deal with the prev and next live bytes which
+  // could be 0.
+  if (*hum_bytes > 0) {
+    bytes = MIN2((size_t) HeapRegion::GrainBytes, *hum_bytes);
+    *hum_bytes -= bytes;
+  }
+  return bytes;
+}
+
+// It deduces the values for a region in a humongous region series
+// from the _hum_* fields and updates those accordingly. It assumes
+// that that _hum_* fields have already been set up from the "starts
+// humongous" region and we visit the regions in address order.
+void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
+                                                     size_t* capacity_bytes,
+                                                     size_t* prev_live_bytes,
+                                                     size_t* next_live_bytes) {
+  assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
+  *used_bytes      = get_hum_bytes(&_hum_used_bytes);
+  *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
+  *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
+  *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
+}
+
+bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
+  const char* type = "";
+  HeapWord* bottom       = r->bottom();
+  HeapWord* end          = r->end();
+  size_t capacity_bytes  = r->capacity();
+  size_t used_bytes      = r->used();
+  size_t prev_live_bytes = r->live_bytes();
+  size_t next_live_bytes = r->next_live_bytes();
+  double gc_eff          = r->gc_efficiency();
+  if (r->used() == 0) {
+    type = "FREE";
+  } else if (r->is_survivor()) {
+    type = "SURV";
+  } else if (r->is_young()) {
+    type = "EDEN";
+  } else if (r->startsHumongous()) {
+    type = "HUMS";
+
+    assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
+           _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
+           "they should have been zeroed after the last time we used them");
+    // Set up the _hum_* fields.
+    _hum_capacity_bytes  = capacity_bytes;
+    _hum_used_bytes      = used_bytes;
+    _hum_prev_live_bytes = prev_live_bytes;
+    _hum_next_live_bytes = next_live_bytes;
+    get_hum_bytes(&used_bytes, &capacity_bytes,
+                  &prev_live_bytes, &next_live_bytes);
+    end = bottom + HeapRegion::GrainWords;
+  } else if (r->continuesHumongous()) {
+    type = "HUMC";
+    get_hum_bytes(&used_bytes, &capacity_bytes,
+                  &prev_live_bytes, &next_live_bytes);
+    assert(end == bottom + HeapRegion::GrainWords, "invariant");
+  } else {
+    type = "OLD";
+  }
+
+  _total_used_bytes      += used_bytes;
+  _total_capacity_bytes  += capacity_bytes;
+  _total_prev_live_bytes += prev_live_bytes;
+  _total_next_live_bytes += next_live_bytes;
+
+  // Print a line for this particular region.
+  _out->print_cr(G1PPRL_LINE_PREFIX
+                 G1PPRL_TYPE_FORMAT
+                 G1PPRL_ADDR_BASE_FORMAT
+                 G1PPRL_BYTE_FORMAT
+                 G1PPRL_BYTE_FORMAT
+                 G1PPRL_BYTE_FORMAT
+                 G1PPRL_DOUBLE_FORMAT,
+                 type, bottom, end,
+                 used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
+
+  return false;
+}
+
+G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
+  // Print the footer of the output.
+  _out->print_cr(G1PPRL_LINE_PREFIX);
+  _out->print_cr(G1PPRL_LINE_PREFIX
+                 " SUMMARY"
+                 G1PPRL_SUM_MB_FORMAT("capacity")
+                 G1PPRL_SUM_MB_PERC_FORMAT("used")
+                 G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
+                 G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
+                 bytes_to_mb(_total_capacity_bytes),
+                 bytes_to_mb(_total_used_bytes),
+                 perc(_total_used_bytes, _total_capacity_bytes),
+                 bytes_to_mb(_total_prev_live_bytes),
+                 perc(_total_prev_live_bytes, _total_capacity_bytes),
+                 bytes_to_mb(_total_next_live_bytes),
+                 perc(_total_next_live_bytes, _total_capacity_bytes));
+  _out->cr();
+}
--- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Tue Apr 05 14:12:31 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Wed Apr 06 16:02:53 2011 -0700
@@ -1149,4 +1149,54 @@
 #endif // _MARKING_STATS_
 };

+// Class that's used to to print out per-region liveness
+// information. It's currently used at the end of marking and also
+// after we sort the old regions at the end of the cleanup operation.
+class G1PrintRegionLivenessInfoClosure: public HeapRegionClosure {
+private:
+  outputStream* _out;
+
+  // Accumulators for these values.
+  size_t _total_used_bytes;
+  size_t _total_capacity_bytes;
+  size_t _total_prev_live_bytes;
+  size_t _total_next_live_bytes;
+
+  // These are set up when we come across a "stars humongous" region
+  // (as this is where most of this information is stored, not in the
+  // subsequent "continues humongous" regions). After that, for every
+  // region in a given humongous region series we deduce the right
+  // values for it by simply subtracting the appropriate amount from
+  // these fields. All these values should reach 0 after we've visited
+  // the last region in the series.
+  size_t _hum_used_bytes;
+  size_t _hum_capacity_bytes;
+  size_t _hum_prev_live_bytes;
+  size_t _hum_next_live_bytes;
+
+  static double perc(size_t val, size_t total) {
+    if (total == 0) {
+      return 0.0;
+    } else {
+      return 100.0 * ((double) val / (double) total);
+    }
+  }
+
+  static double bytes_to_mb(size_t val) {
+    return (double) val / (double) M;
+  }
+
+  // See the .cpp file.
+  size_t get_hum_bytes(size_t* hum_bytes);
+  void get_hum_bytes(size_t* used_bytes, size_t* capacity_bytes,
+                     size_t* prev_live_bytes, size_t* next_live_bytes);
+
+public:
+  // The header and footer are printed in the constructor and
+  // destructor respectively.
+  G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name);
+  virtual bool doHeapRegion(HeapRegion* r);
+  ~G1PrintRegionLivenessInfoClosure();
+};
+
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1AllocRegion.cpp	Wed Apr 06 16:02:53 2011 -0700
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/g1/g1AllocRegion.inline.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+
+G1CollectedHeap* G1AllocRegion::_g1h = NULL;
+HeapRegion* G1AllocRegion::_dummy_region = NULL;
+
+void G1AllocRegion::setup(G1CollectedHeap* g1h, HeapRegion* dummy_region) {
+  assert(_dummy_region == NULL, "should be set once");
+  assert(dummy_region != NULL, "pre-condition");
+  assert(dummy_region->free() == 0, "pre-condition");
+
+  // Make sure that any allocation attempt on this region will fail
+  // and will not trigger any asserts.
+  assert(allocate(dummy_region, 1, false) == NULL, "should fail");
+  assert(par_allocate(dummy_region, 1, false) == NULL, "should fail");
+  assert(allocate(dummy_region, 1, true) == NULL, "should fail");
+  assert(par_allocate(dummy_region, 1, true) == NULL, "should fail");
+
+  _g1h = g1h;
+  _dummy_region = dummy_region;
+}
+
+void G1AllocRegion::fill_up_remaining_space(HeapRegion* alloc_region,
+                                            bool bot_updates) {
+  assert(alloc_region != NULL && alloc_region != _dummy_region,
+         "pre-condition");
+
+  // Other threads might still be trying to allocate using a CAS out
+  // of the region we are trying to retire, as they can do so without
+  // holding the lock. So, we first have to make sure that noone else
+  // can allocate out of it by doing a maximal allocation. Even if our
+  // CAS attempt fails a few times, we'll succeed sooner or later
+  // given that failed CAS attempts mean that the region is getting
+  // closed to being full.
+  size_t free_word_size = alloc_region->free() / HeapWordSize;
+
+  // This is the minimum free chunk we can turn into a dummy
+  // object. If the free space falls below this, then noone can
+  // allocate in this region anyway (all allocation requests will be
+  // of a size larger than this) so we won't have to perform the dummy
+  // allocation.
+  size_t min_word_size_to_fill = CollectedHeap::min_fill_size();
+
+  while (free_word_size >= min_word_size_to_fill) {
+    HeapWord* dummy = par_allocate(alloc_region, free_word_size, bot_updates);
+    if (dummy != NULL) {
+      // If the allocation was successful we should fill in the space.
+      CollectedHeap::fill_with_object(dummy, free_word_size);
+      alloc_region->set_pre_dummy_top(dummy);
+      break;
+    }
+
+    free_word_size = alloc_region->free() / HeapWordSize;
+    // It's also possible that someone else beats us to the
+    // allocation and they fill up the region. In that case, we can
+    // just get out of the loop.
+  }
+  assert(alloc_region->free() / HeapWordSize < min_word_size_to_fill,
+         "post-condition");
+}
+
+void G1AllocRegion::retire(bool fill_up) {
+  assert(_alloc_region != NULL, ar_ext_msg(this, "not initialized properly"));
+
+  trace("retiring");
+  HeapRegion* alloc_region = _alloc_region;
+  if (alloc_region != _dummy_region) {
+    // We never have to check whether the active region is empty or not,
+    // and potentially free it if it is, given that it's guaranteed that
+    // it will never be empty.
+    assert(!alloc_region->is_empty(),
+           ar_ext_msg(this, "the alloc region should never be empty"));
+
+    if (fill_up) {
+      fill_up_remaining_space(alloc_region, _bot_updates);
+    }
+
+    assert(alloc_region->used() >= _used_bytes_before,
+           ar_ext_msg(this, "invariant"));
+    size_t allocated_bytes = alloc_region->used() - _used_bytes_before;
+    retire_region(alloc_region, allocated_bytes);
+    _used_bytes_before = 0;
+    _alloc_region = _dummy_region;
+  }
+  trace("retired");
+}
+
+HeapWord* G1AllocRegion::new_alloc_region_and_allocate(size_t word_size,
+                                                       bool force) {
+  assert(_alloc_region == _dummy_region, ar_ext_msg(this, "pre-condition"));
+  assert(_used_bytes_before == 0, ar_ext_msg(this, "pre-condition"));
+
+  trace("attempting region allocation");
+  HeapRegion* new_alloc_region = allocate_new_region(word_size, force);
+  if (new_alloc_region != NULL) {
+    new_alloc_region->reset_pre_dummy_top();
+    // Need to do this before the allocation
+    _used_bytes_before = new_alloc_region->used();
+    HeapWord* result = allocate(new_alloc_region, word_size, _bot_updates);
+    assert(result != NULL, ar_ext_msg(this, "the allocation should succeeded"));
+
+    OrderAccess::storestore();
+    // Note that we first perform the allocation and then we store the
+    // region in _alloc_region. This is the reason why an active region
+    // can never be empty.
+    _alloc_region = new_alloc_region;
+    trace("region allocation successful");
+    return result;
+  } else {
+    trace("region allocation failed");
+    return NULL;
+  }
+  ShouldNotReachHere();
+}
+
+void G1AllocRegion::fill_in_ext_msg(ar_ext_msg* msg, const char* message) {
+  msg->append("[%s] %s b: %s r: "PTR_FORMAT" u: "SIZE_FORMAT,
+              _name, message, BOOL_TO_STR(_bot_updates),
+              _alloc_region, _used_bytes_before);
+}
+
+void G1AllocRegion::init() {
+  trace("initializing");
+  assert(_alloc_region == NULL && _used_bytes_before == 0,
+         ar_ext_msg(this, "pre-condition"));
+  assert(_dummy_region != NULL, "should have been set");
+  _alloc_region = _dummy_region;
+  trace("initialized");
+}
+
+HeapRegion* G1AllocRegion::release() {
+  trace("releasing");
+  HeapRegion* alloc_region = _alloc_region;
+  retire(false /* fill_up */);
+  assert(_alloc_region == _dummy_region, "post-condition of retire()");
+  _alloc_region = NULL;
+  trace("released");
+  return (alloc_region == _dummy_region) ? NULL : alloc_region;
+}
+
+#if G1_ALLOC_REGION_TRACING
+void G1AllocRegion::trace(const char* str, size_t word_size, HeapWord* result) {
+  // All the calls to trace that set either just the size or the size
+  // and the result are considered part of level 2 tracing and are
+  // skipped during level 1 tracing.
+  if ((word_size == 0 && result == NULL) || (G1_ALLOC_REGION_TRACING > 1)) {
+    const size_t buffer_length = 128;
+    char hr_buffer[buffer_length];
+    char rest_buffer[buffer_length];
+
+    HeapRegion* alloc_region = _alloc_region;
+    if (alloc_region == NULL) {
+      jio_snprintf(hr_buffer, buffer_length, "NULL");
+    } else if (alloc_region == _dummy_region) {
+      jio_snprintf(hr_buffer, buffer_length, "DUMMY");
+    } else {
+      jio_snprintf(hr_buffer, buffer_length,
+                   HR_FORMAT, HR_FORMAT_PARAMS(alloc_region));
+    }
+
+    if (G1_ALLOC_REGION_TRACING > 1) {
+      if (result != NULL) {
+        jio_snprintf(rest_buffer, buffer_length, SIZE_FORMAT" "PTR_FORMAT,
+                     word_size, result);
+      } else if (word_size != 0) {
+        jio_snprintf(rest_buffer, buffer_length, SIZE_FORMAT, word_size);
+      } else {
+        jio_snprintf(rest_buffer, buffer_length, "");
+      }
+    } else {
+      jio_snprintf(rest_buffer, buffer_length, "");
+    }
+
+    tty->print_cr("[%s] %s : %s %s", _name, hr_buffer, str, rest_buffer);
+  }
+}
+#endif // G1_ALLOC_REGION_TRACING
+
+G1AllocRegion::G1AllocRegion(const char* name,
+                             bool bot_updates)
+  : _name(name), _bot_updates(bot_updates),
+    _alloc_region(NULL), _used_bytes_before(0) { }
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1AllocRegion.hpp	Wed Apr 06 16:02:53 2011 -0700
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1ALLOCREGION_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1ALLOCREGION_HPP
+
+#include "gc_implementation/g1/heapRegion.hpp"
+
+class G1CollectedHeap;
+
+// 0 -> no tracing, 1 -> basic tracing, 2 -> basic + allocation tracing
+#define G1_ALLOC_REGION_TRACING 0
+
+class ar_ext_msg;
+
+// A class that holds a region that is active in satisfying allocation
+// requests, potentially issued in parallel. When the active region is
+// full it will be retired it replaced with a new one. The
+// implementation assumes that fast-path allocations will be lock-free
+// and a lock will need to be taken when the active region needs to be
+// replaced.
+
+class G1AllocRegion VALUE_OBJ_CLASS_SPEC {
+  friend class ar_ext_msg;
+
+private:
+  // The active allocating region we are currently allocating out
+  // of. The invariant is that if this object is initialized (i.e.,
+  // init() has been called and release() has not) then _alloc_region
+  // is either an active allocating region or the dummy region (i.e.,
+  // it can never be NULL) and this object can be used to satisfy
+  // allocation requests. If this object is not initialized
+  // (i.e. init() has not been called or release() has been called)
+  // then _alloc_region is NULL and this object should not be used to
+  // satisfy allocation requests (it was done this way to force the
+  // correct use of init() and release()).
+  HeapRegion* _alloc_region;
+
+  // When we set up a new active region we save its used bytes in this
+  // field so that, when we retire it, we can calculate how much space
+  // we allocated in it.
+  size_t _used_bytes_before;
+
+  // Specifies whether the allocate calls will do BOT updates or not.
+  bool _bot_updates;
+
+  // Useful for debugging and tracing.
+  const char* _name;
+
+  // A dummy region (i.e., it's been allocated specially for this
+  // purpose and it is not part of the heap) that is full (i.e., top()
+  // == end()). When we don't have a valid active region we make
+  // _alloc_region point to this. This allows us to skip checking
+  // whether the _alloc_region is NULL or not.
+  static HeapRegion* _dummy_region;
+
+  // Some of the methods below take a bot_updates parameter. Its value
+  // should be the same as the _bot_updates field. The idea is that
+  // the parameter will be a constant for a particular alloc region
+  // and, given that these methods will be hopefully inlined, the
+  // compiler should compile out the test.
+
+  // Perform a non-MT-safe allocation out of the given region.
+  static inline HeapWord* allocate(HeapRegion* alloc_region,
+                                   size_t word_size,
+                                   bool bot_updates);
+
+  // Perform a MT-safe allocation out of the given region.
+  static inline HeapWord* par_allocate(HeapRegion* alloc_region,
+                                       size_t word_size,
+                                       bool bot_updates);
+
+  // Ensure that the region passed as a parameter has been filled up
+  // so that noone else can allocate out of it any more.
+  static void fill_up_remaining_space(HeapRegion* alloc_region,
+                                      bool bot_updates);
+
+  // Retire the active allocating region. If fill_up is true then make
+  // sure that the region is full before we retire it so that noone
+  // else can allocate out of it.
+  void retire(bool fill_up);
+
+  // Allocate a new active region and use it to perform a word_size
+  // allocation. The force parameter will be passed on to
+  // G1CollectedHeap::allocate_new_alloc_region() and tells it to try
+  // to allocate a new region even if the max has been reached.
+  HeapWord* new_alloc_region_and_allocate(size_t word_size, bool force);
+
+  void fill_in_ext_msg(ar_ext_msg* msg, const char* message);
+
+protected:
+  // For convenience as subclasses use it.
+  static G1CollectedHeap* _g1h;
+
+  virtual HeapRegion* allocate_new_region(size_t word_size, bool force) = 0;
+  virtual void retire_region(HeapRegion* alloc_region,
+                             size_t allocated_bytes) = 0;
+
+  G1AllocRegion(const char* name, bool bot_updates);
+
+public:
+  static void setup(G1CollectedHeap* g1h, HeapRegion* dummy_region);
+
+  HeapRegion* get() const {
+    // Make sure that the dummy region does not escape this class.
+    return (_alloc_region == _dummy_region) ? NULL : _alloc_region;
+  }
+
+  // The following two are the building blocks for the allocation method.
+
+  // First-level allocation: Should be called without holding a
+  // lock. It will try to allocate lock-free out of the active region,
+  // or return NULL if it was unable to.
+  inline HeapWord* attempt_allocation(size_t word_size, bool bot_updates);
+
+  // Second-level allocation: Should be called while holding a
+  // lock. It will try to first allocate lock-free out of the active
+  // region or, if it's unable to, it will try to replace the active
+  // alloc region with a new one. We require that the caller takes the
+  // appropriate lock before calling this so that it is easier to make
+  // it conform to its locking protocol.
+  inline HeapWord* attempt_allocation_locked(size_t word_size,
+                                             bool bot_updates);
+
+  // Should be called to allocate a new region even if the max of this
+  // type of regions has been reached. Should only be called if other
+  // allocation attempts have failed and we are not holding a valid
+  // active region.
+  inline HeapWord* attempt_allocation_force(size_t word_size,
+                                            bool bot_updates);
+
+  // Should be called before we start using this object.
+  void init();
+
+  // Should be called when we want to release the active region which
+  // is returned after it's been retired.
+  HeapRegion* release();
+
+#if G1_ALLOC_REGION_TRACING
+  void trace(const char* str, size_t word_size = 0, HeapWord* result = NULL);
+#else // G1_ALLOC_REGION_TRACING
+  void trace(const char* str, size_t word_size = 0, HeapWord* result = NULL) { }
+#endif // G1_ALLOC_REGION_TRACING
+};
+
+class ar_ext_msg : public err_msg {
+public:
+  ar_ext_msg(G1AllocRegion* alloc_region, const char *message) : err_msg("") {
+    alloc_region->fill_in_ext_msg(this, message);
+  }
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1ALLOCREGION_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1AllocRegion.inline.hpp	Wed Apr 06 16:02:53 2011 -0700
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1ALLOCREGION_INLINE_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1ALLOCREGION_INLINE_HPP
+
+#include "gc_implementation/g1/g1AllocRegion.hpp"
+
+inline HeapWord* G1AllocRegion::allocate(HeapRegion* alloc_region,
+                                         size_t word_size,
+                                         bool bot_updates) {
+  assert(alloc_region != NULL, err_msg("pre-condition"));
+
+  if (!bot_updates) {
+    return alloc_region->allocate_no_bot_updates(word_size);
+  } else {
+    return alloc_region->allocate(word_size);
+  }
+}
+
+inline HeapWord* G1AllocRegion::par_allocate(HeapRegion* alloc_region,
+                                             size_t word_size,
+                                             bool bot_updates) {
+  assert(alloc_region != NULL, err_msg("pre-condition"));
+  assert(!alloc_region->is_empty(), err_msg("pre-condition"));
+
+  if (!bot_updates) {
+    return alloc_region->par_allocate_no_bot_updates(word_size);
+  } else {
+    return alloc_region->par_allocate(word_size);
+  }
+}
+
+inline HeapWord* G1AllocRegion::attempt_allocation(size_t word_size,
+                                                   bool bot_updates) {
+  assert(bot_updates == _bot_updates, ar_ext_msg(this, "pre-condition"));
+
+  HeapRegion* alloc_region = _alloc_region;
+  assert(alloc_region != NULL, ar_ext_msg(this, "not initialized properly"));
+
+  HeapWord* result = par_allocate(alloc_region, word_size, bot_updates);
+  if (result != NULL) {
+    trace("alloc", word_size, result);
+    return result;
+  }
+  trace("alloc failed", word_size);
+  return NULL;
+}
+
+inline HeapWord* G1AllocRegion::attempt_allocation_locked(size_t word_size,
+                                                          bool bot_updates) {
+  // First we have to tedo the allocation, assuming we're holding the
+  // appropriate lock, in case another thread changed the region while
+  // we were waiting to get the lock.
+  HeapWord* result = attempt_allocation(word_size, bot_updates);
+  if (result != NULL) {
+    return result;
+  }
+
+  retire(true /* fill_up */);
+  result = new_alloc_region_and_allocate(word_size, false /* force */);
+  if (result != NULL) {
+    trace("alloc locked (second attempt)", word_size, result);
+    return result;
+  }
+  trace("alloc locked failed", word_size);
+  return NULL;
+}
+
+inline HeapWord* G1AllocRegion::attempt_allocation_force(size_t word_size,
+                                                         bool bot_updates) {
+  assert(bot_updates == _bot_updates, ar_ext_msg(this, "pre-condition"));
+  assert(_alloc_region != NULL, ar_ext_msg(this, "not initialized properly"));
+
+  trace("forcing alloc");
+  HeapWord* result = new_alloc_region_and_allocate(word_size, true /* force */);
+  if (result != NULL) {
+    trace("alloc forced", word_size, result);
+    return result;
+  }
+  trace("alloc forced failed", word_size);
+  return NULL;
+}
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1ALLOCREGION_INLINE_HPP
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Tue Apr 05 14:12:31 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Apr 06 16:02:53 2011 -0700
@@ -28,6 +28,7 @@
 #include "gc_implementation/g1/concurrentG1Refine.hpp"
 #include "gc_implementation/g1/concurrentG1RefineThread.hpp"
 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
+#include "gc_implementation/g1/g1AllocRegion.inline.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
 #include "gc_implementation/g1/g1MarkSweep.hpp"
@@ -517,8 +518,7 @@
   return NULL;
 }

-HeapRegion* G1CollectedHeap::new_region_work(size_t word_size,
-                                             bool do_expand) {
+HeapRegion* G1CollectedHeap::new_region(size_t word_size, bool do_expand) {
   assert(!isHumongous(word_size) ||
                                   word_size <= (size_t) HeapRegion::GrainWords,
          "the only time we use this to allocate a humongous region is "
@@ -566,7 +566,7 @@
                                                  size_t word_size) {
   HeapRegion* alloc_region = NULL;
   if (_gc_alloc_region_counts[purpose] < g1_policy()->max_regions(purpose)) {
-    alloc_region = new_region_work(word_size, true /* do_expand */);
+    alloc_region = new_region(word_size, true /* do_expand */);
     if (purpose == GCAllocForSurvived && alloc_region != NULL) {
       alloc_region->set_survivor();
     }
@@ -587,7 +587,7 @@
     // Only one region to allocate, no need to go through the slower
     // path. The caller will attempt the expasion if this fails, so
     // let's not try to expand here too.
-    HeapRegion* hr = new_region_work(word_size, false /* do_expand */);
+    HeapRegion* hr = new_region(word_size, false /* do_expand */);
     if (hr != NULL) {
       first = hr->hrs_index();
     } else {
@@ -788,407 +788,12 @@
   return result;
 }

-void
-G1CollectedHeap::retire_cur_alloc_region(HeapRegion* cur_alloc_region) {
-  // Other threads might still be trying to allocate using CASes out
-  // of the region we are retiring, as they can do so without holding
-  // the Heap_lock. So we first have to make sure that noone else can
-  // allocate in it by doing a maximal allocation. Even if our CAS
-  // attempt fails a few times, we'll succeed sooner or later given
-  // that a failed CAS attempt mean that the region is getting closed
-  // to being full (someone else succeeded in allocating into it).
-  size_t free_word_size = cur_alloc_region->free() / HeapWordSize;
-
-  // This is the minimum free chunk we can turn into a dummy
-  // object. If the free space falls below this, then noone can
-  // allocate in this region anyway (all allocation requests will be
-  // of a size larger than this) so we won't have to perform the dummy
-  // allocation.
-  size_t min_word_size_to_fill = CollectedHeap::min_fill_size();
-
-  while (free_word_size >= min_word_size_to_fill) {
-    HeapWord* dummy =
-      cur_alloc_region->par_allocate_no_bot_updates(free_word_size);
-    if (dummy != NULL) {
-      // If the allocation was successful we should fill in the space.
-      CollectedHeap::fill_with_object(dummy, free_word_size);
-      break;
-    }
-
-    free_word_size = cur_alloc_region->free() / HeapWordSize;
-    // It's also possible that someone else beats us to the
-    // allocation and they fill up the region. In that case, we can
-    // just get out of the loop
-  }
-  assert(cur_alloc_region->free() / HeapWordSize < min_word_size_to_fill,
-         "sanity");
-
-  retire_cur_alloc_region_common(cur_alloc_region);
-  assert(_cur_alloc_region == NULL, "post-condition");
-}
-
-// See the comment in the .hpp file about the locking protocol and
-// assumptions of this method (and other related ones).
-HeapWord*
-G1CollectedHeap::replace_cur_alloc_region_and_allocate(size_t word_size,
-                                                       bool at_safepoint,
-                                                       bool do_dirtying,
-                                                       bool can_expand) {
-  assert_heap_locked_or_at_safepoint(true /* should_be_vm_thread */);
-  assert(_cur_alloc_region == NULL,
-         "replace_cur_alloc_region_and_allocate() should only be called "
-         "after retiring the previous current alloc region");
-  assert(SafepointSynchronize::is_at_safepoint() == at_safepoint,
-         "at_safepoint and is_at_safepoint() should be a tautology");
-  assert(!can_expand || g1_policy()->can_expand_young_list(),
-         "we should not call this method with can_expand == true if "
-         "we are not allowed to expand the young gen");
-
-  if (can_expand || !g1_policy()->is_young_list_full()) {
-    HeapRegion* new_cur_alloc_region = new_alloc_region(word_size);
-    if (new_cur_alloc_region != NULL) {
-      assert(new_cur_alloc_region->is_empty(),
-             "the newly-allocated region should be empty, "
-             "as right now we only allocate new regions out of the free list");
-      g1_policy()->update_region_num(true /* next_is_young */);
-      set_region_short_lived_locked(new_cur_alloc_region);
-
-      assert(!new_cur_alloc_region->isHumongous(),
-             "Catch a regression of this bug.");
-
-      // We need to ensure that the stores to _cur_alloc_region and,
-      // subsequently, to top do not float above the setting of the
-      // young type.
-      OrderAccess::storestore();
-
-      // Now, perform the allocation out of the region we just
-      // allocated. Note that noone else can access that region at
-      // this point (as _cur_alloc_region has not been updated yet),
-      // so we can just go ahead and do the allocation without any
-      // atomics (and we expect this allocation attempt to
-      // suceeded). Given that other threads can attempt an allocation
-      // with a CAS and without needing the Heap_lock, if we assigned
-      // the new region to _cur_alloc_region before first allocating
-      // into it other threads might have filled up the new region
-      // before we got a chance to do the allocation ourselves. In
-      // that case, we would have needed to retire the region, grab a
-      // new one, and go through all this again. Allocating out of the
-      // new region before assigning it to _cur_alloc_region avoids
-      // all this.
-      HeapWord* result =
-                     new_cur_alloc_region->allocate_no_bot_updates(word_size);
-      assert(result != NULL, "we just allocate out of an empty region "
-             "so allocation should have been successful");
-      assert(is_in(result), "result should be in the heap");
-
-      // Now make sure that the store to _cur_alloc_region does not
-      // float above the store to top.
-      OrderAccess::storestore();
-      _cur_alloc_region = new_cur_alloc_region;
-
-      if (!at_safepoint) {
-        Heap_lock->unlock();
-      }
-
-      // do the dirtying, if necessary, after we release the Heap_lock
-      if (do_dirtying) {
-        dirty_young_block(result, word_size);
-      }
-      return result;
-    }
-  }
-
-  assert(_cur_alloc_region == NULL, "we failed to allocate a new current "
-         "alloc region, it should still be NULL");
-  assert_heap_locked_or_at_safepoint(true /* should_be_vm_thread */);
-  return NULL;
-}
-
-// See the comment in the .hpp file about the locking protocol and
-// assumptions of this method (and other related ones).
-HeapWord*
-G1CollectedHeap::attempt_allocation_slow(size_t word_size) {
-  assert_heap_locked_and_not_at_safepoint();
-  assert(!isHumongous(word_size), "attempt_allocation_slow() should not be "
-         "used for humongous allocations");
-
-  // We should only reach here when we were unable to allocate
-  // otherwise. So, we should have not active current alloc region.
-  assert(_cur_alloc_region == NULL, "current alloc region should be NULL");
-
-  // We will loop while succeeded is false, which means that we tried
-  // to do a collection, but the VM op did not succeed. So, when we
-  // exit the loop, either one of the allocation attempts was
-  // successful, or we succeeded in doing the VM op but which was
-  // unable to allocate after the collection.
-  for (int try_count = 1; /* we'll return or break */; try_count += 1) {
-    bool succeeded = true;
-
-    // Every time we go round the loop we should be holding the Heap_lock.
-    assert_heap_locked();
-
-    if (GC_locker::is_active_and_needs_gc()) {
-      // We are locked out of GC because of the GC locker. We can
-      // allocate a new region only if we can expand the young gen.
-
-      if (g1_policy()->can_expand_young_list()) {
-        // Yes, we are allowed to expand the young gen. Let's try to
-        // allocate a new current alloc region.
-        HeapWord* result =
-          replace_cur_alloc_region_and_allocate(word_size,
-                                                false, /* at_safepoint */
-                                                true,  /* do_dirtying */
-                                                true   /* can_expand */);
-        if (result != NULL) {
-          assert_heap_not_locked();
-          return result;
-        }
-      }
-      // We could not expand the young gen further (or we could but we
-      // failed to allocate a new region). We'll stall until the GC
-      // locker forces a GC.
-
-      // If this thread is not in a jni critical section, we stall
-      // the requestor until the critical section has cleared and
-      // GC allowed. When the critical section clears, a GC is
-      // initiated by the last thread exiting the critical section; so
-      // we retry the allocation sequence from the beginning of the loop,
-      // rather than causing more, now probably unnecessary, GC attempts.
-      JavaThread* jthr = JavaThread::current();
-      assert(jthr != NULL, "sanity");
-      if (jthr->in_critical()) {
-        if (CheckJNICalls) {
-          fatal("Possible deadlock due to allocating while"
-                " in jni critical section");
-        }
-        // We are returning NULL so the protocol is that we're still
-        // holding the Heap_lock.
-        assert_heap_locked();
-        return NULL;
-      }
-
-      Heap_lock->unlock();
-      GC_locker::stall_until_clear();
-
-      // No need to relock the Heap_lock. We'll fall off to the code
-      // below the else-statement which assumes that we are not
-      // holding the Heap_lock.
-    } else {
-      // We are not locked out. So, let's try to do a GC. The VM op
-      // will retry the allocation before it completes.
-
-      // Read the GC count while holding the Heap_lock
-      unsigned int gc_count_before = SharedHeap::heap()->total_collections();
-
-      Heap_lock->unlock();
-
-      HeapWord* result =
-        do_collection_pause(word_size, gc_count_before, &succeeded);
-      assert_heap_not_locked();
-      if (result != NULL) {
-        assert(succeeded, "the VM op should have succeeded");
-
-        // Allocations that take place on VM operations do not do any
-        // card dirtying and we have to do it here.
-        dirty_young_block(result, word_size);
-        return result;
-      }
-    }
-
-    // Both paths that get us here from above unlock the Heap_lock.
-    assert_heap_not_locked();
-
-    // We can reach here when we were unsuccessful in doing a GC,
-    // because another thread beat us to it, or because we were locked
-    // out of GC due to the GC locker. In either case a new alloc
-    // region might be available so we will retry the allocation.
-    HeapWord* result = attempt_allocation(word_size);
-    if (result != NULL) {
-      assert_heap_not_locked();
-      return result;
-    }
-
-    // So far our attempts to allocate failed. The only time we'll go
-    // around the loop and try again is if we tried to do a GC and the
-    // VM op that we tried to schedule was not successful because
-    // another thread beat us to it. If that happened it's possible
-    // that by the time we grabbed the Heap_lock again and tried to
-    // allocate other threads filled up the young generation, which
-    // means that the allocation attempt after the GC also failed. So,
-    // it's worth trying to schedule another GC pause.
-    if (succeeded) {
-      break;
-    }
-
-    // Give a warning if we seem to be looping forever.
-    if ((QueuedAllocationWarningCount > 0) &&
-        (try_count % QueuedAllocationWarningCount == 0)) {
-      warning("G1CollectedHeap::attempt_allocation_slow() "
-              "retries %d times", try_count);
-    }
-  }
-
-  assert_heap_locked();
-  return NULL;
-}
-
-// See the comment in the .hpp file about the locking protocol and
-// assumptions of this method (and other related ones).
-HeapWord*
-G1CollectedHeap::attempt_allocation_humongous(size_t word_size,
-                                              bool at_safepoint) {
-  // This is the method that will allocate a humongous object. All
-  // allocation paths that attempt to allocate a humongous object
-  // should eventually reach here. Currently, the only paths are from
-  // mem_allocate() and attempt_allocation_at_safepoint().
-  assert_heap_locked_or_at_safepoint(true /* should_be_vm_thread */);
-  assert(isHumongous(word_size), "attempt_allocation_humongous() "
-         "should only be used for humongous allocations");
-  assert(SafepointSynchronize::is_at_safepoint() == at_safepoint,
-         "at_safepoint and is_at_safepoint() should be a tautology");
-
-  HeapWord* result = NULL;
-
-  // We will loop while succeeded is false, which means that we tried
-  // to do a collection, but the VM op did not succeed. So, when we
-  // exit the loop, either one of the allocation attempts was
-  // successful, or we succeeded in doing the VM op but which was
-  // unable to allocate after the collection.
-  for (int try_count = 1; /* we'll return or break */; try_count += 1) {
-    bool succeeded = true;
-
-    // Given that humongous objects are not allocated in young
-    // regions, we'll first try to do the allocation without doing a
-    // collection hoping that there's enough space in the heap.
-    result = humongous_obj_allocate(word_size);
-    assert(_cur_alloc_region == NULL || !_cur_alloc_region->isHumongous(),
-           "catch a regression of this bug.");
-    if (result != NULL) {
-      if (!at_safepoint) {
-        // If we're not at a safepoint, unlock the Heap_lock.
-        Heap_lock->unlock();
-      }
-      return result;
-    }
-
-    // If we failed to allocate the humongous object, we should try to
-    // do a collection pause (if we're allowed) in case it reclaims
-    // enough space for the allocation to succeed after the pause.
-    if (!at_safepoint) {
-      // Read the GC count while holding the Heap_lock
-      unsigned int gc_count_before = SharedHeap::heap()->total_collections();
-
-      // If we're allowed to do a collection we're not at a
-      // safepoint, so it is safe to unlock the Heap_lock.
-      Heap_lock->unlock();
-
-      result = do_collection_pause(word_size, gc_count_before, &succeeded);
-      assert_heap_not_locked();
-      if (result != NULL) {
-        assert(succeeded, "the VM op should have succeeded");
-        return result;
-      }
-
-      // If we get here, the VM operation either did not succeed
-      // (i.e., another thread beat us to it) or it succeeded but
-      // failed to allocate the object.
-
-      // If we're allowed to do a collection we're not at a
-      // safepoint, so it is safe to lock the Heap_lock.
-      Heap_lock->lock();
-    }
-
-    assert(result == NULL, "otherwise we should have exited the loop earlier");
-
-    // So far our attempts to allocate failed. The only time we'll go
-    // around the loop and try again is if we tried to do a GC and the
-    // VM op that we tried to schedule was not successful because
-    // another thread beat us to it. That way it's possible that some
-    // space was freed up by the thread that successfully scheduled a
-    // GC. So it's worth trying to allocate again.
-    if (succeeded) {
-      break;
-    }
-
-    // Give a warning if we seem to be looping forever.
-    if ((QueuedAllocationWarningCount > 0) &&
-        (try_count % QueuedAllocationWarningCount == 0)) {
-      warning("G1CollectedHeap::attempt_allocation_humongous "
-              "retries %d times", try_count);
-    }
-  }
-
-  assert_heap_locked_or_at_safepoint(true /* should_be_vm_thread */);
-  return NULL;
-}
-
-HeapWord* G1CollectedHeap::attempt_allocation_at_safepoint(size_t word_size,
-                                           bool expect_null_cur_alloc_region) {
-  assert_at_safepoint(true /* should_be_vm_thread */);
-  assert(_cur_alloc_region == NULL || !expect_null_cur_alloc_region,
-         err_msg("the current alloc region was unexpectedly found "
-                 "to be non-NULL, cur alloc region: "PTR_FORMAT" "
-                 "expect_null_cur_alloc_region: %d word_size: "SIZE_FORMAT,
-                 _cur_alloc_region, expect_null_cur_alloc_region, word_size));
-
-  if (!isHumongous(word_size)) {
-    if (!expect_null_cur_alloc_region) {
-      HeapRegion* cur_alloc_region = _cur_alloc_region;
-      if (cur_alloc_region != NULL) {
-        // We are at a safepoint so no reason to use the MT-safe version.
-        HeapWord* result = cur_alloc_region->allocate_no_bot_updates(word_size);
-        if (result != NULL) {
-          assert(is_in(result), "result should be in the heap");
-
-          // We will not do any dirtying here. This is guaranteed to be
-          // called during a safepoint and the thread that scheduled the
-          // pause will do the dirtying if we return a non-NULL result.
-          return result;
-        }
-
-        retire_cur_alloc_region_common(cur_alloc_region);
-      }
-    }
-
-    assert(_cur_alloc_region == NULL,
-           "at this point we should have no cur alloc region");
-    return replace_cur_alloc_region_and_allocate(word_size,
-                                                 true, /* at_safepoint */
-                                                 false /* do_dirtying */,
-                                                 false /* can_expand */);
-  } else {
-    return attempt_allocation_humongous(word_size,
-                                        true /* at_safepoint */);
-  }
-
-  ShouldNotReachHere();
-}
-
 HeapWord* G1CollectedHeap::allocate_new_tlab(size_t word_size) {
   assert_heap_not_locked_and_not_at_safepoint();
-  assert(!isHumongous(word_size), "we do not allow TLABs of humongous size");
-
-  // First attempt: Try allocating out of the current alloc region
-  // using a CAS. If that fails, take the Heap_lock and retry the
-  // allocation, potentially replacing the current alloc region.
-  HeapWord* result = attempt_allocation(word_size);
-  if (result != NULL) {
-    assert_heap_not_locked();
-    return result;
-  }
-
-  // Second attempt: Go to the slower path where we might try to
-  // schedule a collection.
-  result = attempt_allocation_slow(word_size);
-  if (result != NULL) {
-    assert_heap_not_locked();
-    return result;
-  }
-
-  assert_heap_locked();
-  // Need to unlock the Heap_lock before returning.
-  Heap_lock->unlock();
-  return NULL;
+  assert(!isHumongous(word_size), "we do not allow humongous TLABs");
+
+  unsigned int dummy_gc_count_before;
+  return attempt_allocation(word_size, &dummy_gc_count_before);
 }

 HeapWord*
@@ -1200,48 +805,18 @@
   assert(!is_tlab, "mem_allocate() this should not be called directly "
          "to allocate TLABs");

-  // Loop until the allocation is satisified,
-  // or unsatisfied after GC.
+  // Loop until the allocation is satisified, or unsatisfied after GC.
   for (int try_count = 1; /* we'll return */; try_count += 1) {
     unsigned int gc_count_before;
-    {
-      if (!isHumongous(word_size)) {
-        // First attempt: Try allocating out of the current alloc region
-        // using a CAS. If that fails, take the Heap_lock and retry the
-        // allocation, potentially replacing the current alloc region.
-        HeapWord* result = attempt_allocation(word_size);
-        if (result != NULL) {
-          assert_heap_not_locked();
-          return result;
-        }
-
-        assert_heap_locked();
-
-        // Second attempt: Go to the slower path where we might try to
-        // schedule a collection.
-        result = attempt_allocation_slow(word_size);
-        if (result != NULL) {
-          assert_heap_not_locked();
-          return result;
-        }
-      } else {
-        // attempt_allocation_humongous() requires the Heap_lock to be held.
-        Heap_lock->lock();
-
-        HeapWord* result = attempt_allocation_humongous(word_size,
-                                                     false /* at_safepoint */);
-        if (result != NULL) {
-          assert_heap_not_locked();
-          return result;
-        }
-      }
-
-      assert_heap_locked();
-      // Read the gc count while the heap lock is held.
-      gc_count_before = SharedHeap::heap()->total_collections();
-
-      // Release the Heap_lock before attempting the collection.
-      Heap_lock->unlock();
+
+    HeapWord* result = NULL;
+    if (!isHumongous(word_size)) {
+      result = attempt_allocation(word_size, &gc_count_before);
+    } else {
+      result = attempt_allocation_humongous(word_size, &gc_count_before);
+    }
+    if (result != NULL) {
+      return result;
     }

     // Create the garbage collection operation...
@@ -1249,7 +824,6 @@
     // ...and get the VM thread to execute it.
     VMThread::execute(&op);

-    assert_heap_not_locked();
     if (op.prologue_succeeded() && op.pause_succeeded()) {
       // If the operation was successful we'll return the result even
       // if it is NULL. If the allocation attempt failed immediately
@@ -1275,21 +849,207 @@
   }

   ShouldNotReachHere();
+  return NULL;
 }

-void G1CollectedHeap::abandon_cur_alloc_region() {
+HeapWord* G1CollectedHeap::attempt_allocation_slow(size_t word_size,
+                                           unsigned int *gc_count_before_ret) {
+  // Make sure you read the note in attempt_allocation_humongous().
+
+  assert_heap_not_locked_and_not_at_safepoint();
+  assert(!isHumongous(word_size), "attempt_allocation_slow() should not "
+         "be called for humongous allocation requests");
+
+  // We should only get here after the first-level allocation attempt
+  // (attempt_allocation()) failed to allocate.
+
+  // We will loop until a) we manage to successfully perform the
+  // allocation or b) we successfully schedule a collection which
+  // fails to perform the allocation. b) is the only case when we'll
+  // return NULL.
+  HeapWord* result = NULL;
+  for (int try_count = 1; /* we'll return */; try_count += 1) {
+    bool should_try_gc;
+    unsigned int gc_count_before;
+
+    {
+      MutexLockerEx x(Heap_lock);
+
+      result = _mutator_alloc_region.attempt_allocation_locked(word_size,
+                                                      false /* bot_updates */);
+      if (result != NULL) {
+        return result;
+      }
+
+      // If we reach here, attempt_allocation_locked() above failed to
+      // allocate a new region. So the mutator alloc region should be NULL.
+      assert(_mutator_alloc_region.get() == NULL, "only way to get here");
+
+      if (GC_locker::is_active_and_needs_gc()) {
+        if (g1_policy()->can_expand_young_list()) {
+          result = _mutator_alloc_region.attempt_allocation_force(word_size,
+                                                      false /* bot_updates */);
+          if (result != NULL) {
+            return result;
+          }
+        }
+        should_try_gc = false;
+      } else {
+        // Read the GC count while still holding the Heap_lock.
+        gc_count_before = SharedHeap::heap()->total_collections();
+        should_try_gc = true;
+      }
+    }
+
+    if (should_try_gc) {
+      bool succeeded;
+      result = do_collection_pause(word_size, gc_count_before, &succeeded);
+      if (result != NULL) {
+        assert(succeeded, "only way to get back a non-NULL result");
+        return result;
+      }
+
+      if (succeeded) {
+        // If we get here we successfully scheduled a collection which
+        // failed to allocate. No point in trying to allocate
+        // further. We'll just return NULL.
+        MutexLockerEx x(Heap_lock);
+        *gc_count_before_ret = SharedHeap::heap()->total_collections();
+        return NULL;
+      }
+    } else {
+      GC_locker::stall_until_clear();
+    }
+
+    // We can reach here if we were unsuccessul in scheduling a
+    // collection (because another thread beat us to it) or if we were
+    // stalled due to the GC locker. In either can we should retry the
+    // allocation attempt in case another thread successfully
+    // performed a collection and reclaimed enough space. We do the
+    // first attempt (without holding the Heap_lock) here and the
+    // follow-on attempt will be at the start of the next loop
+    // iteration (after taking the Heap_lock).
+    result = _mutator_alloc_region.attempt_allocation(word_size,
+                                                      false /* bot_updates */);
+    if (result != NULL ){
+      return result;
+    }
+
+    // Give a warning if we seem to be looping forever.
+    if ((QueuedAllocationWarningCount > 0) &&
+        (try_count % QueuedAllocationWarningCount == 0)) {
+      warning("G1CollectedHeap::attempt_allocation_slow() "
+              "retries %d times", try_count);
+    }
+  }
+
+  ShouldNotReachHere();
+  return NULL;
+}
+
+HeapWord* G1CollectedHeap::attempt_allocation_humongous(size_t word_size,
+                                          unsigned int * gc_count_before_ret) {
+  // The structure of this method has a lot of similarities to
+  // attempt_allocation_slow(). The reason these two were not merged
+  // into a single one is that such a method would require several "if
+  // allocation is not humongous do this, otherwise do that"
+  // conditional paths which would obscure its flow. In fact, an early
+  // version of this code did use a unified method which was harder to
+  // follow and, as a result, it had subtle bugs that were hard to
+  // track down. So keeping these two methods separate allows each to
+  // be more readable. It will be good to keep these two in sync as
+  // much as possible.
+
+  assert_heap_not_locked_and_not_at_safepoint();
+  assert(isHumongous(word_size), "attempt_allocation_humongous() "
+         "should only be called for humongous allocations");
+
+  // We will loop until a) we manage to successfully perform the
+  // allocation or b) we successfully schedule a collection which
+  // fails to perform the allocation. b) is the only case when we'll
+  // return NULL.
+  HeapWord* result = NULL;
+  for (int try_count = 1; /* we'll return */; try_count += 1) {
+    bool should_try_gc;
+    unsigned int gc_count_before;
+
+    {
+      MutexLockerEx x(Heap_lock);
+
+      // Given that humongous objects are not allocated in young
+      // regions, we'll first try to do the allocation without doing a
+      // collection hoping that there's enough space in the heap.
+      result = humongous_obj_allocate(word_size);
+      if (result != NULL) {
+        return result;
+      }
+
+      if (GC_locker::is_active_and_needs_gc()) {
+        should_try_gc = false;
+      } else {
+        // Read the GC count while still holding the Heap_lock.
+        gc_count_before = SharedHeap::heap()->total_collections();
+        should_try_gc = true;
+      }
+    }
+
+    if (should_try_gc) {
+      // If we failed to allocate the humongous object, we should try to
+      // do a collection pause (if we're allowed) in case it reclaims
+      // enough space for the allocation to succeed after the pause.
+
+      bool succeeded;
+      result = do_collection_pause(word_size, gc_count_before, &succeeded);
+      if (result != NULL) {
+        assert(succeeded, "only way to get back a non-NULL result");
+        return result;
+      }
+
+      if (succeeded) {
+        // If we get here we successfully scheduled a collection which
+        // failed to allocate. No point in trying to allocate
+        // further. We'll just return NULL.
+        MutexLockerEx x(Heap_lock);
+        *gc_count_before_ret = SharedHeap::heap()->total_collections();
+        return NULL;
+      }
+    } else {
+      GC_locker::stall_until_clear();
+    }
+
+    // We can reach here if we were unsuccessul in scheduling a
+    // collection (because another thread beat us to it) or if we were
+    // stalled due to the GC locker. In either can we should retry the
+    // allocation attempt in case another thread successfully
+    // performed a collection and reclaimed enough space.  Give a
+    // warning if we seem to be looping forever.
+
+    if ((QueuedAllocationWarningCount > 0) &&
+        (try_count % QueuedAllocationWarningCount == 0)) {
+      warning("G1CollectedHeap::attempt_allocation_humongous() "
+              "retries %d times", try_count);
+    }
+  }
+
+  ShouldNotReachHere();
+  return NULL;
+}
+
+HeapWord* G1CollectedHeap::attempt_allocation_at_safepoint(size_t word_size,
+                                       bool expect_null_mutator_alloc_region) {
   assert_at_safepoint(true /* should_be_vm_thread */);
-
-  HeapRegion* cur_alloc_region = _cur_alloc_region;
-  if (cur_alloc_region != NULL) {
-    assert(!cur_alloc_region->is_empty(),
-           "the current alloc region can never be empty");
-    assert(cur_alloc_region->is_young(),
-           "the current alloc region should be young");
-
-    retire_cur_alloc_region_common(cur_alloc_region);
-  }
-  assert(_cur_alloc_region == NULL, "post-condition");
+  assert(_mutator_alloc_region.get() == NULL ||
+                                             !expect_null_mutator_alloc_region,
+         "the current alloc region was unexpectedly found to be non-NULL");
+
+  if (!isHumongous(word_size)) {
+    return _mutator_alloc_region.attempt_allocation_locked(word_size,
+                                                      false /* bot_updates */);
+  } else {
+    return humongous_obj_allocate(word_size);
+  }
+
+  ShouldNotReachHere();
 }

 void G1CollectedHeap::abandon_gc_alloc_regions() {
@@ -1417,8 +1177,8 @@

     if (VerifyBeforeGC && total_collections() >= VerifyGCStartAt) {
       HandleMark hm;  // Discard invalid handles created during verification
+      gclog_or_tty->print(" VerifyBeforeGC:");
       prepare_for_verify();
-      gclog_or_tty->print(" VerifyBeforeGC:");
       Universe::verify(true);
     }

@@ -1439,9 +1199,8 @@
     concurrent_mark()->abort();

     // Make sure we'll choose a new allocation region afterwards.
-    abandon_cur_alloc_region();
+    release_mutator_alloc_region();
     abandon_gc_alloc_regions();
-    assert(_cur_alloc_region == NULL, "Invariant.");
     g1_rem_set()->cleanupHRRS();
     tear_down_region_lists();

@@ -1547,6 +1306,8 @@
     // evacuation pause.
     clear_cset_fast_test();

+    init_mutator_alloc_region();
+
     double end = os::elapsedTime();
     g1_policy()->record_full_collection_end();

@@ -1720,8 +1481,9 @@

   *succeeded = true;
   // Let's attempt the allocation first.
-  HeapWord* result = attempt_allocation_at_safepoint(word_size,
-                                     false /* expect_null_cur_alloc_region */);
+  HeapWord* result =
+    attempt_allocation_at_safepoint(word_size,
+                                 false /* expect_null_mutator_alloc_region */);
   if (result != NULL) {
     assert(*succeeded, "sanity");
     return result;
@@ -1748,7 +1510,7 @@

   // Retry the allocation
   result = attempt_allocation_at_safepoint(word_size,
-                                      true /* expect_null_cur_alloc_region */);
+                                  true /* expect_null_mutator_alloc_region */);
   if (result != NULL) {
     assert(*succeeded, "sanity");
     return result;
@@ -1765,7 +1527,7 @@

   // Retry the allocation once more
   result = attempt_allocation_at_safepoint(word_size,
-                                      true /* expect_null_cur_alloc_region */);
+                                  true /* expect_null_mutator_alloc_region */);
   if (result != NULL) {
     assert(*succeeded, "sanity");
     return result;
@@ -1796,7 +1558,7 @@
   if (expand(expand_bytes)) {
     verify_region_sets_optional();
     return attempt_allocation_at_safepoint(word_size,
-                                          false /* expect_null_cur_alloc_region */);
+                                 false /* expect_null_mutator_alloc_region */);
   }
   return NULL;
 }
@@ -1940,7 +1702,6 @@
   _evac_failure_scan_stack(NULL) ,
   _mark_in_progress(false),
   _cg1r(NULL), _summary_bytes_used(0),
-  _cur_alloc_region(NULL),
   _refine_cte_cl(NULL),
   _full_collection(false),
   _free_list("Master Free List"),
@@ -2099,7 +1860,6 @@
   _g1_max_committed = _g1_committed;
   _hrs = new HeapRegionSeq(_expansion_regions);
   guarantee(_hrs != NULL, "Couldn't allocate HeapRegionSeq");
-  guarantee(_cur_alloc_region == NULL, "from constructor");

   // 6843694 - ensure that the maximum region index can fit
   // in the remembered set structures.
@@ -2195,6 +1955,22 @@
   // Do later initialization work for concurrent refinement.
   _cg1r->init();

+  // Here we allocate the dummy full region that is required by the
+  // G1AllocRegion class. If we don't pass an address in the reserved
+  // space here, lots of asserts fire.
+  MemRegion mr(_g1_reserved.start(), HeapRegion::GrainWords);
+  HeapRegion* dummy_region = new HeapRegion(_bot_shared, mr, true);
+  // We'll re-use the same region whether the alloc region will
+  // require BOT updates or not and, if it doesn't, then a non-young
+  // region will complain that it cannot support allocations without
+  // BOT updates. So we'll tag the dummy region as young to avoid that.
+  dummy_region->set_young();
+  // Make sure it's full.
+  dummy_region->set_top(dummy_region->end());
+  G1AllocRegion::setup(this, dummy_region);
+
+  init_mutator_alloc_region();
+
   return JNI_OK;
 }

@@ -2261,7 +2037,7 @@
          "Should be owned on this thread's behalf.");
   size_t result = _summary_bytes_used;
   // Read only once in case it is set to NULL concurrently
-  HeapRegion* hr = _cur_alloc_region;
+  HeapRegion* hr = _mutator_alloc_region.get();
   if (hr != NULL)
     result += hr->used();
   return result;
@@ -2324,13 +2100,11 @@
   // to free(), resulting in a SIGSEGV. Note that this doesn't appear
   // to be a problem in the optimized build, since the two loads of the
   // current allocation region field are optimized away.
-  HeapRegion* car = _cur_alloc_region;
-
-  // FIXME: should iterate over all regions?
-  if (car == NULL) {
+  HeapRegion* hr = _mutator_alloc_region.get();
+  if (hr == NULL) {
     return 0;
   }
-  return car->free();
+  return hr->free();
 }

 bool G1CollectedHeap::should_do_concurrent_full_gc(GCCause::Cause cause) {
@@ -2781,16 +2555,12 @@
   // since we can't allow tlabs to grow big enough to accomodate
   // humongous objects.

-  // We need to store the cur alloc region locally, since it might change
-  // between when we test for NULL and when we use it later.
-  ContiguousSpace* cur_alloc_space = _cur_alloc_region;
+  HeapRegion* hr = _mutator_alloc_region.get();
   size_t max_tlab_size = _humongous_object_threshold_in_words * wordSize;
-
-  if (cur_alloc_space == NULL) {
+  if (hr == NULL) {
     return max_tlab_size;
   } else {
-    return MIN2(MAX2(cur_alloc_space->free(), (size_t)MinTLABSize),
-                max_tlab_size);
+    return MIN2(MAX2(hr->free(), (size_t) MinTLABSize), max_tlab_size);
   }
 }

@@ -3364,6 +3134,7 @@
   }

   verify_region_sets_optional();
+  verify_dirty_young_regions();

   {
     // This call will decide whether this pause is an initial-mark
@@ -3425,8 +3196,8 @@

       if (VerifyBeforeGC && total_collections() >= VerifyGCStartAt) {
         HandleMark hm;  // Discard invalid handles created during verification
+        gclog_or_tty->print(" VerifyBeforeGC:");
         prepare_for_verify();
-        gclog_or_tty->print(" VerifyBeforeGC:");
         Universe::verify(false);
       }

@@ -3442,7 +3213,7 @@

       // Forget the current alloc region (we might even choose it to be part
       // of the collection set!).
-      abandon_cur_alloc_region();
+      release_mutator_alloc_region();

       // The elapsed time induced by the start time below deliberately elides
       // the possible verification above.
@@ -3573,6 +3344,8 @@
       g1_policy()->print_collection_set(g1_policy()->inc_cset_head(), gclog_or_tty);
 #endif // YOUNG_LIST_VERBOSE

+      init_mutator_alloc_region();
+
       double end_time_sec = os::elapsedTime();
       double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS;
       g1_policy()->record_pause_time_ms(pause_time_ms);
@@ -3655,6 +3428,15 @@
   return gclab_word_size;
 }

+void G1CollectedHeap::init_mutator_alloc_region() {
+  assert(_mutator_alloc_region.get() == NULL, "pre-condition");
+  _mutator_alloc_region.init();
+}
+
+void G1CollectedHeap::release_mutator_alloc_region() {
+  _mutator_alloc_region.release();
+  assert(_mutator_alloc_region.get() == NULL, "post-condition");
+}

 void G1CollectedHeap::set_gc_alloc_region(int purpose, HeapRegion* r) {
   assert(purpose >= 0 && purpose < GCAllocPurposeCount, "invalid purpose");
@@ -3879,7 +3661,7 @@
       if (r->is_empty()) {
         // We didn't actually allocate anything in it; let's just put
         // it back on the free list.
-        _free_list.add_as_tail(r);
+        _free_list.add_as_head(r);
       } else if (_retain_gc_alloc_region[ap] && !totally) {
         // retain it so that we can use it at the beginning of the next GC
         _retained_gc_alloc_regions[ap] = r;
@@ -5013,7 +4795,7 @@

   *pre_used += hr->used();
   hr->hr_clear(par, true /* clear_space */);
-  free_list->add_as_tail(hr);
+  free_list->add_as_head(hr);
 }

 void G1CollectedHeap::free_humongous_region(HeapRegion* hr,
@@ -5065,7 +4847,7 @@
   }
   if (free_list != NULL && !free_list->is_empty()) {
     MutexLockerEx x(FreeList_lock, Mutex::_no_safepoint_check_flag);
-    _free_list.add_as_tail(free_list);
+    _free_list.add_as_head(free_list);
   }
   if (humongous_proxy_set != NULL && !humongous_proxy_set->is_empty()) {
     MutexLockerEx x(OldSets_lock, Mutex::_no_safepoint_check_flag);
@@ -5140,10 +4922,8 @@
   CardTableModRefBS* _ct_bs;
 public:
   G1VerifyCardTableCleanup(CardTableModRefBS* ct_bs)
-    : _ct_bs(ct_bs)
-  { }
-  virtual bool doHeapRegion(HeapRegion* r)
-  {
+    : _ct_bs(ct_bs) { }
+  virtual bool doHeapRegion(HeapRegion* r) {
     MemRegion mr(r->bottom(), r->end());
     if (r->is_survivor()) {
       _ct_bs->verify_dirty_region(mr);
@@ -5153,6 +4933,29 @@
     return false;
   }
 };
+
+void G1CollectedHeap::verify_dirty_young_list(HeapRegion* head) {
+  CardTableModRefBS* ct_bs = (CardTableModRefBS*) (barrier_set());
+  for (HeapRegion* hr = head; hr != NULL; hr = hr->get_next_young_region()) {
+    // We cannot guarantee that [bottom(),end()] is dirty.  Threads
+    // dirty allocated blocks as they allocate them. The thread that
+    // retires each region and replaces it with a new one will do a
+    // maximal allocation to fill in [pre_dummy_top(),end()] but will
+    // not dirty that area (one less thing to have to do while holding
+    // a lock). So we can only verify that [bottom(),pre_dummy_top()]
+    // is dirty. Also note that verify_dirty_region() requires
+    // mr.start() and mr.end() to be card aligned and pre_dummy_top()
+    // is not guaranteed to be.
+    MemRegion mr(hr->bottom(),
+                 ct_bs->align_to_card_boundary(hr->pre_dummy_top()));
+    ct_bs->verify_dirty_region(mr);
+  }
+}
+
+void G1CollectedHeap::verify_dirty_young_regions() {
+  verify_dirty_young_list(_young_list->first_region());
+  verify_dirty_young_list(_young_list->first_survivor_region());
+}
 #endif

 void G1CollectedHeap::cleanUpCardTable() {
@@ -5500,6 +5303,44 @@
   }
 }

+HeapRegion* G1CollectedHeap::new_mutator_alloc_region(size_t word_size,
+                                                      bool force) {
+  assert_heap_locked_or_at_safepoint(true /* should_be_vm_thread */);
+  assert(!force || g1_policy()->can_expand_young_list(),
+         "if force is true we should be able to expand the young list");
+  if (force || !g1_policy()->is_young_list_full()) {
+    HeapRegion* new_alloc_region = new_region(word_size,
+                                              false /* do_expand */);
+    if (new_alloc_region != NULL) {
+      g1_policy()->update_region_num(true /* next_is_young */);
+      set_region_short_lived_locked(new_alloc_region);
+      return new_alloc_region;
+    }
+  }
+  return NULL;
+}
+
+void G1CollectedHeap::retire_mutator_alloc_region(HeapRegion* alloc_region,
+                                                  size_t allocated_bytes) {
+  assert_heap_locked_or_at_safepoint(true /* should_be_vm_thread */);
+  assert(alloc_region->is_young(), "all mutator alloc regions should be young");
+
+  g1_policy()->add_region_to_incremental_cset_lhs(alloc_region);
+  _summary_bytes_used += allocated_bytes;
+}
+
+HeapRegion* MutatorAllocRegion::allocate_new_region(size_t word_size,
+                                                    bool force) {
+  return _g1h->new_mutator_alloc_region(word_size, force);
+}
+
+void MutatorAllocRegion::retire_region(HeapRegion* alloc_region,
+                                       size_t allocated_bytes) {
+  _g1h->retire_mutator_alloc_region(alloc_region, allocated_bytes);
+}
+
+// Heap region set verification
+
 class VerifyRegionListsClosure : public HeapRegionClosure {
 private:
   HumongousRegionSet* _humongous_set;
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Tue Apr 05 14:12:31 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Wed Apr 06 16:02:53 2011 -0700
@@ -26,6 +26,7 @@
 #define SHARE_VM_GC_IMPLEMENTATION_G1_G1COLLECTEDHEAP_HPP

 #include "gc_implementation/g1/concurrentMark.hpp"
+#include "gc_implementation/g1/g1AllocRegion.hpp"
 #include "gc_implementation/g1/g1RemSet.hpp"
 #include "gc_implementation/g1/heapRegionSets.hpp"
 #include "gc_implementation/parNew/parGCAllocBuffer.hpp"
@@ -128,6 +129,15 @@
   void          print();
 };

+class MutatorAllocRegion : public G1AllocRegion {
+protected:
+  virtual HeapRegion* allocate_new_region(size_t word_size, bool force);
+  virtual void retire_region(HeapRegion* alloc_region, size_t allocated_bytes);
+public:
+  MutatorAllocRegion()
+    : G1AllocRegion("Mutator Alloc Region", false /* bot_updates */) { }
+};
+
 class RefineCardTableEntryClosure;
 class G1CollectedHeap : public SharedHeap {
   friend class VM_G1CollectForAllocation;
@@ -135,6 +145,7 @@
   friend class VM_G1CollectFull;
   friend class VM_G1IncCollectionPause;
   friend class VMStructs;
+  friend class MutatorAllocRegion;

   // Closures used in implementation.
   friend class G1ParCopyHelper;
@@ -197,12 +208,15 @@
   // The sequence of all heap regions in the heap.
   HeapRegionSeq* _hrs;

-  // The region from which normal-sized objects are currently being
-  // allocated.  May be NULL.
-  HeapRegion* _cur_alloc_region;
+  // Alloc region used to satisfy mutator allocation requests.
+  MutatorAllocRegion _mutator_alloc_region;

-  // Postcondition: cur_alloc_region == NULL.
-  void abandon_cur_alloc_region();
+  // It resets the mutator alloc region before new allocations can take place.
+  void init_mutator_alloc_region();
+
+  // It releases the mutator alloc region.
+  void release_mutator_alloc_region();
+
   void abandon_gc_alloc_regions();

   // The to-space memory regions into which objects are being copied during
@@ -360,27 +374,21 @@
   G1CollectorPolicy* _g1_policy;

   // This is the second level of trying to allocate a new region. If
-  // new_region_work didn't find a region in the free_list, this call
-  // will check whether there's anything available in the
-  // secondary_free_list and/or wait for more regions to appear in that
-  // list, if _free_regions_coming is set.
+  // new_region() didn't find a region on the free_list, this call will
+  // check whether there's anything available on the
+  // secondary_free_list and/or wait for more regions to appear on
+  // that list, if _free_regions_coming is set.
   HeapRegion* new_region_try_secondary_free_list();

   // Try to allocate a single non-humongous HeapRegion sufficient for
   // an allocation of the given word_size. If do_expand is true,
   // attempt to expand the heap if necessary to satisfy the allocation
   // request.
-  HeapRegion* new_region_work(size_t word_size, bool do_expand);
+  HeapRegion* new_region(size_t word_size, bool do_expand);

-  // Try to allocate a new region to be used for allocation by a
-  // mutator thread. Attempt to expand the heap if no region is
+  // Try to allocate a new region to be used for allocation by
+  // a GC thread. It will try to expand the heap if no region is
   // available.
-  HeapRegion* new_alloc_region(size_t word_size) {
-    return new_region_work(word_size, false /* do_expand */);
-  }
-
-  // Try to allocate a new region to be used for allocation by a GC
-  // thread. Attempt to expand the heap if no region is available.
   HeapRegion* new_gc_alloc_region(int purpose, size_t word_size);

   // Attempt to satisfy a humongous allocation request of the given
@@ -415,10 +423,6 @@
   // * All non-TLAB allocation requests should go to mem_allocate()
   //   and mem_allocate() should never be called with is_tlab == true.
   //
-  // * If the GC locker is active we currently stall until we can
-  //   allocate a new young region. This will be changed in the
-  //   near future (see CR 6994056).
-  //
   // * If either call cannot satisfy the allocation request using the
   //   current allocating region, they will try to get a new one. If
   //   this fails, they will attempt to do an evacuation pause and
@@ -441,122 +445,38 @@
                                  bool   is_tlab, /* expected to be false */
                                  bool*  gc_overhead_limit_was_exceeded);

-  // The following methods, allocate_from_cur_allocation_region(),
-  // attempt_allocation(), attempt_allocation_locked(),
-  // replace_cur_alloc_region_and_allocate(),
-  // attempt_allocation_slow(), and attempt_allocation_humongous()
-  // have very awkward pre- and post-conditions with respect to
-  // locking:
-  //
-  // If they are called outside a safepoint they assume the caller
-  // holds the Heap_lock when it calls them. However, on exit they
-  // will release the Heap_lock if they return a non-NULL result, but
-  // keep holding the Heap_lock if they return a NULL result. The
-  // reason for this is that we need to dirty the cards that span
-  // allocated blocks on young regions to avoid having to take the
-  // slow path of the write barrier (for performance reasons we don't
-  // update RSets for references whose source is a young region, so we
-  // don't need to look at dirty cards on young regions). But, doing
-  // this card dirtying while holding the Heap_lock can be a
-  // scalability bottleneck, especially given that some allocation
-  // requests might be of non-trivial size (and the larger the region
-  // size is, the fewer allocations requests will be considered
-  // humongous, as the humongous size limit is a fraction of the
-  // region size). So, when one of these calls succeeds in allocating
-  // a block it does the card dirtying after it releases the Heap_lock
-  // which is why it will return without holding it.
-  //
-  // The above assymetry is the reason why locking / unlocking is done
-  // explicitly (i.e., with Heap_lock->lock() and
-  // Heap_lock->unlocked()) instead of using MutexLocker and
-  // MutexUnlocker objects. The latter would ensure that the lock is
-  // unlocked / re-locked at every possible exit out of the basic
-  // block. However, we only want that action to happen in selected
-  // places.
-  //
-  // Further, if the above methods are called during a safepoint, then
-  // naturally there's no assumption about the Heap_lock being held or
-  // there's no attempt to unlock it. The parameter at_safepoint
-  // indicates whether the call is made during a safepoint or not (as
-  // an optimization, to avoid reading the global flag with
-  // SafepointSynchronize::is_at_safepoint()).
-  //
-  // The methods share these parameters:
-  //
-  // * word_size     : the size of the allocation request in words
-  // * at_safepoint  : whether the call is done at a safepoint; this
-  //                   also determines whether a GC is permitted
-  //                   (at_safepoint == false) or not (at_safepoint == true)
-  // * do_dirtying   : whether the method should dirty the allocated
-  //                   block before returning
-  //
-  // They all return either the address of the block, if they
-  // successfully manage to allocate it, or NULL.
+  // The following three methods take a gc_count_before_ret
+  // parameter which is used to return the GC count if the method
+  // returns NULL. Given that we are required to read the GC count
+  // while holding the Heap_lock, and these paths will take the
+  // Heap_lock at some point, it's easier to get them to read the GC
+  // count while holding the Heap_lock before they return NULL instead
+  // of the caller (namely: mem_allocate()) having to also take the
+  // Heap_lock just to read the GC count.
+
+  // First-level mutator allocation attempt: try to allocate out of
+  // the mutator alloc region without taking the Heap_lock. This
+  // should only be used for non-humongous allocations.
+  inline HeapWord* attempt_allocation(size_t word_size,
+                                      unsigned int* gc_count_before_ret);

-  // It tries to satisfy an allocation request out of the current
-  // alloc region, which is passed as a parameter. It assumes that the
-  // caller has checked that the current alloc region is not NULL.
-  // Given that the caller has to check the current alloc region for
-  // at least NULL, it might as well pass it as the first parameter so
-  // that the method doesn't have to read it from the
-  // _cur_alloc_region field again. It is called from both
-  // attempt_allocation() and attempt_allocation_locked() and the
-  // with_heap_lock parameter indicates whether the caller was holding
-  // the heap lock when it called it or not.
-  inline HeapWord* allocate_from_cur_alloc_region(HeapRegion* cur_alloc_region,
-                                                  size_t word_size,
-                                                  bool with_heap_lock);
-
-  // First-level of allocation slow path: it attempts to allocate out
-  // of the current alloc region in a lock-free manner using a CAS. If
-  // that fails it takes the Heap_lock and calls
-  // attempt_allocation_locked() for the second-level slow path.
-  inline HeapWord* attempt_allocation(size_t word_size);
-
-  // Second-level of allocation slow path: while holding the Heap_lock
-  // it tries to allocate out of the current alloc region and, if that
-  // fails, tries to allocate out of a new current alloc region.
-  inline HeapWord* attempt_allocation_locked(size_t word_size);
+  // Second-level mutator allocation attempt: take the Heap_lock and
+  // retry the allocation attempt, potentially scheduling a GC
+  // pause. This should only be used for non-humongous allocations.
+  HeapWord* attempt_allocation_slow(size_t word_size,
+                                    unsigned int* gc_count_before_ret);

-  // It assumes that the current alloc region has been retired and
-  // tries to allocate a new one. If it's successful, it performs the
-  // allocation out of the new current alloc region and updates
-  // _cur_alloc_region. Normally, it would try to allocate a new
-  // region if the young gen is not full, unless can_expand is true in
-  // which case it would always try to allocate a new region.
-  HeapWord* replace_cur_alloc_region_and_allocate(size_t word_size,
-                                                  bool at_safepoint,
-                                                  bool do_dirtying,
-                                                  bool can_expand);
-
-  // Third-level of allocation slow path: when we are unable to
-  // allocate a new current alloc region to satisfy an allocation
-  // request (i.e., when attempt_allocation_locked() fails). It will
-  // try to do an evacuation pause, which might stall due to the GC
-  // locker, and retry the allocation attempt when appropriate.
-  HeapWord* attempt_allocation_slow(size_t word_size);
+  // Takes the Heap_lock and attempts a humongous allocation. It can
+  // potentially schedule a GC pause.
+  HeapWord* attempt_allocation_humongous(size_t word_size,
+                                         unsigned int* gc_count_before_ret);

-  // The method that tries to satisfy a humongous allocation
-  // request. If it cannot satisfy it it will try to do an evacuation
-  // pause to perhaps reclaim enough space to be able to satisfy the
-  // allocation request afterwards.
-  HeapWord* attempt_allocation_humongous(size_t word_size,
-                                         bool at_safepoint);
-
-  // It does the common work when we are retiring the current alloc region.
-  inline void retire_cur_alloc_region_common(HeapRegion* cur_alloc_region);
-
-  // It retires the current alloc region, which is passed as a
-  // parameter (since, typically, the caller is already holding on to
-  // it). It sets _cur_alloc_region to NULL.
-  void retire_cur_alloc_region(HeapRegion* cur_alloc_region);
-
-  // It attempts to do an allocation immediately before or after an
-  // evacuation pause and can only be called by the VM thread. It has
-  // slightly different assumptions that the ones before (i.e.,
-  // assumes that the current alloc region has been retired).
+  // Allocation attempt that should be called during safepoints (e.g.,
+  // at the end of a successful GC). expect_null_mutator_alloc_region
+  // specifies whether the mutator alloc region is expected to be NULL
+  // or not.
   HeapWord* attempt_allocation_at_safepoint(size_t word_size,
-                                            bool expect_null_cur_alloc_region);
+                                       bool expect_null_mutator_alloc_region);

   // It dirties the cards that cover the block so that so that the post
   // write barrier never queues anything when updating objects on this
@@ -583,6 +503,12 @@
   // GC pause.
   void  retire_alloc_region(HeapRegion* alloc_region, bool par);

+  // These two methods are the "callbacks" from the G1AllocRegion class.
+
+  HeapRegion* new_mutator_alloc_region(size_t word_size, bool force);
+  void retire_mutator_alloc_region(HeapRegion* alloc_region,
+                                   size_t allocated_bytes);
+
   // - if explicit_gc is true, the GC is for a System.gc() or a heap
   //   inspection request and should collect the entire heap
   // - if clear_all_soft_refs is true, all soft references should be
@@ -1027,6 +953,9 @@
   // The number of regions available for "regular" expansion.
   size_t expansion_regions() { return _expansion_regions; }

+  void verify_dirty_young_list(HeapRegion* head) PRODUCT_RETURN;
+  void verify_dirty_young_regions() PRODUCT_RETURN;
+
   // verify_region_sets() performs verification over the region
   // lists. It will be compiled in the product code to be used when
   // necessary (i.e., during heap verification).
@@ -1061,7 +990,7 @@
   }

   void append_secondary_free_list() {
-    _free_list.add_as_tail(&_secondary_free_list);
+    _free_list.add_as_head(&_secondary_free_list);
   }

   void append_secondary_free_list_if_not_empty_with_lock() {
@@ -1128,7 +1057,13 @@
     return _g1_reserved.contains(p);
   }

-  // Returns a MemRegion that corresponds to the space that  has been
+  // Returns a MemRegion that corresponds to the space that has been
+  // reserved for the heap
+  MemRegion g1_reserved() {
+    return _g1_reserved;
+  }
+
+  // Returns a MemRegion that corresponds to the space that has been
   // committed in the heap
   MemRegion g1_committed() {
     return _g1_committed;
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Tue Apr 05 14:12:31 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Wed Apr 06 16:02:53 2011 -0700
@@ -27,6 +27,7 @@

 #include "gc_implementation/g1/concurrentMark.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.hpp"
+#include "gc_implementation/g1/g1AllocRegion.inline.hpp"
 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
 #include "utilities/taskqueue.hpp"
@@ -59,131 +60,23 @@
   return r != NULL && r->in_collection_set();
 }

-// See the comment in the .hpp file about the locking protocol and
-// assumptions of this method (and other related ones).
 inline HeapWord*
-G1CollectedHeap::allocate_from_cur_alloc_region(HeapRegion* cur_alloc_region,
-                                                size_t word_size,
-                                                bool with_heap_lock) {
-  assert_not_at_safepoint();
-  assert(with_heap_lock == Heap_lock->owned_by_self(),
-         "with_heap_lock and Heap_lock->owned_by_self() should be a tautology");
-  assert(cur_alloc_region != NULL, "pre-condition of the method");
-  assert(cur_alloc_region->is_young(),
-         "we only support young current alloc regions");
-  assert(!isHumongous(word_size), "allocate_from_cur_alloc_region() "
-         "should not be used for humongous allocations");
-  assert(!cur_alloc_region->isHumongous(), "Catch a regression of this bug.");
-
-  assert(!cur_alloc_region->is_empty(),
-         err_msg("region ["PTR_FORMAT","PTR_FORMAT"] should not be empty",
-                 cur_alloc_region->bottom(), cur_alloc_region->end()));
-  HeapWord* result = cur_alloc_region->par_allocate_no_bot_updates(word_size);
-  if (result != NULL) {
-    assert(is_in(result), "result should be in the heap");
-
-    if (with_heap_lock) {
-      Heap_lock->unlock();
-    }
-    assert_heap_not_locked();
-    // Do the dirtying after we release the Heap_lock.
-    dirty_young_block(result, word_size);
-    return result;
-  }
-
-  if (with_heap_lock) {
-    assert_heap_locked();
-  } else {
-    assert_heap_not_locked();
-  }
-  return NULL;
-}
-
-// See the comment in the .hpp file about the locking protocol and
-// assumptions of this method (and other related ones).
-inline HeapWord*
-G1CollectedHeap::attempt_allocation(size_t word_size) {
+G1CollectedHeap::attempt_allocation(size_t word_size,
+                                    unsigned int* gc_count_before_ret) {
   assert_heap_not_locked_and_not_at_safepoint();
-  assert(!isHumongous(word_size), "attempt_allocation() should not be called "
-         "for humongous allocation requests");
-
-  HeapRegion* cur_alloc_region = _cur_alloc_region;
-  if (cur_alloc_region != NULL) {
-    HeapWord* result = allocate_from_cur_alloc_region(cur_alloc_region,
-                                                   word_size,
-                                                   false /* with_heap_lock */);
-    assert_heap_not_locked();
-    if (result != NULL) {
-      return result;
-    }
-  }
+  assert(!isHumongous(word_size), "attempt_allocation() should not "
+         "be called for humongous allocation requests");

-  // Our attempt to allocate lock-free failed as the current
-  // allocation region is either NULL or full. So, we'll now take the
-  // Heap_lock and retry.
-  Heap_lock->lock();
-
-  HeapWord* result = attempt_allocation_locked(word_size);
-  if (result != NULL) {
-    assert_heap_not_locked();
-    return result;
+  HeapWord* result = _mutator_alloc_region.attempt_allocation(word_size,
+                                                      false /* bot_updates */);
+  if (result == NULL) {
+    result = attempt_allocation_slow(word_size, gc_count_before_ret);
   }
-
-  assert_heap_locked();
-  return NULL;
-}
-
-inline void
-G1CollectedHeap::retire_cur_alloc_region_common(HeapRegion* cur_alloc_region) {
-  assert_heap_locked_or_at_safepoint(true /* should_be_vm_thread */);
-  assert(cur_alloc_region != NULL && cur_alloc_region == _cur_alloc_region,
-         "pre-condition of the call");
-  assert(cur_alloc_region->is_young(),
-         "we only support young current alloc regions");
-
-  // The region is guaranteed to be young
-  g1_policy()->add_region_to_incremental_cset_lhs(cur_alloc_region);
-  _summary_bytes_used += cur_alloc_region->used();
-  _cur_alloc_region = NULL;
-}
-
-inline HeapWord*
-G1CollectedHeap::attempt_allocation_locked(size_t word_size) {
-  assert_heap_locked_and_not_at_safepoint();
-  assert(!isHumongous(word_size), "attempt_allocation_locked() "
-         "should not be called for humongous allocation requests");
-
-  // First, reread the current alloc region and retry the allocation
-  // in case somebody replaced it while we were waiting to get the
-  // Heap_lock.
-  HeapRegion* cur_alloc_region = _cur_alloc_region;
-  if (cur_alloc_region != NULL) {
-    HeapWord* result = allocate_from_cur_alloc_region(
-                                                  cur_alloc_region, word_size,
-                                                  true /* with_heap_lock */);
-    if (result != NULL) {
-      assert_heap_not_locked();
-      return result;
-    }
-
-    // We failed to allocate out of the current alloc region, so let's
-    // retire it before getting a new one.
-    retire_cur_alloc_region(cur_alloc_region);
+  assert_heap_not_locked();
+  if (result != NULL) {
+    dirty_young_block(result, word_size);
   }
-
-  assert_heap_locked();
-  // Try to get a new region and allocate out of it
-  HeapWord* result = replace_cur_alloc_region_and_allocate(word_size,
-                                                     false, /* at_safepoint */
-                                                     true,  /* do_dirtying */
-                                                     false  /* can_expand */);
-  if (result != NULL) {
-    assert_heap_not_locked();
-    return result;
-  }
-
-  assert_heap_locked();
-  return NULL;
+  return result;
 }

 // It dirties the cards that cover the block so that so that the post
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Tue Apr 05 14:12:31 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Wed Apr 06 16:02:53 2011 -0700
@@ -307,6 +307,7 @@
   _par_last_termination_times_ms = new double[_parallel_gc_threads];
   _par_last_termination_attempts = new double[_parallel_gc_threads];
   _par_last_gc_worker_end_times_ms = new double[_parallel_gc_threads];
+  _par_last_gc_worker_times_ms = new double[_parallel_gc_threads];

   // start conservatively
   _expensive_region_limit_ms = 0.5 * (double) MaxGCPauseMillis;
@@ -911,6 +912,7 @@
     _par_last_termination_times_ms[i] = -1234.0;
     _par_last_termination_attempts[i] = -1234.0;
     _par_last_gc_worker_end_times_ms[i] = -1234.0;
+    _par_last_gc_worker_times_ms[i] = -1234.0;
   }
 #endif

@@ -1063,8 +1065,7 @@

 void G1CollectorPolicy::print_par_stats(int level,
                                         const char* str,
-                                        double* data,
-                                         bool summary) {
+                                        double* data) {
   double min = data[0], max = data[0];
   double total = 0.0;
   LineBuffer buf(level);
@@ -1078,20 +1079,15 @@
     total += val;
     buf.append("  %3.1lf", val);
   }
-  if (summary) {
-    buf.append_and_print_cr("");
-    double avg = total / (double) ParallelGCThreads;
-    buf.append(" ");
-    buf.append("Avg: %5.1lf, Min: %5.1lf, Max: %5.1lf",
-                        avg, min, max);
-  }
-  buf.append_and_print_cr("]");
+  buf.append_and_print_cr("");
+  double avg = total / (double) ParallelGCThreads;
+  buf.append_and_print_cr(" Avg: %5.1lf, Min: %5.1lf, Max: %5.1lf, Diff: %5.1lf]",
+    avg, min, max, max - min);
 }

 void G1CollectorPolicy::print_par_sizes(int level,
                                         const char* str,
-                                        double* data,
-                                        bool summary) {
+                                        double* data) {
   double min = data[0], max = data[0];
   double total = 0.0;
   LineBuffer buf(level);
@@ -1105,14 +1101,10 @@
     total += val;
     buf.append(" %d", (int) val);
   }
-  if (summary) {
-    buf.append_and_print_cr("");
-    double avg = total / (double) ParallelGCThreads;
-    buf.append(" ");
-    buf.append("Sum: %d, Avg: %d, Min: %d, Max: %d",
-               (int)total, (int)avg, (int)min, (int)max);
-  }
-  buf.append_and_print_cr("]");
+  buf.append_and_print_cr("");
+  double avg = total / (double) ParallelGCThreads;
+  buf.append_and_print_cr(" Sum: %d, Avg: %d, Min: %d, Max: %d, Diff: %d]",
+    (int)total, (int)avg, (int)min, (int)max, (int)max - (int)min);
 }

 void G1CollectorPolicy::print_stats (int level,
@@ -1421,22 +1413,22 @@
     }
     if (parallel) {
       print_stats(1, "Parallel Time", _cur_collection_par_time_ms);
-      print_par_stats(2, "GC Worker Start Time",
-                      _par_last_gc_worker_start_times_ms, false);
+      print_par_stats(2, "GC Worker Start Time", _par_last_gc_worker_start_times_ms);
       print_par_stats(2, "Update RS", _par_last_update_rs_times_ms);
-      print_par_sizes(3, "Processed Buffers",
-                      _par_last_update_rs_processed_buffers, true);
-      print_par_stats(2, "Ext Root Scanning",
-                      _par_last_ext_root_scan_times_ms);
-      print_par_stats(2, "Mark Stack Scanning",
-                      _par_last_mark_stack_scan_times_ms);
+      print_par_sizes(3, "Processed Buffers", _par_last_update_rs_processed_buffers);
+      print_par_stats(2, "Ext Root Scanning", _par_last_ext_root_scan_times_ms);
+      print_par_stats(2, "Mark Stack Scanning", _par_last_mark_stack_scan_times_ms);
       print_par_stats(2, "Scan RS", _par_last_scan_rs_times_ms);
       print_par_stats(2, "Object Copy", _par_last_obj_copy_times_ms);
       print_par_stats(2, "Termination", _par_last_termination_times_ms);
-      print_par_sizes(3, "Termination Attempts",
-                      _par_last_termination_attempts, true);
-      print_par_stats(2, "GC Worker End Time",
-                      _par_last_gc_worker_end_times_ms, false);
+      print_par_sizes(3, "Termination Attempts", _par_last_termination_attempts);
+      print_par_stats(2, "GC Worker End Time", _par_last_gc_worker_end_times_ms);
+
+      for (int i = 0; i < _parallel_gc_threads; i++) {
+        _par_last_gc_worker_times_ms[i] = _par_last_gc_worker_end_times_ms[i] - _par_last_gc_worker_start_times_ms[i];
+      }
+      print_par_stats(2, "GC Worker Times", _par_last_gc_worker_times_ms);
+
       print_stats(2, "Other", parallel_other_time);
       print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
     } else {
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Tue Apr 05 14:12:31 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Wed Apr 06 16:02:53 2011 -0700
@@ -182,6 +182,7 @@
   double* _par_last_termination_times_ms;
   double* _par_last_termination_attempts;
   double* _par_last_gc_worker_end_times_ms;
+  double* _par_last_gc_worker_times_ms;

   // indicates that we are in young GC mode
   bool _in_young_gc_mode;
@@ -569,11 +570,8 @@
   void print_stats(int level, const char* str, double value);
   void print_stats(int level, const char* str, int value);

-  void print_par_stats(int level, const char* str, double* data) {
-    print_par_stats(level, str, data, true);
-  }
-  void print_par_stats(int level, const char* str, double* data, bool summary);
-  void print_par_sizes(int level, const char* str, double* data, bool summary);
+  void print_par_stats(int level, const char* str, double* data);
+  void print_par_sizes(int level, const char* str, double* data);

   void check_other_times(int level,
                          NumberSeq* other_times_ms,
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Tue Apr 05 14:12:31 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Wed Apr 06 16:02:53 2011 -0700
@@ -89,6 +89,11 @@
           "The number of discovered reference objects to process before "   \
           "draining concurrent marking work queues.")                       \
                                                                             \
+  experimental(bool, G1UseConcMarkReferenceProcessing, false,               \
+          "If true, enable reference discovery during concurrent "          \
+          "marking and reference processing at the end of remark "          \
+          "(unsafe).")                                                      \
+                                                                            \
   develop(bool, G1SATBBarrierPrintNullPreVals, false,                       \
           "If true, count frac of ptr writes with null pre-vals.")          \
                                                                             \
@@ -138,9 +143,9 @@
   develop(bool, G1RSCountHisto, false,                                      \
           "If true, print a histogram of RS occupancies after each pause")  \
                                                                             \
-  develop(intx, G1PrintRegionLivenessInfo, 0,                               \
-          "When > 0, print the occupancies of the <n> best and worst"       \
-          "regions.")                                                       \
+  product(bool, G1PrintRegionLivenessInfo, false,                           \
+          "Prints the liveness information for all regions in the heap "    \
+          "at the end of a marking cycle.")                                 \
                                                                             \
   develop(bool, G1PrintParCleanupStats, false,                              \
           "When true, print extra stats about parallel cleanup.")           \
@@ -193,6 +198,10 @@
   develop(intx, G1ConcRSHotCardLimit, 4,                                    \
           "The threshold that defines (>=) a hot card.")                    \
                                                                             \
+  develop(intx, G1MaxHotCardCountSizePercent, 25,                           \
+          "The maximum size of the hot card count cache as a "              \
+          "percentage of the number of cards for the maximum heap.")        \
+                                                                            \
   develop(bool, G1PrintOopAppls, false,                                     \
           "When true, print applications of closures to external locs.")    \
                                                                             \
--- a/src/share/vm/gc_implementation/g1/heapRegion.cpp	Tue Apr 05 14:12:31 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp	Wed Apr 06 16:02:53 2011 -0700
@@ -360,6 +360,7 @@
   set_young_index_in_cset(-1);
   uninstall_surv_rate_group();
   set_young_type(NotYoung);
+  reset_pre_dummy_top();

   if (!par) {
     // If this is parallel, this will be done later.
@@ -923,11 +924,11 @@
     ContiguousSpace::set_saved_mark();
     OrderAccess::storestore();
     _gc_time_stamp = curr_gc_time_stamp;
-    // The following fence is to force a flush of the writes above, but
-    // is strictly not needed because when an allocating worker thread
-    // calls set_saved_mark() it does so under the ParGCRareEvent_lock;
-    // when the lock is released, the write will be flushed.
-    // OrderAccess::fence();
+    // No need to do another barrier to flush the writes above. If
+    // this is called in parallel with other threads trying to
+    // allocate into the region, the caller should call this while
+    // holding a lock and when the lock is released the writes will be
+    // flushed.
   }
 }
--- a/src/share/vm/gc_implementation/g1/heapRegion.hpp	Tue Apr 05 14:12:31 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp	Wed Apr 06 16:02:53 2011 -0700
@@ -149,6 +149,13 @@
   G1BlockOffsetArrayContigSpace _offsets;
   Mutex _par_alloc_lock;
   volatile unsigned _gc_time_stamp;
+  // When we need to retire an allocation region, while other threads
+  // are also concurrently trying to allocate into it, we typically
+  // allocate a dummy object at the end of the region to ensure that
+  // no more allocations can take place in it. However, sometimes we
+  // want to know where the end of the last "real" object we allocated
+  // into the region was and this is what this keeps track.
+  HeapWord* _pre_dummy_top;

  public:
   // Constructor.  If "is_zeroed" is true, the MemRegion "mr" may be
@@ -163,6 +170,17 @@
   virtual void set_saved_mark();
   void reset_gc_time_stamp() { _gc_time_stamp = 0; }

+  // See the comment above in the declaration of _pre_dummy_top for an
+  // explanation of what it is.
+  void set_pre_dummy_top(HeapWord* pre_dummy_top) {
+    assert(is_in(pre_dummy_top) && pre_dummy_top <= top(), "pre-condition");
+    _pre_dummy_top = pre_dummy_top;
+  }
+  HeapWord* pre_dummy_top() {
+    return (_pre_dummy_top == NULL) ? top() : _pre_dummy_top;
+  }
+  void reset_pre_dummy_top() { _pre_dummy_top = NULL; }
+
   virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space);
   virtual void clear(bool mangle_space);

@@ -380,13 +398,16 @@

   // The number of bytes marked live in the region in the last marking phase.
   size_t marked_bytes()    { return _prev_marked_bytes; }
+  size_t live_bytes() {
+    return (top() - prev_top_at_mark_start()) * HeapWordSize + marked_bytes();
+  }
+
   // The number of bytes counted in the next marking.
   size_t next_marked_bytes() { return _next_marked_bytes; }
   // The number of bytes live wrt the next marking.
   size_t next_live_bytes() {
-    return (top() - next_top_at_mark_start())
-      * HeapWordSize
-      + next_marked_bytes();
+    return
+      (top() - next_top_at_mark_start()) * HeapWordSize + next_marked_bytes();
   }

   // A lower bound on the amount of garbage bytes in the region.
--- a/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp	Tue Apr 05 14:12:31 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp	Wed Apr 06 16:02:53 2011 -0700
@@ -38,15 +38,8 @@
 // this is used for larger LAB allocations only.
 inline HeapWord* G1OffsetTableContigSpace::par_allocate(size_t size) {
   MutexLocker x(&_par_alloc_lock);
-  // This ought to be just "allocate", because of the lock above, but that
-  // ContiguousSpace::allocate asserts that either the allocating thread
-  // holds the heap lock or it is the VM thread and we're at a safepoint.
-  // The best I (dld) could figure was to put a field in ContiguousSpace
-  // meaning "locking at safepoint taken care of", and set/reset that
-  // here.  But this will do for now, especially in light of the comment
-  // above.  Perhaps in the future some lock-free manner of keeping the
-  // coordination.
-  HeapWord* res = ContiguousSpace::par_allocate(size);
+  // Given that we take the lock no need to use par_allocate() here.
+  HeapWord* res = ContiguousSpace::allocate(size);
   if (res != NULL) {
     _offsets.alloc_block(res, size);
   }
--- a/src/share/vm/gc_implementation/g1/heapRegionSet.cpp	Tue Apr 05 14:12:31 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionSet.cpp	Wed Apr 06 16:02:53 2011 -0700
@@ -261,6 +261,45 @@
   msg->append(" hd: "PTR_FORMAT" tl: "PTR_FORMAT, head(), tail());
 }

+void HeapRegionLinkedList::add_as_head(HeapRegionLinkedList* from_list) {
+  hrs_assert_mt_safety_ok(this);
+  hrs_assert_mt_safety_ok(from_list);
+
+  verify_optional();
+  from_list->verify_optional();
+
+  if (from_list->is_empty()) return;
+
+#ifdef ASSERT
+  HeapRegionLinkedListIterator iter(from_list);
+  while (iter.more_available()) {
+    HeapRegion* hr = iter.get_next();
+    // In set_containing_set() we check that we either set the value
+    // from NULL to non-NULL or vice versa to catch bugs. So, we have
+    // to NULL it first before setting it to the value.
+    hr->set_containing_set(NULL);
+    hr->set_containing_set(this);
+  }
+#endif // ASSERT
+
+  if (_head != NULL) {
+    assert(length() >  0 && _tail != NULL, hrs_ext_msg(this, "invariant"));
+    from_list->_tail->set_next(_head);
+  } else {
+    assert(length() == 0 && _head == NULL, hrs_ext_msg(this, "invariant"));
+    _tail = from_list->_tail;
+  }
+  _head = from_list->_head;
+
+  _length           += from_list->length();
+  _region_num       += from_list->region_num();
+  _total_used_bytes += from_list->total_used_bytes();
+  from_list->clear();
+
+  verify_optional();
+  from_list->verify_optional();
+}
+
 void HeapRegionLinkedList::add_as_tail(HeapRegionLinkedList* from_list) {
   hrs_assert_mt_safety_ok(this);
   hrs_assert_mt_safety_ok(from_list);
--- a/src/share/vm/gc_implementation/g1/heapRegionSet.hpp	Tue Apr 05 14:12:31 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionSet.hpp	Wed Apr 06 16:02:53 2011 -0700
@@ -277,6 +277,10 @@
   }

 public:
+  // It adds hr to the list as the new head. The region should not be
+  // a member of another set.
+  inline void add_as_head(HeapRegion* hr);
+
   // It adds hr to the list as the new tail. The region should not be
   // a member of another set.
   inline void add_as_tail(HeapRegion* hr);
@@ -290,6 +294,11 @@

   // It moves the regions from from_list to this list and empties
   // from_list. The new regions will appear in the same order as they
+  // were in from_list and be linked in the beginning of this list.
+  void add_as_head(HeapRegionLinkedList* from_list);
+
+  // It moves the regions from from_list to this list and empties
+  // from_list. The new regions will appear in the same order as they
   // were in from_list and be linked in the end of this list.
   void add_as_tail(HeapRegionLinkedList* from_list);
--- a/src/share/vm/gc_implementation/g1/heapRegionSet.inline.hpp	Tue Apr 05 14:12:31 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionSet.inline.hpp	Wed Apr 06 16:02:53 2011 -0700
@@ -110,6 +110,23 @@

 //////////////////// HeapRegionLinkedList ////////////////////

+inline void HeapRegionLinkedList::add_as_head(HeapRegion* hr) {
+  hrs_assert_mt_safety_ok(this);
+  assert((length() == 0 && _head == NULL && _tail == NULL) ||
+         (length() >  0 && _head != NULL && _tail != NULL),
+         hrs_ext_msg(this, "invariant"));
+  // add_internal() will verify the region.
+  add_internal(hr);
+
+  // Now link the region.
+  if (_head != NULL) {
+    hr->set_next(_head);
+  } else {
+    _tail = hr;
+  }
+  _head = hr;
+}
+
 inline void HeapRegionLinkedList::add_as_tail(HeapRegion* hr) {
   hrs_assert_mt_safety_ok(this);
   assert((length() == 0 && _head == NULL && _tail == NULL) ||
--- a/src/share/vm/memory/cardTableModRefBS.hpp	Tue Apr 05 14:12:31 2011 -0700
+++ b/src/share/vm/memory/cardTableModRefBS.hpp	Wed Apr 06 16:02:53 2011 -0700
@@ -382,6 +382,11 @@
     return (addr_for(pcard) == p);
   }

+  HeapWord* align_to_card_boundary(HeapWord* p) {
+    jbyte* pcard = byte_for(p + card_size_in_words - 1);
+    return addr_for(pcard);
+  }
+
   // The kinds of precision a CardTableModRefBS may offer.
   enum PrecisionStyle {
     Precise,
--- a/src/share/vm/memory/cardTableRS.cpp	Tue Apr 05 14:12:31 2011 -0700
+++ b/src/share/vm/memory/cardTableRS.cpp	Wed Apr 06 16:02:53 2011 -0700
@@ -318,17 +318,28 @@
 protected:
   template <class T> void do_oop_work(T* p) {
     HeapWord* jp = (HeapWord*)p;
-    if (jp >= _begin && jp < _end) {
-      oop obj = oopDesc::load_decode_heap_oop(p);
-      guarantee(obj == NULL ||
-                (HeapWord*)p < _boundary ||
-                (HeapWord*)obj >= _boundary,
-                "pointer on clean card crosses boundary");
-    }
+    assert(jp >= _begin && jp < _end,
+           err_msg("Error: jp " PTR_FORMAT " should be within "
+                   "[_begin, _end) = [" PTR_FORMAT "," PTR_FORMAT ")",
+                   _begin, _end));
+    oop obj = oopDesc::load_decode_heap_oop(p);
+    guarantee(obj == NULL || (HeapWord*)obj >= _boundary,
+              err_msg("pointer " PTR_FORMAT " at " PTR_FORMAT " on "
+                      "clean card crosses boundary" PTR_FORMAT,
+                      (HeapWord*)obj, jp, _boundary));
   }
+
 public:
   VerifyCleanCardClosure(HeapWord* b, HeapWord* begin, HeapWord* end) :
-    _boundary(b), _begin(begin), _end(end) {}
+    _boundary(b), _begin(begin), _end(end) {
+    assert(b <= begin,
+           err_msg("Error: boundary " PTR_FORMAT " should be at or below begin " PTR_FORMAT,
+                   b, begin));
+    assert(begin <= end,
+           err_msg("Error: begin " PTR_FORMAT " should be strictly below end " PTR_FORMAT,
+                   begin, end));
+  }
+
   virtual void do_oop(oop* p)       { VerifyCleanCardClosure::do_oop_work(p); }
   virtual void do_oop(narrowOop* p) { VerifyCleanCardClosure::do_oop_work(p); }
 };
@@ -392,13 +403,14 @@
         }
       }
       // Now traverse objects until end.
-      HeapWord* cur = start_block;
-      VerifyCleanCardClosure verify_blk(gen_boundary, begin, end);
-      while (cur < end) {
-        if (s->block_is_obj(cur) && s->obj_is_alive(cur)) {
-          oop(cur)->oop_iterate(&verify_blk);
+      if (begin < end) {
+        MemRegion mr(begin, end);
+        VerifyCleanCardClosure verify_blk(gen_boundary, begin, end);
+        for (HeapWord* cur = start_block; cur < end; cur += s->block_size(cur)) {
+          if (s->block_is_obj(cur) && s->obj_is_alive(cur)) {
+            oop(cur)->oop_iterate(&verify_blk, mr);
+          }
         }
-        cur += s->block_size(cur);
       }
       cur_entry = first_dirty;
     } else {
--- a/src/share/vm/memory/space.cpp	Tue Apr 05 14:12:31 2011 -0700
+++ b/src/share/vm/memory/space.cpp	Wed Apr 06 16:02:53 2011 -0700
@@ -818,9 +818,14 @@
 // This version requires locking.
 inline HeapWord* ContiguousSpace::allocate_impl(size_t size,
                                                 HeapWord* const end_value) {
+  // In G1 there are places where a GC worker can allocates into a
+  // region using this serial allocation code without being prone to a
+  // race with other GC workers (we ensure that no other GC worker can
+  // access the same region at the same time). So the assert below is
+  // too strong in the case of G1.
   assert(Heap_lock->owned_by_self() ||
          (SafepointSynchronize::is_at_safepoint() &&
-          Thread::current()->is_VM_thread()),
+                               (Thread::current()->is_VM_thread() || UseG1GC)),
          "not locked");
   HeapWord* obj = top();
   if (pointer_delta(end_value, obj) >= size) {
--- a/src/share/vm/oops/constantPoolKlass.cpp	Tue Apr 05 14:12:31 2011 -0700
+++ b/src/share/vm/oops/constantPoolKlass.cpp	Wed Apr 06 16:02:53 2011 -0700
@@ -245,13 +245,13 @@
   }
   oop* addr;
   addr = cp->tags_addr();
-  blk->do_oop(addr);
+  if (mr.contains(addr)) blk->do_oop(addr);
   addr = cp->cache_addr();
-  blk->do_oop(addr);
+  if (mr.contains(addr)) blk->do_oop(addr);
   addr = cp->operands_addr();
-  blk->do_oop(addr);
+  if (mr.contains(addr)) blk->do_oop(addr);
   addr = cp->pool_holder_addr();
-  blk->do_oop(addr);
+  if (mr.contains(addr)) blk->do_oop(addr);
   return size;
 }
--- a/src/share/vm/runtime/globals.hpp	Tue Apr 05 14:12:31 2011 -0700
+++ b/src/share/vm/runtime/globals.hpp	Wed Apr 06 16:02:53 2011 -0700
@@ -1924,7 +1924,7 @@
   experimental(intx, WorkStealingSleepMillis, 1,                            \
           "Sleep time when sleep is used for yields")                       \
                                                                             \
-  experimental(uintx, WorkStealingYieldsBeforeSleep, 1000,                  \
+  experimental(uintx, WorkStealingYieldsBeforeSleep, 5000,                  \
           "Number of yields before a sleep is done during workstealing")    \
                                                                             \
   experimental(uintx, WorkStealingHardSpins, 4096,                          \