# HG changeset patch
# User tonyp
# Date 1294868065 18000
# Node ID b158bed62ef578c222fc266a1aee930a1514a0b2
# Parent  2250ee17e2582cb4891c6a838d8854b1fd827ce6
6994297: G1: do first-level slow-path allocations with a CAS
Summary: First attempt to allocate out the current alloc region using a CAS instead of taking the Heap_lock (first level of G1's slow allocation path). Only if that fails and it's necessary to replace the current alloc region take the Heap_lock (that's the second level of G1's slow allocation path).
Reviewed-by: johnc, brutisso, ysr

diff -r 2250ee17e258 -r b158bed62ef5 src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Jan 12 13:06:00 2011 -0500
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Jan 12 16:34:25 2011 -0500
@@ -610,6 +610,39 @@
   // of the free region list is revamped as part of CR 6977804.
   wait_for_cleanup_complete();
 
+  // Other threads might still be trying to allocate using CASes out
+  // of the region we are retiring, as they can do so without holding
+  // the Heap_lock. So we first have to make sure that noone else can
+  // allocate in it by doing a maximal allocation. Even if our CAS
+  // attempt fails a few times, we'll succeed sooner or later given
+  // that a failed CAS attempt mean that the region is getting closed
+  // to being full (someone else succeeded in allocating into it).
+  size_t free_word_size = cur_alloc_region->free() / HeapWordSize;
+
+  // This is the minimum free chunk we can turn into a dummy
+  // object. If the free space falls below this, then noone can
+  // allocate in this region anyway (all allocation requests will be
+  // of a size larger than this) so we won't have to perform the dummy
+  // allocation.
+  size_t min_word_size_to_fill = CollectedHeap::min_fill_size();
+
+  while (free_word_size >= min_word_size_to_fill) {
+    HeapWord* dummy =
+      cur_alloc_region->par_allocate_no_bot_updates(free_word_size);
+    if (dummy != NULL) {
+      // If the allocation was successful we should fill in the space.
+      CollectedHeap::fill_with_object(dummy, free_word_size);
+      break;
+    }
+
+    free_word_size = cur_alloc_region->free() / HeapWordSize;
+    // It's also possible that someone else beats us to the
+    // allocation and they fill up the region. In that case, we can
+    // just get out of the loop
+  }
+  assert(cur_alloc_region->free() / HeapWordSize < min_word_size_to_fill,
+         "sanity");
+
   retire_cur_alloc_region_common(cur_alloc_region);
   assert(_cur_alloc_region == NULL, "post-condition");
 }
@@ -661,27 +694,29 @@
       // young type.
       OrderAccess::storestore();
 
-      // Now allocate out of the new current alloc region. We could
-      // have re-used allocate_from_cur_alloc_region() but its
-      // operation is slightly different to what we need here. First,
-      // allocate_from_cur_alloc_region() is only called outside a
-      // safepoint and will always unlock the Heap_lock if it returns
-      // a non-NULL result. Second, it assumes that the current alloc
-      // region is what's already assigned in _cur_alloc_region. What
-      // we want here is to actually do the allocation first before we
-      // assign the new region to _cur_alloc_region. This ordering is
-      // not currently important, but it will be essential when we
-      // change the code to support CAS allocation in the future (see
-      // CR 6994297).
-      //
-      // This allocate method does BOT updates and we don't need them in
-      // the young generation. This will be fixed in the near future by
-      // CR 6994297.
-      HeapWord* result = new_cur_alloc_region->allocate(word_size);
+      // Now, perform the allocation out of the region we just
+      // allocated. Note that noone else can access that region at
+      // this point (as _cur_alloc_region has not been updated yet),
+      // so we can just go ahead and do the allocation without any
+      // atomics (and we expect this allocation attempt to
+      // suceeded). Given that other threads can attempt an allocation
+      // with a CAS and without needing the Heap_lock, if we assigned
+      // the new region to _cur_alloc_region before first allocating
+      // into it other threads might have filled up the new region
+      // before we got a chance to do the allocation ourselves. In
+      // that case, we would have needed to retire the region, grab a
+      // new one, and go through all this again. Allocating out of the
+      // new region before assigning it to _cur_alloc_region avoids
+      // all this.
+      HeapWord* result =
+                     new_cur_alloc_region->allocate_no_bot_updates(word_size);
       assert(result != NULL, "we just allocate out of an empty region "
              "so allocation should have been successful");
       assert(is_in(result), "result should be in the heap");
 
+      // Now make sure that the store to _cur_alloc_region does not
+      // float above the store to top.
+      OrderAccess::storestore();
       _cur_alloc_region = new_cur_alloc_region;
 
       if (!at_safepoint) {
@@ -718,6 +753,9 @@
   for (int try_count = 1; /* we'll return or break */; try_count += 1) {
     bool succeeded = true;
 
+    // Every time we go round the loop we should be holding the Heap_lock.
+    assert_heap_locked();
+
     {
       // We may have concurrent cleanup working at the time. Wait for
       // it to complete. In the future we would probably want to make
@@ -734,7 +772,8 @@
       // attempt as it's redundant (we only reach here after an
       // allocation attempt has been unsuccessful).
       wait_for_cleanup_complete();
-      HeapWord* result = attempt_allocation(word_size);
+
+      HeapWord* result = attempt_allocation_locked(word_size);
       if (result != NULL) {
         assert_heap_not_locked();
         return result;
@@ -748,7 +787,6 @@
       if (g1_policy()->can_expand_young_list()) {
         // Yes, we are allowed to expand the young gen. Let's try to
         // allocate a new current alloc region.
-
         HeapWord* result =
           replace_cur_alloc_region_and_allocate(word_size,
                                                 false, /* at_safepoint */
@@ -771,20 +809,23 @@
       // rather than causing more, now probably unnecessary, GC attempts.
       JavaThread* jthr = JavaThread::current();
       assert(jthr != NULL, "sanity");
-      if (!jthr->in_critical()) {
-        MutexUnlocker mul(Heap_lock);
-        GC_locker::stall_until_clear();
-
-        // We'll then fall off the end of the ("if GC locker active")
-        // if-statement and retry the allocation further down in the
-        // loop.
-      } else {
+      if (jthr->in_critical()) {
         if (CheckJNICalls) {
           fatal("Possible deadlock due to allocating while"
                 " in jni critical section");
         }
+        // We are returning NULL so the protocol is that we're still
+        // holding the Heap_lock.
+        assert_heap_locked();
         return NULL;
       }
+
+      Heap_lock->unlock();
+      GC_locker::stall_until_clear();
+
+      // No need to relock the Heap_lock. We'll fall off to the code
+      // below the else-statement which assumes that we are not
+      // holding the Heap_lock.
     } else {
       // We are not locked out. So, let's try to do a GC. The VM op
       // will retry the allocation before it completes.
@@ -805,11 +846,10 @@
         dirty_young_block(result, word_size);
         return result;
       }
-
-      Heap_lock->lock();
     }
 
-    assert_heap_locked();
+    // Both paths that get us here from above unlock the Heap_lock.
+    assert_heap_not_locked();
 
     // We can reach here when we were unsuccessful in doing a GC,
     // because another thread beat us to it, or because we were locked
@@ -948,10 +988,8 @@
     if (!expect_null_cur_alloc_region) {
       HeapRegion* cur_alloc_region = _cur_alloc_region;
       if (cur_alloc_region != NULL) {
-        // This allocate method does BOT updates and we don't need them in
-        // the young generation. This will be fixed in the near future by
-        // CR 6994297.
-        HeapWord* result = cur_alloc_region->allocate(word_size);
+        // We are at a safepoint so no reason to use the MT-safe version.
+        HeapWord* result = cur_alloc_region->allocate_no_bot_updates(word_size);
         if (result != NULL) {
           assert(is_in(result), "result should be in the heap");
 
@@ -983,20 +1021,17 @@
   assert_heap_not_locked_and_not_at_safepoint();
   assert(!isHumongous(word_size), "we do not allow TLABs of humongous size");
 
-  Heap_lock->lock();
-
-  // First attempt: try allocating out of the current alloc region or
-  // after replacing the current alloc region.
+  // First attempt: Try allocating out of the current alloc region
+  // using a CAS. If that fails, take the Heap_lock and retry the
+  // allocation, potentially replacing the current alloc region.
   HeapWord* result = attempt_allocation(word_size);
   if (result != NULL) {
     assert_heap_not_locked();
     return result;
   }
 
-  assert_heap_locked();
-
-  // Second attempt: go into the even slower path where we might
-  // try to schedule a collection.
+  // Second attempt: Go to the slower path where we might try to
+  // schedule a collection.
   result = attempt_allocation_slow(word_size);
   if (result != NULL) {
     assert_heap_not_locked();
@@ -1004,6 +1039,7 @@
   }
 
   assert_heap_locked();
+  // Need to unlock the Heap_lock before returning.
   Heap_lock->unlock();
   return NULL;
 }
@@ -1022,11 +1058,10 @@
   for (int try_count = 1; /* we'll return */; try_count += 1) {
     unsigned int gc_count_before;
     {
-      Heap_lock->lock();
-
       if (!isHumongous(word_size)) {
-        // First attempt: try allocating out of the current alloc
-        // region or after replacing the current alloc region.
+        // First attempt: Try allocating out of the current alloc region
+        // using a CAS. If that fails, take the Heap_lock and retry the
+        // allocation, potentially replacing the current alloc region.
         HeapWord* result = attempt_allocation(word_size);
         if (result != NULL) {
           assert_heap_not_locked();
@@ -1035,14 +1070,17 @@
 
         assert_heap_locked();
 
-        // Second attempt: go into the even slower path where we might
-        // try to schedule a collection.
+        // Second attempt: Go to the slower path where we might try to
+        // schedule a collection.
         result = attempt_allocation_slow(word_size);
         if (result != NULL) {
           assert_heap_not_locked();
           return result;
         }
       } else {
+        // attempt_allocation_humongous() requires the Heap_lock to be held.
+        Heap_lock->lock();
+
         HeapWord* result = attempt_allocation_humongous(word_size,
                                                      false /* at_safepoint */);
         if (result != NULL) {
@@ -1054,7 +1092,8 @@
       assert_heap_locked();
       // Read the gc count while the heap lock is held.
       gc_count_before = SharedHeap::heap()->total_collections();
-      // We cannot be at a safepoint, so it is safe to unlock the Heap_lock
+
+      // Release the Heap_lock before attempting the collection.
       Heap_lock->unlock();
     }
 
diff -r 2250ee17e258 -r b158bed62ef5 src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Wed Jan 12 13:06:00 2011 -0500
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Wed Jan 12 16:34:25 2011 -0500
@@ -430,7 +430,8 @@
                                  bool*  gc_overhead_limit_was_exceeded);
 
   // The following methods, allocate_from_cur_allocation_region(),
-  // attempt_allocation(), replace_cur_alloc_region_and_allocate(),
+  // attempt_allocation(), attempt_allocation_locked(),
+  // replace_cur_alloc_region_and_allocate(),
   // attempt_allocation_slow(), and attempt_allocation_humongous()
   // have very awkward pre- and post-conditions with respect to
   // locking:
@@ -481,20 +482,30 @@
   // successfully manage to allocate it, or NULL.
 
   // It tries to satisfy an allocation request out of the current
-  // allocating region, which is passed as a parameter. It assumes
-  // that the caller has checked that the current allocating region is
-  // not NULL. Given that the caller has to check the current
-  // allocating region for at least NULL, it might as well pass it as
-  // the first parameter so that the method doesn't have to read it
-  // from the _cur_alloc_region field again.
+  // alloc region, which is passed as a parameter. It assumes that the
+  // caller has checked that the current alloc region is not NULL.
+  // Given that the caller has to check the current alloc region for
+  // at least NULL, it might as well pass it as the first parameter so
+  // that the method doesn't have to read it from the
+  // _cur_alloc_region field again. It is called from both
+  // attempt_allocation() and attempt_allocation_locked() and the
+  // with_heap_lock parameter indicates whether the caller was holding
+  // the heap lock when it called it or not.
   inline HeapWord* allocate_from_cur_alloc_region(HeapRegion* cur_alloc_region,
-                                                  size_t word_size);
+                                                  size_t word_size,
+                                                  bool with_heap_lock);
 
-  // It attempts to allocate out of the current alloc region. If that
-  // fails, it retires the current alloc region (if there is one),
-  // tries to get a new one and retries the allocation.
+  // First-level of allocation slow path: it attempts to allocate out
+  // of the current alloc region in a lock-free manner using a CAS. If
+  // that fails it takes the Heap_lock and calls
+  // attempt_allocation_locked() for the second-level slow path.
   inline HeapWord* attempt_allocation(size_t word_size);
 
+  // Second-level of allocation slow path: while holding the Heap_lock
+  // it tries to allocate out of the current alloc region and, if that
+  // fails, tries to allocate out of a new current alloc region.
+  inline HeapWord* attempt_allocation_locked(size_t word_size);
+
   // It assumes that the current alloc region has been retired and
   // tries to allocate a new one. If it's successful, it performs the
   // allocation out of the new current alloc region and updates
@@ -506,11 +517,11 @@
                                                   bool do_dirtying,
                                                   bool can_expand);
 
-  // The slow path when we are unable to allocate a new current alloc
-  // region to satisfy an allocation request (i.e., when
-  // attempt_allocation() fails). It will try to do an evacuation
-  // pause, which might stall due to the GC locker, and retry the
-  // allocation attempt when appropriate.
+  // Third-level of allocation slow path: when we are unable to
+  // allocate a new current alloc region to satisfy an allocation
+  // request (i.e., when attempt_allocation_locked() fails). It will
+  // try to do an evacuation pause, which might stall due to the GC
+  // locker, and retry the allocation attempt when appropriate.
   HeapWord* attempt_allocation_slow(size_t word_size);
 
   // The method that tries to satisfy a humongous allocation
diff -r 2250ee17e258 -r b158bed62ef5 src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Wed Jan 12 13:06:00 2011 -0500
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Wed Jan 12 16:34:25 2011 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -63,10 +63,12 @@
 // assumptions of this method (and other related ones).
 inline HeapWord*
 G1CollectedHeap::allocate_from_cur_alloc_region(HeapRegion* cur_alloc_region,
-                                                size_t word_size) {
-  assert_heap_locked_and_not_at_safepoint();
+                                                size_t word_size,
+                                                bool with_heap_lock) {
+  assert_not_at_safepoint();
+  assert(with_heap_lock == Heap_lock->owned_by_self(),
+         "with_heap_lock and Heap_lock->owned_by_self() should be a tautology");
   assert(cur_alloc_region != NULL, "pre-condition of the method");
-  assert(cur_alloc_region == _cur_alloc_region, "pre-condition of the method");
   assert(cur_alloc_region->is_young(),
          "we only support young current alloc regions");
   assert(!isHumongous(word_size), "allocate_from_cur_alloc_region() "
@@ -76,20 +78,24 @@
   assert(!cur_alloc_region->is_empty(),
          err_msg("region ["PTR_FORMAT","PTR_FORMAT"] should not be empty",
                  cur_alloc_region->bottom(), cur_alloc_region->end()));
-  // This allocate method does BOT updates and we don't need them in
-  // the young generation. This will be fixed in the near future by
-  // CR 6994297.
-  HeapWord* result = cur_alloc_region->allocate(word_size);
+  HeapWord* result = cur_alloc_region->par_allocate_no_bot_updates(word_size);
   if (result != NULL) {
     assert(is_in(result), "result should be in the heap");
-    Heap_lock->unlock();
 
+    if (with_heap_lock) {
+      Heap_lock->unlock();
+    }
+    assert_heap_not_locked();
     // Do the dirtying after we release the Heap_lock.
     dirty_young_block(result, word_size);
     return result;
   }
 
-  assert_heap_locked();
+  if (with_heap_lock) {
+    assert_heap_locked();
+  } else {
+    assert_heap_not_locked();
+  }
   return NULL;
 }
 
@@ -97,31 +103,27 @@
 // assumptions of this method (and other related ones).
 inline HeapWord*
 G1CollectedHeap::attempt_allocation(size_t word_size) {
-  assert_heap_locked_and_not_at_safepoint();
+  assert_heap_not_locked_and_not_at_safepoint();
   assert(!isHumongous(word_size), "attempt_allocation() should not be called "
          "for humongous allocation requests");
 
   HeapRegion* cur_alloc_region = _cur_alloc_region;
   if (cur_alloc_region != NULL) {
     HeapWord* result = allocate_from_cur_alloc_region(cur_alloc_region,
-                                                      word_size);
+                                                   word_size,
+                                                   false /* with_heap_lock */);
+    assert_heap_not_locked();
     if (result != NULL) {
-      assert_heap_not_locked();
       return result;
     }
-
-    assert_heap_locked();
-
-    // Since we couldn't successfully allocate into it, retire the
-    // current alloc region.
-    retire_cur_alloc_region(cur_alloc_region);
   }
 
-  // Try to get a new region and allocate out of it
-  HeapWord* result = replace_cur_alloc_region_and_allocate(word_size,
-                                                     false, /* at_safepoint */
-                                                     true,  /* do_dirtying */
-                                                     false  /* can_expand */);
+  // Our attempt to allocate lock-free failed as the current
+  // allocation region is either NULL or full. So, we'll now take the
+  // Heap_lock and retry.
+  Heap_lock->lock();
+
+  HeapWord* result = attempt_allocation_locked(word_size);
   if (result != NULL) {
     assert_heap_not_locked();
     return result;
@@ -145,6 +147,45 @@
   _cur_alloc_region = NULL;
 }
 
+inline HeapWord*
+G1CollectedHeap::attempt_allocation_locked(size_t word_size) {
+  assert_heap_locked_and_not_at_safepoint();
+  assert(!isHumongous(word_size), "attempt_allocation_locked() "
+         "should not be called for humongous allocation requests");
+
+  // First, reread the current alloc region and retry the allocation
+  // in case somebody replaced it while we were waiting to get the
+  // Heap_lock.
+  HeapRegion* cur_alloc_region = _cur_alloc_region;
+  if (cur_alloc_region != NULL) {
+    HeapWord* result = allocate_from_cur_alloc_region(
+                                                  cur_alloc_region, word_size,
+                                                  true /* with_heap_lock */);
+    if (result != NULL) {
+      assert_heap_not_locked();
+      return result;
+    }
+
+    // We failed to allocate out of the current alloc region, so let's
+    // retire it before getting a new one.
+    retire_cur_alloc_region(cur_alloc_region);
+  }
+
+  assert_heap_locked();
+  // Try to get a new region and allocate out of it
+  HeapWord* result = replace_cur_alloc_region_and_allocate(word_size,
+                                                     false, /* at_safepoint */
+                                                     true,  /* do_dirtying */
+                                                     false  /* can_expand */);
+  if (result != NULL) {
+    assert_heap_not_locked();
+    return result;
+  }
+
+  assert_heap_locked();
+  return NULL;
+}
+
 // It dirties the cards that cover the block so that so that the post
 // write barrier never queues anything when updating objects on this
 // block. It is assumed (and in fact we assert) that the block
diff -r 2250ee17e258 -r b158bed62ef5 src/share/vm/gc_implementation/g1/heapRegion.hpp
--- a/src/share/vm/gc_implementation/g1/heapRegion.hpp	Wed Jan 12 13:06:00 2011 -0500
+++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp	Wed Jan 12 16:34:25 2011 -0500
@@ -372,6 +372,15 @@
     Allocated
   };
 
+  inline HeapWord* par_allocate_no_bot_updates(size_t word_size) {
+    assert(is_young(), "we can only skip BOT updates on young regions");
+    return ContiguousSpace::par_allocate(word_size);
+  }
+  inline HeapWord* allocate_no_bot_updates(size_t word_size) {
+    assert(is_young(), "we can only skip BOT updates on young regions");
+    return ContiguousSpace::allocate(word_size);
+  }
+
   // If this region is a member of a HeapRegionSeq, the index in that
   // sequence, otherwise -1.
   int hrs_index() const { return _hrs_index; }