# HG changeset patch
# User Roman Kennke <rkennke@redhat.com>
# Date 1421410210 -3600
# Node ID 557818193d0e6091e6bd8f872b7ff9ea3476f75b
# Parent  ac5d21fb6715ef28eba1204f64a3dfdd0c722427
Implement TLAB resizing for Shenandoah.

diff -r ac5d21fb6715 -r 557818193d0e src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp
--- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp	Fri Jan 16 12:30:35 2015 +0100
+++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp	Fri Jan 16 13:10:10 2015 +0100
@@ -166,6 +166,7 @@
 }
 
 void ShenandoahConcurrentThread::schedule_full_gc() {
+  tty->print_cr("scheduling full gc");
   _do_full_gc = true;
 }
 
diff -r ac5d21fb6715 -r 557818193d0e src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp
--- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp	Fri Jan 16 12:30:35 2015 +0100
+++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp	Fri Jan 16 13:10:10 2015 +0100
@@ -251,7 +251,17 @@
   }
 }
 
+class InitGCLABClosure : public ThreadClosure {
+  void do_thread(Thread* thread) {
+    thread->tlab().initialize();
+  }
+};
+
 void ShenandoahHeap::post_initialize() {
+
+  InitGCLABClosure init_gclabs;
+  gc_threads_do(&init_gclabs);
+
   _scm->initialize();
   ref_processing_init();
 }
@@ -386,8 +396,8 @@
   }
   return result;
 }
+
   
-
 ShenandoahHeap* ShenandoahHeap::heap() {
   assert(_pgc != NULL, "Unitialized access to ShenandoahHeap::heap()");
   assert(_pgc->kind() == CollectedHeap::ShenandoahHeap, "not a shenandoah heap");
@@ -511,7 +521,7 @@
     return allocate_large_memory(word_size);
   }
 
-  ShenandoahHeapRegion* my_current_region = get_current_region(evacuation);
+  ShenandoahHeapRegion* my_current_region = get_current_region(false);
   if (my_current_region == NULL) {
     return NULL; // No more room to make a new region. OOM.
   }
@@ -530,7 +540,7 @@
   result = my_current_region->par_allocate(word_size);
   while (result == NULL && my_current_region != NULL) {
     // 2nd attempt. Try next region.
-    my_current_region = get_next_region(evacuation);
+    my_current_region = get_next_region(false);
     if (my_current_region == NULL) {
       return NULL; // No more room to make a new region. OOM.
     }
@@ -857,7 +867,7 @@
     if (ShenandoahGCVerbose) 
       tty->print("Thread %d post barrier sync\n", worker_id);
 
-    Thread::current()->gclab().make_parsable(true);
+    Thread::current()->tlab().make_parsable(true);
   }
 };
 
@@ -1168,26 +1178,14 @@
 
   void do_thread(Thread* thread) {
     thread->tlab().make_parsable(_retire);
-    thread->gclab().make_parsable(_retire);
   }
 };
 
 void ShenandoahHeap::ensure_parsability(bool retire_tlabs) {
-  assert(SafepointSynchronize::is_at_safepoint() ||
-         !is_init_completed(),
-         "Should only be called at a safepoint or at start-up"
-         " otherwise concurrent mutator activity may make heap "
-         " unparsable again");
-  const bool use_tlab = UseTLAB;
-  // The main thread starts allocating via a TLAB even before it
-  // has added itself to the threads list at vm boot-up.
-  assert(!use_tlab || Threads::first() != NULL,
-         "Attempt to fill tlabs before main thread has been added"
-         " to threads list is doomed to failure!");
-
+  CollectedHeap::ensure_parsability(retire_tlabs);
 
   RetireTLABClosure cl(retire_tlabs);
-  Threads::threads_do(&cl);
+  gc_threads_do(&cl);
 }
 
 void ShenandoahHeap::prepare_for_update_references() {
@@ -1509,12 +1507,43 @@
 
 
 size_t  ShenandoahHeap::unsafe_max_tlab_alloc(Thread *thread) const {
-  ShenandoahHeapRegion* current = _free_regions->current(true);
-  if (current == NULL) {
-    return MinTLABSize;
-  } else {
-    return MIN2(current->free(), (size_t) MinTLABSize);
+  return ShenandoahHeapRegion::RegionSizeBytes;
+}
+
+class ResizeGCLABClosure : public ThreadClosure {
+public:
+  void do_thread(Thread* thread) {
+    thread->tlab().resize();
+  }
+};
+
+void ShenandoahHeap::resize_all_tlabs() {
+  CollectedHeap::resize_all_tlabs();
+
+  if (PrintTLAB && Verbose) {
+    tty->print_cr("Resizing Shenandoah GCLABs...");
   }
+
+  ResizeGCLABClosure cl;
+  gc_threads_do(&cl);
+
+  if (PrintTLAB && Verbose) {
+    tty->print_cr("Done resizing Shenandoah GCLABs...");
+  }
+}
+
+class AccumulateStatisticsGCLABClosure : public ThreadClosure {
+public:
+  void do_thread(Thread* thread) {
+    thread->tlab().accumulate_statistics();
+    thread->tlab().initialize_statistics();
+  }
+};
+
+void ShenandoahHeap::accumulate_statistics_all_gclabs() {
+
+  AccumulateStatisticsGCLABClosure cl;
+  gc_threads_do(&cl);
 }
 
 bool  ShenandoahHeap::can_elide_tlab_store_barriers() const {
@@ -1708,7 +1737,8 @@
   }
 }
 size_t ShenandoahHeap::tlab_capacity(Thread *thr) const {
-  return ShenandoahHeapRegion::RegionSizeBytes;
+  // We have all the heap available for tlabs.
+  return capacity();
 }
 
 class ShenandoahIterateObjectClosureRegionClosure: public ShenandoahHeapRegionClosure {
@@ -1883,6 +1913,9 @@
 
 
 void ShenandoahHeap::start_concurrent_marking() {
+
+  accumulate_statistics_all_tlabs();
+
   set_concurrent_mark_in_progress(true);
   // We need to reset all TLABs because we'd lose marks on all objects allocated in them.
   if (UseTLAB) {
@@ -2213,7 +2246,7 @@
 }
 
 HeapWord* ShenandoahHeap::allocate_from_gclab(Thread* thread, size_t size) {
-  HeapWord* obj = thread->gclab().allocate(size);
+  HeapWord* obj = thread->tlab().allocate(size);
   if (obj != NULL) {
     return obj;
   }
@@ -2224,16 +2257,16 @@
 HeapWord* ShenandoahHeap::allocate_from_gclab_slow(Thread* thread, size_t size) {
   // Retain tlab and allocate object in shared space if
   // the amount free in the tlab is too large to discard.
-  if (thread->gclab().free() > thread->gclab().refill_waste_limit()) {
-    thread->gclab().record_slow_allocation(size);
+  if (thread->tlab().free() > thread->tlab().refill_waste_limit()) {
+    thread->tlab().record_slow_allocation(size);
     return NULL;
   }
 
   // Discard tlab and allocate a new one.
   // To minimize fragmentation, the last TLAB may be smaller than the rest.
-  size_t new_gclab_size = thread->gclab().compute_size(size);
-
-  thread->gclab().clear_before_allocation();
+  size_t new_gclab_size = thread->tlab().compute_size(size);
+
+  thread->tlab().clear_before_allocation();
 
   if (new_gclab_size == 0) {
     return NULL;
@@ -2259,7 +2292,7 @@
     Copy::fill_to_words(obj + hdr_size, new_gclab_size - hdr_size, badHeapWordVal);
 #endif // ASSERT
   }
-  thread->gclab().fill(obj, obj + size, new_gclab_size);
+  thread->tlab().fill(obj, obj + size, new_gclab_size);
   return obj;
 }
 
@@ -2293,7 +2326,12 @@
 
   if (filler == NULL) {
     oom_during_evacuation();
-    return p;
+    // If this is a Java thread, it should have waited
+    // until all GC threads are done, and then we
+    // return the forwardee.
+    oop resolved = ShenandoahBarrierSet::resolve_oop_static(p);
+    tty->print_cr("possible emergency allocation needed: %p", (oopDesc*) resolved);
+    return resolved;
   }
 
   HeapWord* copy = filler + BrooksPointer::BROOKS_POINTER_OBJ_SIZE;
@@ -2328,7 +2366,7 @@
 #endif
   }  else {
     if (alloc_from_gclab) {
-      thread->gclab().rollback(required);
+      thread->tlab().rollback(required);
     }
 #ifdef ASSERT
     if (ShenandoahTraceEvacuations) {
@@ -2525,5 +2563,6 @@
 }
 
 size_t ShenandoahHeap::tlab_used(Thread* ignored) const {
-  Unimplemented();
+  // This is used for stats. Dunno how to easily track this.
+  return used();
 }
diff -r ac5d21fb6715 -r 557818193d0e src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp
--- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp	Fri Jan 16 12:30:35 2015 +0100
+++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp	Fri Jan 16 13:10:10 2015 +0100
@@ -179,6 +179,9 @@
   void space_iterate(SpaceClosure* scl);
   virtual size_t unsafe_max_tlab_alloc(Thread *thread) const;
 
+  void resize_all_tlabs();
+  void accumulate_statistics_all_gclabs();
+
   HeapWord* tlab_post_allocation_setup(HeapWord* obj, bool new_obj);
 
   uint oop_extra_words();
diff -r ac5d21fb6715 -r 557818193d0e src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.cpp
--- a/src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.cpp	Fri Jan 16 12:30:35 2015 +0100
+++ b/src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.cpp	Fri Jan 16 13:10:10 2015 +0100
@@ -219,6 +219,8 @@
 
   sh->reset_mark_bitmap();
 
+  sh->resize_all_tlabs();
+
   sh->shenandoahPolicy()->record_final_update_refs_end();
 }
 
diff -r ac5d21fb6715 -r 557818193d0e src/share/vm/gc_interface/collectedHeap.cpp
--- a/src/share/vm/gc_interface/collectedHeap.cpp	Fri Jan 16 12:30:35 2015 +0100
+++ b/src/share/vm/gc_interface/collectedHeap.cpp	Fri Jan 16 13:10:10 2015 +0100
@@ -622,6 +622,10 @@
   // Default implementation does nothing.
 }
 
+void CollectedHeap::accumulate_statistics_all_gclabs() {
+  // Default implementation does nothing.
+}
+
 #ifndef CC_INTERP
 void CollectedHeap::compile_prepare_oop(MacroAssembler* masm, Register obj) {
   // Default implementation does nothing.
diff -r ac5d21fb6715 -r 557818193d0e src/share/vm/gc_interface/collectedHeap.hpp
--- a/src/share/vm/gc_interface/collectedHeap.hpp	Fri Jan 16 12:30:35 2015 +0100
+++ b/src/share/vm/gc_interface/collectedHeap.hpp	Fri Jan 16 13:10:10 2015 +0100
@@ -627,6 +627,9 @@
   // Shut down all GC workers and other GC related threads.
   virtual void shutdown();
 
+  // Accumulate additional statistics from GCLABs.
+  virtual void accumulate_statistics_all_gclabs();
+
   // Non product verification and debugging.
 #ifndef PRODUCT
   // Support for PromotionFailureALot.  Return true if it's time to cause a
diff -r ac5d21fb6715 -r 557818193d0e src/share/vm/memory/threadLocalAllocBuffer.cpp
--- a/src/share/vm/memory/threadLocalAllocBuffer.cpp	Fri Jan 16 12:30:35 2015 +0100
+++ b/src/share/vm/memory/threadLocalAllocBuffer.cpp	Fri Jan 16 13:10:10 2015 +0100
@@ -54,6 +54,8 @@
     thread->tlab().initialize_statistics();
   }
 
+  Universe::heap()->accumulate_statistics_all_gclabs();
+
   // Publish new stats if some allocation occurred.
   if (global_stats()->allocation() != 0) {
     global_stats()->publish();
@@ -195,8 +197,7 @@
   invariants();
 }
 
-void ThreadLocalAllocBuffer::initialize(bool gclab) {
-  _gclab = gclab;
+void ThreadLocalAllocBuffer::initialize() {
   initialize(NULL,                    // start
              NULL,                    // top
              NULL);                   // end
@@ -293,15 +294,9 @@
 }
 
 Thread* ThreadLocalAllocBuffer::myThread() {
-  ByteSize gclab_offset = Thread::gclab_start_offset();
-  ByteSize tlab_offset = Thread::tlab_start_offset();
-  ByteSize offs = _gclab ? gclab_offset : tlab_offset;
-  Thread* thread = (Thread*)(((char *)this) +
-                   in_bytes(start_offset()) - in_bytes(offs));
-#ifdef ASSERT
-  assert(this == (_gclab ? &thread->gclab() : &thread->tlab()), "must be");
-#endif
-  return thread;
+  return (Thread*)(((char *)this) +
+                   in_bytes(start_offset()) -
+                   in_bytes(Thread::tlab_start_offset()));
 }
 
 size_t ThreadLocalAllocBuffer::end_reserve() {
diff -r ac5d21fb6715 -r 557818193d0e src/share/vm/memory/threadLocalAllocBuffer.hpp
--- a/src/share/vm/memory/threadLocalAllocBuffer.hpp	Fri Jan 16 12:30:35 2015 +0100
+++ b/src/share/vm/memory/threadLocalAllocBuffer.hpp	Fri Jan 16 13:10:10 2015 +0100
@@ -55,13 +55,9 @@
   unsigned  _slow_refill_waste;
   unsigned  _gc_waste;
   unsigned  _slow_allocations;
-  bool      _gclab;
 
   AdaptiveWeightedAverage _allocation_fraction;  // fraction of eden allocated in tlabs
 
-  void accumulate_statistics();
-  void initialize_statistics();
-
   void set_start(HeapWord* start)                { _start = start; }
   void set_end(HeapWord* end)                    { _end = end; }
   void set_top(HeapWord* top)                    { _top = top; }
@@ -79,9 +75,6 @@
   // Make parsable and release it.
   void reset();
 
-  // Resize based on amount of allocation, etc.
-  void resize();
-
   void invariants() const { assert(top() >= start() && top() <= end(), "invalid tlab"); }
 
   void initialize(HeapWord* start, HeapWord* top, HeapWord* end);
@@ -106,6 +99,12 @@
     // do nothing.  tlabs must be inited by initialize() calls
   }
 
+  // Resize based on amount of allocation, etc.
+  void resize();
+
+  void accumulate_statistics();
+  void initialize_statistics();
+
   static const size_t min_size()                 { return align_object_size(MinTLABSize / HeapWordSize); }
   static const size_t max_size()                 { assert(_max_size != 0, "max_size not set up"); return _max_size; }
   static void set_max_size(size_t max_size)      { _max_size = max_size; }
@@ -157,7 +156,7 @@
   static void resize_all_tlabs();
 
   void fill(HeapWord* start, HeapWord* top, size_t new_size);
-  void initialize(bool gclab = false);
+  void initialize();
 
   static size_t refill_waste_limit_increment()   { return TLABWasteIncrement; }
 
diff -r ac5d21fb6715 -r 557818193d0e src/share/vm/runtime/thread.cpp
--- a/src/share/vm/runtime/thread.cpp	Fri Jan 16 12:30:35 2015 +0100
+++ b/src/share/vm/runtime/thread.cpp	Fri Jan 16 13:10:10 2015 +0100
@@ -302,18 +302,6 @@
            "bug in forced alignment of thread objects");
   }
 #endif /* ASSERT */
-
-  /*
-  if (UseShenandoahGC) {
-    tty->print_cr("is_Worker_thread: %d, is_Java_thread: %d", is_Worker_thread(), is_Java_thread());
-  }
-  */
-  if (UseShenandoahGC /* && (is_Worker_thread() || is_Java_thread()) */) {
-    gclab().initialize(true);
-    // We need to initialize all tlabs here, it'll be done again for JavaThreads later,
-    // but it shouldn't hurt.
-    tlab().initialize();
-  }
 }
 
 void Thread::initialize_thread_local_storage() {
@@ -1934,9 +1922,6 @@
   if (UseG1GC || UseShenandoahGC) {
     flush_barrier_queues();
   }
-  if (UseShenandoahGC && UseTLAB) {
-    gclab().make_parsable(true);
-  }
 #endif // INCLUDE_ALL_GCS
 
   // Remove from list of active threads list, and notify VM thread if we are the last non-daemon thread
@@ -2003,9 +1988,6 @@
   if (UseG1GC || UseShenandoahGC) {
     flush_barrier_queues();
   }
-  if (UseShenandoahGC && UseTLAB) {
-    gclab().make_parsable(true);
-  }
 #endif // INCLUDE_ALL_GCS
 
   Threads::remove(this);
diff -r ac5d21fb6715 -r 557818193d0e src/share/vm/runtime/thread.hpp
--- a/src/share/vm/runtime/thread.hpp	Fri Jan 16 12:30:35 2015 +0100
+++ b/src/share/vm/runtime/thread.hpp	Fri Jan 16 13:10:10 2015 +0100
@@ -256,7 +256,6 @@
   friend class GC_locker;
 
   ThreadLocalAllocBuffer _tlab;                 // Thread-local eden
-  ThreadLocalAllocBuffer _gclab;                // Thread-local allocation buffer for GC (e.g. evacuation)
   jlong _allocated_bytes;                       // Cumulative number of bytes allocated on
                                                 // the Java heap
 
@@ -439,9 +438,6 @@
     }
   }
 
-  // Thread-Local GC Allocation Buffer (GCLAB) support
-  ThreadLocalAllocBuffer& gclab()                { return _gclab; }
-
   jlong allocated_bytes()               { return _allocated_bytes; }
   void set_allocated_bytes(jlong value) { _allocated_bytes = value; }
   void incr_allocated_bytes(jlong size) { _allocated_bytes += size; }
@@ -630,8 +626,6 @@
 
 #undef TLAB_FIELD_OFFSET
 
-  static ByteSize gclab_start_offset()         { return byte_offset_of(Thread, _gclab) + ThreadLocalAllocBuffer::start_offset(); }
-
   static ByteSize allocated_bytes_offset()       { return byte_offset_of(Thread, _allocated_bytes ); }
 
  public: