# HG changeset patch # User tonyp # Date 1237914592 14400 # Node ID b36996d65c38c62fc5ce72df18d71faca1998514 # Parent c9ede57210d924c36c32c05ea66b3bb9a8efd27f 6817608: G1: backports of G1 CRs from HS15 to HS14 Summary: Backports of the following G1 CRs from HS15 to HS14: 6820321, 6815683, 6816154, 6817419, 6604422, 6728271, 6760309, 6814467, 6812428, 6810698, 6720309, 6720334, 6804746, 6700941, 6802413, 6484959, 6797754, 6793828, 6484956 Reviewed-by: apetrusenko, iveresov, jcoomes, jmasa, jrose, kvn diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/g1/collectionSetChooser.hpp --- a/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp Tue Mar 24 13:09:52 2009 -0400 @@ -24,7 +24,7 @@ // We need to sort heap regions by collection desirability. -class CSetChooserCache { +class CSetChooserCache VALUE_OBJ_CLASS_SPEC { private: enum { CacheLength = 16 diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp Tue Mar 24 13:09:52 2009 -0400 @@ -145,14 +145,9 @@ if (G1RSBarrierUseQueue) { DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); dcqs.abandon_logs(); - if (_cg1rThread->do_traversal()) { - _pya = PYA_restart; - } else { - _cg1rThread->set_do_traversal(true); - // Reset the post-yield actions. - _pya = PYA_continue; - _last_pya = PYA_continue; - } + // Reset the post-yield actions. + _pya = PYA_continue; + _last_pya = PYA_continue; } else { _pya = PYA_restart; } diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp Tue Mar 24 13:09:52 2009 -0400 @@ -33,7 +33,7 @@ PYA_cancel // It's been completed by somebody else: cancel. }; -class ConcurrentG1Refine { +class ConcurrentG1Refine: public CHeapObj { ConcurrentG1RefineThread* _cg1rThread; volatile jint _pya; diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp --- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp Tue Mar 24 13:09:52 2009 -0400 @@ -133,14 +133,12 @@ _co_tracker.update(false); if (G1SmoothConcRefine) { - start_vtime_sec = os::elapsedVTime(); prev_buffer_num = curr_buffer_num; - _sts.leave(); os::sleep(Thread::current(), (jlong) _interval_ms, false); _sts.join(); + start_vtime_sec = os::elapsedVTime(); } - n_logs++; } // Make sure we harvest the PYA, if any. diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/g1/concurrentMark.cpp --- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp Tue Mar 24 13:09:52 2009 -0400 @@ -420,6 +420,10 @@ _has_overflown(false), _concurrent(false), + _has_aborted(false), + _restart_for_overflow(false), + _concurrent_marking_in_progress(false), + _should_gray_objects(false), // _verbose_level set below @@ -1228,7 +1232,16 @@ if (!_final && _regions_done == 0) _start_vtime_sec = os::elapsedVTime(); - if (hr->continuesHumongous()) return false; + if (hr->continuesHumongous()) { + HeapRegion* hum_start = hr->humongous_start_region(); + // If the head region of the humongous region has been determined + // to be alive, then all the tail regions should be marked + // such as well. + if (_region_bm->at(hum_start->hrs_index())) { + _region_bm->par_at_put(hr->hrs_index(), 1); + } + return false; + } HeapWord* nextTop = hr->next_top_at_mark_start(); HeapWord* start = hr->top_at_conc_mark_count(); diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/g1/concurrentMark.hpp --- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp Tue Mar 24 13:09:52 2009 -0400 @@ -30,7 +30,7 @@ // A generic CM bit map. This is essentially a wrapper around the BitMap // class, with one bit per (1<<_shifter) HeapWords. -class CMBitMapRO { +class CMBitMapRO VALUE_OBJ_CLASS_SPEC { protected: HeapWord* _bmStartWord; // base address of range covered by map size_t _bmWordSize; // map size (in #HeapWords covered) @@ -139,7 +139,7 @@ // Represents a marking stack used by the CM collector. // Ideally this should be GrowableArray<> just like MSC's marking stack(s). -class CMMarkStack { +class CMMarkStack VALUE_OBJ_CLASS_SPEC { ConcurrentMark* _cm; oop* _base; // bottom of stack jint _index; // one more than last occupied index @@ -237,7 +237,7 @@ void oops_do(OopClosure* f); }; -class CMRegionStack { +class CMRegionStack VALUE_OBJ_CLASS_SPEC { MemRegion* _base; jint _capacity; jint _index; @@ -312,7 +312,7 @@ class ConcurrentMarkThread; -class ConcurrentMark { +class ConcurrentMark: public CHeapObj { friend class ConcurrentMarkThread; friend class CMTask; friend class CMBitMapClosure; diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp --- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp Tue Mar 24 13:09:52 2009 -0400 @@ -107,7 +107,7 @@ if (PrintGC) { gclog_or_tty->date_stamp(PrintGCDateStamps); gclog_or_tty->stamp(PrintGCTimeStamps); - tty->print_cr("[GC concurrent-mark-start]"); + gclog_or_tty->print_cr("[GC concurrent-mark-start]"); } if (!g1_policy->in_young_gc_mode()) { @@ -320,8 +320,6 @@ set_in_progress(); clear_started(); if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-starting"); - - return; } // Note: this method, although exported by the ConcurrentMarkSweepThread, diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp --- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp Tue Mar 24 13:09:52 2009 -0400 @@ -78,8 +78,8 @@ void DirtyCardQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock, int max_completed_queue, - Mutex* lock) { - PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue); + Mutex* lock, PtrQueueSet* fl_owner) { + PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue, fl_owner); set_buffer_size(DCQBarrierQueueBufferSize); set_process_completed_threshold(DCQBarrierProcessCompletedThreshold); diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp --- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp Tue Mar 24 13:09:52 2009 -0400 @@ -88,7 +88,7 @@ void initialize(Monitor* cbl_mon, Mutex* fl_lock, int max_completed_queue = 0, - Mutex* lock = NULL); + Mutex* lock = NULL, PtrQueueSet* fl_owner = NULL); // The number of parallel ids that can be claimed to allow collector or // mutator threads to do card-processing work. diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Tue Mar 24 13:09:52 2009 -0400 @@ -136,12 +136,20 @@ int calls() { return _calls; } }; +class RedirtyLoggedCardTableEntryFastClosure : public CardTableEntryClosure { +public: + bool do_card_ptr(jbyte* card_ptr, int worker_i) { + *card_ptr = CardTableModRefBS::dirty_card_val(); + return true; + } +}; + YoungList::YoungList(G1CollectedHeap* g1h) : _g1h(g1h), _head(NULL), _scan_only_head(NULL), _scan_only_tail(NULL), _curr_scan_only(NULL), _length(0), _scan_only_length(0), _last_sampled_rs_lengths(0), - _survivor_head(NULL), _survivors_tail(NULL), _survivor_length(0) + _survivor_head(NULL), _survivor_tail(NULL), _survivor_length(0) { guarantee( check_list_empty(false), "just making sure..." ); } @@ -159,16 +167,15 @@ } void YoungList::add_survivor_region(HeapRegion* hr) { - assert(!hr->is_survivor(), "should not already be for survived"); + assert(hr->is_survivor(), "should be flagged as survivor region"); assert(hr->get_next_young_region() == NULL, "cause it should!"); hr->set_next_young_region(_survivor_head); if (_survivor_head == NULL) { - _survivors_tail = hr; + _survivor_tail = hr; } _survivor_head = hr; - hr->set_survivor(); ++_survivor_length; } @@ -239,7 +246,7 @@ empty_list(_survivor_head); _survivor_head = NULL; - _survivors_tail = NULL; + _survivor_tail = NULL; _survivor_length = 0; _last_sampled_rs_lengths = 0; @@ -391,6 +398,7 @@ // Add survivor regions to SurvRateGroup. _g1h->g1_policy()->note_start_adding_survivor_regions(); + _g1h->g1_policy()->finished_recalculating_age_indexes(true /* is_survivors */); for (HeapRegion* curr = _survivor_head; curr != NULL; curr = curr->get_next_young_region()) { @@ -401,7 +409,7 @@ if (_survivor_head != NULL) { _head = _survivor_head; _length = _survivor_length + _scan_only_length; - _survivors_tail->set_next_young_region(_scan_only_head); + _survivor_tail->set_next_young_region(_scan_only_head); } else { _head = _scan_only_head; _length = _scan_only_length; @@ -418,9 +426,9 @@ _curr_scan_only = NULL; _survivor_head = NULL; - _survivors_tail = NULL; + _survivor_tail = NULL; _survivor_length = 0; - _g1h->g1_policy()->finished_recalculating_age_indexes(); + _g1h->g1_policy()->finished_recalculating_age_indexes(false /* is_survivors */); assert(check_list_well_formed(), "young list should be well formed"); } @@ -553,7 +561,7 @@ if (_gc_alloc_region_counts[purpose] < g1_policy()->max_regions(purpose)) { alloc_region = newAllocRegion_work(word_size, true, zero_filled); if (purpose == GCAllocForSurvived && alloc_region != NULL) { - _young_list->add_survivor_region(alloc_region); + alloc_region->set_survivor(); } ++_gc_alloc_region_counts[purpose]; } else { @@ -778,6 +786,12 @@ } } +void G1CollectedHeap::abandon_gc_alloc_regions() { + // first, make sure that the GC alloc region list is empty (it should!) + assert(_gc_alloc_region_list == NULL, "invariant"); + release_gc_alloc_regions(true /* totally */); +} + class PostMCRemSetClearClosure: public HeapRegionClosure { ModRefBarrierSet* _mr_bs; public: @@ -812,6 +826,40 @@ } }; +class RebuildRSOutOfRegionClosure: public HeapRegionClosure { + G1CollectedHeap* _g1h; + UpdateRSOopClosure _cl; + int _worker_i; +public: + RebuildRSOutOfRegionClosure(G1CollectedHeap* g1, int worker_i = 0) : + _cl(g1->g1_rem_set()->as_HRInto_G1RemSet(), worker_i), + _worker_i(worker_i), + _g1h(g1) + { } + bool doHeapRegion(HeapRegion* r) { + if (!r->continuesHumongous()) { + _cl.set_from(r); + r->oop_iterate(&_cl); + } + return false; + } +}; + +class ParRebuildRSTask: public AbstractGangTask { + G1CollectedHeap* _g1; +public: + ParRebuildRSTask(G1CollectedHeap* g1) + : AbstractGangTask("ParRebuildRSTask"), + _g1(g1) + { } + + void work(int i) { + RebuildRSOutOfRegionClosure rebuild_rs(_g1, i); + _g1->heap_region_par_iterate_chunked(&rebuild_rs, i, + HeapRegion::RebuildRSClaimValue); + } +}; + void G1CollectedHeap::do_collection(bool full, bool clear_all_soft_refs, size_t word_size) { ResourceMark rm; @@ -872,6 +920,7 @@ // Make sure we'll choose a new allocation region afterwards. abandon_cur_alloc_region(); + abandon_gc_alloc_regions(); assert(_cur_alloc_region == NULL, "Invariant."); g1_rem_set()->as_HRInto_G1RemSet()->cleanupHRRS(); tear_down_region_lists(); @@ -912,30 +961,42 @@ if (VerifyAfterGC && total_collections() >= VerifyGCStartAt) { HandleMark hm; // Discard invalid handles created during verification gclog_or_tty->print(" VerifyAfterGC:"); + prepare_for_verify(); Universe::verify(false); } NOT_PRODUCT(ref_processor()->verify_no_references_recorded()); reset_gc_time_stamp(); // Since everything potentially moved, we will clear all remembered - // sets, and clear all cards. Later we will also cards in the used - // portion of the heap after the resizing (which could be a shrinking.) - // We will also reset the GC time stamps of the regions. + // sets, and clear all cards. Later we will rebuild remebered + // sets. We will also reset the GC time stamps of the regions. PostMCRemSetClearClosure rs_clear(mr_bs()); heap_region_iterate(&rs_clear); // Resize the heap if necessary. resize_if_necessary_after_full_collection(full ? 0 : word_size); - // Since everything potentially moved, we will clear all remembered - // sets, but also dirty all cards corresponding to used regions. - PostMCRemSetInvalidateClosure rs_invalidate(mr_bs()); - heap_region_iterate(&rs_invalidate); if (_cg1r->use_cache()) { _cg1r->clear_and_record_card_counts(); _cg1r->clear_hot_cache(); } + // Rebuild remembered sets of all regions. + if (ParallelGCThreads > 0) { + ParRebuildRSTask rebuild_rs_task(this); + assert(check_heap_region_claim_values( + HeapRegion::InitialClaimValue), "sanity check"); + set_par_threads(workers()->total_workers()); + workers()->run_task(&rebuild_rs_task); + set_par_threads(0); + assert(check_heap_region_claim_values( + HeapRegion::RebuildRSClaimValue), "sanity check"); + reset_heap_region_claim_values(); + } else { + RebuildRSOutOfRegionClosure rebuild_rs(this); + heap_region_iterate(&rebuild_rs); + } + if (PrintGC) { print_size_transition(gclog_or_tty, g1h_prev_used, used(), capacity()); } @@ -957,7 +1018,8 @@ // dirtied, so this should abandon those logs, and set "do_traversal" // to true. concurrent_g1_refine()->set_pya_restart(); - + assert(!G1DeferredRSUpdate + || (G1DeferredRSUpdate && (dirty_card_queue_set().completed_buffers_num() == 0)), "Should not be any"); assert(regions_accounted_for(), "Region leakage!"); } @@ -1248,7 +1310,7 @@ } void G1CollectedHeap::shrink(size_t shrink_bytes) { - release_gc_alloc_regions(); + release_gc_alloc_regions(true /* totally */); tear_down_region_lists(); // We will rebuild them in a moment. shrink_helper(shrink_bytes); rebuild_region_lists(); @@ -1285,8 +1347,9 @@ _unclean_regions_coming(false), _young_list(new YoungList(this)), _gc_time_stamp(0), - _surviving_young_words(NULL) -{ + _surviving_young_words(NULL), + _in_cset_fast_test(NULL), + _in_cset_fast_test_base(NULL) { _g1h = this; // To catch bugs. if (_process_strong_tasks == NULL || !_process_strong_tasks->valid()) { vm_exit_during_initialization("Failed necessary allocation."); @@ -1311,9 +1374,19 @@ } for (int ap = 0; ap < GCAllocPurposeCount; ++ap) { - _gc_alloc_regions[ap] = NULL; - _gc_alloc_region_counts[ap] = 0; - } + _gc_alloc_regions[ap] = NULL; + _gc_alloc_region_counts[ap] = 0; + _retained_gc_alloc_regions[ap] = NULL; + // by default, we do not retain a GC alloc region for each ap; + // we'll override this, when appropriate, below + _retain_gc_alloc_region[ap] = false; + } + + // We will try to remember the last half-full tenured region we + // allocated to at the end of a collection so that we can re-use it + // during the next collection. + _retain_gc_alloc_region[GCAllocForTenured] = true; + guarantee(_task_queues != NULL, "task_queues allocation failure."); } @@ -1460,6 +1533,13 @@ G1DirtyCardQueueMax, Shared_DirtyCardQ_lock); } + if (G1DeferredRSUpdate) { + dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon, + DirtyCardQ_FL_lock, + 0, + Shared_DirtyCardQ_lock, + &JavaThread::dirty_card_queue_set()); + } // In case we're keeping closure specialization stats, initialize those // counts and that mechanism. SpecializationStats::clear(); @@ -2052,15 +2132,7 @@ bool doHeapRegion(HeapRegion* r) { guarantee(_par || r->claim_value() == HeapRegion::InitialClaimValue, "Should be unclaimed at verify points."); - if (r->isHumongous()) { - if (r->startsHumongous()) { - // Verify the single H object. - oop(r->bottom())->verify(); - size_t word_sz = oop(r->bottom())->size(); - guarantee(r->top() == r->bottom() + word_sz, - "Only one object in a humongous region"); - } - } else { + if (!r->continuesHumongous()) { VerifyObjsInRegionClosure not_dead_yet_cl(r); r->verify(_allow_dirty); r->object_iterate(¬_dead_yet_cl); @@ -2112,6 +2184,7 @@ _g1h(g1h), _allow_dirty(allow_dirty) { } void work(int worker_i) { + HandleMark hm; VerifyRegionClosure blk(_allow_dirty, true); _g1h->heap_region_par_iterate_chunked(&blk, worker_i, HeapRegion::ParVerifyClaimValue); @@ -2310,7 +2383,6 @@ void G1CollectedHeap::checkConcurrentMark() { VerifyMarkedObjsClosure verifycl(this); - doConcurrentMark(); // MutexLockerEx x(getMarkBitMapLock(), // Mutex::_no_safepoint_check_flag); object_iterate(&verifycl); @@ -2485,6 +2557,19 @@ g1_policy()->record_collection_pause_start(start_time_sec, start_used_bytes); + guarantee(_in_cset_fast_test == NULL, "invariant"); + guarantee(_in_cset_fast_test_base == NULL, "invariant"); + _in_cset_fast_test_length = max_regions(); + _in_cset_fast_test_base = + NEW_C_HEAP_ARRAY(bool, _in_cset_fast_test_length); + memset(_in_cset_fast_test_base, false, + _in_cset_fast_test_length * sizeof(bool)); + // We're biasing _in_cset_fast_test to avoid subtracting the + // beginning of the heap every time we want to index; basically + // it's the same with what we do with the card table. + _in_cset_fast_test = _in_cset_fast_test_base - + ((size_t) _g1_reserved.start() >> HeapRegion::LogOfHRGrainBytes); + #if SCAN_ONLY_VERBOSE _young_list->print(); #endif // SCAN_ONLY_VERBOSE @@ -2553,13 +2638,19 @@ free_collection_set(g1_policy()->collection_set()); g1_policy()->clear_collection_set(); + FREE_C_HEAP_ARRAY(bool, _in_cset_fast_test_base); + // this is more for peace of mind; we're nulling them here and + // we're expecting them to be null at the beginning of the next GC + _in_cset_fast_test = NULL; + _in_cset_fast_test_base = NULL; + if (popular_region != NULL) { // We have to wait until now, because we don't want the region to // be rescheduled for pop-evac during RS update. popular_region->set_popular_pending(false); } - release_gc_alloc_regions(); + release_gc_alloc_regions(false /* totally */); cleanup_surviving_young_words(); @@ -2572,6 +2663,9 @@ _young_list->print(); #endif // SCAN_ONLY_VERBOSE + g1_policy()->record_survivor_regions(_young_list->survivor_length(), + _young_list->first_survivor_region(), + _young_list->last_survivor_region()); _young_list->reset_auxilary_lists(); } } else { @@ -2598,7 +2692,8 @@ #endif // SCAN_ONLY_VERBOSE double end_time_sec = os::elapsedTime(); - g1_policy()->record_pause_time((end_time_sec - start_time_sec)*1000.0); + double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS; + g1_policy()->record_pause_time_ms(pause_time_ms); GCOverheadReporter::recordSTWEnd(end_time_sec); g1_policy()->record_collection_pause_end(popular_region != NULL, abandoned); @@ -2608,6 +2703,7 @@ if (VerifyAfterGC && total_collections() >= VerifyGCStartAt) { HandleMark hm; // Discard invalid handles created during verification gclog_or_tty->print(" VerifyAfterGC:"); + prepare_for_verify(); Universe::verify(false); } @@ -2641,6 +2737,10 @@ void G1CollectedHeap::set_gc_alloc_region(int purpose, HeapRegion* r) { assert(purpose >= 0 && purpose < GCAllocPurposeCount, "invalid purpose"); + // make sure we don't call set_gc_alloc_region() multiple times on + // the same region + assert(r == NULL || !r->is_gc_alloc_region(), + "shouldn't already be a GC alloc region"); HeapWord* original_top = NULL; if (r != NULL) original_top = r->top(); @@ -2730,9 +2830,22 @@ while (_gc_alloc_region_list != NULL) { HeapRegion* r = _gc_alloc_region_list; assert(r->is_gc_alloc_region(), "Invariant."); + // We need HeapRegion::oops_on_card_seq_iterate_careful() to work on + // newly allocated data in order to be able to apply deferred updates + // before the GC is done for verification purposes (i.e to allow + // G1HRRSFlushLogBuffersOnVerify). It's safe thing to do after the + // collection. + r->ContiguousSpace::set_saved_mark(); _gc_alloc_region_list = r->next_gc_alloc_region(); r->set_next_gc_alloc_region(NULL); r->set_is_gc_alloc_region(false); + if (r->is_survivor()) { + if (r->is_empty()) { + r->set_not_young(); + } else { + _young_list->add_survivor_region(r); + } + } if (r->is_empty()) { ++_free_regions; } @@ -2750,23 +2863,55 @@ } void G1CollectedHeap::get_gc_alloc_regions() { + // First, let's check that the GC alloc region list is empty (it should) + assert(_gc_alloc_region_list == NULL, "invariant"); + for (int ap = 0; ap < GCAllocPurposeCount; ++ap) { + assert(_gc_alloc_regions[ap] == NULL, "invariant"); + // Create new GC alloc regions. - HeapRegion* alloc_region = _gc_alloc_regions[ap]; - // Clear this alloc region, so that in case it turns out to be - // unacceptable, we end up with no allocation region, rather than a bad - // one. - _gc_alloc_regions[ap] = NULL; - if (alloc_region == NULL || alloc_region->in_collection_set()) { - // Can't re-use old one. Allocate a new one. + HeapRegion* alloc_region = _retained_gc_alloc_regions[ap]; + _retained_gc_alloc_regions[ap] = NULL; + + if (alloc_region != NULL) { + assert(_retain_gc_alloc_region[ap], "only way to retain a GC region"); + + // let's make sure that the GC alloc region is not tagged as such + // outside a GC operation + assert(!alloc_region->is_gc_alloc_region(), "sanity"); + + if (alloc_region->in_collection_set() || + alloc_region->top() == alloc_region->end() || + alloc_region->top() == alloc_region->bottom()) { + // we will discard the current GC alloc region if it's in the + // collection set (it can happen!), if it's already full (no + // point in using it), or if it's empty (this means that it + // was emptied during a cleanup and it should be on the free + // list now). + + alloc_region = NULL; + } + } + + if (alloc_region == NULL) { + // we will get a new GC alloc region alloc_region = newAllocRegionWithExpansion(ap, 0); } + if (alloc_region != NULL) { + assert(_gc_alloc_regions[ap] == NULL, "pre-condition"); set_gc_alloc_region(ap, alloc_region); } + + assert(_gc_alloc_regions[ap] == NULL || + _gc_alloc_regions[ap]->is_gc_alloc_region(), + "the GC alloc region should be tagged as such"); + assert(_gc_alloc_regions[ap] == NULL || + _gc_alloc_regions[ap] == _gc_alloc_region_list, + "the GC alloc region should be the same as the GC alloc list head"); } // Set alternative regions for allocation purposes that have reached - // thier limit. + // their limit. for (int ap = 0; ap < GCAllocPurposeCount; ++ap) { GCAllocPurpose alt_purpose = g1_policy()->alternative_purpose(ap); if (_gc_alloc_regions[ap] == NULL && alt_purpose != ap) { @@ -2776,27 +2921,55 @@ assert(check_gc_alloc_regions(), "alloc regions messed up"); } -void G1CollectedHeap::release_gc_alloc_regions() { +void G1CollectedHeap::release_gc_alloc_regions(bool totally) { // We keep a separate list of all regions that have been alloc regions in - // the current collection pause. Forget that now. + // the current collection pause. Forget that now. This method will + // untag the GC alloc regions and tear down the GC alloc region + // list. It's desirable that no regions are tagged as GC alloc + // outside GCs. forget_alloc_region_list(); // The current alloc regions contain objs that have survived // collection. Make them no longer GC alloc regions. for (int ap = 0; ap < GCAllocPurposeCount; ++ap) { HeapRegion* r = _gc_alloc_regions[ap]; - if (r != NULL && r->is_empty()) { - { + _retained_gc_alloc_regions[ap] = NULL; + + if (r != NULL) { + // we retain nothing on _gc_alloc_regions between GCs + set_gc_alloc_region(ap, NULL); + _gc_alloc_region_counts[ap] = 0; + + if (r->is_empty()) { + // we didn't actually allocate anything in it; let's just put + // it on the free list MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); r->set_zero_fill_complete(); put_free_region_on_list_locked(r); + } else if (_retain_gc_alloc_region[ap] && !totally) { + // retain it so that we can use it at the beginning of the next GC + _retained_gc_alloc_regions[ap] = r; } } - // set_gc_alloc_region will also NULLify all aliases to the region - set_gc_alloc_region(ap, NULL); - _gc_alloc_region_counts[ap] = 0; - } -} + } +} + +#ifndef PRODUCT +// Useful for debugging + +void G1CollectedHeap::print_gc_alloc_regions() { + gclog_or_tty->print_cr("GC alloc regions"); + for (int ap = 0; ap < GCAllocPurposeCount; ++ap) { + HeapRegion* r = _gc_alloc_regions[ap]; + if (r == NULL) { + gclog_or_tty->print_cr(" %2d : "PTR_FORMAT, ap, NULL); + } else { + gclog_or_tty->print_cr(" %2d : "PTR_FORMAT" "SIZE_FORMAT, + ap, r->bottom(), r->used()); + } + } +} +#endif // PRODUCT void G1CollectedHeap::init_for_evac_failure(OopsInHeapRegionClosure* cl) { _drain_in_progress = false; @@ -2877,27 +3050,51 @@ } }; -class RecreateRSetEntriesClosure: public OopClosure { +class UpdateRSetImmediate : public OopsInHeapRegionClosure { private: G1CollectedHeap* _g1; G1RemSet* _g1_rem_set; - HeapRegion* _from; public: - RecreateRSetEntriesClosure(G1CollectedHeap* g1, HeapRegion* from) : - _g1(g1), _g1_rem_set(g1->g1_rem_set()), _from(from) - {} + UpdateRSetImmediate(G1CollectedHeap* g1) : + _g1(g1), _g1_rem_set(g1->g1_rem_set()) {} void do_oop(narrowOop* p) { guarantee(false, "NYI"); } void do_oop(oop* p) { assert(_from->is_in_reserved(p), "paranoia"); - if (*p != NULL) { - _g1_rem_set->write_ref(_from, p); + if (*p != NULL && !_from->is_survivor()) { + _g1_rem_set->par_write_ref(_from, p, 0); } } }; +class UpdateRSetDeferred : public OopsInHeapRegionClosure { +private: + G1CollectedHeap* _g1; + DirtyCardQueue *_dcq; + CardTableModRefBS* _ct_bs; + +public: + UpdateRSetDeferred(G1CollectedHeap* g1, DirtyCardQueue* dcq) : + _g1(g1), _ct_bs((CardTableModRefBS*)_g1->barrier_set()), _dcq(dcq) {} + + void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + void do_oop(oop* p) { + assert(_from->is_in_reserved(p), "paranoia"); + if (!_from->is_in_reserved(*p) && !_from->is_survivor()) { + size_t card_index = _ct_bs->index_for(p); + if (_ct_bs->mark_card_deferred(card_index)) { + _dcq->enqueue((jbyte*)_ct_bs->byte_for_index(card_index)); + } + } + } +}; + + + class RemoveSelfPointerClosure: public ObjectClosure { private: G1CollectedHeap* _g1; @@ -2905,11 +3102,11 @@ HeapRegion* _hr; size_t _prev_marked_bytes; size_t _next_marked_bytes; + OopsInHeapRegionClosure *_cl; public: - RemoveSelfPointerClosure(G1CollectedHeap* g1, HeapRegion* hr) : - _g1(g1), _cm(_g1->concurrent_mark()), _hr(hr), - _prev_marked_bytes(0), _next_marked_bytes(0) - {} + RemoveSelfPointerClosure(G1CollectedHeap* g1, OopsInHeapRegionClosure* cl) : + _g1(g1), _cm(_g1->concurrent_mark()), _prev_marked_bytes(0), + _next_marked_bytes(0), _cl(cl) {} size_t prev_marked_bytes() { return _prev_marked_bytes; } size_t next_marked_bytes() { return _next_marked_bytes; } @@ -2947,8 +3144,7 @@ // that, if evacuation fails, we might have remembered set // entries missing given that we skipped cards on the // collection set. So, we'll recreate such entries now. - RecreateRSetEntriesClosure cl(_g1, _hr); - obj->oop_iterate(&cl); + obj->oop_iterate(_cl); assert(_cm->isPrevMarked(obj), "Should be marked!"); } else { // The object has been either evacuated or is dead. Fill it with a @@ -2961,14 +3157,23 @@ }; void G1CollectedHeap::remove_self_forwarding_pointers() { + UpdateRSetImmediate immediate_update(_g1h); + DirtyCardQueue dcq(&_g1h->dirty_card_queue_set()); + UpdateRSetDeferred deferred_update(_g1h, &dcq); + OopsInHeapRegionClosure *cl; + if (G1DeferredRSUpdate) { + cl = &deferred_update; + } else { + cl = &immediate_update; + } HeapRegion* cur = g1_policy()->collection_set(); - while (cur != NULL) { assert(g1_policy()->assertMarkedBytesDataOK(), "Should be!"); + RemoveSelfPointerClosure rspc(_g1h, cl); if (cur->evacuation_failed()) { - RemoveSelfPointerClosure rspc(_g1h, cur); assert(cur->in_collection_set(), "bad CS"); + cl->set_region(cur); cur->object_iterate(&rspc); // A number of manipulations to make the TAMS be the current top, @@ -3129,6 +3334,20 @@ return block; } +void G1CollectedHeap::retire_alloc_region(HeapRegion* alloc_region, + bool par) { + // Another thread might have obtained alloc_region for the given + // purpose, and might be attempting to allocate in it, and might + // succeed. Therefore, we can't do the "finalization" stuff on the + // region below until we're sure the last allocation has happened. + // We ensure this by allocating the remaining space with a garbage + // object. + if (par) par_allocate_remaining_space(alloc_region); + // Now we can do the post-GC stuff on the region. + alloc_region->note_end_of_copying(); + g1_policy()->record_after_bytes(alloc_region->used()); +} + HeapWord* G1CollectedHeap::allocate_during_gc_slow(GCAllocPurpose purpose, HeapRegion* alloc_region, @@ -3146,16 +3365,7 @@ // Otherwise, continue; this new region is empty, too. } assert(alloc_region != NULL, "We better have an allocation region"); - // Another thread might have obtained alloc_region for the given - // purpose, and might be attempting to allocate in it, and might - // succeed. Therefore, we can't do the "finalization" stuff on the - // region below until we're sure the last allocation has happened. - // We ensure this by allocating the remaining space with a garbage - // object. - if (par) par_allocate_remaining_space(alloc_region); - // Now we can do the post-GC stuff on the region. - alloc_region->note_end_of_copying(); - g1_policy()->record_after_bytes(alloc_region->used()); + retire_alloc_region(alloc_region, par); if (_gc_alloc_region_counts[purpose] >= g1_policy()->max_regions(purpose)) { // Cannot allocate more regions for the given purpose. @@ -3164,7 +3374,7 @@ if (purpose != alt_purpose) { HeapRegion* alt_region = _gc_alloc_regions[alt_purpose]; // Has not the alternative region been aliased? - if (alloc_region != alt_region) { + if (alloc_region != alt_region && alt_region != NULL) { // Try to allocate in the alternative region. if (par) { block = alt_region->par_allocate(word_size); @@ -3173,9 +3383,10 @@ } // Make an alias. _gc_alloc_regions[purpose] = _gc_alloc_regions[alt_purpose]; - } - if (block != NULL) { - return block; + if (block != NULL) { + return block; + } + retire_alloc_region(alt_region, par); } // Both the allocation region and the alternative one are full // and aliased, replace them with a new allocation region. @@ -3471,11 +3682,15 @@ protected: G1CollectedHeap* _g1h; RefToScanQueue* _refs; + DirtyCardQueue _dcq; + CardTableModRefBS* _ct_bs; + G1RemSet* _g1_rem; typedef GrowableArray OverflowQueue; OverflowQueue* _overflowed_refs; G1ParGCAllocBuffer _alloc_buffers[GCAllocPurposeCount]; + ageTable _age_table; size_t _alloc_buffer_waste; size_t _undo_waste; @@ -3511,12 +3726,37 @@ void add_to_undo_waste(size_t waste) { _undo_waste += waste; } + DirtyCardQueue& dirty_card_queue() { return _dcq; } + CardTableModRefBS* ctbs() { return _ct_bs; } + + void immediate_rs_update(HeapRegion* from, oop* p, int tid) { + if (!from->is_survivor()) { + _g1_rem->par_write_ref(from, p, tid); + } + } + + void deferred_rs_update(HeapRegion* from, oop* p, int tid) { + // If the new value of the field points to the same region or + // is the to-space, we don't need to include it in the Rset updates. + if (!from->is_in_reserved(*p) && !from->is_survivor()) { + size_t card_index = ctbs()->index_for(p); + // If the card hasn't been added to the buffer, do it. + if (ctbs()->mark_card_deferred(card_index)) { + dirty_card_queue().enqueue((jbyte*)ctbs()->byte_for_index(card_index)); + } + } + } + public: G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num) : _g1h(g1h), _refs(g1h->task_queue(queue_num)), + _dcq(&g1h->dirty_card_queue_set()), + _ct_bs((CardTableModRefBS*)_g1h->barrier_set()), + _g1_rem(g1h->g1_rem_set()), _hash_seed(17), _queue_num(queue_num), _term_attempts(0), + _age_table(false), #if G1_DETAILED_STATS _pushes(0), _pops(0), _steals(0), _steal_attempts(0), _overflow_pushes(0), @@ -3551,8 +3791,9 @@ RefToScanQueue* refs() { return _refs; } OverflowQueue* overflowed_refs() { return _overflowed_refs; } - - inline G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose) { + ageTable* age_table() { return &_age_table; } + + G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose) { return &_alloc_buffers[purpose]; } @@ -3560,6 +3801,9 @@ size_t undo_waste() { return _undo_waste; } void push_on_queue(oop* ref) { + assert(ref != NULL, "invariant"); + assert(has_partial_array_mask(ref) || _g1h->obj_in_cs(*ref), "invariant"); + if (!refs()->push(ref)) { overflowed_refs()->push(ref); IF_G1_DETAILED_STATS(note_overflow_push()); @@ -3572,6 +3816,10 @@ if (!refs()->pop_local(ref)) { ref = NULL; } else { + assert(ref != NULL, "invariant"); + assert(has_partial_array_mask(ref) || _g1h->obj_in_cs(*ref), + "invariant"); + IF_G1_DETAILED_STATS(note_pop()); } } @@ -3583,6 +3831,14 @@ int refs_to_scan() { return refs()->size(); } int overflowed_refs_to_scan() { return overflowed_refs()->length(); } + void update_rs(HeapRegion* from, oop* p, int tid) { + if (G1DeferredRSUpdate) { + deferred_rs_update(from, p, tid); + } else { + immediate_rs_update(from, p, tid); + } + } + HeapWord* allocate_slow(GCAllocPurpose purpose, size_t word_sz) { HeapWord* obj = NULL; @@ -3601,8 +3857,7 @@ obj = alloc_buf->allocate(word_sz); assert(obj != NULL, "buffer was definitely big enough..."); - } - else { + } else { obj = _g1h->par_allocate_during_gc(purpose, word_sz); } return obj; @@ -3695,31 +3950,63 @@ } } +private: + void deal_with_reference(oop* ref_to_scan) { + if (has_partial_array_mask(ref_to_scan)) { + _partial_scan_cl->do_oop_nv(ref_to_scan); + } else { + // Note: we can use "raw" versions of "region_containing" because + // "obj_to_scan" is definitely in the heap, and is not in a + // humongous region. + HeapRegion* r = _g1h->heap_region_containing_raw(ref_to_scan); + _evac_cl->set_region(r); + _evac_cl->do_oop_nv(ref_to_scan); + } + } + +public: void trim_queue() { + // I've replicated the loop twice, first to drain the overflow + // queue, second to drain the task queue. This is better than + // having a single loop, which checks both conditions and, inside + // it, either pops the overflow queue or the task queue, as each + // loop is tighter. Also, the decision to drain the overflow queue + // first is not arbitrary, as the overflow queue is not visible + // to the other workers, whereas the task queue is. So, we want to + // drain the "invisible" entries first, while allowing the other + // workers to potentially steal the "visible" entries. + while (refs_to_scan() > 0 || overflowed_refs_to_scan() > 0) { - oop *ref_to_scan = NULL; - if (overflowed_refs_to_scan() == 0) { - pop_from_queue(ref_to_scan); - } else { + while (overflowed_refs_to_scan() > 0) { + oop *ref_to_scan = NULL; pop_from_overflow_queue(ref_to_scan); + assert(ref_to_scan != NULL, "invariant"); + // We shouldn't have pushed it on the queue if it was not + // pointing into the CSet. + assert(ref_to_scan != NULL, "sanity"); + assert(has_partial_array_mask(ref_to_scan) || + _g1h->obj_in_cs(*ref_to_scan), "sanity"); + + deal_with_reference(ref_to_scan); } - if (ref_to_scan != NULL) { - if ((intptr_t)ref_to_scan & G1_PARTIAL_ARRAY_MASK) { - _partial_scan_cl->do_oop_nv(ref_to_scan); - } else { - // Note: we can use "raw" versions of "region_containing" because - // "obj_to_scan" is definitely in the heap, and is not in a - // humongous region. - HeapRegion* r = _g1h->heap_region_containing_raw(ref_to_scan); - _evac_cl->set_region(r); - _evac_cl->do_oop_nv(ref_to_scan); + + while (refs_to_scan() > 0) { + oop *ref_to_scan = NULL; + pop_from_queue(ref_to_scan); + + if (ref_to_scan != NULL) { + // We shouldn't have pushed it on the queue if it was not + // pointing into the CSet. + assert(has_partial_array_mask(ref_to_scan) || + _g1h->obj_in_cs(*ref_to_scan), "sanity"); + + deal_with_reference(ref_to_scan); } } } } }; - G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) : _g1(g1), _g1_rem(_g1->g1_rem_set()), _cm(_g1->concurrent_mark()), _par_scan_state(par_scan_state) { } @@ -3728,16 +4015,25 @@ // Should probably be made inline and moved in g1OopClosures.inline.hpp. void G1ParScanClosure::do_oop_nv(oop* p) { oop obj = *p; + if (obj != NULL) { - if (_g1->obj_in_cs(obj)) { - if (obj->is_forwarded()) { - *p = obj->forwardee(); - } else { - _par_scan_state->push_on_queue(p); - return; - } + if (_g1->in_cset_fast_test(obj)) { + // We're not going to even bother checking whether the object is + // already forwarded or not, as this usually causes an immediate + // stall. We'll try to prefetch the object (for write, given that + // we might need to install the forwarding reference) and we'll + // get back to it when pop it from the queue + Prefetch::write(obj->mark_addr(), 0); + Prefetch::read(obj->mark_addr(), (HeapWordSize*2)); + + // slightly paranoid test; I'm trying to catch potential + // problems before we go into push_on_queue to know where the + // problem is coming from + assert(obj == *p, "the value of *p should not have changed"); + _par_scan_state->push_on_queue(p); + } else { + _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num()); } - _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num()); } } @@ -3765,7 +4061,9 @@ (!from_region->is_young() && young_index == 0), "invariant" ); G1CollectorPolicy* g1p = _g1->g1_policy(); markOop m = old->mark(); - GCAllocPurpose alloc_purpose = g1p->evacuation_destination(from_region, m->age(), + int age = m->has_displaced_mark_helper() ? m->displaced_mark_helper()->age() + : m->age(); + GCAllocPurpose alloc_purpose = g1p->evacuation_destination(from_region, age, word_sz); HeapWord* obj_ptr = _par_scan_state->allocate(alloc_purpose, word_sz); oop obj = oop(obj_ptr); @@ -3777,13 +4075,39 @@ return _g1->handle_evacuation_failure_par(cl, old); } + // We're going to allocate linearly, so might as well prefetch ahead. + Prefetch::write(obj_ptr, PrefetchCopyIntervalInBytes); + oop forward_ptr = old->forward_to_atomic(obj); if (forward_ptr == NULL) { Copy::aligned_disjoint_words((HeapWord*) old, obj_ptr, word_sz); - obj->set_mark(m); if (g1p->track_object_age(alloc_purpose)) { - obj->incr_age(); + // We could simply do obj->incr_age(). However, this causes a + // performance issue. obj->incr_age() will first check whether + // the object has a displaced mark by checking its mark word; + // getting the mark word from the new location of the object + // stalls. So, given that we already have the mark word and we + // are about to install it anyway, it's better to increase the + // age on the mark word, when the object does not have a + // displaced mark word. We're not expecting many objects to have + // a displaced marked word, so that case is not optimized + // further (it could be...) and we simply call obj->incr_age(). + + if (m->has_displaced_mark_helper()) { + // in this case, we have to install the mark word first, + // otherwise obj looks to be forwarded (the old mark word, + // which contains the forward pointer, was copied) + obj->set_mark(m); + obj->incr_age(); + } else { + m = m->incr_age(); + obj->set_mark(m); + } + _par_scan_state->age_table()->add(obj, word_sz); + } else { + obj->set_mark(m); } + // preserve "next" mark bit if (_g1->mark_in_progress() && !_g1->is_obj_ill(old)) { if (!use_local_bitmaps || @@ -3805,9 +4129,11 @@ if (obj->is_objArray() && arrayOop(obj)->length() >= ParGCArrayScanChunk) { arrayOop(old)->set_length(0); - _par_scan_state->push_on_queue((oop*) ((intptr_t)old | G1_PARTIAL_ARRAY_MASK)); + _par_scan_state->push_on_queue(set_partial_array_mask(old)); } else { - _scanner->set_region(_g1->heap_region_containing(obj)); + // No point in using the slower heap_region_containing() method, + // given that we know obj is in the heap. + _scanner->set_region(_g1->heap_region_containing_raw(obj)); obj->oop_iterate_backwards(_scanner); } } else { @@ -3817,47 +4143,55 @@ return obj; } -template -void G1ParCopyClosure::do_oop_work(oop* p) { +template +void G1ParCopyClosure::do_oop_work(oop* p) { oop obj = *p; assert(barrier != G1BarrierRS || obj != NULL, "Precondition: G1BarrierRS implies obj is nonNull"); - if (obj != NULL) { - if (_g1->obj_in_cs(obj)) { + // The only time we skip the cset test is when we're scanning + // references popped from the queue. And we only push on the queue + // references that we know point into the cset, so no point in + // checking again. But we'll leave an assert here for peace of mind. + assert(!skip_cset_test || _g1->obj_in_cs(obj), "invariant"); + + // here the null check is implicit in the cset_fast_test() test + if (skip_cset_test || _g1->in_cset_fast_test(obj)) { #if G1_REM_SET_LOGGING - gclog_or_tty->print_cr("Loc "PTR_FORMAT" contains pointer "PTR_FORMAT" into CS.", - p, (void*) obj); + gclog_or_tty->print_cr("Loc "PTR_FORMAT" contains pointer "PTR_FORMAT" " + "into CS.", p, (void*) obj); #endif - if (obj->is_forwarded()) { - *p = obj->forwardee(); - } else { - *p = copy_to_survivor_space(obj); - } - // When scanning the RS, we only care about objs in CS. - if (barrier == G1BarrierRS) { - _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num()); - } + if (obj->is_forwarded()) { + *p = obj->forwardee(); + } else { + *p = copy_to_survivor_space(obj); + } + // When scanning the RS, we only care about objs in CS. + if (barrier == G1BarrierRS) { + _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num()); } - // When scanning moved objs, must look at all oops. - if (barrier == G1BarrierEvac) { - _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num()); - } - - if (do_gen_barrier) { - par_do_barrier(p); - } - } -} - -template void G1ParCopyClosure::do_oop_work(oop* p); - -template void G1ParScanPartialArrayClosure::process_array_chunk( + } + + // When scanning moved objs, must look at all oops. + if (barrier == G1BarrierEvac && obj != NULL) { + _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num()); + } + + if (do_gen_barrier && obj != NULL) { + par_do_barrier(p); + } +} + +template void G1ParCopyClosure::do_oop_work(oop* p); + +template void G1ParScanPartialArrayClosure::process_array_chunk( oop obj, int start, int end) { // process our set of indices (include header in first chunk) assert(start < end, "invariant"); T* const base = (T*)objArrayOop(obj)->base(); - T* const start_addr = base + start; + T* const start_addr = (start == 0) ? (T*) obj : base + start; T* const end_addr = base + end; MemRegion mr((HeapWord*)start_addr, (HeapWord*)end_addr); _scanner.set_region(_g1->heap_region_containing(obj)); @@ -3866,7 +4200,8 @@ void G1ParScanPartialArrayClosure::do_oop_nv(oop* p) { assert(!UseCompressedOops, "Needs to be fixed to work with compressed oops"); - oop old = oop((intptr_t)p & ~G1_PARTIAL_ARRAY_MASK); + assert(has_partial_array_mask(p), "invariant"); + oop old = clear_partial_array_mask(p); assert(old->is_objArray(), "must be obj array"); assert(old->is_forwarded(), "must be forwarded"); assert(Universe::heap()->is_in_reserved(old), "must be in heap."); @@ -3884,7 +4219,7 @@ end = start + ParGCArrayScanChunk; arrayOop(old)->set_length(end); // Push remainder. - _par_scan_state->push_on_queue((oop*) ((intptr_t) old | G1_PARTIAL_ARRAY_MASK)); + _par_scan_state->push_on_queue(set_partial_array_mask(old)); } else { // Restore length so that the heap remains parsable in // case of evacuation failure. @@ -3893,11 +4228,6 @@ // process our set of indices (include header in first chunk) process_array_chunk(obj, start, end); - oop* start_addr = start == 0 ? (oop*)obj : obj->obj_at_addr(start); - oop* end_addr = (oop*)(obj->base()) + end; // obj_at_addr(end) asserts end < length - MemRegion mr((HeapWord*)start_addr, (HeapWord*)end_addr); - _scanner.set_region(_g1->heap_region_containing(obj)); - obj->oop_iterate(&_scanner, mr); } int G1ScanAndBalanceClosure::_nq = 0; @@ -3931,6 +4261,13 @@ pss->hash_seed(), ref_to_scan)) { IF_G1_DETAILED_STATS(pss->note_steal()); + + // slightly paranoid tests; I'm trying to catch potential + // problems before we go into push_on_queue to know where the + // problem is coming from + assert(ref_to_scan != NULL, "invariant"); + assert(has_partial_array_mask(ref_to_scan) || + _g1h->obj_in_cs(*ref_to_scan), "invariant"); pss->push_on_queue(ref_to_scan); continue; } @@ -3976,10 +4313,10 @@ ResourceMark rm; HandleMark hm; - G1ParScanThreadState pss(_g1h, i); - G1ParScanHeapEvacClosure scan_evac_cl(_g1h, &pss); - G1ParScanHeapEvacClosure evac_failure_cl(_g1h, &pss); - G1ParScanPartialArrayClosure partial_scan_cl(_g1h, &pss); + G1ParScanThreadState pss(_g1h, i); + G1ParScanHeapEvacClosure scan_evac_cl(_g1h, &pss); + G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss); + G1ParScanPartialArrayClosure partial_scan_cl(_g1h, &pss); pss.set_evac_closure(&scan_evac_cl); pss.set_evac_failure_closure(&evac_failure_cl); @@ -3988,6 +4325,7 @@ G1ParScanExtRootClosure only_scan_root_cl(_g1h, &pss); G1ParScanPermClosure only_scan_perm_cl(_g1h, &pss); G1ParScanHeapRSClosure only_scan_heap_rs_cl(_g1h, &pss); + G1ParScanAndMarkExtRootClosure scan_mark_root_cl(_g1h, &pss); G1ParScanAndMarkPermClosure scan_mark_perm_cl(_g1h, &pss); G1ParScanAndMarkHeapRSClosure scan_mark_heap_rs_cl(_g1h, &pss); @@ -4024,6 +4362,9 @@ _g1h->g1_policy()->record_obj_copy_time(i, elapsed_ms-term_ms); _g1h->g1_policy()->record_termination_time(i, term_ms); } + if (G1UseSurvivorSpace) { + _g1h->g1_policy()->record_thread_age_table(pss.age_table()); + } _g1h->update_surviving_young_words(pss.surviving_young_words()+1); // Clean up any par-expanded rem sets. @@ -4240,7 +4581,6 @@ g1_rem_set()->prepare_for_oops_into_collection_set_do(); concurrent_g1_refine()->set_use_cache(false); int n_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1); - set_par_threads(n_workers); G1ParTask g1_par_task(this, n_workers, _task_queues); @@ -4248,8 +4588,9 @@ change_strong_roots_parity(); // In preparation for parallel strong roots. rem_set()->prepare_for_younger_refs_iterate(true); + + assert(dirty_card_queue_set().completed_buffers_num() == 0, "Should be empty"); double start_par = os::elapsedTime(); - if (ParallelGCThreads > 0) { // The individual threads will set their evac-failure closures. workers()->run_task(&g1_par_task); @@ -4263,14 +4604,14 @@ // Is this the right thing to do here? We don't save marks // on individual heap regions when we allocate from // them in parallel, so this seems like the correct place for this. - all_alloc_regions_note_end_of_copying(); + retire_all_alloc_regions(); { G1IsAliveClosure is_alive(this); G1KeepAliveClosure keep_alive(this); JNIHandles::weak_oops_do(&is_alive, &keep_alive); } - g1_rem_set()->cleanup_after_oops_into_collection_set_do(); + concurrent_g1_refine()->set_use_cache(true); finalize_for_evac_failure(); @@ -4281,7 +4622,6 @@ if (evacuation_failed()) { remove_self_forwarding_pointers(); - if (PrintGCDetails) { gclog_or_tty->print(" (evacuation failed)"); } else if (PrintGC) { @@ -4289,6 +4629,14 @@ } } + if (G1DeferredRSUpdate) { + RedirtyLoggedCardTableEntryFastClosure redirty; + dirty_card_queue_set().set_closure(&redirty); + dirty_card_queue_set().apply_closure_to_all_completed_buffers(); + JavaThread::dirty_card_queue_set().merge_bufferlists(&dirty_card_queue_set()); + assert(dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed"); + } + COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); } @@ -4903,7 +5251,7 @@ return no_allocs; } -void G1CollectedHeap::all_alloc_regions_note_end_of_copying() { +void G1CollectedHeap::retire_all_alloc_regions() { for (int ap = 0; ap < GCAllocPurposeCount; ++ap) { HeapRegion* r = _gc_alloc_regions[ap]; if (r != NULL) { @@ -4916,8 +5264,7 @@ } } if (!has_processed_alias) { - r->note_end_of_copying(); - g1_policy()->record_after_bytes(r->used()); + retire_alloc_region(r, false /* par */); } } } diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Tue Mar 24 13:09:52 2009 -0400 @@ -90,7 +90,7 @@ HeapRegion* _curr_scan_only; HeapRegion* _survivor_head; - HeapRegion* _survivors_tail; + HeapRegion* _survivor_tail; size_t _survivor_length; void empty_list(HeapRegion* list); @@ -105,6 +105,7 @@ bool is_empty() { return _length == 0; } size_t length() { return _length; } size_t scan_only_length() { return _scan_only_length; } + size_t survivor_length() { return _survivor_length; } void rs_length_sampling_init(); bool rs_length_sampling_more(); @@ -120,6 +121,7 @@ HeapRegion* first_region() { return _head; } HeapRegion* first_scan_only_region() { return _scan_only_head; } HeapRegion* first_survivor_region() { return _survivor_head; } + HeapRegion* last_survivor_region() { return _survivor_tail; } HeapRegion* par_get_next_scan_only_region() { MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); HeapRegion* ret = _curr_scan_only; @@ -170,7 +172,6 @@ NumAPIs = HeapRegion::MaxAge }; - // The one and only G1CollectedHeap, so static functions can find it. static G1CollectedHeap* _g1h; @@ -215,11 +216,20 @@ // Postcondition: cur_alloc_region == NULL. void abandon_cur_alloc_region(); + void abandon_gc_alloc_regions(); // The to-space memory regions into which objects are being copied during // a GC. HeapRegion* _gc_alloc_regions[GCAllocPurposeCount]; - uint _gc_alloc_region_counts[GCAllocPurposeCount]; + size_t _gc_alloc_region_counts[GCAllocPurposeCount]; + // These are the regions, one per GCAllocPurpose, that are half-full + // at the end of a collection and that we want to reuse during the + // next collection. + HeapRegion* _retained_gc_alloc_regions[GCAllocPurposeCount]; + // This specifies whether we will keep the last half-full region at + // the end of a collection so that it can be reused during the next + // collection (this is specified per GCAllocPurpose) + bool _retain_gc_alloc_region[GCAllocPurposeCount]; // A list of the regions that have been set to be alloc regions in the // current collection. @@ -247,6 +257,27 @@ NumberSeq _pop_obj_rc_at_copy; void print_popularity_summary_info() const; + // This is used for a quick test on whether a reference points into + // the collection set or not. Basically, we have an array, with one + // byte per region, and that byte denotes whether the corresponding + // region is in the collection set or not. The entry corresponding + // the bottom of the heap, i.e., region 0, is pointed to by + // _in_cset_fast_test_base. The _in_cset_fast_test field has been + // biased so that it actually points to address 0 of the address + // space, to make the test as fast as possible (we can simply shift + // the address to address into it, instead of having to subtract the + // bottom of the heap from the address before shifting it; basically + // it works in the same way the card table works). + bool* _in_cset_fast_test; + + // The allocated array used for the fast test on whether a reference + // points into the collection set or not. This field is also used to + // free the array. + bool* _in_cset_fast_test_base; + + // The length of the _in_cset_fast_test_base array. + size_t _in_cset_fast_test_length; + volatile unsigned _gc_time_stamp; size_t* _surviving_young_words; @@ -260,8 +291,8 @@ // Returns "true" iff none of the gc alloc regions have any allocations // since the last call to "save_marks". bool all_alloc_regions_no_allocs_since_save_marks(); - // Calls "note_end_of_copying on all gc alloc_regions. - void all_alloc_regions_note_end_of_copying(); + // Perform finalization stuff on all allocation regions. + void retire_all_alloc_regions(); // The number of regions allocated to hold humongous objects. int _num_humongous_regions; @@ -330,6 +361,10 @@ // that parallel threads might be attempting allocations. void par_allocate_remaining_space(HeapRegion* r); + // Retires an allocation region when it is full or at the end of a + // GC pause. + void retire_alloc_region(HeapRegion* alloc_region, bool par); + // Helper function for two callbacks below. // "full", if true, indicates that the GC is for a System.gc() request, // and should collect the entire heap. If "clear_all_soft_refs" is true, @@ -368,6 +403,38 @@ virtual void gc_prologue(bool full); virtual void gc_epilogue(bool full); + // We register a region with the fast "in collection set" test. We + // simply set to true the array slot corresponding to this region. + void register_region_with_in_cset_fast_test(HeapRegion* r) { + assert(_in_cset_fast_test_base != NULL, "sanity"); + assert(r->in_collection_set(), "invariant"); + int index = r->hrs_index(); + assert(0 <= (size_t) index && (size_t) index < _in_cset_fast_test_length, + "invariant"); + assert(!_in_cset_fast_test_base[index], "invariant"); + _in_cset_fast_test_base[index] = true; + } + + // This is a fast test on whether a reference points into the + // collection set or not. It does not assume that the reference + // points into the heap; if it doesn't, it will return false. + bool in_cset_fast_test(oop obj) { + assert(_in_cset_fast_test != NULL, "sanity"); + if (_g1_committed.contains((HeapWord*) obj)) { + // no need to subtract the bottom of the heap from obj, + // _in_cset_fast_test is biased + size_t index = ((size_t) obj) >> HeapRegion::LogOfHRGrainBytes; + bool ret = _in_cset_fast_test[index]; + // let's make sure the result is consistent with what the slower + // test returns + assert( ret || !obj_in_cs(obj), "sanity"); + assert(!ret || obj_in_cs(obj), "sanity"); + return ret; + } else { + return false; + } + } + protected: // Shrink the garbage-first heap by at most the given size (in bytes!). @@ -398,6 +465,10 @@ // And it's mod ref barrier set, used to track updates for the above. ModRefBarrierSet* _mr_bs; + // A set of cards that cover the objects for which the Rsets should be updated + // concurrently after the collection. + DirtyCardQueueSet _dirty_card_queue_set; + // The Heap Region Rem Set Iterator. HeapRegionRemSetIterator** _rem_set_iterator; @@ -526,8 +597,21 @@ // Ensure that the relevant gc_alloc regions are set. void get_gc_alloc_regions(); - // We're done with GC alloc regions; release them, as appropriate. - void release_gc_alloc_regions(); + // We're done with GC alloc regions. We are going to tear down the + // gc alloc list and remove the gc alloc tag from all the regions on + // that list. However, we will also retain the last (i.e., the one + // that is half-full) GC alloc region, per GCAllocPurpose, for + // possible reuse during the next collection, provided + // _retain_gc_alloc_region[] indicates that it should be the + // case. Said regions are kept in the _retained_gc_alloc_regions[] + // array. If the parameter totally is set, we will not retain any + // regions, irrespective of what _retain_gc_alloc_region[] + // indicates. + void release_gc_alloc_regions(bool totally); +#ifndef PRODUCT + // Useful for debugging. + void print_gc_alloc_regions(); +#endif // !PRODUCT // ("Weak") Reference processing support ReferenceProcessor* _ref_processor; @@ -607,6 +691,9 @@ RefToScanQueue *task_queue(int i); + // A set of cards where updates happened during the GC + DirtyCardQueueSet& dirty_card_queue_set() { return _dirty_card_queue_set; } + // Create a G1CollectedHeap with the specified policy. // Must call the initialize method afterwards. // May not return if something goes wrong. diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp Tue Mar 24 13:09:52 2009 -0400 @@ -36,8 +36,11 @@ inline HeapRegion* G1CollectedHeap::heap_region_containing_raw(const void* addr) const { - HeapRegion* res = _hrs->addr_to_region(addr); - assert(res != NULL, "addr outside of heap?"); + assert(_g1_reserved.contains(addr), "invariant"); + size_t index = ((intptr_t) addr - (intptr_t) _g1_reserved.start()) + >> HeapRegion::LogOfHRGrainBytes; + HeapRegion* res = _hrs->at(index); + assert(res == _hrs->addr_to_region(addr), "sanity"); return res; } diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Tue Mar 24 13:09:52 2009 -0400 @@ -196,8 +196,13 @@ _short_lived_surv_rate_group(new SurvRateGroup(this, "Short Lived", G1YoungSurvRateNumRegionsSummary)), _survivor_surv_rate_group(new SurvRateGroup(this, "Survivor", - G1YoungSurvRateNumRegionsSummary)) + G1YoungSurvRateNumRegionsSummary)), // add here any more surv rate groups + _recorded_survivor_regions(0), + _recorded_survivor_head(NULL), + _recorded_survivor_tail(NULL), + _survivors_age_table(true) + { _recent_prev_end_times_for_all_gcs_sec->add(os::elapsedTime()); _prev_collection_pause_end_ms = os::elapsedTime() * 1000.0; @@ -272,6 +277,15 @@ _concurrent_mark_cleanup_times_ms->add(0.20); _tenuring_threshold = MaxTenuringThreshold; + if (G1UseSurvivorSpace) { + // if G1FixedSurvivorSpaceSize is 0 which means the size is not + // fixed, then _max_survivor_regions will be calculated at + // calculate_young_list_target_config during initialization + _max_survivor_regions = G1FixedSurvivorSpaceSize / HeapRegion::GrainBytes; + } else { + _max_survivor_regions = 0; + } + initialize_all(); } @@ -283,6 +297,9 @@ void G1CollectorPolicy::initialize_flags() { set_min_alignment(HeapRegion::GrainBytes); set_max_alignment(GenRemSet::max_alignment_constraint(rem_set_name())); + if (SurvivorRatio < 1) { + vm_exit_during_initialization("Invalid survivor ratio specified"); + } CollectorPolicy::initialize_flags(); } @@ -301,6 +318,8 @@ "-XX:+UseConcMarkSweepGC."); } + initialize_gc_policy_counters(); + if (G1Gen) { _in_young_gc_mode = true; @@ -322,6 +341,12 @@ } } +// Create the jstat counters for the policy. +void G1CollectorPolicy::initialize_gc_policy_counters() +{ + _gc_policy_counters = new GCPolicyCounters("GarbageFirst", 1, 2 + G1Gen); +} + void G1CollectorPolicy::calculate_young_list_min_length() { _young_list_min_length = 0; @@ -352,6 +377,7 @@ guarantee( so_length < _young_list_target_length, "invariant" ); _young_list_so_prefix_length = so_length; } + calculate_survivors_policy(); } // This method calculate the optimal scan-only set for a fixed young @@ -448,6 +474,9 @@ if (full_young_gcs() && _free_regions_at_end_of_collection > 0) { // we are in fully-young mode and there are free regions in the heap + double survivor_regions_evac_time = + predict_survivor_regions_evac_time(); + size_t min_so_length = 0; size_t max_so_length = 0; @@ -497,9 +526,8 @@ scanned_cards = predict_non_young_card_num(adj_rs_lengths); // calculate this once, so that we don't have to recalculate it in // the innermost loop - double base_time_ms = predict_base_elapsed_time_ms(pending_cards, - scanned_cards); - + double base_time_ms = predict_base_elapsed_time_ms(pending_cards, scanned_cards) + + survivor_regions_evac_time; // the result size_t final_young_length = 0; size_t final_so_length = 0; @@ -548,14 +576,14 @@ bool done = false; // this is the outermost loop while (!done) { -#if 0 +#ifdef TRACE_CALC_YOUNG_CONFIG // leave this in for debugging, just in case gclog_or_tty->print_cr("searching between " SIZE_FORMAT " and " SIZE_FORMAT ", incr " SIZE_FORMAT ", pass %s", from_so_length, to_so_length, so_length_incr, (pass == pass_type_coarse) ? "coarse" : (pass == pass_type_fine) ? "fine" : "final"); -#endif // 0 +#endif // TRACE_CALC_YOUNG_CONFIG size_t so_length = from_so_length; size_t init_free_regions = @@ -651,11 +679,11 @@ guarantee( so_length_incr == so_coarse_increments, "invariant" ); guarantee( final_so_length >= min_so_length, "invariant" ); -#if 0 +#ifdef TRACE_CALC_YOUNG_CONFIG // leave this in for debugging, just in case gclog_or_tty->print_cr(" coarse pass: SO length " SIZE_FORMAT, final_so_length); -#endif // 0 +#endif // TRACE_CALC_YOUNG_CONFIG from_so_length = (final_so_length - min_so_length > so_coarse_increments) ? @@ -687,12 +715,12 @@ // of the optimal size_t new_so_length = 950 * final_so_length / 1000; -#if 0 +#ifdef TRACE_CALC_YOUNG_CONFIG // leave this in for debugging, just in case gclog_or_tty->print_cr(" fine pass: SO length " SIZE_FORMAT ", setting it to " SIZE_FORMAT, final_so_length, new_so_length); -#endif // 0 +#endif // TRACE_CALC_YOUNG_CONFIG from_so_length = new_so_length; to_so_length = new_so_length; @@ -719,7 +747,8 @@ } // we should have at least one region in the target young length - _young_list_target_length = MAX2((size_t) 1, final_young_length); + _young_list_target_length = + MAX2((size_t) 1, final_young_length + _recorded_survivor_regions); if (final_so_length >= final_young_length) // and we need to ensure that the S-O length is not greater than // the target young length (this is being a bit careful) @@ -734,7 +763,7 @@ double end_time_sec = os::elapsedTime(); double elapsed_time_ms = (end_time_sec - start_time_sec) * 1000.0; -#if 0 +#ifdef TRACE_CALC_YOUNG_CONFIG // leave this in for debugging, just in case gclog_or_tty->print_cr("target = %1.1lf ms, young = " SIZE_FORMAT ", SO = " SIZE_FORMAT ", " @@ -747,9 +776,9 @@ calculations, full_young_gcs() ? "full" : "partial", should_initiate_conc_mark() ? " i-m" : "", - in_marking_window(), - in_marking_window_im()); -#endif // 0 + _in_marking_window, + _in_marking_window_im); +#endif // TRACE_CALC_YOUNG_CONFIG if (_young_list_target_length < _young_list_min_length) { // bummer; this means that, if we do a pause when the optimal @@ -768,14 +797,14 @@ // S-O length so_length = calculate_optimal_so_length(_young_list_min_length); -#if 0 +#ifdef TRACE_CALC_YOUNG_CONFIG // leave this in for debugging, just in case gclog_or_tty->print_cr("adjusted target length from " SIZE_FORMAT " to " SIZE_FORMAT ", SO " SIZE_FORMAT, _young_list_target_length, _young_list_min_length, so_length); -#endif // 0 +#endif // TRACE_CALC_YOUNG_CONFIG _young_list_target_length = MAX2(_young_list_min_length, (size_t)1); @@ -785,12 +814,12 @@ // we are in a partially-young mode or we've run out of regions (due // to evacuation failure) -#if 0 +#ifdef TRACE_CALC_YOUNG_CONFIG // leave this in for debugging, just in case gclog_or_tty->print_cr("(partial) setting target to " SIZE_FORMAT ", SO " SIZE_FORMAT, _young_list_min_length, 0); -#endif // 0 +#endif // TRACE_CALC_YOUNG_CONFIG // we'll do the pause as soon as possible and with no S-O prefix // (see above for the reasons behind the latter) @@ -884,6 +913,16 @@ return true; } +double G1CollectorPolicy::predict_survivor_regions_evac_time() { + double survivor_regions_evac_time = 0.0; + for (HeapRegion * r = _recorded_survivor_head; + r != NULL && r != _recorded_survivor_tail->get_next_young_region(); + r = r->get_next_young_region()) { + survivor_regions_evac_time += predict_region_elapsed_time_ms(r, true); + } + return survivor_regions_evac_time; +} + void G1CollectorPolicy::check_prediction_validity() { guarantee( adaptive_young_list_length(), "should not call this otherwise" ); @@ -975,7 +1014,7 @@ _all_full_gc_times_ms->add(full_gc_time_ms); - update_recent_gc_times(end_sec, full_gc_time_sec); + update_recent_gc_times(end_sec, full_gc_time_ms); _g1->clear_full_collection(); @@ -995,11 +1034,15 @@ _short_lived_surv_rate_group->start_adding_regions(); // also call this on any additional surv rate groups + record_survivor_regions(0, NULL, NULL); + _prev_region_num_young = _region_num_young; _prev_region_num_tenured = _region_num_tenured; _free_regions_at_end_of_collection = _g1->free_regions(); _scan_only_regions_at_end_of_collection = 0; + // Reset survivors SurvRateGroup. + _survivor_surv_rate_group->reset(); calculate_young_list_min_length(); calculate_young_list_target_config(); } @@ -1044,6 +1087,7 @@ assert(_g1->used_regions() == _g1->recalculate_used_regions(), "sanity"); + assert(_g1->used() == _g1->recalculate_used(), "sanity"); double s_w_t_ms = (start_time_sec - _stop_world_start) * 1000.0; _all_stop_world_times_ms->add(s_w_t_ms); @@ -1104,6 +1148,10 @@ _short_lived_surv_rate_group->record_scan_only_prefix(short_lived_so_length); tag_scan_only(short_lived_so_length); + if (G1UseSurvivorSpace) { + _survivors_age_table.clear(); + } + assert( verify_young_ages(), "region age verification" ); } @@ -1428,6 +1476,7 @@ size_t cur_used_bytes = _g1->used(); assert(cur_used_bytes == _g1->recalculate_used(), "It should!"); bool last_pause_included_initial_mark = false; + bool update_stats = !abandoned && !_g1->evacuation_failed(); #ifndef PRODUCT if (G1YoungSurvRateVerbose) { @@ -1488,7 +1537,7 @@ _n_pauses++; - if (!abandoned) { + if (update_stats) { _recent_CH_strong_roots_times_ms->add(_cur_CH_strong_roots_dur_ms); _recent_G1_strong_roots_times_ms->add(_cur_G1_strong_roots_dur_ms); _recent_evac_times_ms->add(evac_ms); @@ -1588,8 +1637,10 @@ double obj_copy_time = avg_value(_par_last_obj_copy_times_ms); double termination_time = avg_value(_par_last_termination_times_ms); - double parallel_other_time; - if (!abandoned) { + double parallel_other_time = _cur_collection_par_time_ms - + (update_rs_time + ext_root_scan_time + mark_stack_scan_time + + scan_only_time + scan_rs_time + obj_copy_time + termination_time); + if (update_stats) { MainBodySummary* body_summary = summary->main_body_summary(); guarantee(body_summary != NULL, "should not be null!"); @@ -1607,9 +1658,6 @@ body_summary->record_parallel_time_ms(_cur_collection_par_time_ms); body_summary->record_clear_ct_time_ms(_cur_clear_ct_time_ms); body_summary->record_termination_time_ms(termination_time); - parallel_other_time = _cur_collection_par_time_ms - - (update_rs_time + ext_root_scan_time + mark_stack_scan_time + - scan_only_time + scan_rs_time + obj_copy_time + termination_time); body_summary->record_parallel_other_time_ms(parallel_other_time); } body_summary->record_mark_closure_time_ms(_mark_closure_time_ms); @@ -1754,8 +1802,10 @@ gclog_or_tty->print_cr("]"); _all_pause_times_ms->add(elapsed_ms); - summary->record_total_time_ms(elapsed_ms); - summary->record_other_time_ms(other_time_ms); + if (update_stats) { + summary->record_total_time_ms(elapsed_ms); + summary->record_other_time_ms(other_time_ms); + } for (int i = 0; i < _aux_num; ++i) if (_cur_aux_times_set[i]) _all_aux_times_ms[i].add(_cur_aux_times_ms[i]); @@ -1805,7 +1855,7 @@ // - if (!popular && !abandoned) { + if (!popular && update_stats) { double pause_time_ms = elapsed_ms; size_t diff = 0; @@ -1965,9 +2015,6 @@ // _target_pause_time_ms = -1.0; - - // TODO: calculate tenuring threshold - _tenuring_threshold = MaxTenuringThreshold; } // @@ -2058,7 +2105,7 @@ guarantee( hr->is_young() && hr->age_in_surv_rate_group() != -1, "invariant" ); int age = hr->age_in_surv_rate_group(); - double yg_surv_rate = predict_yg_surv_rate(age); + double yg_surv_rate = predict_yg_surv_rate(age, hr->surv_rate_group()); bytes_to_copy = (size_t) ((double) hr->used() * yg_surv_rate); } @@ -2091,7 +2138,7 @@ } #if PREDICTIONS_VERBOSE if (young) { - _recorded_young_bytes += hr->asSpace()->used(); + _recorded_young_bytes += hr->used(); } else { _recorded_marked_bytes += hr->max_live_bytes(); } @@ -2119,11 +2166,6 @@ predict_non_young_card_num(_predicted_rs_lengths); _recorded_region_num = _recorded_young_regions + _recorded_non_young_regions; - _predicted_young_survival_ratio = 0.0; - for (int i = 0; i < _recorded_young_regions; ++i) - _predicted_young_survival_ratio += predict_yg_surv_rate(i); - _predicted_young_survival_ratio /= (double) _recorded_young_regions; - _predicted_scan_only_scan_time_ms = predict_scan_only_time_ms(_recorded_scan_only_regions); _predicted_rs_update_time_ms = @@ -2673,8 +2715,11 @@ assert(in_young_gc_mode(), "should be in young GC mode"); bool ret; size_t young_list_length = _g1->young_list_length(); - - if (young_list_length < _young_list_target_length) { + size_t young_list_max_length = _young_list_target_length; + if (G1FixedEdenSize) { + young_list_max_length -= _max_survivor_regions; + } + if (young_list_length < young_list_max_length) { ret = true; ++_region_num_young; } else { @@ -2710,17 +2755,39 @@ } -uint G1CollectorPolicy::max_regions(int purpose) { +size_t G1CollectorPolicy::max_regions(int purpose) { switch (purpose) { case GCAllocForSurvived: - return G1MaxSurvivorRegions; + return _max_survivor_regions; case GCAllocForTenured: - return UINT_MAX; + return REGIONS_UNLIMITED; default: - return UINT_MAX; + ShouldNotReachHere(); + return REGIONS_UNLIMITED; }; } +// Calculates survivor space parameters. +void G1CollectorPolicy::calculate_survivors_policy() +{ + if (!G1UseSurvivorSpace) { + return; + } + if (G1FixedSurvivorSpaceSize == 0) { + _max_survivor_regions = _young_list_target_length / SurvivorRatio; + } else { + _max_survivor_regions = G1FixedSurvivorSpaceSize / HeapRegion::GrainBytes; + } + + if (G1FixedTenuringThreshold) { + _tenuring_threshold = MaxTenuringThreshold; + } else { + _tenuring_threshold = _survivors_age_table.compute_tenuring_threshold( + HeapRegion::GrainWords * _max_survivor_regions); + } +} + + void G1CollectorPolicy_BestRegionsFirst:: set_single_region_collection_set(HeapRegion* hr) { @@ -2743,7 +2810,11 @@ double max_pause_time_ms = _mmu_tracker->max_gc_time() * 1000.0; size_t young_list_length = _g1->young_list_length(); - bool reached_target_length = young_list_length >= _young_list_target_length; + size_t young_list_max_length = _young_list_target_length; + if (G1FixedEdenSize) { + young_list_max_length -= _max_survivor_regions; + } + bool reached_target_length = young_list_length >= young_list_max_length; if (in_young_gc_mode()) { if (reached_target_length) { @@ -2985,6 +3056,7 @@ _collection_set = hr; _collection_set_size++; _collection_set_bytes_used_before += hr->used(); + _g1->register_region_with_in_cset_fast_test(hr); } void diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Tue Mar 24 13:09:52 2009 -0400 @@ -49,7 +49,7 @@ class MainBodySummary; class PopPreambleSummary; -class PauseSummary { +class PauseSummary: public CHeapObj { define_num_seq(total) define_num_seq(other) @@ -58,7 +58,7 @@ virtual PopPreambleSummary* pop_preamble_summary() { return NULL; } }; -class MainBodySummary { +class MainBodySummary: public CHeapObj { define_num_seq(satb_drain) // optional define_num_seq(parallel) // parallel only define_num_seq(ext_root_scan) @@ -75,7 +75,7 @@ define_num_seq(clear_ct) // parallel only }; -class PopPreambleSummary { +class PopPreambleSummary: public CHeapObj { define_num_seq(pop_preamble) define_num_seq(pop_update_rs) define_num_seq(pop_scan_rs) @@ -557,6 +557,8 @@ return get_new_neg_prediction(_young_gc_eff_seq); } + double predict_survivor_regions_evac_time(); + // public: @@ -599,8 +601,8 @@ // Returns an estimate of the survival rate of the region at yg-age // "yg_age". - double predict_yg_surv_rate(int age) { - TruncatedSeq* seq = _short_lived_surv_rate_group->get_seq(age); + double predict_yg_surv_rate(int age, SurvRateGroup* surv_rate_group) { + TruncatedSeq* seq = surv_rate_group->get_seq(age); if (seq->num() == 0) gclog_or_tty->print("BARF! age is %d", age); guarantee( seq->num() > 0, "invariant" ); @@ -610,6 +612,10 @@ return pred; } + double predict_yg_surv_rate(int age) { + return predict_yg_surv_rate(age, _short_lived_surv_rate_group); + } + double accum_yg_surv_rate_pred(int age) { return _short_lived_surv_rate_group->accum_surv_rate_pred(age); } @@ -822,6 +828,9 @@ virtual void init(); + // Create jstat counters for the policy. + virtual void initialize_gc_policy_counters(); + virtual HeapWord* mem_allocate_work(size_t size, bool is_tlab, bool* gc_overhead_limit_was_exceeded); @@ -957,7 +966,7 @@ record_termination_time(0, ms); } - void record_pause_time(double ms) { + void record_pause_time_ms(double ms) { _last_pause_time_ms = ms; } @@ -1047,8 +1056,12 @@ // Print stats on young survival ratio void print_yg_surv_rate_info() const; - void finished_recalculating_age_indexes() { - _short_lived_surv_rate_group->finished_recalculating_age_indexes(); + void finished_recalculating_age_indexes(bool is_survivors) { + if (is_survivors) { + _survivor_surv_rate_group->finished_recalculating_age_indexes(); + } else { + _short_lived_surv_rate_group->finished_recalculating_age_indexes(); + } // do that for any other surv rate groups } @@ -1097,6 +1110,17 @@ // maximum amount of suvivors regions. int _tenuring_threshold; + // The limit on the number of regions allocated for survivors. + size_t _max_survivor_regions; + + // The amount of survor regions after a collection. + size_t _recorded_survivor_regions; + // List of survivor regions. + HeapRegion* _recorded_survivor_head; + HeapRegion* _recorded_survivor_tail; + + ageTable _survivors_age_table; + public: inline GCAllocPurpose @@ -1116,7 +1140,9 @@ return GCAllocForTenured; } - uint max_regions(int purpose); + static const size_t REGIONS_UNLIMITED = ~(size_t)0; + + size_t max_regions(int purpose); // The limit on regions for a particular purpose is reached. void note_alloc_region_limit_reached(int purpose) { @@ -1132,6 +1158,23 @@ void note_stop_adding_survivor_regions() { _survivor_surv_rate_group->stop_adding_regions(); } + + void record_survivor_regions(size_t regions, + HeapRegion* head, + HeapRegion* tail) { + _recorded_survivor_regions = regions; + _recorded_survivor_head = head; + _recorded_survivor_tail = tail; + } + + void record_thread_age_table(ageTable* age_table) + { + _survivors_age_table.merge_par(age_table); + } + + // Calculates survivor space parameters. + void calculate_survivors_policy(); + }; // This encapsulates a particular strategy for a g1 Collector. diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/g1/g1MMUTracker.hpp --- a/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp Tue Mar 24 13:09:52 2009 -0400 @@ -28,7 +28,7 @@ /***** ALL TIMES ARE IN SECS!!!!!!! *****/ // this is the "interface" -class G1MMUTracker { +class G1MMUTracker: public CHeapObj { protected: double _time_slice; double _max_gc_time; // this is per time slice @@ -67,7 +67,7 @@ } }; -class G1MMUTrackerQueueElem { +class G1MMUTrackerQueueElem VALUE_OBJ_CLASS_SPEC { private: double _start_time; double _end_time; diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/g1/g1OopClosures.hpp --- a/src/share/vm/gc_implementation/g1/g1OopClosures.hpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1OopClosures.hpp Tue Mar 24 13:09:52 2009 -0400 @@ -77,6 +77,18 @@ #define G1_PARTIAL_ARRAY_MASK 1 +inline bool has_partial_array_mask(oop* ref) { + return (intptr_t) ref & G1_PARTIAL_ARRAY_MASK; +} + +inline oop* set_partial_array_mask(oop obj) { + return (oop*) ((intptr_t) obj | G1_PARTIAL_ARRAY_MASK); +} + +inline oop clear_partial_array_mask(oop* ref) { + return oop((intptr_t) ref & ~G1_PARTIAL_ARRAY_MASK); +} + class G1ParScanPartialArrayClosure : public G1ParClosureSuper { G1ParScanClosure _scanner; template void process_array_chunk(oop obj, int start, int end); @@ -101,7 +113,8 @@ G1ParClosureSuper(g1, par_scan_state), _scanner(scanner) { } }; -template +template class G1ParCopyClosure : public G1ParCopyHelper { G1ParScanClosure _scanner; void do_oop_work(oop* p); @@ -119,14 +132,22 @@ virtual void do_oop(narrowOop* p) { do_oop_nv(p); } }; -typedef G1ParCopyClosure G1ParScanExtRootClosure; -typedef G1ParCopyClosure G1ParScanPermClosure; -typedef G1ParCopyClosure G1ParScanAndMarkExtRootClosure; -typedef G1ParCopyClosure G1ParScanAndMarkPermClosure; -typedef G1ParCopyClosure G1ParScanHeapRSClosure; -typedef G1ParCopyClosure G1ParScanAndMarkHeapRSClosure; -typedef G1ParCopyClosure G1ParScanHeapEvacClosure; - +typedef G1ParCopyClosure G1ParScanExtRootClosure; +typedef G1ParCopyClosure G1ParScanPermClosure; +typedef G1ParCopyClosure G1ParScanAndMarkExtRootClosure; +typedef G1ParCopyClosure G1ParScanAndMarkPermClosure; +typedef G1ParCopyClosure G1ParScanHeapRSClosure; +typedef G1ParCopyClosure G1ParScanAndMarkHeapRSClosure; +// This is the only case when we set skip_cset_test. Basically, this +// closure is (should?) only be called directly while we're draining +// the overflow and task queues. In that case we know that the +// reference in question points into the collection set, otherwise we +// would not have pushed it on the queue. +typedef G1ParCopyClosure G1ParScanHeapEvacClosure; +// We need a separate closure to handle references during evacuation +// failure processing, as it cannot asume that the reference already + // points to the collection set (like G1ParScanHeapEvacClosure does). +typedef G1ParCopyClosure G1ParScanHeapEvacFailureClosure; class FilterIntoCSClosure: public OopClosure { G1CollectedHeap* _g1; diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/g1/g1RemSet.cpp --- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp Tue Mar 24 13:09:52 2009 -0400 @@ -105,33 +105,6 @@ _g1->heap_region_iterate(&rc); } -class UpdateRSOopClosure: public OopClosure { - HeapRegion* _from; - HRInto_G1RemSet* _rs; - int _worker_i; -public: - UpdateRSOopClosure(HRInto_G1RemSet* rs, int worker_i = 0) : - _from(NULL), _rs(rs), _worker_i(worker_i) { - guarantee(_rs != NULL, "Requires an HRIntoG1RemSet"); - } - - void set_from(HeapRegion* from) { - assert(from != NULL, "from region must be non-NULL"); - _from = from; - } - - virtual void do_oop(narrowOop* p) { - guarantee(false, "NYI"); - } - virtual void do_oop(oop* p) { - assert(_from != NULL, "from region must be non-NULL"); - _rs->par_write_ref(_from, p, _worker_i); - } - // Override: this closure is idempotent. - // bool idempotent() { return true; } - bool apply_to_weak_ref_discovered_field() { return true; } -}; - class UpdateRSOutOfRegionClosure: public HeapRegionClosure { G1CollectedHeap* _g1h; ModRefBarrierSet* _mr_bs; @@ -177,11 +150,19 @@ _cards_scanned(NULL), _total_cards_scanned(0) { _seq_task = new SubTasksDone(NumSeqTasks); - _new_refs = NEW_C_HEAP_ARRAY(GrowableArray*, ParallelGCThreads); + guarantee(n_workers() > 0, "There should be some workers"); + _new_refs = NEW_C_HEAP_ARRAY(GrowableArray*, n_workers()); + for (uint i = 0; i < n_workers(); i++) { + _new_refs[i] = new (ResourceObj::C_HEAP) GrowableArray(8192,true); + } } HRInto_G1RemSet::~HRInto_G1RemSet() { delete _seq_task; + for (uint i = 0; i < n_workers(); i++) { + delete _new_refs[i]; + } + FREE_C_HEAP_ARRAY(GrowableArray*, _new_refs); } void CountNonCleanMemRegionClosure::do_MemRegion(MemRegion mr) { @@ -281,8 +262,9 @@ if (!_ct_bs->is_card_claimed(card_index) && !_ct_bs->is_card_dirty(card_index)) { assert(_ct_bs->is_card_clean(card_index) || - _ct_bs->is_card_claimed(card_index), - "Card is either dirty, clean, or claimed"); + _ct_bs->is_card_claimed(card_index) || + _ct_bs->is_card_deferred(card_index), + "Card is either clean, claimed or deferred"); if (_ct_bs->claim_card(card_index)) scanCard(card_index, card_region); } @@ -338,14 +320,12 @@ _g1p->record_scan_rs_start_time(worker_i, rs_time_start * 1000.0); _g1p->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0); - if (ParallelGCThreads > 0) { - // In this case, we called scanNewRefsRS and recorded the corresponding - // time. - double scan_new_refs_time_ms = _g1p->get_scan_new_refs_time(worker_i); - if (scan_new_refs_time_ms > 0.0) { - closure_app_time_ms += scan_new_refs_time_ms; - } + + double scan_new_refs_time_ms = _g1p->get_scan_new_refs_time(worker_i); + if (scan_new_refs_time_ms > 0.0) { + closure_app_time_ms += scan_new_refs_time_ms; } + _g1p->record_obj_copy_time(worker_i, closure_app_time_ms); } @@ -469,8 +449,8 @@ double scan_new_refs_start_sec = os::elapsedTime(); G1CollectedHeap* g1h = G1CollectedHeap::heap(); CardTableModRefBS* ct_bs = (CardTableModRefBS*) (g1h->barrier_set()); - while (_new_refs[worker_i]->is_nonempty()) { - oop* p = _new_refs[worker_i]->pop(); + for (int i = 0; i < _new_refs[worker_i]->length(); i++) { + oop* p = _new_refs[worker_i]->at(i); oop obj = *p; // *p was in the collection set when p was pushed on "_new_refs", but // another thread may have processed this location from an RS, so it @@ -480,10 +460,6 @@ HeapRegion* r = g1h->heap_region_containing(p); DEBUG_ONLY(HeapRegion* to = g1h->heap_region_containing(obj)); - assert(ParallelGCThreads > 1 - || to->rem_set()->contains_reference(p), - "Invariant: pushed after being added." - "(Not reliable in parallel code.)"); oc->set_region(r); // If "p" has already been processed concurrently, this is // idempotent. @@ -526,20 +502,31 @@ } if (ParallelGCThreads > 0) { - // This is a temporary change to serialize the update and scanning - // of remembered sets. There are some race conditions when this is - // done in parallel and they are causing failures. When we resolve - // said race conditions, we'll revert back to parallel remembered - // set updating and scanning. See CRs 6677707 and 6677708. - if (worker_i == 0) { + // The two flags below were introduced temporarily to serialize + // the updating and scanning of remembered sets. There are some + // race conditions when these two operations are done in parallel + // and they are causing failures. When we resolve said race + // conditions, we'll revert back to parallel remembered set + // updating and scanning. See CRs 6677707 and 6677708. + if (G1EnableParallelRSetUpdating || (worker_i == 0)) { updateRS(worker_i); scanNewRefsRS(oc, worker_i); + } else { + _g1p->record_update_rs_start_time(worker_i, os::elapsedTime()); + _g1p->record_update_rs_processed_buffers(worker_i, 0.0); + _g1p->record_update_rs_time(worker_i, 0.0); + _g1p->record_scan_new_refs_time(worker_i, 0.0); + } + if (G1EnableParallelRSetScanning || (worker_i == 0)) { scanRS(oc, worker_i); + } else { + _g1p->record_scan_rs_start_time(worker_i, os::elapsedTime()); + _g1p->record_scan_rs_time(worker_i, 0.0); } } else { assert(worker_i == 0, "invariant"); - updateRS(0); + scanNewRefsRS(oc, 0); scanRS(oc, 0); } } @@ -559,11 +546,7 @@ assert(!_par_traversal_in_progress, "Invariant between iterations."); if (ParallelGCThreads > 0) { set_par_traversal(true); - int n_workers = _g1->workers()->total_workers(); - _seq_task->set_par_threads(n_workers); - for (uint i = 0; i < ParallelGCThreads; i++) - _new_refs[i] = new (ResourceObj::C_HEAP) GrowableArray(8192,true); - + _seq_task->set_par_threads((int)n_workers()); if (cg1r->do_traversal()) { updateRS(0); // Have to do this again after updaters @@ -572,6 +555,9 @@ } guarantee( _cards_scanned == NULL, "invariant" ); _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers()); + for (uint i = 0; i < n_workers(); ++i) { + _cards_scanned[i] = 0; + } _total_cards_scanned = 0; } @@ -584,6 +570,53 @@ } }; +class UpdateRSetOopsIntoCSImmediate : public OopClosure { + G1CollectedHeap* _g1; +public: + UpdateRSetOopsIntoCSImmediate(G1CollectedHeap* g1) : _g1(g1) { } + virtual void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + virtual void do_oop(oop* p) { + HeapRegion* to = _g1->heap_region_containing(*p); + if (to->in_collection_set()) { + if (to->rem_set()->add_reference(p, 0)) { + _g1->schedule_popular_region_evac(to); + } + } + } +}; + +class UpdateRSetOopsIntoCSDeferred : public OopClosure { + G1CollectedHeap* _g1; + CardTableModRefBS* _ct_bs; + DirtyCardQueue* _dcq; +public: + UpdateRSetOopsIntoCSDeferred(G1CollectedHeap* g1, DirtyCardQueue* dcq) : + _g1(g1), _ct_bs((CardTableModRefBS*)_g1->barrier_set()), _dcq(dcq) { } + virtual void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + virtual void do_oop(oop* p) { + oop obj = *p; + if (_g1->obj_in_cs(obj)) { + size_t card_index = _ct_bs->index_for(p); + if (_ct_bs->mark_card_deferred(card_index)) { + _dcq->enqueue((jbyte*)_ct_bs->byte_for_index(card_index)); + } + } + } +}; + +void HRInto_G1RemSet::new_refs_iterate(OopClosure* cl) { + for (size_t i = 0; i < n_workers(); i++) { + for (int j = 0; j < _new_refs[i]->length(); j++) { + oop* p = _new_refs[i]->at(j); + cl->do_oop(p); + } + } +} + void HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do() { guarantee( _cards_scanned != NULL, "invariant" ); _total_cards_scanned = 0; @@ -606,11 +639,25 @@ if (cg1r->do_traversal()) { cg1r->cg1rThread()->set_do_traversal(false); } - for (uint i = 0; i < ParallelGCThreads; i++) { - delete _new_refs[i]; - } set_par_traversal(false); } + + if (_g1->evacuation_failed()) { + // Restore remembered sets for the regions pointing into + // the collection set. + if (G1DeferredRSUpdate) { + DirtyCardQueue dcq(&_g1->dirty_card_queue_set()); + UpdateRSetOopsIntoCSDeferred deferred_update(_g1, &dcq); + new_refs_iterate(&deferred_update); + } else { + UpdateRSetOopsIntoCSImmediate immediate_update(_g1); + new_refs_iterate(&immediate_update); + } + } + for (uint i = 0; i < n_workers(); i++) { + _new_refs[i]->clear(); + } + assert(!_par_traversal_in_progress, "Invariant between iterations."); } @@ -988,7 +1035,9 @@ } } void HRInto_G1RemSet::prepare_for_verify() { - if (G1HRRSFlushLogBuffersOnVerify && VerifyBeforeGC && !_g1->full_collection()) { + if (G1HRRSFlushLogBuffersOnVerify && + (VerifyBeforeGC || VerifyAfterGC) + && !_g1->full_collection()) { cleanupHRRS(); _g1->set_refine_cte_cl_concurrency(false); if (SafepointSynchronize::is_at_safepoint()) { @@ -999,5 +1048,7 @@ _cg1r->set_use_cache(false); updateRS(0); _cg1r->set_use_cache(cg1r_use_cache); + + assert(JavaThread::dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed"); } } diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/g1/g1RemSet.hpp --- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp Tue Mar 24 13:09:52 2009 -0400 @@ -30,7 +30,7 @@ class HRInto_G1RemSet; class ConcurrentG1Refine; -class G1RemSet { +class G1RemSet: public CHeapObj { protected: G1CollectedHeap* _g1; @@ -155,6 +155,7 @@ bool _par_traversal_in_progress; void set_par_traversal(bool b); GrowableArray** _new_refs; + void new_refs_iterate(OopClosure* cl); public: // This is called to reset dual hash tables after the gc pause @@ -214,3 +215,27 @@ int n() { return _n; }; HeapWord* start_first() { return _start_first; } }; + +class UpdateRSOopClosure: public OopClosure { + HeapRegion* _from; + HRInto_G1RemSet* _rs; + int _worker_i; +public: + UpdateRSOopClosure(HRInto_G1RemSet* rs, int worker_i = 0) : + _from(NULL), _rs(rs), _worker_i(worker_i) { + guarantee(_rs != NULL, "Requires an HRIntoG1RemSet"); + } + + void set_from(HeapRegion* from) { + assert(from != NULL, "from region must be non-NULL"); + _from = from; + } + + virtual void do_oop(narrowOop* p); + virtual void do_oop(oop* p); + + // Override: this closure is idempotent. + // bool idempotent() { return true; } + bool apply_to_weak_ref_discovered_field() { return true; } +}; + diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp --- a/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp Tue Mar 24 13:09:52 2009 -0400 @@ -31,24 +31,7 @@ } inline void HRInto_G1RemSet::write_ref_nv(HeapRegion* from, oop* p) { - oop obj = *p; - assert(from != NULL && from->is_in_reserved(p), - "p is not in a from"); - HeapRegion* to = _g1->heap_region_containing(obj); - if (from != to && to != NULL) { - if (!to->popular() && !from->is_survivor()) { -#if G1_REM_SET_LOGGING - gclog_or_tty->print_cr("Adding " PTR_FORMAT " (" PTR_FORMAT ") to RS" - " for region [" PTR_FORMAT ", " PTR_FORMAT ")", - p, obj, - to->bottom(), to->end()); -#endif - assert(to->rem_set() != NULL, "Need per-region 'into' remsets."); - if (to->rem_set()->add_reference(p)) { - _g1->schedule_popular_region_evac(to); - } - } - } + par_write_ref(from, p, 0); } inline void HRInto_G1RemSet::write_ref(HeapRegion* from, oop* p) { @@ -82,7 +65,22 @@ HeapRegion* to = _g1->heap_region_containing(obj); // The test below could be optimized by applying a bit op to to and from. if (to != NULL && from != NULL && from != to) { - if (!to->popular() && !from->is_survivor()) { + bool update_delayed = false; + // There is a tricky infinite loop if we keep pushing + // self forwarding pointers onto our _new_refs list. + // The _par_traversal_in_progress flag is true during the collection pause, + // false during the evacuation failure handing. + if (_par_traversal_in_progress && + to->in_collection_set() && !self_forwarded(obj)) { + _new_refs[tid]->push(p); + // Deferred updates to the Cset are either discarded (in the normal case), + // or processed (if an evacuation failure occurs) at the end + // of the collection. + // See HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do(). + update_delayed = true; + } + + if (!to->popular() && !update_delayed) { #if G1_REM_SET_LOGGING gclog_or_tty->print_cr("Adding " PTR_FORMAT " (" PTR_FORMAT ") to RS" " for region [" PTR_FORMAT ", " PTR_FORMAT ")", @@ -94,11 +92,14 @@ _g1->schedule_popular_region_evac(to); } } - // There is a tricky infinite loop if we keep pushing - // self forwarding pointers onto our _new_refs list. - if (_par_traversal_in_progress && - to->in_collection_set() && !self_forwarded(obj)) { - _new_refs[tid]->push(p); - } } } + +inline void UpdateRSOopClosure::do_oop(narrowOop* p) { + guarantee(false, "NYI"); +} + +inline void UpdateRSOopClosure::do_oop(oop* p) { + assert(_from != NULL, "from region must be non-NULL"); + _rs->par_write_ref(_from, p, _worker_i); +} diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/g1/g1_globals.hpp --- a/src/share/vm/gc_implementation/g1/g1_globals.hpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp Tue Mar 24 13:09:52 2009 -0400 @@ -28,7 +28,7 @@ #define G1_FLAGS(develop, develop_pd, product, product_pd, diagnostic, experimental, notproduct, manageable, product_rw) \ \ - product(intx, ParallelGCG1AllocBufferSize, 4*K, \ + product(intx, ParallelGCG1AllocBufferSize, 8*K, \ "Size of parallel G1 allocation buffers in to-space.") \ \ product(intx, G1TimeSliceMS, 500, \ @@ -172,6 +172,9 @@ develop(bool, G1RSBarrierUseQueue, true, \ "If true, use queueing RS barrier") \ \ + develop(bool, G1DeferredRSUpdate, true, \ + "If true, use deferred RS updates") \ + \ develop(bool, G1RSLogCheckCardTable, false, \ "If true, verify that no dirty cards remain after RS log " \ "processing.") \ @@ -281,7 +284,25 @@ develop(bool, G1HRRSFlushLogBuffersOnVerify, false, \ "Forces flushing of log buffers before verification.") \ \ - product(intx, G1MaxSurvivorRegions, 0, \ - "The maximum number of survivor regions") + product(bool, G1UseSurvivorSpace, true, \ + "When true, use survivor space.") \ + \ + product(bool, G1FixedTenuringThreshold, false, \ + "When set, G1 will not adjust the tenuring threshold") \ + \ + product(bool, G1FixedEdenSize, false, \ + "When set, G1 will not allocate unused survivor space regions") \ + \ + product(uintx, G1FixedSurvivorSpaceSize, 0, \ + "If non-0 is the size of the G1 survivor space, " \ + "otherwise SurvivorRatio is used to determine the size") \ + \ + experimental(bool, G1EnableParallelRSetUpdating, false, \ + "Enables the parallelization of remembered set updating " \ + "during evacuation pauses") \ + \ + experimental(bool, G1EnableParallelRSetScanning, false, \ + "Enables the parallelization of remembered set scanning " \ + "during evacuation pauses") G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG) diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp --- a/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp Tue Mar 24 13:09:52 2009 -0400 @@ -32,11 +32,13 @@ G1BarrierNone, G1BarrierRS, G1BarrierEvac }; -template +template class G1ParCopyClosure; class G1ParScanClosure; -typedef G1ParCopyClosure G1ParScanHeapEvacClosure; +typedef G1ParCopyClosure + G1ParScanHeapEvacClosure; class FilterIntoCSClosure; class FilterOutOfRegionClosure; diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/g1/heapRegion.hpp --- a/src/share/vm/gc_implementation/g1/heapRegion.hpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp Tue Mar 24 13:09:52 2009 -0400 @@ -318,7 +318,8 @@ FinalCountClaimValue = 1, NoteEndClaimValue = 2, ScrubRemSetClaimValue = 3, - ParVerifyClaimValue = 4 + ParVerifyClaimValue = 4, + RebuildRSClaimValue = 5 }; // Concurrent refinement requires contiguous heap regions (in which TLABs @@ -566,7 +567,11 @@ void note_end_of_copying() { assert(top() >= _next_top_at_mark_start, "Increase only"); - _next_top_at_mark_start = top(); + // Survivor regions will be scanned on the start of concurrent + // marking. + if (!is_survivor()) { + _next_top_at_mark_start = top(); + } } // Returns "false" iff no object in the region was allocated when the @@ -829,7 +834,7 @@ // A linked lists of heap regions. It leaves the "next" field // unspecified; that's up to subtypes. -class RegionList { +class RegionList VALUE_OBJ_CLASS_SPEC { protected: virtual HeapRegion* get_next(HeapRegion* chr) = 0; virtual void set_next(HeapRegion* chr, diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp --- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp Tue Mar 24 13:09:52 2009 -0400 @@ -65,9 +65,11 @@ // We need access in order to union things into the base table. BitMap* bm() { return &_bm; } +#if PRT_COUNT_OCCUPIED void recount_occupied() { _occupied = (jint) bm()->count_one_bits(); } +#endif PerRegionTable(HeapRegion* hr) : _hr(hr), @@ -1144,7 +1146,9 @@ size_t i = _outgoing_region_map.get_next_one_offset(0); while (i < _outgoing_region_map.size()) { HeapRegion* to_region = g1h->region_at(i); - to_region->rem_set()->clear_incoming_entry(hr()); + if (!to_region->in_collection_set()) { + to_region->rem_set()->clear_incoming_entry(hr()); + } i = _outgoing_region_map.get_next_one_offset(i+1); } } diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp --- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp Tue Mar 24 13:09:52 2009 -0400 @@ -58,7 +58,7 @@ // is represented. If a deleted PRT is re-used, a thread adding a bit, // thinking the PRT is for a different region, does no harm. -class OtherRegionsTable: public CHeapObj { +class OtherRegionsTable VALUE_OBJ_CLASS_SPEC { friend class HeapRegionRemSetIterator; G1CollectedHeap* _g1h; diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/g1/ptrQueue.cpp --- a/src/share/vm/gc_implementation/g1/ptrQueue.cpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/ptrQueue.cpp Tue Mar 24 13:09:52 2009 -0400 @@ -91,15 +91,17 @@ _n_completed_buffers(0), _process_completed_threshold(0), _process_completed(false), _buf_free_list(NULL), _buf_free_list_sz(0) -{} +{ + _fl_owner = this; +} void** PtrQueueSet::allocate_buffer() { assert(_sz > 0, "Didn't set a buffer size."); - MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag); - if (_buf_free_list != NULL) { - void** res = _buf_free_list; - _buf_free_list = (void**)_buf_free_list[0]; - _buf_free_list_sz--; + MutexLockerEx x(_fl_owner->_fl_lock, Mutex::_no_safepoint_check_flag); + if (_fl_owner->_buf_free_list != NULL) { + void** res = _fl_owner->_buf_free_list; + _fl_owner->_buf_free_list = (void**)_fl_owner->_buf_free_list[0]; + _fl_owner->_buf_free_list_sz--; // Just override the next pointer with NULL, just in case we scan this part // of the buffer. res[0] = NULL; @@ -111,10 +113,10 @@ void PtrQueueSet::deallocate_buffer(void** buf) { assert(_sz > 0, "Didn't set a buffer size."); - MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag); - buf[0] = (void*)_buf_free_list; - _buf_free_list = buf; - _buf_free_list_sz++; + MutexLockerEx x(_fl_owner->_fl_lock, Mutex::_no_safepoint_check_flag); + buf[0] = (void*)_fl_owner->_buf_free_list; + _fl_owner->_buf_free_list = buf; + _fl_owner->_buf_free_list_sz++; } void PtrQueueSet::reduce_free_list() { @@ -207,3 +209,58 @@ void PtrQueueSet::set_process_completed_threshold(size_t sz) { _process_completed_threshold = sz; } + +// Merge lists of buffers. Notify waiting threads if the length of the list +// exceeds threshold. The source queue is emptied as a result. The queues +// must share the monitor. +void PtrQueueSet::merge_bufferlists(PtrQueueSet *src) { + assert(_cbl_mon == src->_cbl_mon, "Should share the same lock"); + MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); + if (_completed_buffers_tail == NULL) { + assert(_completed_buffers_head == NULL, "Well-formedness"); + _completed_buffers_head = src->_completed_buffers_head; + _completed_buffers_tail = src->_completed_buffers_tail; + } else { + assert(_completed_buffers_head != NULL, "Well formedness"); + if (src->_completed_buffers_head != NULL) { + _completed_buffers_tail->next = src->_completed_buffers_head; + _completed_buffers_tail = src->_completed_buffers_tail; + } + } + _n_completed_buffers += src->_n_completed_buffers; + + src->_n_completed_buffers = 0; + src->_completed_buffers_head = NULL; + src->_completed_buffers_tail = NULL; + + assert(_completed_buffers_head == NULL && _completed_buffers_tail == NULL || + _completed_buffers_head != NULL && _completed_buffers_tail != NULL, + "Sanity"); + + if (!_process_completed && + _n_completed_buffers >= _process_completed_threshold) { + _process_completed = true; + if (_notify_when_complete) + _cbl_mon->notify_all(); + } +} + +// Merge free lists of the two queues. The free list of the source +// queue is emptied as a result. The queues must share the same +// mutex that guards free lists. +void PtrQueueSet::merge_freelists(PtrQueueSet* src) { + assert(_fl_lock == src->_fl_lock, "Should share the same lock"); + MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag); + if (_buf_free_list != NULL) { + void **p = _buf_free_list; + while (*p != NULL) { + p = (void**)*p; + } + *p = src->_buf_free_list; + } else { + _buf_free_list = src->_buf_free_list; + } + _buf_free_list_sz += src->_buf_free_list_sz; + src->_buf_free_list = NULL; + src->_buf_free_list_sz = 0; +} diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/g1/ptrQueue.hpp --- a/src/share/vm/gc_implementation/g1/ptrQueue.hpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/ptrQueue.hpp Tue Mar 24 13:09:52 2009 -0400 @@ -29,7 +29,7 @@ class PtrQueueSet; -class PtrQueue: public CHeapObj { +class PtrQueue VALUE_OBJ_CLASS_SPEC { protected: // The ptr queue set to which this queue belongs. @@ -130,7 +130,7 @@ // In particular, the individual queues allocate buffers from this shared // set, and return completed buffers to the set. // All these variables are are protected by the TLOQ_CBL_mon. XXX ??? -class PtrQueueSet: public CHeapObj { +class PtrQueueSet VALUE_OBJ_CLASS_SPEC { protected: @@ -155,6 +155,9 @@ Mutex* _fl_lock; void** _buf_free_list; size_t _buf_free_list_sz; + // Queue set can share a freelist. The _fl_owner variable + // specifies the owner. It is set to "this" by default. + PtrQueueSet* _fl_owner; // The size of all buffers in the set. size_t _sz; @@ -188,10 +191,13 @@ // Because of init-order concerns, we can't pass these as constructor // arguments. void initialize(Monitor* cbl_mon, Mutex* fl_lock, - int max_completed_queue = 0) { + int max_completed_queue = 0, + PtrQueueSet *fl_owner = NULL) { _max_completed_queue = max_completed_queue; assert(cbl_mon != NULL && fl_lock != NULL, "Init order issue?"); - _cbl_mon = cbl_mon; _fl_lock = fl_lock; + _cbl_mon = cbl_mon; + _fl_lock = fl_lock; + _fl_owner = (fl_owner != NULL) ? fl_owner : this; } // Return an empty oop array of size _sz (required to be non-zero). @@ -228,4 +234,7 @@ void reduce_free_list(); size_t completed_buffers_num() { return _n_completed_buffers; } + + void merge_bufferlists(PtrQueueSet* src); + void merge_freelists(PtrQueueSet* src); }; diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/g1/sparsePRT.cpp --- a/src/share/vm/gc_implementation/g1/sparsePRT.cpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/sparsePRT.cpp Tue Mar 24 13:09:52 2009 -0400 @@ -504,6 +504,7 @@ // Make sure that the current and next tables agree. (Another mechanism // takes care of deleting now-unused tables.) _cur = _next; + set_expanded(false); } void SparsePRT::expand() { diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/g1/sparsePRT.hpp --- a/src/share/vm/gc_implementation/g1/sparsePRT.hpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/sparsePRT.hpp Tue Mar 24 13:09:52 2009 -0400 @@ -33,7 +33,7 @@ // old versions synchronously. -class SparsePRTEntry { +class SparsePRTEntry: public CHeapObj { public: enum SomePublicConstants { CardsPerEntry = (short)4, @@ -167,7 +167,7 @@ }; // ValueObj because will be embedded in HRRS iterator. -class RSHashTableIter: public CHeapObj { +class RSHashTableIter VALUE_OBJ_CLASS_SPEC { short _tbl_ind; short _bl_ind; short _card_ind; @@ -213,7 +213,7 @@ class SparsePRTIter; -class SparsePRT : public CHeapObj { +class SparsePRT VALUE_OBJ_CLASS_SPEC { // Iterations are done on the _cur hash table, since they only need to // see entries visible at the start of a collection pause. // All other operations are done using the _next hash table. @@ -274,7 +274,7 @@ // Clean up all tables on the expanded list. Called single threaded. static void cleanup_all(); - RSHashTable* next() const { return _next; } + RSHashTable* cur() const { return _cur; } void init_iterator(SparsePRTIter* sprt_iter); @@ -300,7 +300,7 @@ {} void init(const SparsePRT* sprt) { - RSHashTableIter::init(sprt->next()); + RSHashTableIter::init(sprt->cur()); } bool has_next(size_t& card_index) { return RSHashTableIter::has_next(card_index); diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/g1/survRateGroup.cpp --- a/src/share/vm/gc_implementation/g1/survRateGroup.cpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/survRateGroup.cpp Tue Mar 24 13:09:52 2009 -0400 @@ -29,23 +29,14 @@ const char* name, size_t summary_surv_rates_len) : _g1p(g1p), _name(name), - _all_regions_allocated(0), - _curr_length(0), _scan_only_prefix(0), _setup_seq_num(0), - _array_length(0), _surv_rate(NULL), _accum_surv_rate_pred(NULL), - _accum_surv_rate(0.0), _surv_rate_pred(NULL), _last_pred(0.0), _summary_surv_rates_len(summary_surv_rates_len), _summary_surv_rates_max_len(0), - _summary_surv_rates(NULL) { - - // the following will set up the arrays with length 1 - _curr_length = 1; - stop_adding_regions(); - guarantee( _array_length == 1, "invariant" ); - guarantee( _surv_rate_pred[0] != NULL, "invariant" ); - _surv_rate_pred[0]->add(0.4); - all_surviving_words_recorded(false); - _curr_length = 0; - + _summary_surv_rates(NULL), + _surv_rate(NULL), + _accum_surv_rate_pred(NULL), + _surv_rate_pred(NULL) +{ + reset(); if (summary_surv_rates_len > 0) { size_t length = summary_surv_rates_len; _summary_surv_rates = NEW_C_HEAP_ARRAY(NumberSeq*, length); @@ -60,61 +51,80 @@ start_adding_regions(); } + +void SurvRateGroup::reset() +{ + _all_regions_allocated = 0; + _scan_only_prefix = 0; + _setup_seq_num = 0; + _stats_arrays_length = 0; + _accum_surv_rate = 0.0; + _last_pred = 0.0; + // the following will set up the arrays with length 1 + _region_num = 1; + stop_adding_regions(); + guarantee( _stats_arrays_length == 1, "invariant" ); + guarantee( _surv_rate_pred[0] != NULL, "invariant" ); + _surv_rate_pred[0]->add(0.4); + all_surviving_words_recorded(false); + _region_num = 0; +} + + void SurvRateGroup::start_adding_regions() { - _setup_seq_num = _array_length; - _curr_length = _scan_only_prefix; + _setup_seq_num = _stats_arrays_length; + _region_num = _scan_only_prefix; _accum_surv_rate = 0.0; #if 0 - gclog_or_tty->print_cr("start adding regions, seq num %d, length %d", - _setup_seq_num, _curr_length); + gclog_or_tty->print_cr("[%s] start adding regions, seq num %d, length %d", + _name, _setup_seq_num, _region_num); #endif // 0 } void SurvRateGroup::stop_adding_regions() { - size_t length = _curr_length; #if 0 - gclog_or_tty->print_cr("stop adding regions, length %d", length); + gclog_or_tty->print_cr("[%s] stop adding regions, length %d", _name, _region_num); #endif // 0 - if (length > _array_length) { + if (_region_num > _stats_arrays_length) { double* old_surv_rate = _surv_rate; double* old_accum_surv_rate_pred = _accum_surv_rate_pred; TruncatedSeq** old_surv_rate_pred = _surv_rate_pred; - _surv_rate = NEW_C_HEAP_ARRAY(double, length); + _surv_rate = NEW_C_HEAP_ARRAY(double, _region_num); if (_surv_rate == NULL) { - vm_exit_out_of_memory(sizeof(double) * length, + vm_exit_out_of_memory(sizeof(double) * _region_num, "Not enough space for surv rate array."); } - _accum_surv_rate_pred = NEW_C_HEAP_ARRAY(double, length); + _accum_surv_rate_pred = NEW_C_HEAP_ARRAY(double, _region_num); if (_accum_surv_rate_pred == NULL) { - vm_exit_out_of_memory(sizeof(double) * length, + vm_exit_out_of_memory(sizeof(double) * _region_num, "Not enough space for accum surv rate pred array."); } - _surv_rate_pred = NEW_C_HEAP_ARRAY(TruncatedSeq*, length); + _surv_rate_pred = NEW_C_HEAP_ARRAY(TruncatedSeq*, _region_num); if (_surv_rate == NULL) { - vm_exit_out_of_memory(sizeof(TruncatedSeq*) * length, + vm_exit_out_of_memory(sizeof(TruncatedSeq*) * _region_num, "Not enough space for surv rate pred array."); } - for (size_t i = 0; i < _array_length; ++i) + for (size_t i = 0; i < _stats_arrays_length; ++i) _surv_rate_pred[i] = old_surv_rate_pred[i]; #if 0 - gclog_or_tty->print_cr("stop adding regions, new seqs %d to %d", - _array_length, length - 1); + gclog_or_tty->print_cr("[%s] stop adding regions, new seqs %d to %d", + _name, _array_length, _region_num - 1); #endif // 0 - for (size_t i = _array_length; i < length; ++i) { + for (size_t i = _stats_arrays_length; i < _region_num; ++i) { _surv_rate_pred[i] = new TruncatedSeq(10); // _surv_rate_pred[i]->add(last_pred); } - _array_length = length; + _stats_arrays_length = _region_num; if (old_surv_rate != NULL) FREE_C_HEAP_ARRAY(double, old_surv_rate); @@ -124,7 +134,7 @@ FREE_C_HEAP_ARRAY(NumberSeq*, old_surv_rate_pred); } - for (size_t i = 0; i < _array_length; ++i) + for (size_t i = 0; i < _stats_arrays_length; ++i) _surv_rate[i] = 0.0; } @@ -135,7 +145,7 @@ double ret = _accum_surv_rate; if (adjustment > 0) { - TruncatedSeq* seq = get_seq(_curr_length+1); + TruncatedSeq* seq = get_seq(_region_num+1); double surv_rate = _g1p->get_new_prediction(seq); ret += surv_rate; } @@ -145,23 +155,23 @@ int SurvRateGroup::next_age_index() { - TruncatedSeq* seq = get_seq(_curr_length); + TruncatedSeq* seq = get_seq(_region_num); double surv_rate = _g1p->get_new_prediction(seq); _accum_surv_rate += surv_rate; - ++_curr_length; + ++_region_num; return (int) ++_all_regions_allocated; } void SurvRateGroup::record_scan_only_prefix(size_t scan_only_prefix) { - guarantee( scan_only_prefix <= _curr_length, "pre-condition" ); + guarantee( scan_only_prefix <= _region_num, "pre-condition" ); _scan_only_prefix = scan_only_prefix; } void SurvRateGroup::record_surviving_words(int age_in_group, size_t surv_words) { - guarantee( 0 <= age_in_group && (size_t) age_in_group < _curr_length, + guarantee( 0 <= age_in_group && (size_t) age_in_group < _region_num, "pre-condition" ); guarantee( _surv_rate[age_in_group] <= 0.00001, "should only update each slot once" ); @@ -178,15 +188,15 @@ void SurvRateGroup::all_surviving_words_recorded(bool propagate) { - if (propagate && _curr_length > 0) { // conservative - double surv_rate = _surv_rate_pred[_curr_length-1]->last(); + if (propagate && _region_num > 0) { // conservative + double surv_rate = _surv_rate_pred[_region_num-1]->last(); #if 0 gclog_or_tty->print_cr("propagating %1.2lf from %d to %d", surv_rate, _curr_length, _array_length - 1); #endif // 0 - for (size_t i = _curr_length; i < _array_length; ++i) { + for (size_t i = _region_num; i < _stats_arrays_length; ++i) { guarantee( _surv_rate[i] <= 0.00001, "the slot should not have been updated" ); _surv_rate_pred[i]->add(surv_rate); @@ -195,7 +205,7 @@ double accum = 0.0; double pred = 0.0; - for (size_t i = 0; i < _array_length; ++i) { + for (size_t i = 0; i < _stats_arrays_length; ++i) { pred = _g1p->get_new_prediction(_surv_rate_pred[i]); if (pred > 1.0) pred = 1.0; accum += pred; @@ -209,8 +219,8 @@ void SurvRateGroup::print() { gclog_or_tty->print_cr("Surv Rate Group: %s (%d entries, %d scan-only)", - _name, _curr_length, _scan_only_prefix); - for (size_t i = 0; i < _curr_length; ++i) { + _name, _region_num, _scan_only_prefix); + for (size_t i = 0; i < _region_num; ++i) { gclog_or_tty->print_cr(" age %4d surv rate %6.2lf %% pred %6.2lf %%%s", i, _surv_rate[i] * 100.0, _g1p->get_new_prediction(_surv_rate_pred[i]) * 100.0, diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/g1/survRateGroup.hpp --- a/src/share/vm/gc_implementation/g1/survRateGroup.hpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/survRateGroup.hpp Tue Mar 24 13:09:52 2009 -0400 @@ -29,7 +29,7 @@ G1CollectorPolicy* _g1p; const char* _name; - size_t _array_length; + size_t _stats_arrays_length; double* _surv_rate; double* _accum_surv_rate_pred; double _last_pred; @@ -40,7 +40,7 @@ size_t _summary_surv_rates_max_len; int _all_regions_allocated; - size_t _curr_length; + size_t _region_num; size_t _scan_only_prefix; size_t _setup_seq_num; @@ -48,6 +48,7 @@ SurvRateGroup(G1CollectorPolicy* g1p, const char* name, size_t summary_surv_rates_len); + void reset(); void start_adding_regions(); void stop_adding_regions(); void record_scan_only_prefix(size_t scan_only_prefix); @@ -55,22 +56,21 @@ void all_surviving_words_recorded(bool propagate); const char* name() { return _name; } - size_t region_num() { return _curr_length; } + size_t region_num() { return _region_num; } size_t scan_only_length() { return _scan_only_prefix; } double accum_surv_rate_pred(int age) { assert(age >= 0, "must be"); - if ((size_t)age < _array_length) + if ((size_t)age < _stats_arrays_length) return _accum_surv_rate_pred[age]; else { - double diff = (double) (age - _array_length + 1); - return _accum_surv_rate_pred[_array_length-1] + diff * _last_pred; + double diff = (double) (age - _stats_arrays_length + 1); + return _accum_surv_rate_pred[_stats_arrays_length-1] + diff * _last_pred; } } double accum_surv_rate(size_t adjustment); TruncatedSeq* get_seq(size_t age) { - guarantee( 0 <= age, "pre-condition" ); if (age >= _setup_seq_num) { guarantee( _setup_seq_num > 0, "invariant" ); age = _setup_seq_num-1; diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/includeDB_gc_g1 --- a/src/share/vm/gc_implementation/includeDB_gc_g1 Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/includeDB_gc_g1 Tue Mar 24 13:09:52 2009 -0400 @@ -31,7 +31,7 @@ cardTableRS.cpp concurrentMark.hpp cardTableRS.cpp g1SATBCardTableModRefBS.hpp -collectionSetChooser.cpp g1CollectedHeap.hpp +collectionSetChooser.cpp g1CollectedHeap.inline.hpp collectionSetChooser.cpp g1CollectorPolicy.hpp collectionSetChooser.cpp collectionSetChooser.hpp @@ -42,14 +42,15 @@ concurrentG1Refine.cpp concurrentG1Refine.hpp concurrentG1Refine.cpp concurrentG1RefineThread.hpp concurrentG1Refine.cpp copy.hpp -concurrentG1Refine.cpp g1CollectedHeap.hpp +concurrentG1Refine.cpp g1CollectedHeap.inline.hpp concurrentG1Refine.cpp g1RemSet.hpp concurrentG1Refine.hpp globalDefinitions.hpp +concurrentG1Refine.hpp allocation.hpp concurrentG1RefineThread.cpp concurrentG1Refine.hpp concurrentG1RefineThread.cpp concurrentG1RefineThread.hpp -concurrentG1RefineThread.cpp g1CollectedHeap.hpp +concurrentG1RefineThread.cpp g1CollectedHeap.inline.hpp concurrentG1RefineThread.cpp g1CollectorPolicy.hpp concurrentG1RefineThread.cpp handles.inline.hpp concurrentG1RefineThread.cpp mutexLocker.hpp @@ -166,10 +167,11 @@ g1CollectorPolicy.cpp concurrentMarkThread.inline.hpp g1CollectorPolicy.cpp debug.hpp g1CollectorPolicy.cpp java.hpp -g1CollectorPolicy.cpp g1CollectedHeap.hpp +g1CollectorPolicy.cpp g1CollectedHeap.inline.hpp g1CollectorPolicy.cpp g1CollectorPolicy.hpp g1CollectorPolicy.cpp heapRegionRemSet.hpp g1CollectorPolicy.cpp mutexLocker.hpp +g1CollectorPolicy.cpp gcPolicyCounters.hpp g1CollectorPolicy.hpp collectorPolicy.hpp g1CollectorPolicy.hpp collectionSetChooser.hpp @@ -187,7 +189,7 @@ g1MarkSweep.cpp codeCache.hpp g1MarkSweep.cpp events.hpp g1MarkSweep.cpp fprofiler.hpp -g1MarkSweep.hpp g1CollectedHeap.hpp +g1MarkSweep.hpp g1CollectedHeap.inline.hpp g1MarkSweep.cpp g1MarkSweep.hpp g1MarkSweep.cpp gcLocker.hpp g1MarkSweep.cpp genCollectedHeap.hpp @@ -226,7 +228,7 @@ g1MMUTracker.cpp mutexLocker.hpp g1MMUTracker.hpp debug.hpp - +g1MMUTracker.hpp allocation.hpp g1RemSet.cpp bufferingOopClosure.hpp g1RemSet.cpp concurrentG1Refine.hpp g1RemSet.cpp concurrentG1RefineThread.hpp @@ -270,6 +272,7 @@ heapRegion.hpp watermark.hpp heapRegion.hpp g1_specialized_oop_closures.hpp heapRegion.hpp survRateGroup.hpp +heapRegion.hpp ageTable.hpp heapRegionRemSet.hpp sparsePRT.hpp @@ -283,7 +286,7 @@ heapRegionRemSet.cpp space.inline.hpp heapRegionSeq.cpp allocation.hpp -heapRegionSeq.cpp g1CollectedHeap.hpp +heapRegionSeq.cpp g1CollectedHeap.inline.hpp heapRegionSeq.cpp heapRegionSeq.hpp heapRegionSeq.hpp growableArray.hpp @@ -334,18 +337,18 @@ survRateGroup.hpp numberSeq.hpp survRateGroup.cpp allocation.hpp -survRateGroup.cpp g1CollectedHeap.hpp +survRateGroup.cpp g1CollectedHeap.inline.hpp survRateGroup.cpp g1CollectorPolicy.hpp survRateGroup.cpp heapRegion.hpp survRateGroup.cpp survRateGroup.hpp thread.cpp concurrentMarkThread.inline.hpp -universe.cpp g1CollectedHeap.hpp +universe.cpp g1CollectedHeap.inline.hpp universe.cpp g1CollectorPolicy.hpp vm_operations_g1.hpp vmGCOperations.hpp vm_operations_g1.cpp vm_operations_g1.hpp -vm_operations_g1.cpp g1CollectedHeap.hpp +vm_operations_g1.cpp g1CollectedHeap.inline.hpp vm_operations_g1.cpp isGCActiveMark.hpp diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/includeDB_gc_shared --- a/src/share/vm/gc_implementation/includeDB_gc_shared Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/includeDB_gc_shared Tue Mar 24 13:09:52 2009 -0400 @@ -100,4 +100,4 @@ spaceCounters.hpp perfData.hpp spaceCounters.hpp generationCounters.hpp -vmGCOperations.cpp g1CollectedHeap.hpp +vmGCOperations.cpp g1CollectedHeap.inline.hpp diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/shared/ageTable.cpp --- a/src/share/vm/gc_implementation/shared/ageTable.cpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/shared/ageTable.cpp Tue Mar 24 13:09:52 2009 -0400 @@ -67,6 +67,12 @@ } } +void ageTable::merge_par(ageTable* subTable) { + for (int i = 0; i < table_size; i++) { + Atomic::add_ptr(subTable->sizes[i], &sizes[i]); + } +} + int ageTable::compute_tenuring_threshold(size_t survivor_capacity) { size_t desired_survivor_size = (size_t)((((double) survivor_capacity)*TargetSurvivorRatio)/100); size_t total = 0; diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/gc_implementation/shared/ageTable.hpp --- a/src/share/vm/gc_implementation/shared/ageTable.hpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/gc_implementation/shared/ageTable.hpp Tue Mar 24 13:09:52 2009 -0400 @@ -56,6 +56,7 @@ // Merge another age table with the current one. Used // for parallel young generation gc. void merge(ageTable* subTable); + void merge_par(ageTable* subTable); // calculate new tenuring threshold based on age information int compute_tenuring_threshold(size_t survivor_capacity); diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/memory/cardTableModRefBS.cpp --- a/src/share/vm/memory/cardTableModRefBS.cpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/memory/cardTableModRefBS.cpp Tue Mar 24 13:09:52 2009 -0400 @@ -343,18 +343,62 @@ inline_write_ref_field(field, newVal); } +/* + Claimed and deferred bits are used together in G1 during the evacuation + pause. These bits can have the following state transitions: + 1. The claimed bit can be put over any other card state. Except that + the "dirty -> dirty and claimed" transition is checked for in + G1 code and is not used. + 2. Deferred bit can be set only if the previous state of the card + was either clean or claimed. mark_card_deferred() is wait-free. + We do not care if the operation is be successful because if + it does not it will only result in duplicate entry in the update + buffer because of the "cache-miss". So it's not worth spinning. + */ + bool CardTableModRefBS::claim_card(size_t card_index) { jbyte val = _byte_map[card_index]; - if (val != claimed_card_val()) { - jbyte res = Atomic::cmpxchg((jbyte) claimed_card_val(), &_byte_map[card_index], val); - if (res == val) + assert(val != dirty_card_val(), "Shouldn't claim a dirty card"); + while (val == clean_card_val() || + (val & (clean_card_mask_val() | claimed_card_val())) != claimed_card_val()) { + jbyte new_val = val; + if (val == clean_card_val()) { + new_val = (jbyte)claimed_card_val(); + } else { + new_val = val | (jbyte)claimed_card_val(); + } + jbyte res = Atomic::cmpxchg(new_val, &_byte_map[card_index], val); + if (res == val) { return true; - else return false; + } + val = res; } return false; } +bool CardTableModRefBS::mark_card_deferred(size_t card_index) { + jbyte val = _byte_map[card_index]; + // It's already processed + if ((val & (clean_card_mask_val() | deferred_card_val())) == deferred_card_val()) { + return false; + } + // Cached bit can be installed either on a clean card or on a claimed card. + jbyte new_val = val; + if (val == clean_card_val()) { + new_val = (jbyte)deferred_card_val(); + } else { + if (val & claimed_card_val()) { + new_val = val | (jbyte)deferred_card_val(); + } + } + if (new_val != val) { + Atomic::cmpxchg(new_val, &_byte_map[card_index], val); + } + return true; +} + + void CardTableModRefBS::non_clean_card_iterate(Space* sp, MemRegion mr, DirtyCardToOopClosure* dcto_cl, diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/memory/cardTableModRefBS.hpp --- a/src/share/vm/memory/cardTableModRefBS.hpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/memory/cardTableModRefBS.hpp Tue Mar 24 13:09:52 2009 -0400 @@ -52,11 +52,15 @@ enum CardValues { clean_card = -1, + // The mask contains zeros in places for all other values. + clean_card_mask = clean_card - 31, + dirty_card = 0, precleaned_card = 1, - claimed_card = 3, - last_card = 4, - CT_MR_BS_last_reserved = 10 + claimed_card = 2, + deferred_card = 4, + last_card = 8, + CT_MR_BS_last_reserved = 16 }; // dirty and precleaned are equivalent wrt younger_refs_iter. @@ -254,9 +258,11 @@ }; static int clean_card_val() { return clean_card; } + static int clean_card_mask_val() { return clean_card_mask; } static int dirty_card_val() { return dirty_card; } static int claimed_card_val() { return claimed_card; } static int precleaned_card_val() { return precleaned_card; } + static int deferred_card_val() { return deferred_card; } // For RTTI simulation. bool is_a(BarrierSet::Name bsn) { @@ -329,7 +335,8 @@ } bool is_card_claimed(size_t card_index) { - return _byte_map[card_index] == claimed_card_val(); + jbyte val = _byte_map[card_index]; + return (val & (clean_card_mask_val() | claimed_card_val())) == claimed_card_val(); } bool claim_card(size_t card_index); @@ -338,6 +345,13 @@ return _byte_map[card_index] == clean_card_val(); } + bool is_card_deferred(size_t card_index) { + jbyte val = _byte_map[card_index]; + return (val & (clean_card_mask_val() | deferred_card_val())) == deferred_card_val(); + } + + bool mark_card_deferred(size_t card_index); + // Card marking array base (adjusted for heap low boundary) // This would be the 0th element of _byte_map, if the heap started at 0x0. // But since the heap starts at some higher address, this points to somewhere @@ -434,6 +448,10 @@ return byte_for(p) - _byte_map; } + const jbyte* byte_for_index(const size_t card_index) const { + return _byte_map + card_index; + } + void verify(); void verify_guard(); diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/opto/graphKit.cpp --- a/src/share/vm/opto/graphKit.cpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/opto/graphKit.cpp Tue Mar 24 13:09:52 2009 -0400 @@ -3233,12 +3233,11 @@ // Now some of the values - Node* marking = __ load(no_ctrl, marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw); - Node* index = __ load(no_ctrl, index_adr, TypeInt::INT, T_INT, Compile::AliasIdxRaw); - Node* buffer = __ load(no_ctrl, buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw); + Node* marking = __ load(__ ctrl(), marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw); // if (!marking) __ if_then(marking, BoolTest::ne, zero); { + Node* index = __ load(__ ctrl(), index_adr, TypeInt::INT, T_INT, Compile::AliasIdxRaw); const Type* t1 = adr->bottom_type(); const Type* t2 = val->bottom_type(); @@ -3246,6 +3245,7 @@ Node* orig = __ load(no_ctrl, adr, val_type, bt, alias_idx); // if (orig != NULL) __ if_then(orig, BoolTest::ne, null()); { + Node* buffer = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw); // load original value // alias_idx correct?? diff -r c9ede57210d9 -r b36996d65c38 src/share/vm/utilities/workgroup.hpp --- a/src/share/vm/utilities/workgroup.hpp Fri Mar 20 12:17:54 2009 -0700 +++ b/src/share/vm/utilities/workgroup.hpp Tue Mar 24 13:09:52 2009 -0400 @@ -32,7 +32,7 @@ // An abstract task to be worked on by a gang. // You subclass this to supply your own work() method -class AbstractGangTask: public CHeapObj { +class AbstractGangTask VALUE_OBJ_CLASS_SPEC { public: // The abstract work method. // The argument tells you which member of the gang you are.