# HG changeset patch # User kvn # Date 1302299782 25200 # Node ID 3af54845df9820197f7d14ae436e5437e509c436 # Parent 3f49d30f8184c508e72c4ff0b12b3664aded8824 7004555: Add new policy for one iteration loops Summary: Add new policy for one iteration loops (mostly formal pre- loops). Reviewed-by: never diff -r 3f49d30f8184 -r 3af54845df98 src/share/vm/opto/loopTransform.cpp --- a/src/share/vm/opto/loopTransform.cpp Thu Apr 07 21:32:23 2011 -0700 +++ b/src/share/vm/opto/loopTransform.cpp Fri Apr 08 14:56:22 2011 -0700 @@ -63,6 +63,46 @@ } } +//------------------------------compute_exact_trip_count----------------------- +// Compute loop exact trip count if possible. Do not recalculate trip count for +// split loops (pre-main-post) which have their limits and inits behind Opaque node. +void IdealLoopTree::compute_exact_trip_count( PhaseIdealLoop *phase ) { + if (!_head->as_Loop()->is_valid_counted_loop()) { + return; + } + CountedLoopNode* cl = _head->as_CountedLoop(); + // Trip count may become nonexact for iteration split loops since + // RCE modifies limits. Note, _trip_count value is not reset since + // it is used to limit unrolling of main loop. + cl->set_nonexact_trip_count(); + + // Loop's test should be part of loop. + if (!phase->is_member(this, phase->get_ctrl(cl->loopexit()->in(CountedLoopEndNode::TestValue)))) + return; // Infinite loop + +#ifdef ASSERT + BoolTest::mask bt = cl->loopexit()->test_trip(); + assert(bt == BoolTest::lt || bt == BoolTest::gt || + bt == BoolTest::ne, "canonical test is expected"); +#endif + + Node* init_n = cl->init_trip(); + Node* limit_n = cl->limit(); + if (init_n != NULL && init_n->is_Con() && + limit_n != NULL && limit_n->is_Con()) { + // Use longs to avoid integer overflow. + int stride_con = cl->stride_con(); + long init_con = cl->init_trip()->get_int(); + long limit_con = cl->limit()->get_int(); + int stride_m = stride_con - (stride_con > 0 ? 1 : -1); + long trip_count = (limit_con - init_con + stride_m)/stride_con; + if (trip_count > 0 && (julong)trip_count < (julong)max_juint) { + // Set exact trip count. + cl->set_exact_trip_count((uint)trip_count); + } + } +} + //------------------------------compute_profile_trip_cnt---------------------------- // Compute loop trip count from profile data as // (backedge_count + loop_exit_count) / loop_exit_count @@ -533,29 +573,15 @@ if (!cl->is_valid_counted_loop()) return false; // Malformed counted loop - Node *init_n = cl->init_trip(); - Node *limit_n = cl->limit(); - - // Non-constant bounds - if (init_n == NULL || !init_n->is_Con() || - limit_n == NULL || !limit_n->is_Con()) { + if (!cl->has_exact_trip_count()) { + // Trip count is not exact. return false; } - // Use longs to avoid integer overflow. - int stride_con = cl->stride_con(); - long init_con = cl->init_trip()->get_int(); - long limit_con = cl->limit()->get_int(); - int stride_m = stride_con - (stride_con > 0 ? 1 : -1); - long trip_cnt = (limit_con - init_con + stride_m)/stride_con; - // Note, max_jint is used to indicate unknown trip count. - if (trip_cnt <= 0 || trip_cnt >= (long)max_jint) { - // return a false (no maximally unroll) and the regular unroll/peel - // route will make a small mess which CCP will fold away. - return false; - } - uint trip_count = (uint)trip_cnt; - cl->set_trip_count(trip_count); + uint trip_count = cl->trip_count(); + // Note, max_juint is used to indicate unknown trip count. + assert(trip_count > 1, "one iteration loop should be optimized out already"); + assert(trip_count < max_juint, "exact trip_count should be less than max_uint."); // Real policy: if we maximally unroll, does it get too big? // Allow the unrolled mess to get larger than standard loop @@ -1096,7 +1122,7 @@ tty->print("Unrolling "); loop->dump_head(); } else if (TraceLoopOpts) { - if (loop_head->trip_count() < LoopUnrollLimit) { + if (loop_head->trip_count() < (uint)LoopUnrollLimit) { tty->print("Unroll %d(%2d) ", loop_head->unrolled_count()*2, loop_head->trip_count()); } else { tty->print("Unroll %d ", loop_head->unrolled_count()*2); @@ -1802,6 +1828,17 @@ // main and post loops have explicitly created zero trip guard bool needs_guard = !cl->is_main_loop() && !cl->is_post_loop(); if (needs_guard) { + // Skip guard if values not overlap. + const TypeInt* init_t = phase->_igvn.type(cl->init_trip())->is_int(); + const TypeInt* limit_t = phase->_igvn.type(cl->limit())->is_int(); + int stride_con = cl->stride_con(); + if (stride_con > 0) { + needs_guard = (init_t->_hi >= limit_t->_lo); + } else { + needs_guard = (init_t->_lo <= limit_t->_hi); + } + } + if (needs_guard) { // Check for an obvious zero trip guard. Node* inctrl = PhaseIdealLoop::skip_loop_predicates(cl->in(LoopNode::EntryControl)); if (inctrl->Opcode() == Op_IfTrue) { @@ -1846,12 +1883,49 @@ return true; } +//------------------------------policy_do_one_iteration_loop------------------- +// Convert one iteration loop into normal code. +bool IdealLoopTree::policy_do_one_iteration_loop( PhaseIdealLoop *phase ) { + if (!_head->as_Loop()->is_valid_counted_loop()) + return false; // Only for counted loop + + CountedLoopNode *cl = _head->as_CountedLoop(); + if (!cl->has_exact_trip_count() || cl->trip_count() != 1) { + return false; + } + +#ifndef PRODUCT + if(TraceLoopOpts) { + tty->print("OneIteration "); + this->dump_head(); + } +#endif + + Node *init_n = cl->init_trip(); +#ifdef ASSERT + // Loop boundaries should be constant since trip count is exact. + assert(init_n->get_int() + cl->stride_con() >= cl->limit()->get_int(), "should be one iteration"); +#endif + // Replace the phi at loop head with the value of the init_trip. + // Then the CountedLoopEnd will collapse (backedge will not be taken) + // and all loop-invariant uses of the exit values will be correct. + phase->_igvn.replace_node(cl->phi(), cl->init_trip()); + phase->C->set_major_progress(); + return true; +} //============================================================================= //------------------------------iteration_split_impl--------------------------- bool IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ) { + // Compute exact loop trip count if possible. + compute_exact_trip_count(phase); + + // Convert one iteration loop into normal code. + if (policy_do_one_iteration_loop(phase)) + return true; + // Check and remove empty loops (spam micro-benchmarks) - if( policy_do_remove_empty_loop(phase) ) + if (policy_do_remove_empty_loop(phase)) return true; // Here we removed an empty loop bool should_peel = policy_peeling(phase); // Should we peel? @@ -1860,40 +1934,40 @@ // Non-counted loops may be peeled; exactly 1 iteration is peeled. // This removes loop-invariant tests (usually null checks). - if( !_head->is_CountedLoop() ) { // Non-counted loop + if (!_head->is_CountedLoop()) { // Non-counted loop if (PartialPeelLoop && phase->partial_peel(this, old_new)) { // Partial peel succeeded so terminate this round of loop opts return false; } - if( should_peel ) { // Should we peel? + if (should_peel) { // Should we peel? #ifndef PRODUCT if (PrintOpto) tty->print_cr("should_peel"); #endif phase->do_peeling(this,old_new); - } else if( should_unswitch ) { + } else if (should_unswitch) { phase->do_unswitching(this, old_new); } return true; } CountedLoopNode *cl = _head->as_CountedLoop(); - if( !cl->loopexit() ) return true; // Ignore various kinds of broken loops + if (!cl->loopexit()) return true; // Ignore various kinds of broken loops // Do nothing special to pre- and post- loops - if( cl->is_pre_loop() || cl->is_post_loop() ) return true; + if (cl->is_pre_loop() || cl->is_post_loop()) return true; // Compute loop trip count from profile data compute_profile_trip_cnt(phase); // Before attempting fancy unrolling, RCE or alignment, see if we want // to completely unroll this loop or do loop unswitching. - if( cl->is_normal_loop() ) { + if (cl->is_normal_loop()) { if (should_unswitch) { phase->do_unswitching(this, old_new); return true; } bool should_maximally_unroll = policy_maximally_unroll(phase); - if( should_maximally_unroll ) { + if (should_maximally_unroll) { // Here we did some unrolling and peeling. Eventually we will // completely unroll this loop and it will no longer be a loop. phase->do_maximally_unroll(this,old_new); @@ -1937,14 +2011,14 @@ // If we have any of these conditions (RCE, alignment, unrolling) met, then // we switch to the pre-/main-/post-loop model. This model also covers // peeling. - if( should_rce || should_align || should_unroll ) { - if( cl->is_normal_loop() ) // Convert to 'pre/main/post' loops + if (should_rce || should_align || should_unroll) { + if (cl->is_normal_loop()) // Convert to 'pre/main/post' loops phase->insert_pre_post_loops(this,old_new, !may_rce_align); // Adjust the pre- and main-loop limits to let the pre and post loops run // with full checks, but the main-loop with no checks. Remove said // checks from the main body. - if( should_rce ) + if (should_rce) phase->do_range_check(this,old_new); // Double loop body for unrolling. Adjust the minimum-trip test (will do @@ -1952,16 +2026,16 @@ // an even number of trips). If we are peeling, we might enable some RCE // and we'd rather unroll the post-RCE'd loop SO... do not unroll if // peeling. - if( should_unroll && !should_peel ) - phase->do_unroll(this,old_new, true); + if (should_unroll && !should_peel) + phase->do_unroll(this,old_new, true); // Adjust the pre-loop limits to align the main body // iterations. - if( should_align ) + if (should_align) Unimplemented(); } else { // Else we have an unchanged counted loop - if( should_peel ) // Might want to peel but do nothing else + if (should_peel) // Might want to peel but do nothing else phase->do_peeling(this,old_new); } return true; diff -r 3f49d30f8184 -r 3af54845df98 src/share/vm/opto/loopnode.cpp --- a/src/share/vm/opto/loopnode.cpp Thu Apr 07 21:32:23 2011 -0700 +++ b/src/share/vm/opto/loopnode.cpp Fri Apr 08 14:56:22 2011 -0700 @@ -1450,6 +1450,21 @@ if (_head->is_CountedLoop()) { CountedLoopNode *cl = _head->as_CountedLoop(); tty->print(" counted"); + + Node* init_n = cl->init_trip(); + if (init_n != NULL && init_n->is_Con()) + tty->print(" [%d,", cl->init_trip()->get_int()); + else + tty->print(" [int,"); + Node* limit_n = cl->limit(); + if (limit_n != NULL && limit_n->is_Con()) + tty->print("%d),", cl->limit()->get_int()); + else + tty->print("int),"); + int stride_con = cl->stride_con(); + if (stride_con > 0) tty->print("+"); + tty->print("%d", stride_con); + if (cl->is_pre_loop ()) tty->print(" pre" ); if (cl->is_main_loop()) tty->print(" main"); if (cl->is_post_loop()) tty->print(" post"); diff -r 3f49d30f8184 -r 3af54845df98 src/share/vm/opto/loopnode.hpp --- a/src/share/vm/opto/loopnode.hpp Thu Apr 07 21:32:23 2011 -0700 +++ b/src/share/vm/opto/loopnode.hpp Fri Apr 08 14:56:22 2011 -0700 @@ -57,7 +57,12 @@ protected: short _loop_flags; // Names for flag bitfields - enum { pre_post_main=0, inner_loop=8, partial_peel_loop=16, partial_peel_failed=32 }; + enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3, + MainHasNoPreLoop=4, + HasExactTripCount=8, + InnerLoop=16, + PartialPeelLoop=32, + PartialPeelFailed=64 }; char _unswitch_count; enum { _unswitch_max=3 }; @@ -65,13 +70,13 @@ // Names for edge indices enum { Self=0, EntryControl, LoopBackControl }; - int is_inner_loop() const { return _loop_flags & inner_loop; } - void set_inner_loop() { _loop_flags |= inner_loop; } + int is_inner_loop() const { return _loop_flags & InnerLoop; } + void set_inner_loop() { _loop_flags |= InnerLoop; } - int is_partial_peel_loop() const { return _loop_flags & partial_peel_loop; } - void set_partial_peel_loop() { _loop_flags |= partial_peel_loop; } - int partial_peel_has_failed() const { return _loop_flags & partial_peel_failed; } - void mark_partial_peel_failed() { _loop_flags |= partial_peel_failed; } + int is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; } + void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; } + int partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; } + void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; } int unswitch_max() { return _unswitch_max; } int unswitch_count() { return _unswitch_count; } @@ -137,8 +142,8 @@ // the Main CountedLoop. Used to assert that we understand the graph shape. node_idx_t _main_idx; - // Known trip count calculated by policy_maximally_unroll - int _trip_count; + // Known trip count calculated by compute_exact_trip_count() + uint _trip_count; // Expected trip count from profile data float _profile_trip_cnt; @@ -152,7 +157,7 @@ public: CountedLoopNode( Node *entry, Node *backedge ) - : LoopNode(entry, backedge), _trip_count(max_jint), + : LoopNode(entry, backedge), _main_idx(0), _trip_count(max_juint), _profile_trip_cnt(COUNT_UNKNOWN), _unrolled_count_log2(0), _node_count_before_unroll(0) { init_class_id(Class_CountedLoop); @@ -194,13 +199,12 @@ // A 'main' loop that is ONLY unrolled or peeled, never RCE'd or // Aligned, may be missing it's pre-loop. - enum { Normal=0, Pre=1, Main=2, Post=3, PrePostFlagsMask=3, Main_Has_No_Pre_Loop=4 }; - int is_normal_loop() const { return (_loop_flags&PrePostFlagsMask) == Normal; } - int is_pre_loop () const { return (_loop_flags&PrePostFlagsMask) == Pre; } - int is_main_loop () const { return (_loop_flags&PrePostFlagsMask) == Main; } - int is_post_loop () const { return (_loop_flags&PrePostFlagsMask) == Post; } - int is_main_no_pre_loop() const { return _loop_flags & Main_Has_No_Pre_Loop; } - void set_main_no_pre_loop() { _loop_flags |= Main_Has_No_Pre_Loop; } + int is_normal_loop() const { return (_loop_flags&PreMainPostFlagsMask) == Normal; } + int is_pre_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Pre; } + int is_main_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Main; } + int is_post_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Post; } + int is_main_no_pre_loop() const { return _loop_flags & MainHasNoPreLoop; } + void set_main_no_pre_loop() { _loop_flags |= MainHasNoPreLoop; } int main_idx() const { return _main_idx; } @@ -208,10 +212,19 @@ void set_pre_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Pre ; _main_idx = main->_idx; } void set_main_loop ( ) { assert(is_normal_loop(),""); _loop_flags |= Main; } void set_post_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Post; _main_idx = main->_idx; } - void set_normal_loop( ) { _loop_flags &= ~PrePostFlagsMask; } + void set_normal_loop( ) { _loop_flags &= ~PreMainPostFlagsMask; } + + void set_trip_count(uint tc) { _trip_count = tc; } + uint trip_count() { return _trip_count; } - void set_trip_count(int tc) { _trip_count = tc; } - int trip_count() { return _trip_count; } + bool has_exact_trip_count() const { return (_loop_flags & HasExactTripCount) != 0; } + void set_exact_trip_count(uint tc) { + _trip_count = tc; + _loop_flags |= HasExactTripCount; + } + void set_nonexact_trip_count() { + _loop_flags &= ~HasExactTripCount; + } void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; } float profile_trip_cnt() { return _profile_trip_cnt; } @@ -384,6 +397,9 @@ // Micro-benchmark spamming. Remove empty loops. bool policy_do_remove_empty_loop( PhaseIdealLoop *phase ); + // Convert one iteration loop into normal code. + bool policy_do_one_iteration_loop( PhaseIdealLoop *phase ); + // Return TRUE or FALSE if the loop should be peeled or not. Peel if we can // make some loop-invariant test (usually a null-check) happen before the // loop. @@ -412,6 +428,9 @@ // Return TRUE if "iff" is a range check. bool is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar) const; + // Compute loop exact trip count if possible + void compute_exact_trip_count( PhaseIdealLoop *phase ); + // Compute loop trip count from profile data void compute_profile_trip_cnt( PhaseIdealLoop *phase );