# HG changeset patch # User dcherepanov # Date 1589378357 -7200 # Node ID e692f9f04b16a59a0959521d697bccff1a744160 # Parent 16b8606e7a356ba7b472d3e177b621fbeeef3bcd 8241114: Better range handling Reviewed-by: kvn, vlivanov, rhalade, ahgross, bae, yan diff -r 16b8606e7a35 -r e692f9f04b16 src/share/vm/opto/addnode.cpp --- a/src/share/vm/opto/addnode.cpp Fri Dec 14 11:22:26 2018 +0100 +++ b/src/share/vm/opto/addnode.cpp Wed May 13 15:59:17 2020 +0200 @@ -844,6 +844,14 @@ return TypeInt::make( MAX2(r0->_lo,r1->_lo), MAX2(r0->_hi,r1->_hi), MAX2(r0->_widen,r1->_widen) ); } +// Check if addition of an integer with type 't' and a constant 'c' can overflow +static bool can_overflow(const TypeInt* t, jint c) { + jint t_lo = t->_lo; + jint t_hi = t->_hi; + return ((c < 0 && (java_add(t_lo, c) > t_lo)) || + (c > 0 && (java_add(t_hi, c) < t_hi))); +} + //============================================================================= //------------------------------Idealize--------------------------------------- // MINs show up in range-check loop limit calculations. Look for @@ -866,7 +874,7 @@ // Get left input & constant Node *x = l; - int x_off = 0; + jint x_off = 0; if( x->Opcode() == Op_AddI && // Check for "x+c0" and collect constant x->in(2)->is_Con() ) { const Type *t = x->in(2)->bottom_type(); @@ -877,7 +885,7 @@ // Scan a right-spline-tree for MINs Node *y = r; - int y_off = 0; + jint y_off = 0; // Check final part of MIN tree if( y->Opcode() == Op_AddI && // Check for "y+c1" and collect constant y->in(2)->is_Con() ) { @@ -891,6 +899,7 @@ return this; } + const TypeInt* tx = phase->type(x)->isa_int(); if( r->Opcode() == Op_MinI ) { assert( r != r->in(2), "dead loop in MinINode::Ideal" ); @@ -907,18 +916,23 @@ if( x->_idx > y->_idx ) return new (phase->C) MinINode(r->in(1),phase->transform(new (phase->C) MinINode(l,r->in(2)))); - // See if covers: MIN2(x+c0,MIN2(y+c1,z)) - if( !phase->eqv(x,y) ) return NULL; - // If (y == x) transform MIN2(x+c0, MIN2(x+c1,z)) into - // MIN2(x+c0 or x+c1 which less, z). - return new (phase->C) MinINode(phase->transform(new (phase->C) AddINode(x,phase->intcon(MIN2(x_off,y_off)))),r->in(2)); + // Transform MIN2(x + c0, MIN2(x + c1, z)) into MIN2(x + MIN2(c0, c1), z) + // if x == y and the additions can't overflow. + if (phase->eqv(x,y) && + !can_overflow(tx, x_off) && + !can_overflow(tx, y_off)) { + return new (phase->C) MinINode(phase->transform(new (phase->C) AddINode(x, phase->intcon(MIN2(x_off, y_off)))), r->in(2)); + } } else { - // See if covers: MIN2(x+c0,y+c1) - if( !phase->eqv(x,y) ) return NULL; - // If (y == x) transform MIN2(x+c0,x+c1) into x+c0 or x+c1 which less. - return new (phase->C) AddINode(x,phase->intcon(MIN2(x_off,y_off))); + // Transform MIN2(x + c0, y + c1) into x + MIN2(c0, c1) + // if x == y and the additions can't overflow. + if (phase->eqv(x,y) && + !can_overflow(tx, x_off) && + !can_overflow(tx, y_off)) { + return new (phase->C) AddINode(x,phase->intcon(MIN2(x_off,y_off))); + } } - + return NULL; } //------------------------------add_ring--------------------------------------- diff -r 16b8606e7a35 -r e692f9f04b16 src/share/vm/opto/loopTransform.cpp --- a/src/share/vm/opto/loopTransform.cpp Fri Dec 14 11:22:26 2018 +0100 +++ b/src/share/vm/opto/loopTransform.cpp Wed May 13 15:59:17 2020 +0200 @@ -1517,65 +1517,78 @@ } //------------------------------adjust_limit----------------------------------- -// Helper function for add_constraint(). -Node* PhaseIdealLoop::adjust_limit(int stride_con, Node * scale, Node *offset, Node *rc_limit, Node *loop_limit, Node *pre_ctrl, bool round_up) { - // Compute "I :: (limit-offset)/scale" - Node *con = new (C) SubINode(rc_limit, offset); - register_new_node(con, pre_ctrl); - Node *X = new (C) DivINode(0, con, scale); - register_new_node(X, pre_ctrl); +// Helper function that computes new loop limit as (rc_limit-offset)/scale +Node* PhaseIdealLoop::adjust_limit(bool is_positive_stride, Node* scale, Node* offset, Node* rc_limit, Node* old_limit, Node* pre_ctrl, bool round) { + Node* sub = new (C) SubLNode(rc_limit, offset); + register_new_node(sub, pre_ctrl); + Node* limit = new (C) DivLNode(NULL, sub, scale); + register_new_node(limit, pre_ctrl); - // When the absolute value of scale is greater than one, the integer - // division may round limit down so add one to the limit. - if (round_up) { - X = new (C) AddINode(X, _igvn.intcon(1)); - register_new_node(X, pre_ctrl); + // When the absolute value of scale is greater than one, the division + // may round limit down/up, so add/sub one to/from the limit. + if (round) { + limit = new (C) AddLNode(limit, _igvn.longcon(is_positive_stride ? -1 : 1)); + register_new_node(limit, pre_ctrl); } - // Adjust loop limit - loop_limit = (stride_con > 0) - ? (Node*)(new (C) MinINode(loop_limit, X)) - : (Node*)(new (C) MaxINode(loop_limit, X)); - register_new_node(loop_limit, pre_ctrl); - return loop_limit; + // Clamp the limit to handle integer under-/overflows. + // When reducing the limit, clamp to [min_jint, old_limit]: + // MIN(old_limit, MAX(limit, min_jint)) + // When increasing the limit, clamp to [old_limit, max_jint]: + // MAX(old_limit, MIN(limit, max_jint)) + Node* cmp = new (C) CmpLNode(limit, _igvn.longcon(is_positive_stride ? min_jint : max_jint)); + register_new_node(cmp, pre_ctrl); + Node* bol = new (C) BoolNode(cmp, is_positive_stride ? BoolTest::lt : BoolTest::gt); + register_new_node(bol, pre_ctrl); + limit = new (C) ConvL2INode(limit); + register_new_node(limit, pre_ctrl); + limit = new (C) CMoveINode(bol, limit, _igvn.intcon(is_positive_stride ? min_jint : max_jint), TypeInt::INT); + register_new_node(limit, pre_ctrl); + + limit = is_positive_stride ? (Node*)(new (C) MinINode(old_limit, limit)) + : (Node*)(new (C) MaxINode(old_limit, limit)); + register_new_node(limit, pre_ctrl); + return limit; } //------------------------------add_constraint--------------------------------- // Constrain the main loop iterations so the conditions: -// low_limit <= scale_con * I + offset < upper_limit -// always holds true. That is, either increase the number of iterations in -// the pre-loop or the post-loop until the condition holds true in the main -// loop. Stride, scale, offset and limit are all loop invariant. Further, -// stride and scale are constants (offset and limit often are). -void PhaseIdealLoop::add_constraint( int stride_con, int scale_con, Node *offset, Node *low_limit, Node *upper_limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit ) { - // For positive stride, the pre-loop limit always uses a MAX function - // and the main loop a MIN function. For negative stride these are - // reversed. +// low_limit <= scale_con*I + offset < upper_limit +// always hold true. That is, either increase the number of iterations in the +// pre-loop or reduce the number of iterations in the main-loop until the condition +// holds true in the main-loop. Stride, scale, offset and limit are all loop +// invariant. Further, stride and scale are constants (offset and limit often are). +void PhaseIdealLoop::add_constraint(jlong stride_con, jlong scale_con, Node* offset, Node* low_limit, Node* upper_limit, Node* pre_ctrl, Node** pre_limit, Node** main_limit) { + assert(_igvn.type(offset)->isa_long() != NULL && _igvn.type(low_limit)->isa_long() != NULL && + _igvn.type(upper_limit)->isa_long() != NULL, "arguments should be long values"); - // Also for positive stride*scale the affine function is increasing, so the - // pre-loop must check for underflow and the post-loop for overflow. - // Negative stride*scale reverses this; pre-loop checks for overflow and - // post-loop for underflow. + // For a positive stride, we need to reduce the main-loop limit and + // increase the pre-loop limit. This is reversed for a negative stride. + bool is_positive_stride = (stride_con > 0); - Node *scale = _igvn.intcon(scale_con); + // If the absolute scale value is greater one, division in 'adjust_limit' may require + // rounding. Make sure the ABS method correctly handles min_jint. + // Only do this for the pre-loop, one less iteration of the main loop doesn't hurt. + bool round = ABS(scale_con) > 1; + + Node* scale = _igvn.longcon(scale_con); set_ctrl(scale, C->root()); if ((stride_con^scale_con) >= 0) { // Use XOR to avoid overflow + // Positive stride*scale: the affine function is increasing, + // the pre-loop checks for underflow and the post-loop for overflow. + // The overflow limit: scale*I+offset < upper_limit - // For main-loop compute + // For the main-loop limit compute: // ( if (scale > 0) /* and stride > 0 */ // I < (upper_limit-offset)/scale // else /* scale < 0 and stride < 0 */ // I > (upper_limit-offset)/scale // ) - // - // (upper_limit-offset) may overflow or underflow. - // But it is fine since main loop will either have - // less iterations or will be skipped in such case. - *main_limit = adjust_limit(stride_con, scale, offset, upper_limit, *main_limit, pre_ctrl, false); + *main_limit = adjust_limit(is_positive_stride, scale, offset, upper_limit, *main_limit, pre_ctrl, false); - // The underflow limit: low_limit <= scale*I+offset. - // For pre-loop compute + // The underflow limit: low_limit <= scale*I+offset + // For the pre-loop limit compute: // NOT(scale*I+offset >= low_limit) // scale*I+offset < low_limit // ( if (scale > 0) /* and stride > 0 */ @@ -1583,40 +1596,13 @@ // else /* scale < 0 and stride < 0 */ // I > (low_limit-offset)/scale // ) + *pre_limit = adjust_limit(!is_positive_stride, scale, offset, low_limit, *pre_limit, pre_ctrl, round); + } else { + // Negative stride*scale: the affine function is decreasing, + // the pre-loop checks for overflow and the post-loop for underflow. - if (low_limit->get_int() == -max_jint) { - if (!RangeLimitCheck) return; - // We need this guard when scale*pre_limit+offset >= limit - // due to underflow. So we need execute pre-loop until - // scale*I+offset >= min_int. But (min_int-offset) will - // underflow when offset > 0 and X will be > original_limit - // when stride > 0. To avoid it we replace positive offset with 0. - // - // Also (min_int+1 == -max_int) is used instead of min_int here - // to avoid problem with scale == -1 (min_int/(-1) == min_int). - Node* shift = _igvn.intcon(31); - set_ctrl(shift, C->root()); - Node* sign = new (C) RShiftINode(offset, shift); - register_new_node(sign, pre_ctrl); - offset = new (C) AndINode(offset, sign); - register_new_node(offset, pre_ctrl); - } else { - assert(low_limit->get_int() == 0, "wrong low limit for range check"); - // The only problem we have here when offset == min_int - // since (0-min_int) == min_int. It may be fine for stride > 0 - // but for stride < 0 X will be < original_limit. To avoid it - // max(pre_limit, original_limit) is used in do_range_check(). - } - // Pass (-stride) to indicate pre_loop_cond = NOT(main_loop_cond); - *pre_limit = adjust_limit((-stride_con), scale, offset, low_limit, *pre_limit, pre_ctrl, - scale_con > 1 && stride_con > 0); - - } else { // stride_con*scale_con < 0 - // For negative stride*scale pre-loop checks for overflow and - // post-loop for underflow. - // // The overflow limit: scale*I+offset < upper_limit - // For pre-loop compute + // For the pre-loop limit compute: // NOT(scale*I+offset < upper_limit) // scale*I+offset >= upper_limit // scale*I+offset+1 > upper_limit @@ -1625,58 +1611,24 @@ // else /* scale > 0 and stride < 0 */ // I > (upper_limit-(offset+1))/scale // ) - // - // (upper_limit-offset-1) may underflow or overflow. - // To avoid it min(pre_limit, original_limit) is used - // in do_range_check() for stride > 0 and max() for < 0. - Node *one = _igvn.intcon(1); + Node* one = _igvn.longcon(1); set_ctrl(one, C->root()); - - Node *plus_one = new (C) AddINode(offset, one); + Node* plus_one = new (C) AddLNode(offset, one); register_new_node( plus_one, pre_ctrl ); - // Pass (-stride) to indicate pre_loop_cond = NOT(main_loop_cond); - *pre_limit = adjust_limit((-stride_con), scale, plus_one, upper_limit, *pre_limit, pre_ctrl, - scale_con < -1 && stride_con > 0); + *pre_limit = adjust_limit(!is_positive_stride, scale, plus_one, upper_limit, *pre_limit, pre_ctrl, round); - if (low_limit->get_int() == -max_jint) { - if (!RangeLimitCheck) return; - // We need this guard when scale*main_limit+offset >= limit - // due to underflow. So we need execute main-loop while - // scale*I+offset+1 > min_int. But (min_int-offset-1) will - // underflow when (offset+1) > 0 and X will be < main_limit - // when scale < 0 (and stride > 0). To avoid it we replace - // positive (offset+1) with 0. - // - // Also (min_int+1 == -max_int) is used instead of min_int here - // to avoid problem with scale == -1 (min_int/(-1) == min_int). - Node* shift = _igvn.intcon(31); - set_ctrl(shift, C->root()); - Node* sign = new (C) RShiftINode(plus_one, shift); - register_new_node(sign, pre_ctrl); - plus_one = new (C) AndINode(plus_one, sign); - register_new_node(plus_one, pre_ctrl); - } else { - assert(low_limit->get_int() == 0, "wrong low limit for range check"); - // The only problem we have here when offset == max_int - // since (max_int+1) == min_int and (0-min_int) == min_int. - // But it is fine since main loop will either have - // less iterations or will be skipped in such case. - } - // The underflow limit: low_limit <= scale*I+offset. - // For main-loop compute + // The underflow limit: low_limit <= scale*I+offset + // For the main-loop limit compute: // scale*I+offset+1 > low_limit // ( if (scale < 0) /* and stride > 0 */ // I < (low_limit-(offset+1))/scale // else /* scale > 0 and stride < 0 */ // I > (low_limit-(offset+1))/scale // ) - - *main_limit = adjust_limit(stride_con, scale, plus_one, low_limit, *main_limit, pre_ctrl, - false); + *main_limit = adjust_limit(is_positive_stride, scale, plus_one, low_limit, *main_limit, pre_ctrl, false); } } - //------------------------------is_scaled_iv--------------------------------- // Return true if exp is a constant times an induction var bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, int* p_scale) { @@ -1836,22 +1788,14 @@ // Must know if its a count-up or count-down loop int stride_con = cl->stride_con(); - Node *zero = _igvn.intcon(0); - Node *one = _igvn.intcon(1); + Node* zero = _igvn.longcon(0); + Node* one = _igvn.longcon(1); // Use symmetrical int range [-max_jint,max_jint] - Node *mini = _igvn.intcon(-max_jint); + Node* mini = _igvn.longcon(-max_jint); set_ctrl(zero, C->root()); set_ctrl(one, C->root()); set_ctrl(mini, C->root()); - // Range checks that do not dominate the loop backedge (ie. - // conditionally executed) can lengthen the pre loop limit beyond - // the original loop limit. To prevent this, the pre limit is - // (for stride > 0) MINed with the original loop limit (MAXed - // stride < 0) when some range_check (rc) is conditionally - // executed. - bool conditional_rc = false; - // Check loop body for tests of trip-counter plus loop-invariant vs // loop-invariant. for( uint i = 0; i < loop->_body.size(); i++ ) { @@ -1930,15 +1874,20 @@ // stride_con and scale_con can be negative which will flip about the // sense of the test. + // Perform the limit computations in jlong to avoid overflow + jlong lscale_con = scale_con; + Node* int_offset = offset; + offset = new (C) ConvI2LNode(offset); + register_new_node(offset, pre_ctrl); + Node* int_limit = limit; + limit = new (C) ConvI2LNode(limit); + register_new_node(limit, pre_ctrl); + // Adjust pre and main loop limits to guard the correct iteration set if( cmp->Opcode() == Op_CmpU ) {// Unsigned compare is really 2 tests if( b_test._test == BoolTest::lt ) { // Range checks always use lt // The underflow and overflow limits: 0 <= scale*I+offset < limit - add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit ); - if (!conditional_rc) { - // (0-offset)/scale could be outside of loop iterations range. - conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck; - } + add_constraint(stride_con, lscale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit); } else { #ifndef PRODUCT if( PrintOpto ) @@ -1952,16 +1901,16 @@ // Fall into GE case case BoolTest::ge: // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit - scale_con = -scale_con; - offset = new (C) SubINode( zero, offset ); + lscale_con = -lscale_con; + offset = new (C) SubLNode(zero, offset); register_new_node( offset, pre_ctrl ); - limit = new (C) SubINode( zero, limit ); + limit = new (C) SubLNode(zero, limit); register_new_node( limit, pre_ctrl ); // Fall into LE case case BoolTest::le: if (b_test._test != BoolTest::gt) { // Convert X <= Y to X < Y+1 - limit = new (C) AddINode( limit, one ); + limit = new (C) AddLNode(limit, one); register_new_node( limit, pre_ctrl ); } // Fall into LT case @@ -1969,13 +1918,7 @@ // The underflow and overflow limits: MIN_INT <= scale*I+offset < limit // Note: (MIN_INT+1 == -MAX_INT) is used instead of MIN_INT here // to avoid problem with scale == -1: MIN_INT/(-1) == MIN_INT. - add_constraint( stride_con, scale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit ); - if (!conditional_rc) { - // ((MIN_INT+1)-offset)/scale could be outside of loop iterations range. - // Note: negative offset is replaced with 0 but (MIN_INT+1)/scale could - // still be outside of loop range. - conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck; - } + add_constraint(stride_con, lscale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit); break; default: #ifndef PRODUCT @@ -2011,7 +1954,8 @@ } // Update loop limits - if (conditional_rc) { + if (pre_limit != orig_limit) { + // Computed pre-loop limit can be outside of loop iterations range. pre_limit = (stride_con > 0) ? (Node*)new (C) MinINode(pre_limit, orig_limit) : (Node*)new (C) MaxINode(pre_limit, orig_limit); register_new_node(pre_limit, pre_ctrl); diff -r 16b8606e7a35 -r e692f9f04b16 src/share/vm/opto/loopnode.hpp --- a/src/share/vm/opto/loopnode.hpp Fri Dec 14 11:22:26 2018 +0100 +++ b/src/share/vm/opto/loopnode.hpp Wed May 13 15:59:17 2020 +0200 @@ -945,9 +945,9 @@ // always holds true. That is, either increase the number of iterations in // the pre-loop or the post-loop until the condition holds true in the main // loop. Scale_con, offset and limit are all loop invariant. - void add_constraint( int stride_con, int scale_con, Node *offset, Node *low_limit, Node *upper_limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit ); + void add_constraint(jlong stride_con, jlong scale_con, Node* offset, Node* low_limit, Node* upper_limit, Node* pre_ctrl, Node** pre_limit, Node** main_limit); // Helper function for add_constraint(). - Node* adjust_limit(int stride_con, Node * scale, Node *offset, Node *rc_limit, Node *loop_limit, Node *pre_ctrl, bool round_up); + Node* adjust_limit(bool reduce, Node* scale, Node* offset, Node* rc_limit, Node* old_limit, Node* pre_ctrl, bool round); // Partially peel loop up through last_peel node. bool partial_peel( IdealLoopTree *loop, Node_List &old_new );