# HG changeset patch
# User dcherepanov
# Date 1589378357 -7200
# Node ID e692f9f04b16a59a0959521d697bccff1a744160
# Parent  16b8606e7a356ba7b472d3e177b621fbeeef3bcd
8241114: Better range handling
Reviewed-by: kvn, vlivanov, rhalade, ahgross, bae, yan

diff -r 16b8606e7a35 -r e692f9f04b16 src/share/vm/opto/addnode.cpp
--- a/src/share/vm/opto/addnode.cpp	Fri Dec 14 11:22:26 2018 +0100
+++ b/src/share/vm/opto/addnode.cpp	Wed May 13 15:59:17 2020 +0200
@@ -844,6 +844,14 @@
   return TypeInt::make( MAX2(r0->_lo,r1->_lo), MAX2(r0->_hi,r1->_hi), MAX2(r0->_widen,r1->_widen) );
 }
 
+// Check if addition of an integer with type 't' and a constant 'c' can overflow
+static bool can_overflow(const TypeInt* t, jint c) {
+  jint t_lo = t->_lo;
+  jint t_hi = t->_hi;
+  return ((c < 0 && (java_add(t_lo, c) > t_lo)) ||
+          (c > 0 && (java_add(t_hi, c) < t_hi)));
+}
+
 //=============================================================================
 //------------------------------Idealize---------------------------------------
 // MINs show up in range-check loop limit calculations.  Look for
@@ -866,7 +874,7 @@
 
   // Get left input & constant
   Node *x = l;
-  int x_off = 0;
+  jint x_off = 0;
   if( x->Opcode() == Op_AddI && // Check for "x+c0" and collect constant
       x->in(2)->is_Con() ) {
     const Type *t = x->in(2)->bottom_type();
@@ -877,7 +885,7 @@
 
   // Scan a right-spline-tree for MINs
   Node *y = r;
-  int y_off = 0;
+  jint y_off = 0;
   // Check final part of MIN tree
   if( y->Opcode() == Op_AddI && // Check for "y+c1" and collect constant
       y->in(2)->is_Con() ) {
@@ -891,6 +899,7 @@
     return this;
   }
 
+  const TypeInt* tx = phase->type(x)->isa_int();
 
   if( r->Opcode() == Op_MinI ) {
     assert( r != r->in(2), "dead loop in MinINode::Ideal" );
@@ -907,18 +916,23 @@
     if( x->_idx > y->_idx )
       return new (phase->C) MinINode(r->in(1),phase->transform(new (phase->C) MinINode(l,r->in(2))));
 
-    // See if covers: MIN2(x+c0,MIN2(y+c1,z))
-    if( !phase->eqv(x,y) ) return NULL;
-    // If (y == x) transform MIN2(x+c0, MIN2(x+c1,z)) into
-    // MIN2(x+c0 or x+c1 which less, z).
-    return new (phase->C) MinINode(phase->transform(new (phase->C) AddINode(x,phase->intcon(MIN2(x_off,y_off)))),r->in(2));
+    // Transform MIN2(x + c0, MIN2(x + c1, z)) into MIN2(x + MIN2(c0, c1), z)
+    // if x == y and the additions can't overflow.
+    if (phase->eqv(x,y) &&
+        !can_overflow(tx, x_off) &&
+        !can_overflow(tx, y_off)) {
+      return new (phase->C) MinINode(phase->transform(new (phase->C) AddINode(x, phase->intcon(MIN2(x_off, y_off)))), r->in(2));
+    }
   } else {
-    // See if covers: MIN2(x+c0,y+c1)
-    if( !phase->eqv(x,y) ) return NULL;
-    // If (y == x) transform MIN2(x+c0,x+c1) into x+c0 or x+c1 which less.
-    return new (phase->C) AddINode(x,phase->intcon(MIN2(x_off,y_off)));
+    // Transform MIN2(x + c0, y + c1) into x + MIN2(c0, c1)
+    // if x == y and the additions can't overflow.
+    if (phase->eqv(x,y) &&
+        !can_overflow(tx, x_off) &&
+        !can_overflow(tx, y_off)) {
+      return new (phase->C) AddINode(x,phase->intcon(MIN2(x_off,y_off)));
+    }
   }
-
+  return NULL;
 }
 
 //------------------------------add_ring---------------------------------------
diff -r 16b8606e7a35 -r e692f9f04b16 src/share/vm/opto/loopTransform.cpp
--- a/src/share/vm/opto/loopTransform.cpp	Fri Dec 14 11:22:26 2018 +0100
+++ b/src/share/vm/opto/loopTransform.cpp	Wed May 13 15:59:17 2020 +0200
@@ -1517,65 +1517,78 @@
 }
 
 //------------------------------adjust_limit-----------------------------------
-// Helper function for add_constraint().
-Node* PhaseIdealLoop::adjust_limit(int stride_con, Node * scale, Node *offset, Node *rc_limit, Node *loop_limit, Node *pre_ctrl, bool round_up) {
-  // Compute "I :: (limit-offset)/scale"
-  Node *con = new (C) SubINode(rc_limit, offset);
-  register_new_node(con, pre_ctrl);
-  Node *X = new (C) DivINode(0, con, scale);
-  register_new_node(X, pre_ctrl);
+// Helper function that computes new loop limit as (rc_limit-offset)/scale
+Node* PhaseIdealLoop::adjust_limit(bool is_positive_stride, Node* scale, Node* offset, Node* rc_limit, Node* old_limit, Node* pre_ctrl, bool round) {
+  Node* sub = new (C) SubLNode(rc_limit, offset);
+  register_new_node(sub, pre_ctrl);
+  Node* limit = new (C) DivLNode(NULL, sub, scale);
+  register_new_node(limit, pre_ctrl);
 
-  // When the absolute value of scale is greater than one, the integer
-  // division may round limit down so add one to the limit.
-  if (round_up) {
-    X = new (C) AddINode(X, _igvn.intcon(1));
-    register_new_node(X, pre_ctrl);
+  // When the absolute value of scale is greater than one, the division
+  // may round limit down/up, so add/sub one to/from the limit.
+  if (round) {
+    limit = new (C) AddLNode(limit, _igvn.longcon(is_positive_stride ? -1 : 1));
+    register_new_node(limit, pre_ctrl);
   }
 
-  // Adjust loop limit
-  loop_limit = (stride_con > 0)
-               ? (Node*)(new (C) MinINode(loop_limit, X))
-               : (Node*)(new (C) MaxINode(loop_limit, X));
-  register_new_node(loop_limit, pre_ctrl);
-  return loop_limit;
+  // Clamp the limit to handle integer under-/overflows.
+  // When reducing the limit, clamp to [min_jint, old_limit]:
+  //   MIN(old_limit, MAX(limit, min_jint))
+  // When increasing the limit, clamp to [old_limit, max_jint]:
+  //   MAX(old_limit, MIN(limit, max_jint))
+  Node* cmp = new (C) CmpLNode(limit, _igvn.longcon(is_positive_stride ? min_jint : max_jint));
+  register_new_node(cmp, pre_ctrl);
+  Node* bol = new (C) BoolNode(cmp, is_positive_stride ? BoolTest::lt : BoolTest::gt);
+  register_new_node(bol, pre_ctrl);
+  limit = new (C) ConvL2INode(limit);
+  register_new_node(limit, pre_ctrl);
+  limit = new (C) CMoveINode(bol, limit, _igvn.intcon(is_positive_stride ? min_jint : max_jint), TypeInt::INT);
+  register_new_node(limit, pre_ctrl);
+
+  limit = is_positive_stride ? (Node*)(new (C) MinINode(old_limit, limit))
+                             : (Node*)(new (C) MaxINode(old_limit, limit));
+  register_new_node(limit, pre_ctrl);
+  return limit;
 }
 
 //------------------------------add_constraint---------------------------------
 // Constrain the main loop iterations so the conditions:
-//    low_limit <= scale_con * I + offset  <  upper_limit
-// always holds true.  That is, either increase the number of iterations in
-// the pre-loop or the post-loop until the condition holds true in the main
-// loop.  Stride, scale, offset and limit are all loop invariant.  Further,
-// stride and scale are constants (offset and limit often are).
-void PhaseIdealLoop::add_constraint( int stride_con, int scale_con, Node *offset, Node *low_limit, Node *upper_limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit ) {
-  // For positive stride, the pre-loop limit always uses a MAX function
-  // and the main loop a MIN function.  For negative stride these are
-  // reversed.
+//    low_limit <= scale_con*I + offset < upper_limit
+// always hold true. That is, either increase the number of iterations in the
+// pre-loop or reduce the number of iterations in the main-loop until the condition
+// holds true in the main-loop. Stride, scale, offset and limit are all loop
+// invariant. Further, stride and scale are constants (offset and limit often are).
+void PhaseIdealLoop::add_constraint(jlong stride_con, jlong scale_con, Node* offset, Node* low_limit, Node* upper_limit, Node* pre_ctrl, Node** pre_limit, Node** main_limit) {
+  assert(_igvn.type(offset)->isa_long() != NULL && _igvn.type(low_limit)->isa_long() != NULL &&
+         _igvn.type(upper_limit)->isa_long() != NULL, "arguments should be long values");
 
-  // Also for positive stride*scale the affine function is increasing, so the
-  // pre-loop must check for underflow and the post-loop for overflow.
-  // Negative stride*scale reverses this; pre-loop checks for overflow and
-  // post-loop for underflow.
+  // For a positive stride, we need to reduce the main-loop limit and
+  // increase the pre-loop limit. This is reversed for a negative stride.
+  bool is_positive_stride = (stride_con > 0);
 
-  Node *scale = _igvn.intcon(scale_con);
+  // If the absolute scale value is greater one, division in 'adjust_limit' may require
+  // rounding. Make sure the ABS method correctly handles min_jint.
+  // Only do this for the pre-loop, one less iteration of the main loop doesn't hurt.
+  bool round = ABS(scale_con) > 1;
+
+  Node* scale = _igvn.longcon(scale_con);
   set_ctrl(scale, C->root());
 
   if ((stride_con^scale_con) >= 0) { // Use XOR to avoid overflow
+    // Positive stride*scale: the affine function is increasing,
+    // the pre-loop checks for underflow and the post-loop for overflow.
+
     // The overflow limit: scale*I+offset < upper_limit
-    // For main-loop compute
+    // For the main-loop limit compute:
     //   ( if (scale > 0) /* and stride > 0 */
     //       I < (upper_limit-offset)/scale
     //     else /* scale < 0 and stride < 0 */
     //       I > (upper_limit-offset)/scale
     //   )
-    //
-    // (upper_limit-offset) may overflow or underflow.
-    // But it is fine since main loop will either have
-    // less iterations or will be skipped in such case.
-    *main_limit = adjust_limit(stride_con, scale, offset, upper_limit, *main_limit, pre_ctrl, false);
+    *main_limit = adjust_limit(is_positive_stride, scale, offset, upper_limit, *main_limit, pre_ctrl, false);
 
-    // The underflow limit: low_limit <= scale*I+offset.
-    // For pre-loop compute
+    // The underflow limit: low_limit <= scale*I+offset
+    // For the pre-loop limit compute:
     //   NOT(scale*I+offset >= low_limit)
     //   scale*I+offset < low_limit
     //   ( if (scale > 0) /* and stride > 0 */
@@ -1583,40 +1596,13 @@
     //     else /* scale < 0 and stride < 0 */
     //       I > (low_limit-offset)/scale
     //   )
+    *pre_limit = adjust_limit(!is_positive_stride, scale, offset, low_limit, *pre_limit, pre_ctrl, round);
+  } else {
+    // Negative stride*scale: the affine function is decreasing,
+    // the pre-loop checks for overflow and the post-loop for underflow.
 
-    if (low_limit->get_int() == -max_jint) {
-      if (!RangeLimitCheck) return;
-      // We need this guard when scale*pre_limit+offset >= limit
-      // due to underflow. So we need execute pre-loop until
-      // scale*I+offset >= min_int. But (min_int-offset) will
-      // underflow when offset > 0 and X will be > original_limit
-      // when stride > 0. To avoid it we replace positive offset with 0.
-      //
-      // Also (min_int+1 == -max_int) is used instead of min_int here
-      // to avoid problem with scale == -1 (min_int/(-1) == min_int).
-      Node* shift = _igvn.intcon(31);
-      set_ctrl(shift, C->root());
-      Node* sign = new (C) RShiftINode(offset, shift);
-      register_new_node(sign, pre_ctrl);
-      offset = new (C) AndINode(offset, sign);
-      register_new_node(offset, pre_ctrl);
-    } else {
-      assert(low_limit->get_int() == 0, "wrong low limit for range check");
-      // The only problem we have here when offset == min_int
-      // since (0-min_int) == min_int. It may be fine for stride > 0
-      // but for stride < 0 X will be < original_limit. To avoid it
-      // max(pre_limit, original_limit) is used in do_range_check().
-    }
-    // Pass (-stride) to indicate pre_loop_cond = NOT(main_loop_cond);
-    *pre_limit = adjust_limit((-stride_con), scale, offset, low_limit, *pre_limit, pre_ctrl,
-                              scale_con > 1 && stride_con > 0);
-
-  } else { // stride_con*scale_con < 0
-    // For negative stride*scale pre-loop checks for overflow and
-    // post-loop for underflow.
-    //
     // The overflow limit: scale*I+offset < upper_limit
-    // For pre-loop compute
+    // For the pre-loop limit compute:
     //   NOT(scale*I+offset < upper_limit)
     //   scale*I+offset >= upper_limit
     //   scale*I+offset+1 > upper_limit
@@ -1625,58 +1611,24 @@
     //     else /* scale > 0 and stride < 0 */
     //       I > (upper_limit-(offset+1))/scale
     //   )
-    //
-    // (upper_limit-offset-1) may underflow or overflow.
-    // To avoid it min(pre_limit, original_limit) is used
-    // in do_range_check() for stride > 0 and max() for < 0.
-    Node *one  = _igvn.intcon(1);
+    Node* one = _igvn.longcon(1);
     set_ctrl(one, C->root());
-
-    Node *plus_one = new (C) AddINode(offset, one);
+    Node* plus_one = new (C) AddLNode(offset, one);
     register_new_node( plus_one, pre_ctrl );
-    // Pass (-stride) to indicate pre_loop_cond = NOT(main_loop_cond);
-    *pre_limit = adjust_limit((-stride_con), scale, plus_one, upper_limit, *pre_limit, pre_ctrl,
-                              scale_con < -1 && stride_con > 0);
+    *pre_limit = adjust_limit(!is_positive_stride, scale, plus_one, upper_limit, *pre_limit, pre_ctrl, round);
 
-    if (low_limit->get_int() == -max_jint) {
-      if (!RangeLimitCheck) return;
-      // We need this guard when scale*main_limit+offset >= limit
-      // due to underflow. So we need execute main-loop while
-      // scale*I+offset+1 > min_int. But (min_int-offset-1) will
-      // underflow when (offset+1) > 0 and X will be < main_limit
-      // when scale < 0 (and stride > 0). To avoid it we replace
-      // positive (offset+1) with 0.
-      //
-      // Also (min_int+1 == -max_int) is used instead of min_int here
-      // to avoid problem with scale == -1 (min_int/(-1) == min_int).
-      Node* shift = _igvn.intcon(31);
-      set_ctrl(shift, C->root());
-      Node* sign = new (C) RShiftINode(plus_one, shift);
-      register_new_node(sign, pre_ctrl);
-      plus_one = new (C) AndINode(plus_one, sign);
-      register_new_node(plus_one, pre_ctrl);
-    } else {
-      assert(low_limit->get_int() == 0, "wrong low limit for range check");
-      // The only problem we have here when offset == max_int
-      // since (max_int+1) == min_int and (0-min_int) == min_int.
-      // But it is fine since main loop will either have
-      // less iterations or will be skipped in such case.
-    }
-    // The underflow limit: low_limit <= scale*I+offset.
-    // For main-loop compute
+    // The underflow limit: low_limit <= scale*I+offset
+    // For the main-loop limit compute:
     //   scale*I+offset+1 > low_limit
     //   ( if (scale < 0) /* and stride > 0 */
     //       I < (low_limit-(offset+1))/scale
     //     else /* scale > 0 and stride < 0 */
     //       I > (low_limit-(offset+1))/scale
     //   )
-
-    *main_limit = adjust_limit(stride_con, scale, plus_one, low_limit, *main_limit, pre_ctrl,
-                               false);
+    *main_limit = adjust_limit(is_positive_stride, scale, plus_one, low_limit, *main_limit, pre_ctrl, false);
   }
 }
 
-
 //------------------------------is_scaled_iv---------------------------------
 // Return true if exp is a constant times an induction var
 bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, int* p_scale) {
@@ -1836,22 +1788,14 @@
   // Must know if its a count-up or count-down loop
 
   int stride_con = cl->stride_con();
-  Node *zero = _igvn.intcon(0);
-  Node *one  = _igvn.intcon(1);
+  Node* zero = _igvn.longcon(0);
+  Node* one  = _igvn.longcon(1);
   // Use symmetrical int range [-max_jint,max_jint]
-  Node *mini = _igvn.intcon(-max_jint);
+  Node* mini = _igvn.longcon(-max_jint);
   set_ctrl(zero, C->root());
   set_ctrl(one,  C->root());
   set_ctrl(mini, C->root());
 
-  // Range checks that do not dominate the loop backedge (ie.
-  // conditionally executed) can lengthen the pre loop limit beyond
-  // the original loop limit. To prevent this, the pre limit is
-  // (for stride > 0) MINed with the original loop limit (MAXed
-  // stride < 0) when some range_check (rc) is conditionally
-  // executed.
-  bool conditional_rc = false;
-
   // Check loop body for tests of trip-counter plus loop-invariant vs
   // loop-invariant.
   for( uint i = 0; i < loop->_body.size(); i++ ) {
@@ -1930,15 +1874,20 @@
       // stride_con and scale_con can be negative which will flip about the
       // sense of the test.
 
+      // Perform the limit computations in jlong to avoid overflow
+      jlong lscale_con = scale_con;
+      Node* int_offset = offset;
+      offset = new (C) ConvI2LNode(offset);
+      register_new_node(offset, pre_ctrl);
+      Node* int_limit = limit;
+      limit = new (C) ConvI2LNode(limit);
+      register_new_node(limit, pre_ctrl);
+
       // Adjust pre and main loop limits to guard the correct iteration set
       if( cmp->Opcode() == Op_CmpU ) {// Unsigned compare is really 2 tests
         if( b_test._test == BoolTest::lt ) { // Range checks always use lt
           // The underflow and overflow limits: 0 <= scale*I+offset < limit
-          add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit );
-          if (!conditional_rc) {
-            // (0-offset)/scale could be outside of loop iterations range.
-            conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck;
-          }
+          add_constraint(stride_con, lscale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit);
         } else {
 #ifndef PRODUCT
           if( PrintOpto )
@@ -1952,16 +1901,16 @@
           // Fall into GE case
         case BoolTest::ge:
           // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
-          scale_con = -scale_con;
-          offset = new (C) SubINode( zero, offset );
+          lscale_con = -lscale_con;
+          offset = new (C) SubLNode(zero, offset);
           register_new_node( offset, pre_ctrl );
-          limit  = new (C) SubINode( zero, limit  );
+          limit  = new (C) SubLNode(zero, limit);
           register_new_node( limit, pre_ctrl );
           // Fall into LE case
         case BoolTest::le:
           if (b_test._test != BoolTest::gt) {
             // Convert X <= Y to X < Y+1
-            limit = new (C) AddINode( limit, one );
+            limit = new (C) AddLNode(limit, one);
             register_new_node( limit, pre_ctrl );
           }
           // Fall into LT case
@@ -1969,13 +1918,7 @@
           // The underflow and overflow limits: MIN_INT <= scale*I+offset < limit
           // Note: (MIN_INT+1 == -MAX_INT) is used instead of MIN_INT here
           // to avoid problem with scale == -1: MIN_INT/(-1) == MIN_INT.
-          add_constraint( stride_con, scale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit );
-          if (!conditional_rc) {
-            // ((MIN_INT+1)-offset)/scale could be outside of loop iterations range.
-            // Note: negative offset is replaced with 0 but (MIN_INT+1)/scale could
-            // still be outside of loop range.
-            conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck;
-          }
+          add_constraint(stride_con, lscale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit);
           break;
         default:
 #ifndef PRODUCT
@@ -2011,7 +1954,8 @@
   }
 
   // Update loop limits
-  if (conditional_rc) {
+  if (pre_limit != orig_limit) {
+    // Computed pre-loop limit can be outside of loop iterations range.
     pre_limit = (stride_con > 0) ? (Node*)new (C) MinINode(pre_limit, orig_limit)
                                  : (Node*)new (C) MaxINode(pre_limit, orig_limit);
     register_new_node(pre_limit, pre_ctrl);
diff -r 16b8606e7a35 -r e692f9f04b16 src/share/vm/opto/loopnode.hpp
--- a/src/share/vm/opto/loopnode.hpp	Fri Dec 14 11:22:26 2018 +0100
+++ b/src/share/vm/opto/loopnode.hpp	Wed May 13 15:59:17 2020 +0200
@@ -945,9 +945,9 @@
   // always holds true.  That is, either increase the number of iterations in
   // the pre-loop or the post-loop until the condition holds true in the main
   // loop.  Scale_con, offset and limit are all loop invariant.
-  void add_constraint( int stride_con, int scale_con, Node *offset, Node *low_limit, Node *upper_limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit );
+  void add_constraint(jlong stride_con, jlong scale_con, Node* offset, Node* low_limit, Node* upper_limit, Node* pre_ctrl, Node** pre_limit, Node** main_limit);
   // Helper function for add_constraint().
-  Node* adjust_limit(int stride_con, Node * scale, Node *offset, Node *rc_limit, Node *loop_limit, Node *pre_ctrl, bool round_up);
+  Node* adjust_limit(bool reduce, Node* scale, Node* offset, Node* rc_limit, Node* old_limit, Node* pre_ctrl, bool round);
 
   // Partially peel loop up through last_peel node.
   bool partial_peel( IdealLoopTree *loop, Node_List &old_new );