Mercurial > hg > jdk9-shenandoah > hotspot

--- a/.hgtags	Mon Aug 17 10:43:11 2015 +0200
+++ b/.hgtags	Mon Aug 17 10:53:11 2015 +0000
@@ -479,3 +479,4 @@
 fff6b54e9770ac4c12c2fb4cab5aa7672affa4bd jdk9-b74
 2f354281e9915275693c4e519a959b8a6f22d3a3 jdk9-b75
 0bc8d1656d6f2b1fdfe803c1305a108bb9939f35 jdk9-b76
+e66c3813789debfc06f206afde1bf7a84cb08451 jdk9-b77
--- a/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Mon Aug 17 10:43:11 2015 +0200
+++ b/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Mon Aug 17 10:53:11 2015 +0000
@@ -416,7 +416,8 @@
   int jmp_off = __ offset();
   __ jmp(_patch_site_entry);
   // Add enough nops so deoptimization can overwrite the jmp above with a call
-  // and not destroy the world.
+  // and not destroy the world. We cannot use fat nops here, since the concurrent
+  // code rewrite may transiently create the illegal instruction sequence.
   for (int j = __ offset() ; j < jmp_off + 5 ; j++ ) {
     __ nop();
   }
--- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Mon Aug 17 10:43:11 2015 +0200
+++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Mon Aug 17 10:53:11 2015 +0000
@@ -345,9 +345,7 @@
   const bool do_post_padding = VerifyOops || UseCompressedClassPointers;
   if (!do_post_padding) {
     // insert some nops so that the verified entry point is aligned on CodeEntryAlignment
-    while ((__ offset() + ic_cmp_size) % CodeEntryAlignment != 0) {
-      __ nop();
-    }
+    __ align(CodeEntryAlignment, __ offset() + ic_cmp_size);
   }
   int offset = __ offset();
   __ inline_cache_check(receiver, IC_Klass);
@@ -2861,9 +2859,7 @@
       case lir_virtual_call:  // currently, sparc-specific for niagara
       default: ShouldNotReachHere();
     }
-    while (offset++ % BytesPerWord != 0) {
-      __ nop();
-    }
+    __ align(BytesPerWord, offset);
   }
 }

@@ -2902,10 +2898,7 @@
   int start = __ offset();
   if (os::is_MP()) {
     // make sure that the displacement word of the call ends up word aligned
-    int offset = __ offset() + NativeMovConstReg::instruction_size + NativeCall::displacement_offset;
-    while (offset++ % BytesPerWord != 0) {
-      __ nop();
-    }
+    __ align(BytesPerWord, __ offset() + NativeMovConstReg::instruction_size + NativeCall::displacement_offset);
   }
   __ relocate(static_stub_Relocation::spec(call_pc));
   __ mov_metadata(rbx, (Metadata*)NULL);
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp	Mon Aug 17 10:43:11 2015 +0200
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp	Mon Aug 17 10:53:11 2015 +0000
@@ -970,8 +970,12 @@
 }

 void MacroAssembler::align(int modulus) {
-  if (offset() % modulus != 0) {
-    nop(modulus - (offset() % modulus));
+  align(modulus, offset());
+}
+
+void MacroAssembler::align(int modulus, int target) {
+  if (target % modulus != 0) {
+    nop(modulus - (target % modulus));
   }
 }
--- a/src/cpu/x86/vm/macroAssembler_x86.hpp	Mon Aug 17 10:43:11 2015 +0200
+++ b/src/cpu/x86/vm/macroAssembler_x86.hpp	Mon Aug 17 10:53:11 2015 +0000
@@ -192,6 +192,7 @@

   // Alignment
   void align(int modulus);
+  void align(int modulus, int target);

   // A 5 byte nop that is safe for patching (see patch_verified_entry)
   void fat_nop();
--- a/src/share/vm/c1/c1_LIRAssembler.cpp	Mon Aug 17 10:43:11 2015 +0200
+++ b/src/share/vm/c1/c1_LIRAssembler.cpp	Mon Aug 17 10:53:11 2015 +0000
@@ -33,7 +33,9 @@
 #include "runtime/os.hpp"

 void LIR_Assembler::patching_epilog(PatchingStub* patch, LIR_PatchCode patch_code, Register obj, CodeEmitInfo* info) {
-  // we must have enough patching space so that call can be inserted
+  // We must have enough patching space so that call can be inserted.
+  // We cannot use fat nops here, since the concurrent code rewrite may transiently
+  // create the illegal instruction sequence.
   while ((intx) _masm->pc() - (intx) patch->pc_start() < NativeCall::instruction_size) {
     _masm->nop();
   }
@@ -592,9 +594,7 @@
 void LIR_Assembler::emit_op0(LIR_Op0* op) {
   switch (op->code()) {
     case lir_word_align: {
-      while (code_offset() % BytesPerWord != 0) {
-        _masm->nop();
-      }
+      _masm->align(BytesPerWord);
       break;
     }
--- a/src/share/vm/opto/block.cpp	Mon Aug 17 10:43:11 2015 +0200
+++ b/src/share/vm/opto/block.cpp	Mon Aug 17 10:53:11 2015 +0000
@@ -393,7 +393,7 @@
   VectorSet visited(a);

   // Allocate stack with enough space to avoid frequent realloc
-  Node_Stack nstack(a, C->unique() >> 1);
+  Node_Stack nstack(a, C->live_nodes() >> 1);
   nstack.push(_root, 0);
   uint sum = 0;                 // Counter for blocks
--- a/src/share/vm/opto/cfgnode.cpp	Mon Aug 17 10:43:11 2015 +0200
+++ b/src/share/vm/opto/cfgnode.cpp	Mon Aug 17 10:53:11 2015 +0000
@@ -802,7 +802,7 @@
   Compile *C = igvn->C;
   Arena *a = Thread::current()->resource_area();
   Node_Array node_map = new Node_Array(a);
-  Node_Stack stack(a, C->unique() >> 4);
+  Node_Stack stack(a, C->live_nodes() >> 4);
   PhiNode *nphi = slice_memory(at);
   igvn->register_new_node_with_optimizer( nphi );
   node_map.map(_idx, nphi);
--- a/src/share/vm/opto/compile.cpp	Mon Aug 17 10:43:11 2015 +0200
+++ b/src/share/vm/opto/compile.cpp	Mon Aug 17 10:53:11 2015 +0000
@@ -3315,7 +3315,7 @@

   // Visit everybody reachable!
   // Allocate stack of size C->unique()/2 to avoid frequent realloc
-  Node_Stack nstack(unique() >> 1);
+  Node_Stack nstack(live_nodes() >> 1);
   final_graph_reshaping_walk(nstack, root(), frc);

   // Check for unreachable (from below) code (i.e., infinite loops).
--- a/src/share/vm/opto/domgraph.cpp	Mon Aug 17 10:43:11 2015 +0200
+++ b/src/share/vm/opto/domgraph.cpp	Mon Aug 17 10:53:11 2015 +0000
@@ -507,7 +507,7 @@
 // 'semi' as vertex to DFS mapping.  Set 'parent' to DFS parent.
 int NTarjan::DFS( NTarjan *ntarjan, VectorSet &visited, PhaseIdealLoop *pil, uint *dfsorder) {
   // Allocate stack of size C->unique()/8 to avoid frequent realloc
-  GrowableArray <Node *> dfstack(pil->C->unique() >> 3);
+  GrowableArray <Node *> dfstack(pil->C->live_nodes() >> 3);
   Node *b = pil->C->root();
   int dfsnum = 1;
   dfsorder[b->_idx] = dfsnum; // Cache parent's dfsnum for a later use
--- a/src/share/vm/opto/gcm.cpp	Mon Aug 17 10:43:11 2015 +0200
+++ b/src/share/vm/opto/gcm.cpp	Mon Aug 17 10:53:11 2015 +0000
@@ -107,8 +107,8 @@
 //------------------------------schedule_pinned_nodes--------------------------
 // Set the basic block for Nodes pinned into blocks
 void PhaseCFG::schedule_pinned_nodes(VectorSet &visited) {
-  // Allocate node stack of size C->unique()+8 to avoid frequent realloc
-  GrowableArray <Node *> spstack(C->unique() + 8);
+  // Allocate node stack of size C->live_nodes()+8 to avoid frequent realloc
+  GrowableArray <Node *> spstack(C->live_nodes() + 8);
   spstack.push(_root);
   while (spstack.is_nonempty()) {
     Node* node = spstack.pop();
@@ -1310,7 +1310,7 @@
   visited.Clear();
   Node_List stack(arena);
   // Pre-grow the list
-  stack.map((C->unique() >> 1) + 16, NULL);
+  stack.map((C->live_nodes() >> 1) + 16, NULL);
   if (!schedule_early(visited, stack)) {
     // Bailout without retry
     C->record_method_not_compilable("early schedule failed");
--- a/src/share/vm/opto/loopTransform.cpp	Mon Aug 17 10:43:11 2015 +0200
+++ b/src/share/vm/opto/loopTransform.cpp	Mon Aug 17 10:53:11 2015 +0000
@@ -1282,7 +1282,7 @@

   if (C->do_vector_loop() && (PrintOpto && VerifyLoopOptimizations || TraceLoopOpts)) {
     Arena* arena = Thread::current()->resource_area();
-    Node_Stack stack(arena, C->unique() >> 2);
+    Node_Stack stack(arena, C->live_nodes() >> 2);
     Node_List rpo_list;
     VectorSet visited(arena);
     visited.set(loop_head->_idx);
--- a/src/share/vm/opto/loopnode.cpp	Mon Aug 17 10:43:11 2015 +0200
+++ b/src/share/vm/opto/loopnode.cpp	Mon Aug 17 10:53:11 2015 +0000
@@ -2231,7 +2231,7 @@
   // _nodes array holds the earliest legal controlling CFG node.

   // Allocate stack with enough space to avoid frequent realloc
-  int stack_size = (C->unique() >> 1) + 16; // (unique>>1)+16 from Java2D stats
+  int stack_size = (C->live_nodes() >> 1) + 16; // (live_nodes>>1)+16 from Java2D stats
   Node_Stack nstack( a, stack_size );

   visited.Clear();
@@ -2691,7 +2691,7 @@
     }
   }
   if (_dom_stk == NULL) {
-    uint init_size = C->unique() / 100; // Guess that 1/100 is a reasonable initial size.
+    uint init_size = C->live_nodes() / 100; // Guess that 1/100 is a reasonable initial size.
     if (init_size < 10) init_size = 10;
     _dom_stk = new GrowableArray<uint>(init_size);
   }
@@ -2781,8 +2781,8 @@
 // The sort is of size number-of-control-children, which generally limits
 // it to size 2 (i.e., I just choose between my 2 target loops).
 void PhaseIdealLoop::build_loop_tree() {
-  // Allocate stack of size C->unique()/2 to avoid frequent realloc
-  GrowableArray <Node *> bltstack(C->unique() >> 1);
+  // Allocate stack of size C->live_nodes()/2 to avoid frequent realloc
+  GrowableArray <Node *> bltstack(C->live_nodes() >> 1);
   Node *n = C->root();
   bltstack.push(n);
   int pre_order = 1;
@@ -3672,7 +3672,7 @@
 void PhaseIdealLoop::dump( ) const {
   ResourceMark rm;
   Arena* arena = Thread::current()->resource_area();
-  Node_Stack stack(arena, C->unique() >> 2);
+  Node_Stack stack(arena, C->live_nodes() >> 2);
   Node_List rpo_list;
   VectorSet visited(arena);
   visited.set(C->top()->_idx);
--- a/src/share/vm/opto/matcher.cpp	Mon Aug 17 10:43:11 2015 +0200
+++ b/src/share/vm/opto/matcher.cpp	Mon Aug 17 10:53:11 2015 +0000
@@ -2050,7 +2050,7 @@
 // Set bits if Node is shared or otherwise a root
 void Matcher::find_shared( Node *n ) {
   // Allocate stack of size C->unique() * 2 to avoid frequent realloc
-  MStack mstack(C->unique() * 2);
+  MStack mstack(C->live_nodes() * 2);
   // Mark nodes as address_visited if they are inputs to an address expression
   VectorSet address_visited(Thread::current()->resource_area());
   mstack.push(n, Visit);     // Don't need to pre-visit root node
--- a/src/share/vm/opto/node.cpp	Mon Aug 17 10:43:11 2015 +0200
+++ b/src/share/vm/opto/node.cpp	Mon Aug 17 10:53:11 2015 +0000
@@ -1799,7 +1799,7 @@
 static void dump_nodes(const Node* start, int d, bool only_ctrl) {
   if (NotANode(start)) return;

-  GrowableArray <Node *> nstack(Compile::current()->unique());
+  GrowableArray <Node *> nstack(Compile::current()->live_nodes());
   collect_nodes_i(&nstack, start, d, (uint) ABS(d), true, only_ctrl, false);

   int end = nstack.length();
--- a/src/share/vm/opto/phaseX.cpp	Mon Aug 17 10:43:11 2015 +0200
+++ b/src/share/vm/opto/phaseX.cpp	Mon Aug 17 10:53:11 2015 +0000
@@ -791,7 +791,7 @@
 //------------------------------PhaseIterGVN-----------------------------------
 // Initialize hash table to fresh and clean for +VerifyOpto
 PhaseIterGVN::PhaseIterGVN( PhaseIterGVN *igvn, const char *dummy ) : PhaseGVN(igvn,dummy), _worklist( ),
-                                                                      _stack(C->unique() >> 1),
+                                                                      _stack(C->live_nodes() >> 1),
                                                                       _delay_transform(false) {
 }

@@ -808,7 +808,11 @@
 // Initialize with previous PhaseGVN info from Parser
 PhaseIterGVN::PhaseIterGVN( PhaseGVN *gvn ) : PhaseGVN(gvn),
                                               _worklist(*C->for_igvn()),
-                                              _stack(C->unique() >> 1),
+// TODO: Before incremental inlining it was allocated only once and it was fine. Now that
+//       the constructor is used in incremental inlining, this consumes too much memory:
+//                                            _stack(C->live_nodes() >> 1),
+//       So, as a band-aid, we replace this by:
+                                              _stack(C->comp_arena(), 32),
                                               _delay_transform(false)
 {
   uint max;
@@ -1638,7 +1642,7 @@
   _nodes.map( n->_idx, new_node );  // Flag as having been cloned

   // Allocate stack of size _nodes.Size()/2 to avoid frequent realloc
-  GrowableArray <Node *> trstack(C->unique() >> 1);
+  GrowableArray <Node *> trstack(C->live_nodes() >> 1);

   trstack.push(new_node);           // Process children of cloned node
   while ( trstack.is_nonempty() ) {