# HG changeset patch # User adlertz # Date 1493048973 -3600 # Node ID 34d9f1ce747b1c8ecf479250201220f58332606c # Parent 1c3719302f465663d7c0b6c430a0da6a229f95bf 8023003, PR3209: Cleanup the public interface to PhaseCFG Summary: public methods that don't need to be public should be private. Reviewed-by: kvn, twisti diff -r 1c3719302f46 -r 34d9f1ce747b src/share/vm/opto/block.cpp --- a/src/share/vm/opto/block.cpp Wed Apr 19 05:28:25 2017 +0100 +++ b/src/share/vm/opto/block.cpp Mon Apr 24 16:49:33 2017 +0100 @@ -35,10 +35,6 @@ #include "opto/rootnode.hpp" #include "utilities/copy.hpp" -// Optimization - Graph Style - - -//----------------------------------------------------------------------------- void Block_Array::grow( uint i ) { assert(i >= Max(), "must be an overflow"); debug_only(_limit = i+1); @@ -54,7 +50,6 @@ Copy::zero_to_bytes( &_blocks[old], (_size-old)*sizeof(Block*) ); } -//============================================================================= void Block_List::remove(uint i) { assert(i < _cnt, "index out of bounds"); Copy::conjoint_words_to_lower((HeapWord*)&_blocks[i+1], (HeapWord*)&_blocks[i], ((_cnt-i-1)*sizeof(Block*))); @@ -76,8 +71,6 @@ } #endif -//============================================================================= - uint Block::code_alignment() { // Check for Root block if (_pre_order == 0) return CodeEntryAlignment; @@ -113,7 +106,6 @@ return unit_sz; // no particular alignment } -//----------------------------------------------------------------------------- // Compute the size of first 'inst_cnt' instructions in this block. // Return the number of instructions left to compute if the block has // less then 'inst_cnt' instructions. Stop, and return 0 if sum_size @@ -138,7 +130,6 @@ return inst_cnt; } -//----------------------------------------------------------------------------- uint Block::find_node( const Node *n ) const { for( uint i = 0; i < _nodes.size(); i++ ) { if( _nodes[i] == n ) @@ -161,7 +152,6 @@ return false; } -//------------------------------is_Empty--------------------------------------- // Return empty status of a block. Empty blocks contain only the head, other // ideal nodes, and an optional trailing goto. int Block::is_Empty() const { @@ -200,7 +190,6 @@ return not_empty; } -//------------------------------has_uncommon_code------------------------------ // Return true if the block's code implies that it is likely to be // executed infrequently. Check to see if the block ends in a Halt or // a low probability call. @@ -226,7 +215,6 @@ return op == Op_Halt; } -//------------------------------is_uncommon------------------------------------ // True if block is low enough frequency or guarded by a test which // mostly does not go here. bool Block::is_uncommon(PhaseCFG* cfg) const { @@ -279,7 +267,6 @@ return false; } -//------------------------------dump------------------------------------------- #ifndef PRODUCT void Block::dump_bidx(const Block* orig, outputStream* st) const { if (_pre_order) st->print("B%d",_pre_order); @@ -372,13 +359,12 @@ } #endif -//============================================================================= -//------------------------------PhaseCFG--------------------------------------- PhaseCFG::PhaseCFG(Arena* arena, RootNode* root, Matcher& matcher) : Phase(CFG) , _block_arena(arena) +, _root(root) +, _matcher(matcher) , _node_to_block_mapping(arena) -, _root(root) , _node_latency(NULL) #ifndef PRODUCT , _trace_opto_pipelining(TraceOptoPipelining || C->method_has_option("TraceOptoPipelining")) @@ -398,11 +384,10 @@ _goto->set_req(0,_goto); // Build the CFG in Reverse Post Order - _num_blocks = build_cfg(); - _broot = get_block_for_node(_root); + _number_of_blocks = build_cfg(); + _root_block = get_block_for_node(_root); } -//------------------------------build_cfg-------------------------------------- // Build a proper looking CFG. Make every block begin with either a StartNode // or a RegionNode. Make every block end with either a Goto, If or Return. // The RootNode both starts and ends it's own block. Do this with a recursive @@ -504,13 +489,12 @@ return sum; } -//------------------------------insert_goto_at--------------------------------- // Inserts a goto & corresponding basic block between // block[block_no] and its succ_no'th successor block void PhaseCFG::insert_goto_at(uint block_no, uint succ_no) { // get block with block_no - assert(block_no < _num_blocks, "illegal block number"); - Block* in = _blocks[block_no]; + assert(block_no < number_of_blocks(), "illegal block number"); + Block* in = get_block(block_no); // get successor block succ_no assert(succ_no < in->_num_succs, "illegal successor number"); Block* out = in->_succs[succ_no]; @@ -545,11 +529,9 @@ // Set the frequency of the new block block->_freq = freq; // add new basic block to basic block list - _blocks.insert(block_no + 1, block); - _num_blocks++; + add_block_at(block_no + 1, block); } -//------------------------------no_flip_branch--------------------------------- // Does this block end in a multiway branch that cannot have the default case // flipped for another case? static bool no_flip_branch(Block *b) { @@ -577,7 +559,6 @@ return false; } -//------------------------------convert_NeverBranch_to_Goto-------------------- // Check for NeverBranch at block end. This needs to become a GOTO to the // true target. NeverBranch are treated as a conditional branch that always // goes the same direction for most of the optimizer and are used to give a @@ -615,7 +596,6 @@ dead->_nodes[k]->del_req(j); } -//------------------------------move_to_next----------------------------------- // Helper function to move block bx to the slot following b_index. Return // true if the move is successful, otherwise false bool PhaseCFG::move_to_next(Block* bx, uint b_index) { @@ -623,20 +603,22 @@ // Return false if bx is already scheduled. uint bx_index = bx->_pre_order; - if ((bx_index <= b_index) && (_blocks[bx_index] == bx)) { + if ((bx_index <= b_index) && (get_block(bx_index) == bx)) { return false; } // Find the current index of block bx on the block list bx_index = b_index + 1; - while( bx_index < _num_blocks && _blocks[bx_index] != bx ) bx_index++; - assert(_blocks[bx_index] == bx, "block not found"); + while (bx_index < number_of_blocks() && get_block(bx_index) != bx) { + bx_index++; + } + assert(get_block(bx_index) == bx, "block not found"); // If the previous block conditionally falls into bx, return false, // because moving bx will create an extra jump. for(uint k = 1; k < bx->num_preds(); k++ ) { Block* pred = get_block_for_node(bx->pred(k)); - if (pred == _blocks[bx_index-1]) { + if (pred == get_block(bx_index - 1)) { if (pred->_num_succs != 1) { return false; } @@ -649,7 +631,6 @@ return true; } -//------------------------------move_to_end------------------------------------ // Move empty and uncommon blocks to the end. void PhaseCFG::move_to_end(Block *b, uint i) { int e = b->is_Empty(); @@ -667,31 +648,31 @@ _blocks.push(b); } -//---------------------------set_loop_alignment-------------------------------- // Set loop alignment for every block void PhaseCFG::set_loop_alignment() { - uint last = _num_blocks; - assert( _blocks[0] == _broot, "" ); + uint last = number_of_blocks(); + assert(get_block(0) == get_root_block(), ""); - for (uint i = 1; i < last; i++ ) { - Block *b = _blocks[i]; - if (b->head()->is_Loop()) { - b->set_loop_alignment(b); + for (uint i = 1; i < last; i++) { + Block* block = get_block(i); + if (block->head()->is_Loop()) { + block->set_loop_alignment(block); } } } -//-----------------------------remove_empty------------------------------------ // Make empty basic blocks to be "connector" blocks, Move uncommon blocks // to the end. -void PhaseCFG::remove_empty() { +void PhaseCFG::remove_empty_blocks() { // Move uncommon blocks to the end - uint last = _num_blocks; - assert( _blocks[0] == _broot, "" ); + uint last = number_of_blocks(); + assert(get_block(0) == get_root_block(), ""); for (uint i = 1; i < last; i++) { - Block *b = _blocks[i]; - if (b->is_connector()) break; + Block* block = get_block(i); + if (block->is_connector()) { + break; + } // Check for NeverBranch at block end. This needs to become a GOTO to the // true target. NeverBranch are treated as a conditional branch that @@ -699,30 +680,33 @@ // to give a fake exit path to infinite loops. At this late stage they // need to turn into Goto's so that when you enter the infinite loop you // indeed hang. - if( b->_nodes[b->end_idx()]->Opcode() == Op_NeverBranch ) - convert_NeverBranch_to_Goto(b); + if (block->_nodes[block->end_idx()]->Opcode() == Op_NeverBranch) { + convert_NeverBranch_to_Goto(block); + } // Look for uncommon blocks and move to end. if (!C->do_freq_based_layout()) { - if (b->is_uncommon(this)) { - move_to_end(b, i); + if (block->is_uncommon(this)) { + move_to_end(block, i); last--; // No longer check for being uncommon! - if( no_flip_branch(b) ) { // Fall-thru case must follow? - b = _blocks[i]; // Find the fall-thru block - move_to_end(b, i); + if (no_flip_branch(block)) { // Fall-thru case must follow? + // Find the fall-thru block + block = get_block(i); + move_to_end(block, i); last--; } - i--; // backup block counter post-increment + // backup block counter post-increment + i--; } } } // Move empty blocks to the end - last = _num_blocks; + last = number_of_blocks(); for (uint i = 1; i < last; i++) { - Block *b = _blocks[i]; - if (b->is_Empty() != Block::not_empty) { - move_to_end(b, i); + Block* block = get_block(i); + if (block->is_Empty() != Block::not_empty) { + move_to_end(block, i); last--; i--; } @@ -785,108 +769,108 @@ return bnext; } -//-----------------------------fixup_flow-------------------------------------- // Fix up the final control flow for basic blocks. void PhaseCFG::fixup_flow() { // Fixup final control flow for the blocks. Remove jump-to-next // block. If neither arm of an IF follows the conditional branch, we // have to add a second jump after the conditional. We place the // TRUE branch target in succs[0] for both GOTOs and IFs. - for (uint i=0; i < _num_blocks; i++) { - Block *b = _blocks[i]; - b->_pre_order = i; // turn pre-order into block-index + for (uint i = 0; i < number_of_blocks(); i++) { + Block* block = get_block(i); + block->_pre_order = i; // turn pre-order into block-index // Connector blocks need no further processing. - if (b->is_connector()) { - assert((i+1) == _num_blocks || _blocks[i+1]->is_connector(), - "All connector blocks should sink to the end"); + if (block->is_connector()) { + assert((i+1) == number_of_blocks() || get_block(i + 1)->is_connector(), "All connector blocks should sink to the end"); continue; } - assert(b->is_Empty() != Block::completely_empty, - "Empty blocks should be connectors"); + assert(block->is_Empty() != Block::completely_empty, "Empty blocks should be connectors"); - Block *bnext = (i < _num_blocks-1) ? _blocks[i+1] : NULL; - Block *bs0 = b->non_connector_successor(0); + Block* bnext = (i < number_of_blocks() - 1) ? get_block(i + 1) : NULL; + Block* bs0 = block->non_connector_successor(0); // Check for multi-way branches where I cannot negate the test to // exchange the true and false targets. - if (no_flip_branch(b)) { + if (no_flip_branch(block)) { // Find fall through case - if must fall into its target // Get the index of the branch's first successor. - int branch_idx = b->_nodes.size() - b->_num_succs; + int branch_idx = block->_nodes.size() - block->_num_succs; // The branch is 1 before the branch's first successor. - Node *bra = b->_nodes[branch_idx-1]; + Node *branch = block->_nodes[branch_idx-1]; // Handle no-flip branches which have implicit checks and which require // special block ordering and individual semantics of the 'fall through // case'. if ((TrapBasedNullChecks || TrapBasedRangeChecks) && - bra->is_Mach() && bra->as_Mach()->is_TrapBasedCheckNode()) { - bnext = fixup_trap_based_check(bra, b, i, bnext); + branch->is_Mach() && branch->as_Mach()->is_TrapBasedCheckNode()) { + bnext = fixup_trap_based_check(branch, block, i, bnext); } else { // Else, default handling for no-flip branches - for (uint j2 = 0; j2 < b->_num_succs; j2++) { - const ProjNode* p = b->_nodes[branch_idx + j2]->as_Proj(); + for (uint j2 = 0; j2 < block->_num_succs; j2++) { + const ProjNode* p = block->_nodes[branch_idx + j2]->as_Proj(); if (p->_con == 0) { // successor j2 is fall through case - if (b->non_connector_successor(j2) != bnext) { + if (block->non_connector_successor(j2) != bnext) { // but it is not the next block => insert a goto insert_goto_at(i, j2); } // Put taken branch in slot 0 - if (j2 == 0 && b->_num_succs == 2) { + if (j2 == 0 && block->_num_succs == 2) { // Flip targets in succs map - Block *tbs0 = b->_succs[0]; - Block *tbs1 = b->_succs[1]; - b->_succs.map(0, tbs1); - b->_succs.map(1, tbs0); + Block *tbs0 = block->_succs[0]; + Block *tbs1 = block->_succs[1]; + block->_succs.map(0, tbs1); + block->_succs.map(1, tbs0); } break; } } } + // Remove all CatchProjs - for (uint j1 = 0; j1 < b->_num_succs; j1++) b->_nodes.pop(); + for (uint j = 0; j < block->_num_succs; j++) { + block->_nodes.pop(); + } - } else if (b->_num_succs == 1) { + } else if (block->_num_succs == 1) { // Block ends in a Goto? if (bnext == bs0) { // We fall into next block; remove the Goto - b->_nodes.pop(); + block->_nodes.pop(); } - } else if( b->_num_succs == 2 ) { // Block ends in a If? + } else if(block->_num_succs == 2) { // Block ends in a If? // Get opcode of 1st projection (matches _succs[0]) // Note: Since this basic block has 2 exits, the last 2 nodes must // be projections (in any order), the 3rd last node must be // the IfNode (we have excluded other 2-way exits such as // CatchNodes already). - MachNode *iff = b->_nodes[b->_nodes.size()-3]->as_Mach(); - ProjNode *proj0 = b->_nodes[b->_nodes.size()-2]->as_Proj(); - ProjNode *proj1 = b->_nodes[b->_nodes.size()-1]->as_Proj(); + MachNode* iff = block->_nodes[block->_nodes.size() - 3]->as_Mach(); + ProjNode* proj0 = block->_nodes[block->_nodes.size() - 2]->as_Proj(); + ProjNode* proj1 = block->_nodes[block->_nodes.size() - 1]->as_Proj(); // Assert that proj0 and succs[0] match up. Similarly for proj1 and succs[1]. - assert(proj0->raw_out(0) == b->_succs[0]->head(), "Mismatch successor 0"); - assert(proj1->raw_out(0) == b->_succs[1]->head(), "Mismatch successor 1"); + assert(proj0->raw_out(0) == block->_succs[0]->head(), "Mismatch successor 0"); + assert(proj1->raw_out(0) == block->_succs[1]->head(), "Mismatch successor 1"); - Block *bs1 = b->non_connector_successor(1); + Block* bs1 = block->non_connector_successor(1); // Check for neither successor block following the current // block ending in a conditional. If so, move one of the // successors after the current one, provided that the // successor was previously unscheduled, but moveable // (i.e., all paths to it involve a branch). - if( !C->do_freq_based_layout() && bnext != bs0 && bnext != bs1 ) { + if (!C->do_freq_based_layout() && bnext != bs0 && bnext != bs1) { // Choose the more common successor based on the probability // of the conditional branch. - Block *bx = bs0; - Block *by = bs1; + Block* bx = bs0; + Block* by = bs1; // _prob is the probability of taking the true path. Make // p the probability of taking successor #1. float p = iff->as_MachIf()->_prob; - if( proj0->Opcode() == Op_IfTrue ) { + if (proj0->Opcode() == Op_IfTrue) { p = 1.0 - p; } @@ -913,14 +897,16 @@ // succs[1]. if (bnext == bs0) { // Fall-thru case in succs[0], so flip targets in succs map - Block *tbs0 = b->_succs[0]; - Block *tbs1 = b->_succs[1]; - b->_succs.map( 0, tbs1 ); - b->_succs.map( 1, tbs0 ); + Block* tbs0 = block->_succs[0]; + Block* tbs1 = block->_succs[1]; + block->_succs.map(0, tbs1); + block->_succs.map(1, tbs0); // Flip projection for each target - { ProjNode *tmp = proj0; proj0 = proj1; proj1 = tmp; } + ProjNode* tmp = proj0; + proj0 = proj1; + proj1 = tmp; - } else if( bnext != bs1 ) { + } else if(bnext != bs1) { // Need a double-branch // The existing conditional branch need not change. // Add a unconditional branch to the false target. @@ -930,12 +916,12 @@ } // Make sure we TRUE branch to the target - if( proj0->Opcode() == Op_IfFalse ) { + if (proj0->Opcode() == Op_IfFalse) { iff->as_MachIf()->negate(); } - b->_nodes.pop(); // Remove IfFalse & IfTrue projections - b->_nodes.pop(); + block->_nodes.pop(); // Remove IfFalse & IfTrue projections + block->_nodes.pop(); } else { // Multi-exit block, e.g. a switch statement @@ -1014,7 +1000,7 @@ NOT_PRODUCT(bool foundNode = false;) // for all blocks - for (uint i = 0; i < _num_blocks; i++) { + for (uint i = 0; i < number_of_blocks(); i++) { Block *b = _blocks[i]; // For all instructions in the current block. for (uint j = 0; j < b->_nodes.size(); j++) { @@ -1163,7 +1149,6 @@ } -//------------------------------dump------------------------------------------- #ifndef PRODUCT void PhaseCFG::_dump_cfg( const Node *end, VectorSet &visited ) const { const Node *x = end->is_block_proj(); @@ -1189,10 +1174,11 @@ } void PhaseCFG::dump( ) const { - tty->print("\n--- CFG --- %d BBs\n",_num_blocks); + tty->print("\n--- CFG --- %d BBs\n", number_of_blocks()); if (_blocks.size()) { // Did we do basic-block layout? - for (uint i = 0; i < _num_blocks; i++) { - _blocks[i]->dump(this); + for (uint i = 0; i < number_of_blocks(); i++) { + const Block* block = get_block(i); + block->dump(this); } } else { // Else do it with a DFS VectorSet visited(_block_arena); @@ -1201,27 +1187,26 @@ } void PhaseCFG::dump_headers() { - for( uint i = 0; i < _num_blocks; i++ ) { - if (_blocks[i]) { - _blocks[i]->dump_head(this); + for (uint i = 0; i < number_of_blocks(); i++) { + Block* block = get_block(i); + if (block != NULL) { + block->dump_head(this); } } } -void PhaseCFG::verify( ) const { +void PhaseCFG::verify() const { #ifdef ASSERT // Verify sane CFG - for (uint i = 0; i < _num_blocks; i++) { - Block *b = _blocks[i]; - uint cnt = b->_nodes.size(); + for (uint i = 0; i < number_of_blocks(); i++) { + Block* block = get_block(i); + uint cnt = block->_nodes.size(); uint j; for (j = 0; j < cnt; j++) { - Node *n = b->_nodes[j]; - assert(get_block_for_node(n) == b, ""); - if (j >= 1 && n->is_Mach() && - n->as_Mach()->ideal_Opcode() == Op_CreateEx) { - assert(j == 1 || b->_nodes[j-1]->is_Phi(), - "CreateEx must be first instruction in block"); + Node *n = block->_nodes[j]; + assert(get_block_for_node(n) == block, ""); + if (j >= 1 && n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_CreateEx) { + assert(j == 1 || block->_nodes[j-1]->is_Phi(), "CreateEx must be first instruction in block"); } for (uint k = 0; k < n->req(); k++) { Node *def = n->in(k); @@ -1231,8 +1216,7 @@ // Uses must follow their definition if they are at the same block. // Mostly done to check that MachSpillCopy nodes are placed correctly // when CreateEx node is moved in build_ifg_physical(). - if (get_block_for_node(def) == b && - !(b->head()->is_Loop() && n->is_Phi()) && + if (get_block_for_node(def) == block && !(block->head()->is_Loop() && n->is_Phi()) && // See (+++) comment in reg_split.cpp !(n->jvms() != NULL && n->jvms()->is_monitor_use(k))) { bool is_loop = false; @@ -1244,29 +1228,29 @@ } } } - assert(is_loop || b->find_node(def) < j, "uses must follow definitions"); + assert(is_loop || block->find_node(def) < j, "uses must follow definitions"); } } } } - j = b->end_idx(); - Node *bp = (Node*)b->_nodes[b->_nodes.size()-1]->is_block_proj(); - assert( bp, "last instruction must be a block proj" ); - assert( bp == b->_nodes[j], "wrong number of successors for this block" ); + j = block->end_idx(); + Node* bp = (Node*)block->_nodes[block->_nodes.size() - 1]->is_block_proj(); + assert(bp, "last instruction must be a block proj"); + assert(bp == block->_nodes[j], "wrong number of successors for this block"); if (bp->is_Catch()) { - while (b->_nodes[--j]->is_MachProj()) ; - assert(b->_nodes[j]->is_MachCall(), "CatchProj must follow call"); + while (block->_nodes[--j]->is_MachProj()) { + ; + } + assert(block->_nodes[j]->is_MachCall(), "CatchProj must follow call"); } else if (bp->is_Mach() && bp->as_Mach()->ideal_Opcode() == Op_If) { - assert(b->_num_succs == 2, "Conditional branch must have two targets"); + assert(block->_num_succs == 2, "Conditional branch must have two targets"); } } #endif } #endif -//============================================================================= -//------------------------------UnionFind-------------------------------------- UnionFind::UnionFind( uint max ) : _cnt(max), _max(max), _indices(NEW_RESOURCE_ARRAY(uint,max)) { Copy::zero_to_bytes( _indices, sizeof(uint)*max ); } @@ -1291,7 +1275,6 @@ for( uint i=0; ifreq(); @@ -1412,7 +1390,6 @@ return dist1 - dist0; } -//------------------------------trace_frequency_order-------------------------- // Comparison function for edges extern "C" int trace_frequency_order(const void *p0, const void *p1) { Trace *tr0 = *(Trace **) p0; @@ -1438,17 +1415,15 @@ return diff; } -//------------------------------find_edges------------------------------------- // Find edges of interest, i.e, those which can fall through. Presumes that // edges which don't fall through are of low frequency and can be generally // ignored. Initialize the list of traces. -void PhaseBlockLayout::find_edges() -{ +void PhaseBlockLayout::find_edges() { // Walk the blocks, creating edges and Traces uint i; Trace *tr = NULL; - for (i = 0; i < _cfg._num_blocks; i++) { - Block *b = _cfg._blocks[i]; + for (i = 0; i < _cfg.number_of_blocks(); i++) { + Block* b = _cfg.get_block(i); tr = new Trace(b, next, prev); traces[tr->id()] = tr; @@ -1472,7 +1447,7 @@ if (n->num_preds() != 1) break; i++; - assert(n = _cfg._blocks[i], "expecting next block"); + assert(n = _cfg.get_block(i), "expecting next block"); tr->append(n); uf->map(n->_pre_order, tr->id()); traces[n->_pre_order] = NULL; @@ -1496,8 +1471,8 @@ } // Group connector blocks into one trace - for (i++; i < _cfg._num_blocks; i++) { - Block *b = _cfg._blocks[i]; + for (i++; i < _cfg.number_of_blocks(); i++) { + Block *b = _cfg.get_block(i); assert(b->is_connector(), "connector blocks at the end"); tr->append(b); uf->map(b->_pre_order, tr->id()); @@ -1505,10 +1480,8 @@ } } -//------------------------------union_traces---------------------------------- // Union two traces together in uf, and null out the trace in the list -void PhaseBlockLayout::union_traces(Trace* updated_trace, Trace* old_trace) -{ +void PhaseBlockLayout::union_traces(Trace* updated_trace, Trace* old_trace) { uint old_id = old_trace->id(); uint updated_id = updated_trace->id(); @@ -1532,10 +1505,8 @@ traces[hi_id] = NULL; } -//------------------------------grow_traces------------------------------------- // Append traces together via the most frequently executed edges -void PhaseBlockLayout::grow_traces() -{ +void PhaseBlockLayout::grow_traces() { // Order the edges, and drive the growth of Traces via the most // frequently executed edges. edges->sort(edge_order); @@ -1577,11 +1548,9 @@ } } -//------------------------------merge_traces----------------------------------- // Embed one trace into another, if the fork or join points are sufficiently // balanced. -void PhaseBlockLayout::merge_traces(bool fall_thru_only) -{ +void PhaseBlockLayout::merge_traces(bool fall_thru_only) { // Walk the edge list a another time, looking at unprocessed edges. // Fold in diamonds for (int i = 0; i < edges->length(); i++) { @@ -1635,7 +1604,7 @@ src_trace->insert_after(src_block, targ_trace); union_traces(src_trace, targ_trace); } else if (src_at_tail) { - if (src_trace != trace(_cfg._broot)) { + if (src_trace != trace(_cfg.get_root_block())) { e->set_state(CFGEdge::connected); targ_trace->insert_before(targ_block, src_trace); union_traces(targ_trace, src_trace); @@ -1644,7 +1613,7 @@ } else if (e->state() == CFGEdge::open) { // Append traces, even without a fall-thru connection. // But leave root entry at the beginning of the block list. - if (targ_trace != trace(_cfg._broot)) { + if (targ_trace != trace(_cfg.get_root_block())) { e->set_state(CFGEdge::connected); src_trace->append(targ_trace); union_traces(src_trace, targ_trace); @@ -1653,11 +1622,9 @@ } } -//----------------------------reorder_traces----------------------------------- // Order the sequence of the traces in some desirable way, and fixup the // jumps at the end of each block. -void PhaseBlockLayout::reorder_traces(int count) -{ +void PhaseBlockLayout::reorder_traces(int count) { ResourceArea *area = Thread::current()->resource_area(); Trace ** new_traces = NEW_ARENA_ARRAY(area, Trace *, count); Block_List worklist; @@ -1672,15 +1639,14 @@ } // The entry block should be first on the new trace list. - Trace *tr = trace(_cfg._broot); + Trace *tr = trace(_cfg.get_root_block()); assert(tr == new_traces[0], "entry trace misplaced"); // Sort the new trace list by frequency qsort(new_traces + 1, new_count - 1, sizeof(new_traces[0]), trace_frequency_order); // Patch up the successor blocks - _cfg._blocks.reset(); - _cfg._num_blocks = 0; + _cfg.clear_blocks(); for (int i = 0; i < new_count; i++) { Trace *tr = new_traces[i]; if (tr != NULL) { @@ -1689,17 +1655,15 @@ } } -//------------------------------PhaseBlockLayout------------------------------- // Order basic blocks based on frequency -PhaseBlockLayout::PhaseBlockLayout(PhaseCFG &cfg) : - Phase(BlockLayout), - _cfg(cfg) -{ +PhaseBlockLayout::PhaseBlockLayout(PhaseCFG &cfg) +: Phase(BlockLayout) +, _cfg(cfg) { ResourceMark rm; ResourceArea *area = Thread::current()->resource_area(); // List of traces - int size = _cfg._num_blocks + 1; + int size = _cfg.number_of_blocks() + 1; traces = NEW_ARENA_ARRAY(area, Trace *, size); memset(traces, 0, size*sizeof(Trace*)); next = NEW_ARENA_ARRAY(area, Block *, size); @@ -1732,11 +1696,10 @@ // Re-order all the remaining traces by frequency reorder_traces(size); - assert(_cfg._num_blocks >= (uint) (size - 1), "number of blocks can not shrink"); + assert(_cfg.number_of_blocks() >= (uint) (size - 1), "number of blocks can not shrink"); } -//------------------------------backedge--------------------------------------- // Edge e completes a loop in a trace. If the target block is head of the // loop, rotate the loop block so that the loop ends in a conditional branch. bool Trace::backedge(CFGEdge *e) { @@ -1788,14 +1751,12 @@ return loop_rotated; } -//------------------------------fixup_blocks----------------------------------- // push blocks onto the CFG list // ensure that blocks have the correct two-way branch sense void Trace::fixup_blocks(PhaseCFG &cfg) { Block *last = last_block(); for (Block *b = first_block(); b != NULL; b = next(b)) { - cfg._blocks.push(b); - cfg._num_blocks++; + cfg.add_block(b); if (!b->is_connector()) { int nfallthru = b->num_fall_throughs(); if (b != last) { diff -r 1c3719302f46 -r 34d9f1ce747b src/share/vm/opto/block.hpp --- a/src/share/vm/opto/block.hpp Wed Apr 19 05:28:25 2017 +0100 +++ b/src/share/vm/opto/block.hpp Mon Apr 24 16:49:33 2017 +0100 @@ -350,20 +350,74 @@ class PhaseCFG : public Phase { friend class VMStructs; private: + + // Root of whole program + RootNode* _root; + + // The block containing the root node + Block* _root_block; + + // List of basic blocks that are created during CFG creation + Block_List _blocks; + + // Count of basic blocks + uint _number_of_blocks; + // Arena for the blocks to be stored in Arena* _block_arena; + // The matcher for this compilation + Matcher& _matcher; + // Map nodes to owning basic block Block_Array _node_to_block_mapping; + // Loop from the root + CFGLoop* _root_loop; + + // Outmost loop frequency + float _outer_loop_frequency; + // Build a proper looking cfg. Return count of basic blocks uint build_cfg(); - // Perform DFS search. + // Build the dominator tree so that we know where we can move instructions + void build_dominator_tree(); + + // Estimate block frequencies based on IfNode probabilities, so that we know where we want to move instructions + void estimate_block_frequency(); + + // Global Code Motion. See Click's PLDI95 paper. Place Nodes in specific + // basic blocks; i.e. _node_to_block_mapping now maps _idx for all Nodes to some Block. + // Move nodes to ensure correctness from GVN and also try to move nodes out of loops. + void global_code_motion(); + + // Schedule Nodes early in their basic blocks. + bool schedule_early(VectorSet &visited, Node_List &roots); + + // For each node, find the latest block it can be scheduled into + // and then select the cheapest block between the latest and earliest + // block to place the node. + void schedule_late(VectorSet &visited, Node_List &stack); + + // Compute the (backwards) latency of a node from a single use + int latency_from_use(Node *n, const Node *def, Node *use); + + // Compute the (backwards) latency of a node from the uses of this instruction + void partial_latency_of_defs(Node *n); + + // Compute the instruction global latency with a backwards walk + void compute_latencies_backwards(VectorSet &visited, Node_List &stack); + + // Pick a block between early and late that is a cheaper alternative + // to late. Helper for schedule_late. + Block* hoist_to_cheaper_block(Block* LCA, Block* early, Node* self); + + // Perform a Depth First Search (DFS). // Setup 'vertex' as DFS to vertex mapping. // Setup 'semi' as vertex to DFS mapping. // Set 'parent' to DFS parent. - uint DFS( Tarjan *tarjan ); + uint do_DFS(Tarjan* tarjan, uint rpo_counter); // Helper function to insert a node into a block void schedule_node_into_block( Node *n, Block *b ); @@ -374,7 +428,8 @@ void schedule_pinned_nodes( VectorSet &visited ); // I'll need a few machine-specific GotoNodes. Clone from this one. - MachNode *_goto; + // Used when building the CFG and creating end nodes for blocks. + MachNode* _goto; Block* insert_anti_dependences(Block* LCA, Node* load, bool verify = false); void verify_anti_dependences(Block* LCA, Node* load) { @@ -382,17 +437,73 @@ insert_anti_dependences(LCA, load, true); } + bool move_to_next(Block* bx, uint b_index); + void move_to_end(Block* bx, uint b_index); + + void insert_goto_at(uint block_no, uint succ_no); + + // Check for NeverBranch at block end. This needs to become a GOTO to the + // true target. NeverBranch are treated as a conditional branch that always + // goes the same direction for most of the optimizer and are used to give a + // fake exit path to infinite loops. At this late stage they need to turn + // into Goto's so that when you enter the infinite loop you indeed hang. + void convert_NeverBranch_to_Goto(Block *b); + + CFGLoop* create_loop_tree(); + public: PhaseCFG(Arena* arena, RootNode* root, Matcher& matcher); - uint _num_blocks; // Count of basic blocks - Block_List _blocks; // List of basic blocks - RootNode *_root; // Root of whole program - Block *_broot; // Basic block of root - uint _rpo_ctr; - CFGLoop* _root_loop; - float _outer_loop_freq; // Outmost loop frequency + void set_latency_for_node(Node* node, int latency) { + _node_latency->at_put_grow(node->_idx, latency); + } + + uint get_latency_for_node(Node* node) { + return _node_latency->at_grow(node->_idx); + } + + // Get the outer most frequency + float get_outer_loop_frequency() const { + return _outer_loop_frequency; + } + + // Get the root node of the CFG + RootNode* get_root_node() const { + return _root; + } + + // Get the block of the root node + Block* get_root_block() const { + return _root_block; + } + // Add a block at a position and moves the later ones one step + void add_block_at(uint pos, Block* block) { + _blocks.insert(pos, block); + _number_of_blocks++; + } + + // Adds a block to the top of the block list + void add_block(Block* block) { + _blocks.push(block); + _number_of_blocks++; + } + + // Clear the list of blocks + void clear_blocks() { + _blocks.reset(); + _number_of_blocks = 0; + } + + // Get the block at position pos in _blocks + Block* get_block(uint pos) const { + return _blocks[pos]; + } + + // Number of blocks + uint number_of_blocks() const { + return _number_of_blocks; + } // set which block this node should reside in void map_node_to_block(const Node* node, Block* block) { @@ -425,62 +536,23 @@ Unique_Node_List _raw_oops; #endif - // Build dominators - void Dominators(); - - // Estimate block frequencies based on IfNode probabilities - void Estimate_Block_Frequency(); - - // Global Code Motion. See Click's PLDI95 paper. Place Nodes in specific - // basic blocks; i.e. _node_to_block_mapping now maps _idx for all Nodes to some Block. - void GlobalCodeMotion( Matcher &m, uint unique, Node_List &proj_list ); + // Do global code motion by first building dominator tree and estimate block frequency + // Returns true on success + bool do_global_code_motion(); // Compute the (backwards) latency of a node from the uses void latency_from_uses(Node *n); - // Compute the (backwards) latency of a node from a single use - int latency_from_use(Node *n, const Node *def, Node *use); - - // Compute the (backwards) latency of a node from the uses of this instruction - void partial_latency_of_defs(Node *n); - - // Schedule Nodes early in their basic blocks. - bool schedule_early(VectorSet &visited, Node_List &roots); - - // For each node, find the latest block it can be scheduled into - // and then select the cheapest block between the latest and earliest - // block to place the node. - void schedule_late(VectorSet &visited, Node_List &stack); - - // Pick a block between early and late that is a cheaper alternative - // to late. Helper for schedule_late. - Block* hoist_to_cheaper_block(Block* LCA, Block* early, Node* self); - - // Compute the instruction global latency with a backwards walk - void ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack); - // Set loop alignment void set_loop_alignment(); // Remove empty basic blocks - void remove_empty(); + void remove_empty_blocks(); Block *fixup_trap_based_check(Node *branch, Block *block, int block_pos, Block *bnext); void fixup_flow(); - bool move_to_next(Block* bx, uint b_index); - void move_to_end(Block* bx, uint b_index); - void insert_goto_at(uint block_no, uint succ_no); - // Check for NeverBranch at block end. This needs to become a GOTO to the - // true target. NeverBranch are treated as a conditional branch that always - // goes the same direction for most of the optimizer and are used to give a - // fake exit path to infinite loops. At this late stage they need to turn - // into Goto's so that when you enter the infinite loop you indeed hang. - void convert_NeverBranch_to_Goto(Block *b); - - CFGLoop* create_loop_tree(); - - // Insert a node into a block, and update the _bbs - void insert( Block *b, uint idx, Node *n ) { + // Insert a node into a block at index and map the node to the block + void insert(Block *b, uint idx, Node *n) { b->_nodes.insert( idx, n ); map_node_to_block(n, b); } diff -r 1c3719302f46 -r 34d9f1ce747b src/share/vm/opto/buildOopMap.cpp --- a/src/share/vm/opto/buildOopMap.cpp Wed Apr 19 05:28:25 2017 +0100 +++ b/src/share/vm/opto/buildOopMap.cpp Mon Apr 24 16:49:33 2017 +0100 @@ -90,7 +90,6 @@ // OptoReg::Bad for not-callee-saved. -//------------------------------OopFlow---------------------------------------- // Structure to pass around struct OopFlow : public ResourceObj { short *_callees; // Array mapping register to callee-saved @@ -122,7 +121,6 @@ OopMap *build_oop_map( Node *n, int max_reg, PhaseRegAlloc *regalloc, int* live ); }; -//------------------------------compute_reach---------------------------------- // Given reaching-defs for this block start, compute it for this block end void OopFlow::compute_reach( PhaseRegAlloc *regalloc, int max_reg, Dict *safehash ) { @@ -180,7 +178,6 @@ } } -//------------------------------merge------------------------------------------ // Merge the given flow into the 'this' flow void OopFlow::merge( OopFlow *flow, int max_reg ) { assert( _b == NULL, "merging into a happy flow" ); @@ -200,14 +197,12 @@ } -//------------------------------clone------------------------------------------ void OopFlow::clone( OopFlow *flow, int max_size ) { _b = flow->_b; memcpy( _callees, flow->_callees, sizeof(short)*max_size); memcpy( _defs , flow->_defs , sizeof(Node*)*max_size); } -//------------------------------make------------------------------------------- OopFlow *OopFlow::make( Arena *A, int max_size, Compile* C ) { short *callees = NEW_ARENA_ARRAY(A,short,max_size+1); Node **defs = NEW_ARENA_ARRAY(A,Node*,max_size+1); @@ -218,7 +213,6 @@ return flow; } -//------------------------------bit twiddlers---------------------------------- static int get_live_bit( int *live, int reg ) { return live[reg>>LogBitsPerInt] & (1<<(reg&(BitsPerInt-1))); } static void set_live_bit( int *live, int reg ) { @@ -226,7 +220,6 @@ static void clr_live_bit( int *live, int reg ) { live[reg>>LogBitsPerInt] &= ~(1<<(reg&(BitsPerInt-1))); } -//------------------------------build_oop_map---------------------------------- // Build an oopmap from the current flow info OopMap *OopFlow::build_oop_map( Node *n, int max_reg, PhaseRegAlloc *regalloc, int* live ) { int framesize = regalloc->_framesize; @@ -415,19 +408,18 @@ return omap; } -//------------------------------do_liveness------------------------------------ // Compute backwards liveness on registers -static void do_liveness( PhaseRegAlloc *regalloc, PhaseCFG *cfg, Block_List *worklist, int max_reg_ints, Arena *A, Dict *safehash ) { - int *live = NEW_ARENA_ARRAY(A, int, (cfg->_num_blocks+1) * max_reg_ints); - int *tmp_live = &live[cfg->_num_blocks * max_reg_ints]; - Node *root = cfg->C->root(); +static void do_liveness(PhaseRegAlloc* regalloc, PhaseCFG* cfg, Block_List* worklist, int max_reg_ints, Arena* A, Dict* safehash) { + int* live = NEW_ARENA_ARRAY(A, int, (cfg->number_of_blocks() + 1) * max_reg_ints); + int* tmp_live = &live[cfg->number_of_blocks() * max_reg_ints]; + Node* root = cfg->get_root_node(); // On CISC platforms, get the node representing the stack pointer that regalloc // used for spills Node *fp = NodeSentinel; if (UseCISCSpill && root->req() > 1) { fp = root->in(1)->in(TypeFunc::FramePtr); } - memset( live, 0, cfg->_num_blocks * (max_reg_ints<number_of_blocks() * (max_reg_ints << LogBytesPerInt)); // Push preds onto worklist for (uint i = 1; i < root->req(); i++) { Block* block = cfg->get_block_for_node(root->in(i)); @@ -552,29 +544,32 @@ // Scan for any missing safepoints. Happens to infinite loops // ala ZKM.jar uint i; - for( i=1; i_num_blocks; i++ ) { - Block *b = cfg->_blocks[i]; + for (i = 1; i < cfg->number_of_blocks(); i++) { + Block* block = cfg->get_block(i); uint j; - for( j=1; j_nodes.size(); j++ ) - if( b->_nodes[j]->jvms() && - (*safehash)[b->_nodes[j]] == NULL ) + for (j = 1; j < block->_nodes.size(); j++) { + if (block->_nodes[j]->jvms() && (*safehash)[block->_nodes[j]] == NULL) { break; - if( j_nodes.size() ) break; + } + } + if (j < block->_nodes.size()) { + break; + } } - if( i == cfg->_num_blocks ) + if (i == cfg->number_of_blocks()) { break; // Got 'em all + } #ifndef PRODUCT if( PrintOpto && Verbose ) tty->print_cr("retripping live calc"); #endif // Force the issue (expensively): recheck everybody - for( i=1; i_num_blocks; i++ ) - worklist->push(cfg->_blocks[i]); + for (i = 1; i < cfg->number_of_blocks(); i++) { + worklist->push(cfg->get_block(i)); + } } - } -//------------------------------BuildOopMaps----------------------------------- // Collect GC mask info - where are all the OOPs? void Compile::BuildOopMaps() { NOT_PRODUCT( TracePhase t3("bldOopMaps", &_t_buildOopMaps, TimeCompiler); ) @@ -595,12 +590,12 @@ OopFlow *free_list = NULL; // Free, unused // Array mapping blocks to completed oopflows - OopFlow **flows = NEW_ARENA_ARRAY(A, OopFlow*, _cfg->_num_blocks); - memset( flows, 0, _cfg->_num_blocks*sizeof(OopFlow*) ); + OopFlow **flows = NEW_ARENA_ARRAY(A, OopFlow*, _cfg->number_of_blocks()); + memset( flows, 0, _cfg->number_of_blocks() * sizeof(OopFlow*) ); // Do the first block 'by hand' to prime the worklist - Block *entry = _cfg->_blocks[1]; + Block *entry = _cfg->get_block(1); OopFlow *rootflow = OopFlow::make(A,max_reg,this); // Initialize to 'bottom' (not 'top') memset( rootflow->_callees, OptoReg::Bad, max_reg*sizeof(short) ); @@ -626,7 +621,9 @@ Block *b = worklist.pop(); // Ignore root block - if( b == _cfg->_broot ) continue; + if (b == _cfg->get_root_block()) { + continue; + } // Block is already done? Happens if block has several predecessors, // he can get on the worklist more than once. if( flows[b->_pre_order] ) continue; diff -r 1c3719302f46 -r 34d9f1ce747b src/share/vm/opto/chaitin.cpp --- a/src/share/vm/opto/chaitin.cpp Wed Apr 19 05:28:25 2017 +0100 +++ b/src/share/vm/opto/chaitin.cpp Mon Apr 24 16:49:33 2017 +0100 @@ -40,10 +40,8 @@ #include "opto/opcodes.hpp" #include "opto/rootnode.hpp" -//============================================================================= - #ifndef PRODUCT -void LRG::dump( ) const { +void LRG::dump() const { ttyLocker ttyl; tty->print("%d ",num_regs()); _mask.dump(); @@ -94,7 +92,6 @@ } #endif -//------------------------------score------------------------------------------ // Compute score from cost and area. Low score is best to spill. static double raw_score( double cost, double area ) { return cost - (area*RegisterCostAreaRatio) * 1.52588e-5; @@ -125,7 +122,6 @@ return score; } -//------------------------------LRG_List--------------------------------------- LRG_List::LRG_List( uint max ) : _cnt(max), _max(max), _lidxs(NEW_RESOURCE_ARRAY(uint,max)) { memset( _lidxs, 0, sizeof(uint)*max ); } @@ -145,7 +141,6 @@ #define NUMBUCKS 3 -//------------------------------Chaitin---------------------------------------- PhaseChaitin::PhaseChaitin(uint unique, PhaseCFG &cfg, Matcher &matcher) : PhaseRegAlloc(unique, cfg, matcher, #ifndef PRODUCT @@ -166,32 +161,32 @@ { NOT_PRODUCT( Compile::TracePhase t3("ctorChaitin", &_t_ctorChaitin, TimeCompiler); ) - _high_frequency_lrg = MIN2(float(OPTO_LRG_HIGH_FREQ), _cfg._outer_loop_freq); + _high_frequency_lrg = MIN2(float(OPTO_LRG_HIGH_FREQ), _cfg.get_outer_loop_frequency()); uint i,j; // Build a list of basic blocks, sorted by frequency - _blks = NEW_RESOURCE_ARRAY( Block *, _cfg._num_blocks ); + _blks = NEW_RESOURCE_ARRAY(Block *, _cfg.number_of_blocks()); // Experiment with sorting strategies to speed compilation double cutoff = BLOCK_FREQUENCY(1.0); // Cutoff for high frequency bucket Block **buckets[NUMBUCKS]; // Array of buckets uint buckcnt[NUMBUCKS]; // Array of bucket counters double buckval[NUMBUCKS]; // Array of bucket value cutoffs for( i = 0; i < NUMBUCKS; i++ ) { - buckets[i] = NEW_RESOURCE_ARRAY( Block *, _cfg._num_blocks ); + buckets[i] = NEW_RESOURCE_ARRAY(Block *, _cfg.number_of_blocks()); buckcnt[i] = 0; // Bump by three orders of magnitude each time cutoff *= 0.001; buckval[i] = cutoff; - for( j = 0; j < _cfg._num_blocks; j++ ) { + for ( j = 0; j < _cfg.number_of_blocks(); j++) { buckets[i][j] = NULL; } } // Sort blocks into buckets - for( i = 0; i < _cfg._num_blocks; i++ ) { + for ( i = 0; i < _cfg.number_of_blocks(); i++) { for( j = 0; j < NUMBUCKS; j++ ) { - if( (j == NUMBUCKS-1) || (_cfg._blocks[i]->_freq > buckval[j]) ) { + if ((j == NUMBUCKS - 1) || (_cfg.get_block(i)->_freq > buckval[j])) { // Assign block to end of list for appropriate bucket - buckets[j][buckcnt[j]++] = _cfg._blocks[i]; + buckets[j][buckcnt[j]++] = _cfg.get_block(i); break; // kick out of inner loop } } @@ -204,7 +199,7 @@ } } - assert(blkcnt == _cfg._num_blocks, "Block array not totally filled"); + assert(blkcnt == _cfg.number_of_blocks(), "Block array not totally filled"); } void PhaseChaitin::Register_Allocate() { @@ -524,20 +519,19 @@ C->set_indexSet_arena(NULL); // ResourceArea is at end of scope } -//------------------------------de_ssa----------------------------------------- void PhaseChaitin::de_ssa() { // Set initial Names for all Nodes. Most Nodes get the virtual register // number. A few get the ZERO live range number. These do not // get allocated, but instead rely on correct scheduling to ensure that // only one instance is simultaneously live at a time. uint lr_counter = 1; - for( uint i = 0; i < _cfg._num_blocks; i++ ) { - Block *b = _cfg._blocks[i]; - uint cnt = b->_nodes.size(); + for( uint i = 0; i < _cfg.number_of_blocks(); i++ ) { + Block* block = _cfg.get_block(i); + uint cnt = block->_nodes.size(); // Handle all the normal Nodes in the block for( uint j = 0; j < cnt; j++ ) { - Node *n = b->_nodes[j]; + Node *n = block->_nodes[j]; // Pre-color to the zero live range, or pick virtual register const RegMask &rm = n->out_RegMask(); _names.map( n->_idx, rm.is_NotEmpty() ? lr_counter++ : 0 ); @@ -548,52 +542,55 @@ } -//------------------------------gather_lrg_masks------------------------------- // Gather LiveRanGe information, including register masks. Modification of // cisc spillable in_RegMasks should not be done before AggressiveCoalesce. void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) { // Nail down the frame pointer live range - uint fp_lrg = n2lidx(_cfg._root->in(1)->in(TypeFunc::FramePtr)); + uint fp_lrg = n2lidx(_cfg.get_root_node()->in(1)->in(TypeFunc::FramePtr)); lrgs(fp_lrg)._cost += 1e12; // Cost is infinite // For all blocks - for( uint i = 0; i < _cfg._num_blocks; i++ ) { - Block *b = _cfg._blocks[i]; + for (uint i = 0; i < _cfg.number_of_blocks(); i++) { + Block* block = _cfg.get_block(i); // For all instructions - for( uint j = 1; j < b->_nodes.size(); j++ ) { - Node *n = b->_nodes[j]; + for (uint j = 1; j < block->_nodes.size(); j++) { + Node* n = block->_nodes[j]; uint input_edge_start =1; // Skip control most nodes - if( n->is_Mach() ) input_edge_start = n->as_Mach()->oper_input_base(); + if (n->is_Mach()) { + input_edge_start = n->as_Mach()->oper_input_base(); + } uint idx = n->is_Copy(); // Get virtual register number, same as LiveRanGe index uint vreg = n2lidx(n); - LRG &lrg = lrgs(vreg); - if( vreg ) { // No vreg means un-allocable (e.g. memory) + LRG& lrg = lrgs(vreg); + if (vreg) { // No vreg means un-allocable (e.g. memory) // Collect has-copy bit - if( idx ) { + if (idx) { lrg._has_copy = 1; uint clidx = n2lidx(n->in(idx)); - LRG ©_src = lrgs(clidx); + LRG& copy_src = lrgs(clidx); copy_src._has_copy = 1; } // Check for float-vs-int live range (used in register-pressure // calculations) const Type *n_type = n->bottom_type(); - if (n_type->is_floatingpoint()) + if (n_type->is_floatingpoint()) { lrg._is_float = 1; + } // Check for twice prior spilling. Once prior spilling might have // spilled 'soft', 2nd prior spill should have spilled 'hard' and // further spilling is unlikely to make progress. - if( _spilled_once.test(n->_idx) ) { + if (_spilled_once.test(n->_idx)) { lrg._was_spilled1 = 1; - if( _spilled_twice.test(n->_idx) ) + if (_spilled_twice.test(n->_idx)) { lrg._was_spilled2 = 1; + } } #ifndef PRODUCT @@ -630,16 +627,18 @@ // Check for bound register masks const RegMask &lrgmask = lrg.mask(); - if (lrgmask.is_bound(ireg)) + if (lrgmask.is_bound(ireg)) { lrg._is_bound = 1; + } // Check for maximum frequency value - if (lrg._maxfreq < b->_freq) - lrg._maxfreq = b->_freq; + if (lrg._maxfreq < block->_freq) { + lrg._maxfreq = block->_freq; + } // Check for oop-iness, or long/double // Check for multi-kill projection - switch( ireg ) { + switch (ireg) { case MachProjNode::fat_proj: // Fat projections have size equal to number of registers killed lrg.set_num_regs(rm.Size()); @@ -807,7 +806,7 @@ // AggressiveCoalesce. This effectively pre-virtual-splits // around uncommon uses of common defs. const RegMask &rm = n->in_RegMask(k); - if (!after_aggressive && _cfg.get_block_for_node(n->in(k))->_freq > 1000 * b->_freq) { + if (!after_aggressive && _cfg.get_block_for_node(n->in(k))->_freq > 1000 * block->_freq) { // Since we are BEFORE aggressive coalesce, leave the register // mask untrimmed by the call. This encourages more coalescing. // Later, AFTER aggressive, this live range will have to spill @@ -851,8 +850,9 @@ } // Check for maximum frequency value - if( lrg._maxfreq < b->_freq ) - lrg._maxfreq = b->_freq; + if (lrg._maxfreq < block->_freq) { + lrg._maxfreq = block->_freq; + } } // End for all allocated inputs } // end for all instructions @@ -874,7 +874,6 @@ } } -//------------------------------set_was_low------------------------------------ // Set the was-lo-degree bit. Conservative coalescing should not change the // colorability of the graph. If any live range was of low-degree before // coalescing, it should Simplify. This call sets the was-lo-degree bit. @@ -911,7 +910,6 @@ #define REGISTER_CONSTRAINED 16 -//------------------------------cache_lrg_info--------------------------------- // Compute cost/area ratio, in case we spill. Build the lo-degree list. void PhaseChaitin::cache_lrg_info( ) { @@ -945,7 +943,6 @@ } } -//------------------------------Pre-Simplify----------------------------------- // Simplify the IFG by removing LRGs of low degree that have NO copies void PhaseChaitin::Pre_Simplify( ) { @@ -996,7 +993,6 @@ // No more lo-degree no-copy live ranges to simplify } -//------------------------------Simplify--------------------------------------- // Simplify the IFG by removing LRGs of low degree. void PhaseChaitin::Simplify( ) { @@ -1133,7 +1129,6 @@ } -//------------------------------is_legal_reg----------------------------------- // Is 'reg' register legal for 'lrg'? static bool is_legal_reg(LRG &lrg, OptoReg::Name reg, int chunk) { if (reg >= chunk && reg < (chunk + RegMask::CHUNK_SIZE) && @@ -1160,7 +1155,6 @@ return false; } -//------------------------------bias_color------------------------------------- // Choose a color using the biasing heuristic OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { @@ -1222,7 +1216,6 @@ return OptoReg::add( reg, chunk ); } -//------------------------------choose_color----------------------------------- // Choose a color in the current chunk OptoReg::Name PhaseChaitin::choose_color( LRG &lrg, int chunk ) { assert( C->in_preserve_stack_slots() == 0 || chunk != 0 || lrg._is_bound || lrg.mask().is_bound1() || !lrg.mask().Member(OptoReg::Name(_matcher._old_SP-1)), "must not allocate stack0 (inside preserve area)"); @@ -1244,7 +1237,6 @@ return lrg.mask().find_last_elem(); } -//------------------------------Select----------------------------------------- // Select colors by re-inserting LRGs back into the IFG. LRGs are re-inserted // in reverse order of removal. As long as nothing of hi-degree was yanked, // everything going back is guaranteed a color. Select that color. If some @@ -1419,8 +1411,6 @@ return spill_reg-LRG::SPILL_REG; // Return number of spills } - -//------------------------------copy_was_spilled------------------------------- // Copy 'was_spilled'-edness from the source Node to the dst Node. void PhaseChaitin::copy_was_spilled( Node *src, Node *dst ) { if( _spilled_once.test(src->_idx) ) { @@ -1433,14 +1423,12 @@ } } -//------------------------------set_was_spilled-------------------------------- // Set the 'spilled_once' or 'spilled_twice' flag on a node. void PhaseChaitin::set_was_spilled( Node *n ) { if( _spilled_once.test_set(n->_idx) ) _spilled_twice.set(n->_idx); } -//------------------------------fixup_spills----------------------------------- // Convert Ideal spill instructions into proper FramePtr + offset Loads and // Stores. Use-def chains are NOT preserved, but Node->LRG->reg maps are. void PhaseChaitin::fixup_spills() { @@ -1450,16 +1438,16 @@ NOT_PRODUCT( Compile::TracePhase t3("fixupSpills", &_t_fixupSpills, TimeCompiler); ) // Grab the Frame Pointer - Node *fp = _cfg._broot->head()->in(1)->in(TypeFunc::FramePtr); + Node *fp = _cfg.get_root_block()->head()->in(1)->in(TypeFunc::FramePtr); // For all blocks - for( uint i = 0; i < _cfg._num_blocks; i++ ) { - Block *b = _cfg._blocks[i]; + for (uint i = 0; i < _cfg.number_of_blocks(); i++) { + Block* block = _cfg.get_block(i); // For all instructions in block - uint last_inst = b->end_idx(); - for( uint j = 1; j <= last_inst; j++ ) { - Node *n = b->_nodes[j]; + uint last_inst = block->end_idx(); + for (uint j = 1; j <= last_inst; j++) { + Node* n = block->_nodes[j]; // Dead instruction??? assert( n->outcnt() != 0 ||// Nothing dead after post alloc @@ -1496,7 +1484,7 @@ assert( cisc->oper_input_base() == 2, "Only adding one edge"); cisc->ins_req(1,src); // Requires a memory edge } - b->_nodes.map(j,cisc); // Insert into basic block + block->_nodes.map(j,cisc); // Insert into basic block n->subsume_by(cisc, C); // Correct graph // ++_used_cisc_instructions; @@ -1522,7 +1510,6 @@ } // End of for all blocks } -//------------------------------find_base_for_derived-------------------------- // Helper to stretch above; recursively discover the base Node for a // given derived Node. Easy for AddP-related machine nodes, but needs // to be recursive for derived Phis. @@ -1552,7 +1539,7 @@ // Initialize it once and make it shared: // set control to _root and place it into Start block // (where top() node is placed). - base->init_req(0, _cfg._root); + base->init_req(0, _cfg.get_root_node()); Block *startb = _cfg.get_block_for_node(C->top()); startb->_nodes.insert(startb->find_node(C->top()), base ); _cfg.map_node_to_block(base, startb); @@ -1561,7 +1548,7 @@ if (n2lidx(base) == 0) { new_lrg(base, maxlrg++); } - assert(base->in(0) == _cfg._root && _cfg.get_block_for_node(base) == _cfg.get_block_for_node(C->top()), "base NULL should be shared"); + assert(base->in(0) == _cfg.get_root_node() && _cfg.get_block_for_node(base) == _cfg.get_block_for_node(C->top()), "base NULL should be shared"); derived_base_map[derived->_idx] = base; return base; } @@ -1624,8 +1611,6 @@ return base; } - -//------------------------------stretch_base_pointer_live_ranges--------------- // At each Safepoint, insert extra debug edges for each pair of derived value/ // base pointer that is live across the Safepoint for oopmap building. The // edge pairs get added in after sfpt->jvmtail()->oopoff(), but are in the @@ -1637,14 +1622,14 @@ memset( derived_base_map, 0, sizeof(Node*)*C->unique() ); // For all blocks in RPO do... - for( uint i=0; i<_cfg._num_blocks; i++ ) { - Block *b = _cfg._blocks[i]; + for (uint i = 0; i < _cfg.number_of_blocks(); i++) { + Block* block = _cfg.get_block(i); // Note use of deep-copy constructor. I cannot hammer the original // liveout bits, because they are needed by the following coalesce pass. - IndexSet liveout(_live->live(b)); + IndexSet liveout(_live->live(block)); - for( uint j = b->end_idx() + 1; j > 1; j-- ) { - Node *n = b->_nodes[j-1]; + for (uint j = block->end_idx() + 1; j > 1; j--) { + Node* n = block->_nodes[j - 1]; // Pre-split compares of loop-phis. Loop-phis form a cycle we would // like to see in the same register. Compare uses the loop-phi and so @@ -1659,7 +1644,7 @@ Node *phi = n->in(1); if( phi->is_Phi() && phi->as_Phi()->region()->is_Loop() ) { Block *phi_block = _cfg.get_block_for_node(phi); - if (_cfg.get_block_for_node(phi_block->pred(2)) == b) { + if (_cfg.get_block_for_node(phi_block->pred(2)) == block) { const RegMask *mask = C->matcher()->idealreg2spillmask[Op_RegI]; Node *spill = new (C) MachSpillCopyNode( phi, *mask, *mask ); insert_proj( phi_block, 1, spill, maxlrg++ ); @@ -1710,7 +1695,7 @@ if( (n2lidx(base) >= _maxlrg ||// (Brand new base (hence not live) or !liveout.member( n2lidx(base) ) ) && // not live) AND (n2lidx(base) > 0) && // not a constant - _cfg.get_block_for_node(base) != b) { // base not def'd in blk) + _cfg.get_block_for_node(base) != block) { // base not def'd in blk) // Base pointer is not currently live. Since I stretched // the base pointer to here and it crosses basic-block // boundaries, the global live info is now incorrect. @@ -1743,14 +1728,11 @@ return must_recompute_live != 0; } - -//------------------------------add_reference---------------------------------- // Extend the node to LRG mapping void PhaseChaitin::add_reference( const Node *node, const Node *old_node ) { _names.extend( node->_idx, n2lidx(old_node) ); } -//------------------------------dump------------------------------------------- #ifndef PRODUCT void PhaseChaitin::dump( const Node *n ) const { uint r = (n->_idx < _names.Size() ) ? Find_const(n) : 0; @@ -1852,8 +1834,9 @@ _matcher._new_SP, _framesize ); // For all blocks - for( uint i = 0; i < _cfg._num_blocks; i++ ) - dump(_cfg._blocks[i]); + for (uint i = 0; i < _cfg.number_of_blocks(); i++) { + dump(_cfg.get_block(i)); + } // End of per-block dump tty->print("\n"); @@ -1890,7 +1873,6 @@ tty->print_cr(""); } -//------------------------------dump_degree_lists------------------------------ void PhaseChaitin::dump_degree_lists() const { // Dump lo-degree list tty->print("Lo degree: "); @@ -1911,7 +1893,6 @@ tty->print_cr(""); } -//------------------------------dump_simplified-------------------------------- void PhaseChaitin::dump_simplified() const { tty->print("Simplified: "); for( uint i = _simplified; i; i = lrgs(i)._next ) @@ -1930,7 +1911,6 @@ return buf+strlen(buf); } -//------------------------------dump_register---------------------------------- // Dump a register name into a buffer. Be intelligent if we get called // before allocation is complete. char *PhaseChaitin::dump_register( const Node *n, char *buf ) const { @@ -1964,7 +1944,6 @@ return buf+strlen(buf); } -//----------------------dump_for_spill_split_recycle-------------------------- void PhaseChaitin::dump_for_spill_split_recycle() const { if( WizardMode && (PrintCompilation || PrintOpto) ) { // Display which live ranges need to be split and the allocator's state @@ -1980,7 +1959,6 @@ } } -//------------------------------dump_frame------------------------------------ void PhaseChaitin::dump_frame() const { const char *fp = OptoReg::regname(OptoReg::c_frame_pointer); const TypeTuple *domain = C->tf()->domain(); @@ -2086,17 +2064,16 @@ tty->print_cr("#"); } -//------------------------------dump_bb---------------------------------------- void PhaseChaitin::dump_bb( uint pre_order ) const { tty->print_cr("---dump of B%d---",pre_order); - for( uint i = 0; i < _cfg._num_blocks; i++ ) { - Block *b = _cfg._blocks[i]; - if( b->_pre_order == pre_order ) - dump(b); + for (uint i = 0; i < _cfg.number_of_blocks(); i++) { + Block* block = _cfg.get_block(i); + if (block->_pre_order == pre_order) { + dump(block); + } } } -//------------------------------dump_lrg--------------------------------------- void PhaseChaitin::dump_lrg( uint lidx, bool defs_only ) const { tty->print_cr("---dump of L%d---",lidx); @@ -2115,17 +2092,17 @@ tty->cr(); } // For all blocks - for( uint i = 0; i < _cfg._num_blocks; i++ ) { - Block *b = _cfg._blocks[i]; + for (uint i = 0; i < _cfg.number_of_blocks(); i++) { + Block* block = _cfg.get_block(i); int dump_once = 0; // For all instructions - for( uint j = 0; j < b->_nodes.size(); j++ ) { - Node *n = b->_nodes[j]; + for( uint j = 0; j < block->_nodes.size(); j++ ) { + Node *n = block->_nodes[j]; if( Find_const(n) == lidx ) { if( !dump_once++ ) { tty->cr(); - b->dump_head(&_cfg); + block->dump_head(&_cfg); } dump(n); continue; @@ -2138,7 +2115,7 @@ if( Find_const(m) == lidx ) { if( !dump_once++ ) { tty->cr(); - b->dump_head(&_cfg); + block->dump_head(&_cfg); } dump(n); } @@ -2150,7 +2127,6 @@ } #endif // not PRODUCT -//------------------------------print_chaitin_statistics------------------------------- int PhaseChaitin::_final_loads = 0; int PhaseChaitin::_final_stores = 0; int PhaseChaitin::_final_memoves= 0; diff -r 1c3719302f46 -r 34d9f1ce747b src/share/vm/opto/coalesce.cpp --- a/src/share/vm/opto/coalesce.cpp Wed Apr 19 05:28:25 2017 +0100 +++ b/src/share/vm/opto/coalesce.cpp Mon Apr 24 16:49:33 2017 +0100 @@ -188,8 +188,6 @@ } -//============================================================================= -//------------------------------Dump------------------------------------------- #ifndef PRODUCT void PhaseCoalesce::dump( Node *n ) const { // Being a const function means I cannot use 'Find' @@ -197,12 +195,11 @@ tty->print("L%d/N%d ",r,n->_idx); } -//------------------------------dump------------------------------------------- void PhaseCoalesce::dump() const { // I know I have a block layout now, so I can print blocks in a loop - for( uint i=0; i<_phc._cfg._num_blocks; i++ ) { + for( uint i=0; i<_phc._cfg.number_of_blocks(); i++ ) { uint j; - Block *b = _phc._cfg._blocks[i]; + Block* b = _phc._cfg.get_block(i); // Print a nice block header tty->print("B%d: ",b->_pre_order); for( j=1; jnum_preds(); j++ ) @@ -239,7 +236,6 @@ } #endif -//------------------------------combine_these_two------------------------------ // Combine the live ranges def'd by these 2 Nodes. N2 is an input to N1. void PhaseCoalesce::combine_these_two( Node *n1, Node *n2 ) { uint lr1 = _phc.Find(n1); @@ -281,18 +277,15 @@ } } -//------------------------------coalesce_driver-------------------------------- // Copy coalescing -void PhaseCoalesce::coalesce_driver( ) { - +void PhaseCoalesce::coalesce_driver() { verify(); // Coalesce from high frequency to low - for( uint i=0; i<_phc._cfg._num_blocks; i++ ) - coalesce( _phc._blks[i] ); - + for (uint i = 0; i < _phc._cfg.number_of_blocks(); i++) { + coalesce(_phc._blks[i]); + } } -//------------------------------insert_copy_with_overlap----------------------- // I am inserting copies to come out of SSA form. In the general case, I am // doing a parallel renaming. I'm in the Named world now, so I can't do a // general parallel renaming. All the copies now use "names" (live-ranges) @@ -361,7 +354,6 @@ b->_nodes.insert(last_use_idx+1,copy); } -//------------------------------insert_copies---------------------------------- void PhaseAggressiveCoalesce::insert_copies( Matcher &matcher ) { // We do LRGs compressing and fix a liveout data only here since the other // place in Split() is guarded by the assert which we never hit. @@ -370,8 +362,8 @@ for(uint lrg = 1; lrg < _phc._maxlrg; lrg++ ) { uint compressed_lrg = _phc.Find(lrg); if( lrg != compressed_lrg ) { - for( uint bidx = 0; bidx < _phc._cfg._num_blocks; bidx++ ) { - IndexSet *liveout = _phc._live->live(_phc._cfg._blocks[bidx]); + for (uint bidx = 0; bidx < _phc._cfg.number_of_blocks(); bidx++) { + IndexSet *liveout = _phc._live->live(_phc._cfg.get_block(bidx)); if( liveout->member(lrg) ) { liveout->remove(lrg); liveout->insert(compressed_lrg); @@ -384,10 +376,10 @@ // Nodes with index less than '_unique' are original, non-virtual Nodes. _unique = C->unique(); - for( uint i=0; i<_phc._cfg._num_blocks; i++ ) { + for (uint i = 0; i < _phc._cfg.number_of_blocks(); i++) { C->check_node_count(NodeLimitFudgeFactor, "out of nodes in coalesce"); if (C->failing()) return; - Block *b = _phc._cfg._blocks[i]; + Block *b = _phc._cfg.get_block(i); uint cnt = b->num_preds(); // Number of inputs to the Phi for( uint l = 1; l_nodes.size(); l++ ) { @@ -539,8 +531,7 @@ } // End of for all blocks } -//============================================================================= -//------------------------------coalesce--------------------------------------- + // Aggressive (but pessimistic) copy coalescing of a single block // The following coalesce pass represents a single round of aggressive @@ -600,20 +591,16 @@ } // End of for all instructions in block } -//============================================================================= -//------------------------------PhaseConservativeCoalesce---------------------- PhaseConservativeCoalesce::PhaseConservativeCoalesce( PhaseChaitin &chaitin ) : PhaseCoalesce(chaitin) { _ulr.initialize(_phc._maxlrg); } -//------------------------------verify----------------------------------------- void PhaseConservativeCoalesce::verify() { #ifdef ASSERT _phc.set_was_low(); #endif } -//------------------------------union_helper----------------------------------- void PhaseConservativeCoalesce::union_helper( Node *lr1_node, Node *lr2_node, uint lr1, uint lr2, Node *src_def, Node *dst_copy, Node *src_copy, Block *b, uint bindex ) { // Join live ranges. Merge larger into smaller. Union lr2 into lr1 in the // union-find tree @@ -656,7 +643,6 @@ } } -//------------------------------compute_separating_interferences--------------- // Factored code from copy_copy that computes extra interferences from // lengthening a live range by double-coalescing. uint PhaseConservativeCoalesce::compute_separating_interferences(Node *dst_copy, Node *src_copy, Block *b, uint bindex, RegMask &rm, uint reg_degree, uint rm_size, uint lr1, uint lr2 ) { @@ -718,7 +704,6 @@ return reg_degree; } -//------------------------------update_ifg------------------------------------- void PhaseConservativeCoalesce::update_ifg(uint lr1, uint lr2, IndexSet *n_lr1, IndexSet *n_lr2) { // Some original neighbors of lr1 might have gone away // because the constrained register mask prevented them. @@ -748,7 +733,6 @@ lrgs(neighbor).inc_degree( lrg1.compute_degree(lrgs(neighbor)) ); } -//------------------------------record_bias------------------------------------ static void record_bias( const PhaseIFG *ifg, int lr1, int lr2 ) { // Tag copy bias here if( !ifg->lrgs(lr1)._copy_bias ) @@ -757,7 +741,6 @@ ifg->lrgs(lr2)._copy_bias = lr1; } -//------------------------------copy_copy-------------------------------------- // See if I can coalesce a series of multiple copies together. I need the // final dest copy and the original src copy. They can be the same Node. // Compute the compatible register masks. @@ -907,7 +890,6 @@ return true; } -//------------------------------coalesce--------------------------------------- // Conservative (but pessimistic) copy coalescing of a single block void PhaseConservativeCoalesce::coalesce( Block *b ) { // Bail out on infrequent blocks diff -r 1c3719302f46 -r 34d9f1ce747b src/share/vm/opto/compile.cpp --- a/src/share/vm/opto/compile.cpp Wed Apr 19 05:28:25 2017 +0100 +++ b/src/share/vm/opto/compile.cpp Mon Apr 24 16:49:33 2017 +0100 @@ -2220,7 +2220,9 @@ //------------------------------Code_Gen--------------------------------------- // Given a graph, generate code for it void Compile::Code_Gen() { - if (failing()) return; + if (failing()) { + return; + } // Perform instruction selection. You might think we could reclaim Matcher // memory PDQ, but actually the Matcher is used in generating spill code. @@ -2232,12 +2234,11 @@ // nodes. Mapping is only valid at the root of each matched subtree. NOT_PRODUCT( verify_graph_edges(); ) - Node_List proj_list; - Matcher m(proj_list); - _matcher = &m; + Matcher matcher; + _matcher = &matcher; { TracePhase t2("matcher", &_t_matcher, true); - m.match(); + matcher.match(); } // In debug mode can dump m._nodes.dump() for mapping of ideal to machine // nodes. Mapping is only valid at the root of each matched subtree. @@ -2245,34 +2246,29 @@ // If you have too many nodes, or if matching has failed, bail out check_node_count(0, "out of nodes matching instructions"); - if (failing()) return; + if (failing()) { + return; + } // Platform dependent post matching hook (used on ppc). PdCompile::pd_post_matching_hook(this); // Build a proper-looking CFG - PhaseCFG cfg(node_arena(), root(), m); + PhaseCFG cfg(node_arena(), root(), matcher); _cfg = &cfg; { NOT_PRODUCT( TracePhase t2("scheduler", &_t_scheduler, TimeCompiler); ) - cfg.Dominators(); - if (failing()) return; - - NOT_PRODUCT( verify_graph_edges(); ) - - cfg.Estimate_Block_Frequency(); - cfg.GlobalCodeMotion(m,unique(),proj_list); - if (failing()) return; + bool success = cfg.do_global_code_motion(); + if (!success) { + return; + } print_method(PHASE_GLOBAL_CODE_MOTION, 2); - NOT_PRODUCT( verify_graph_edges(); ) - debug_only( cfg.verify(); ) } - NOT_PRODUCT( verify_graph_edges(); ) - - PhaseChaitin regalloc(unique(),cfg,m); + + PhaseChaitin regalloc(unique(), cfg, matcher); _regalloc = ®alloc; { TracePhase t2("regalloc", &_t_registerAllocation, true); @@ -2296,7 +2292,7 @@ // can now safely remove it. { NOT_PRODUCT( TracePhase t2("blockOrdering", &_t_blockOrdering, TimeCompiler); ) - cfg.remove_empty(); + cfg.remove_empty_blocks(); if (do_freq_based_layout()) { PhaseBlockLayout layout(cfg); } else { @@ -2351,38 +2347,50 @@ _regalloc->dump_frame(); Node *n = NULL; - for( uint i=0; i<_cfg->_num_blocks; i++ ) { - if (VMThread::should_terminate()) { cut_short = true; break; } - Block *b = _cfg->_blocks[i]; - if (b->is_connector() && !Verbose) continue; - n = b->_nodes[0]; - if (pcs && n->_idx < pc_limit) + for (uint i = 0; i < _cfg->number_of_blocks(); i++) { + if (VMThread::should_terminate()) { + cut_short = true; + break; + } + Block* block = _cfg->get_block(i); + if (block->is_connector() && !Verbose) { + continue; + } + n = block->_nodes[0]; + if (pcs && n->_idx < pc_limit) { tty->print("%3.3x ", pcs[n->_idx]); - else + } else { tty->print(" "); - b->dump_head(_cfg); - if (b->is_connector()) { + } + block->dump_head(_cfg); + if (block->is_connector()) { tty->print_cr(" # Empty connector block"); - } else if (b->num_preds() == 2 && b->pred(1)->is_CatchProj() && b->pred(1)->as_CatchProj()->_con == CatchProjNode::fall_through_index) { + } else if (block->num_preds() == 2 && block->pred(1)->is_CatchProj() && block->pred(1)->as_CatchProj()->_con == CatchProjNode::fall_through_index) { tty->print_cr(" # Block is sole successor of call"); } // For all instructions Node *delay = NULL; - for( uint j = 0; j_nodes.size(); j++ ) { - if (VMThread::should_terminate()) { cut_short = true; break; } - n = b->_nodes[j]; + for (uint j = 0; j < block->_nodes.size(); j++) { + if (VMThread::should_terminate()) { + cut_short = true; + break; + } + n = block->_nodes[j]; if (valid_bundle_info(n)) { - Bundle *bundle = node_bundling(n); + Bundle* bundle = node_bundling(n); if (bundle->used_in_unconditional_delay()) { delay = n; continue; } - if (bundle->starts_bundle()) + if (bundle->starts_bundle()) { starts_bundle = '+'; + } } - if (WizardMode) n->dump(); + if (WizardMode) { + n->dump(); + } if( !n->is_Region() && // Dont print in the Assembly !n->is_Phi() && // a few noisely useless nodes diff -r 1c3719302f46 -r 34d9f1ce747b src/share/vm/opto/domgraph.cpp --- a/src/share/vm/opto/domgraph.cpp Wed Apr 19 05:28:25 2017 +0100 +++ b/src/share/vm/opto/domgraph.cpp Mon Apr 24 16:49:33 2017 +0100 @@ -32,9 +32,6 @@ // Portions of code courtesy of Clifford Click -// Optimization - Graph Style - -//------------------------------Tarjan----------------------------------------- // A data structure that holds all the information needed to find dominators. struct Tarjan { Block *_block; // Basic block for this info @@ -60,23 +57,21 @@ }; -//------------------------------Dominator-------------------------------------- // Compute the dominator tree of the CFG. The CFG must already have been // constructed. This is the Lengauer & Tarjan O(E-alpha(E,V)) algorithm. -void PhaseCFG::Dominators( ) { +void PhaseCFG::build_dominator_tree() { // Pre-grow the blocks array, prior to the ResourceMark kicking in - _blocks.map(_num_blocks,0); + _blocks.map(number_of_blocks(), 0); ResourceMark rm; // Setup mappings from my Graph to Tarjan's stuff and back // Note: Tarjan uses 1-based arrays - Tarjan *tarjan = NEW_RESOURCE_ARRAY(Tarjan,_num_blocks+1); + Tarjan* tarjan = NEW_RESOURCE_ARRAY(Tarjan, number_of_blocks() + 1); // Tarjan's algorithm, almost verbatim: // Step 1: - _rpo_ctr = _num_blocks; - uint dfsnum = DFS( tarjan ); - if( dfsnum-1 != _num_blocks ) {// Check for unreachable loops! + uint dfsnum = do_DFS(tarjan, number_of_blocks()); + if (dfsnum - 1 != number_of_blocks()) { // Check for unreachable loops! // If the returned dfsnum does not match the number of blocks, then we // must have some unreachable loops. These can be made at any time by // IterGVN. They are cleaned up by CCP or the loop opts, but the last @@ -93,14 +88,13 @@ C->record_method_not_compilable("unreachable loop"); return; } - _blocks._cnt = _num_blocks; + _blocks._cnt = number_of_blocks(); // Tarjan is using 1-based arrays, so these are some initialize flags tarjan[0]._size = tarjan[0]._semi = 0; tarjan[0]._label = &tarjan[0]; - uint i; - for( i=_num_blocks; i>=2; i-- ) { // For all vertices in DFS order + for (uint i = number_of_blocks(); i >= 2; i--) { // For all vertices in DFS order Tarjan *w = &tarjan[i]; // Get vertex from DFS // Step 2: @@ -130,19 +124,19 @@ } // Step 4: - for( i=2; i <= _num_blocks; i++ ) { + for (uint i = 2; i <= number_of_blocks(); i++) { Tarjan *w = &tarjan[i]; if( w->_dom != &tarjan[w->_semi] ) w->_dom = w->_dom->_dom; w->_dom_next = w->_dom_child = NULL; // Initialize for building tree later } // No immediate dominator for the root - Tarjan *w = &tarjan[_broot->_pre_order]; + Tarjan *w = &tarjan[get_root_block()->_pre_order]; w->_dom = NULL; w->_dom_next = w->_dom_child = NULL; // Initialize for building tree later // Convert the dominator tree array into my kind of graph - for( i=1; i<=_num_blocks;i++){// For all Tarjan vertices + for(uint i = 1; i <= number_of_blocks(); i++){ // For all Tarjan vertices Tarjan *t = &tarjan[i]; // Handy access Tarjan *tdom = t->_dom; // Handy access to immediate dominator if( tdom ) { // Root has no immediate dominator @@ -152,11 +146,10 @@ } else t->_block->_idom = NULL; // Root } - w->setdepth( _num_blocks+1 ); // Set depth in dominator tree + w->setdepth(number_of_blocks() + 1); // Set depth in dominator tree } -//----------------------------Block_Stack-------------------------------------- class Block_Stack { private: struct Block_Descr { @@ -214,7 +207,6 @@ } }; -//-------------------------most_frequent_successor----------------------------- // Find the index into the b->succs[] array of the most frequent successor. uint Block_Stack::most_frequent_successor( Block *b ) { uint freq_idx = 0; @@ -258,40 +250,38 @@ return freq_idx; } -//------------------------------DFS-------------------------------------------- // Perform DFS search. Setup 'vertex' as DFS to vertex mapping. Setup // 'semi' as vertex to DFS mapping. Set 'parent' to DFS parent. -uint PhaseCFG::DFS( Tarjan *tarjan ) { - Block *b = _broot; +uint PhaseCFG::do_DFS(Tarjan *tarjan, uint rpo_counter) { + Block* root_block = get_root_block(); uint pre_order = 1; - // Allocate stack of size _num_blocks+1 to avoid frequent realloc - Block_Stack bstack(tarjan, _num_blocks+1); + // Allocate stack of size number_of_blocks() + 1 to avoid frequent realloc + Block_Stack bstack(tarjan, number_of_blocks() + 1); // Push on stack the state for the first block - bstack.push(pre_order, b); + bstack.push(pre_order, root_block); ++pre_order; while (bstack.is_nonempty()) { if (!bstack.last_successor()) { // Walk over all successors in pre-order (DFS). - Block *s = bstack.next_successor(); - if (s->_pre_order == 0) { // Check for no-pre-order, not-visited + Block* next_block = bstack.next_successor(); + if (next_block->_pre_order == 0) { // Check for no-pre-order, not-visited // Push on stack the state of successor - bstack.push(pre_order, s); + bstack.push(pre_order, next_block); ++pre_order; } } else { // Build a reverse post-order in the CFG _blocks array Block *stack_top = bstack.pop(); - stack_top->_rpo = --_rpo_ctr; + stack_top->_rpo = --rpo_counter; _blocks.map(stack_top->_rpo, stack_top); } } return pre_order; } -//------------------------------COMPRESS--------------------------------------- void Tarjan::COMPRESS() { assert( _ancestor != 0, "" ); @@ -303,14 +293,12 @@ } } -//------------------------------EVAL------------------------------------------- Tarjan *Tarjan::EVAL() { if( !_ancestor ) return _label; COMPRESS(); return (_ancestor->_label->_semi >= _label->_semi) ? _label : _ancestor->_label; } -//------------------------------LINK------------------------------------------- void Tarjan::LINK( Tarjan *w, Tarjan *tarjan0 ) { Tarjan *s = w; while( w->_label->_semi < s->_child->_label->_semi ) { @@ -333,7 +321,6 @@ } } -//------------------------------setdepth--------------------------------------- void Tarjan::setdepth( uint stack_size ) { Tarjan **top = NEW_RESOURCE_ARRAY(Tarjan*, stack_size); Tarjan **next = top; @@ -362,8 +349,7 @@ } while (last < top); } -//*********************** DOMINATORS ON THE SEA OF NODES*********************** -//------------------------------NTarjan---------------------------------------- +// Compute dominators on the Sea of Nodes form // A data structure that holds all the information needed to find dominators. struct NTarjan { Node *_control; // Control node associated with this info @@ -396,7 +382,6 @@ #endif }; -//------------------------------Dominator-------------------------------------- // Compute the dominator tree of the sea of nodes. This version walks all CFG // nodes (using the is_CFG() call) and places them in a dominator tree. Thus, // it needs a count of the CFG nodes for the mapping table. This is the @@ -517,7 +502,6 @@ } } -//------------------------------DFS-------------------------------------------- // Perform DFS search. Setup 'vertex' as DFS to vertex mapping. Setup // 'semi' as vertex to DFS mapping. Set 'parent' to DFS parent. int NTarjan::DFS( NTarjan *ntarjan, VectorSet &visited, PhaseIdealLoop *pil, uint *dfsorder) { @@ -560,7 +544,6 @@ return dfsnum; } -//------------------------------COMPRESS--------------------------------------- void NTarjan::COMPRESS() { assert( _ancestor != 0, "" ); @@ -572,14 +555,12 @@ } } -//------------------------------EVAL------------------------------------------- NTarjan *NTarjan::EVAL() { if( !_ancestor ) return _label; COMPRESS(); return (_ancestor->_label->_semi >= _label->_semi) ? _label : _ancestor->_label; } -//------------------------------LINK------------------------------------------- void NTarjan::LINK( NTarjan *w, NTarjan *ntarjan0 ) { NTarjan *s = w; while( w->_label->_semi < s->_child->_label->_semi ) { @@ -602,7 +583,6 @@ } } -//------------------------------setdepth--------------------------------------- void NTarjan::setdepth( uint stack_size, uint *dom_depth ) { NTarjan **top = NEW_RESOURCE_ARRAY(NTarjan*, stack_size); NTarjan **next = top; @@ -631,7 +611,6 @@ } while (last < top); } -//------------------------------dump------------------------------------------- #ifndef PRODUCT void NTarjan::dump(int offset) const { // Dump the data from this node diff -r 1c3719302f46 -r 34d9f1ce747b src/share/vm/opto/gcm.cpp --- a/src/share/vm/opto/gcm.cpp Wed Apr 19 05:28:25 2017 +0100 +++ b/src/share/vm/opto/gcm.cpp Mon Apr 24 16:49:33 2017 +0100 @@ -128,27 +128,30 @@ //------------------------------schedule_pinned_nodes-------------------------- // Set the basic block for Nodes pinned into blocks -void PhaseCFG::schedule_pinned_nodes( VectorSet &visited ) { +void PhaseCFG::schedule_pinned_nodes(VectorSet &visited) { // Allocate node stack of size C->unique()+8 to avoid frequent realloc - GrowableArray spstack(C->unique()+8); + GrowableArray spstack(C->unique() + 8); spstack.push(_root); - while ( spstack.is_nonempty() ) { - Node *n = spstack.pop(); - if( !visited.test_set(n->_idx) ) { // Test node and flag it as visited - if( n->pinned() && !has_block(n)) { // Pinned? Nail it down! - assert( n->in(0), "pinned Node must have Control" ); + while (spstack.is_nonempty()) { + Node* node = spstack.pop(); + if (!visited.test_set(node->_idx)) { // Test node and flag it as visited + if (node->pinned() && !has_block(node)) { // Pinned? Nail it down! + assert(node->in(0), "pinned Node must have Control"); // Before setting block replace block_proj control edge - replace_block_proj_ctrl(n); - Node *input = n->in(0); + replace_block_proj_ctrl(node); + Node* input = node->in(0); while (!input->is_block_start()) { input = input->in(0); } - Block *b = get_block_for_node(input); // Basic block of controlling input - schedule_node_into_block(n, b); + Block* block = get_block_for_node(input); // Basic block of controlling input + schedule_node_into_block(node, block); } - for( int i = n->req() - 1; i >= 0; --i ) { // For all inputs - if( n->in(i) != NULL ) - spstack.push(n->in(i)); + + // process all inputs that are non NULL + for (int i = node->req() - 1; i >= 0; --i) { + if (node->in(i) != NULL) { + spstack.push(node->in(i)); + } } } } @@ -212,32 +215,29 @@ // which all their inputs occur. bool PhaseCFG::schedule_early(VectorSet &visited, Node_List &roots) { // Allocate stack with enough space to avoid frequent realloc - Node_Stack nstack(roots.Size() + 8); // (unique >> 1) + 24 from Java2D stats - // roots.push(_root); _root will be processed among C->top() inputs + Node_Stack nstack(roots.Size() + 8); + // _root will be processed among C->top() inputs roots.push(C->top()); visited.set(C->top()->_idx); while (roots.size() != 0) { // Use local variables nstack_top_n & nstack_top_i to cache values // on stack's top. - Node *nstack_top_n = roots.pop(); - uint nstack_top_i = 0; -//while_nstack_nonempty: + Node* parent_node = roots.pop(); + uint input_index = 0; + while (true) { - // Get parent node and next input's index from stack's top. - Node *n = nstack_top_n; - uint i = nstack_top_i; - - if (i == 0) { + if (input_index == 0) { // Fixup some control. Constants without control get attached // to root and nodes that use is_block_proj() nodes should be attached // to the region that starts their block. - const Node *in0 = n->in(0); - if (in0 != NULL) { // Control-dependent? - replace_block_proj_ctrl(n); - } else { // n->in(0) == NULL - if (n->req() == 1) { // This guy is a constant with NO inputs? - n->set_req(0, _root); + const Node* control_input = parent_node->in(0); + if (control_input != NULL) { + replace_block_proj_ctrl(parent_node); + } else { + // Is a constant with NO inputs? + if (parent_node->req() == 1) { + parent_node->set_req(0, _root); } } } @@ -246,37 +246,47 @@ // input is already in a block we quit following inputs (to avoid // cycles). Instead we put that Node on a worklist to be handled // later (since IT'S inputs may not have a block yet). - bool done = true; // Assume all n's inputs will be processed - while (i < n->len()) { // For all inputs - Node *in = n->in(i); // Get input - ++i; - if (in == NULL) continue; // Ignore NULL, missing inputs + + // Assume all n's inputs will be processed + bool done = true; + + while (input_index < parent_node->len()) { + Node* in = parent_node->in(input_index++); + if (in == NULL) { + continue; + } + int is_visited = visited.test_set(in->_idx); - if (!has_block(in)) { // Missing block selection? + if (!has_block(in)) { if (is_visited) { - // assert( !visited.test(in->_idx), "did not schedule early" ); return false; } - nstack.push(n, i); // Save parent node and next input's index. - nstack_top_n = in; // Process current input now. - nstack_top_i = 0; - done = false; // Not all n's inputs processed. - break; // continue while_nstack_nonempty; - } else if (!is_visited) { // Input not yet visited? - roots.push(in); // Visit this guy later, using worklist + // Save parent node and next input's index. + nstack.push(parent_node, input_index); + // Process current input now. + parent_node = in; + input_index = 0; + // Not all n's inputs processed. + done = false; + break; + } else if (!is_visited) { + // Visit this guy later, using worklist + roots.push(in); } } + if (done) { // All of n's inputs have been processed, complete post-processing. // Some instructions are pinned into a block. These include Region, // Phi, Start, Return, and other control-dependent instructions and // any projections which depend on them. - if (!n->pinned()) { + if (!parent_node->pinned()) { // Set earliest legal block. - map_node_to_block(n, find_deepest_input(n, this)); + Block* earliest_block = find_deepest_input(parent_node, this); + map_node_to_block(parent_node, earliest_block); } else { - assert(get_block_for_node(n) == get_block_for_node(n->in(0)), "Pinned Node should be at the same block as its control edge"); + assert(get_block_for_node(parent_node) == get_block_for_node(parent_node->in(0)), "Pinned Node should be at the same block as its control edge"); } if (nstack.is_empty()) { @@ -285,12 +295,12 @@ break; } // Get saved parent node and next input's index. - nstack_top_n = nstack.node(); - nstack_top_i = nstack.index(); + parent_node = nstack.node(); + input_index = nstack.index(); nstack.pop(); - } // if (done) - } // while (true) - } // while (roots.size() != 0) + } + } + } return true; } @@ -854,7 +864,7 @@ //------------------------------ComputeLatenciesBackwards---------------------- // Compute the latency of all the instructions. -void PhaseCFG::ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack) { +void PhaseCFG::compute_latencies_backwards(VectorSet &visited, Node_List &stack) { #ifndef PRODUCT if (trace_opto_pipelining()) tty->print("\n#---- ComputeLatenciesBackwards ----\n"); @@ -877,31 +887,34 @@ // Set the latency for this instruction #ifndef PRODUCT if (trace_opto_pipelining()) { - tty->print("# latency_to_inputs: node_latency[%d] = %d for node", - n->_idx, _node_latency->at_grow(n->_idx)); + tty->print("# latency_to_inputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n)); dump(); } #endif - if (n->is_Proj()) + if (n->is_Proj()) { n = n->in(0); + } - if (n->is_Root()) + if (n->is_Root()) { return; + } uint nlen = n->len(); - uint use_latency = _node_latency->at_grow(n->_idx); + uint use_latency = get_latency_for_node(n); uint use_pre_order = get_block_for_node(n)->_pre_order; - for ( uint j=0; jin(j); - if (!def || def == n) + if (!def || def == n) { continue; + } // Walk backwards thru projections - if (def->is_Proj()) + if (def->is_Proj()) { def = def->in(0); + } #ifndef PRODUCT if (trace_opto_pipelining()) { @@ -914,22 +927,20 @@ Block *def_block = get_block_for_node(def); uint def_pre_order = def_block ? def_block->_pre_order : 0; - if ( (use_pre_order < def_pre_order) || - (use_pre_order == def_pre_order && n->is_Phi()) ) + if ((use_pre_order < def_pre_order) || (use_pre_order == def_pre_order && n->is_Phi())) { continue; + } uint delta_latency = n->latency(j); uint current_latency = delta_latency + use_latency; - if (_node_latency->at_grow(def->_idx) < current_latency) { - _node_latency->at_put_grow(def->_idx, current_latency); + if (get_latency_for_node(def) < current_latency) { + set_latency_for_node(def, current_latency); } #ifndef PRODUCT if (trace_opto_pipelining()) { - tty->print_cr("# %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d", - use_latency, j, delta_latency, current_latency, def->_idx, - _node_latency->at_grow(def->_idx)); + tty->print_cr("# %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d", use_latency, j, delta_latency, current_latency, def->_idx, get_latency_for_node(def)); } #endif } @@ -964,7 +975,7 @@ return 0; uint nlen = use->len(); - uint nl = _node_latency->at_grow(use->_idx); + uint nl = get_latency_for_node(use); for ( uint j=0; jin(j) == n) { @@ -999,8 +1010,7 @@ // Set the latency for this instruction #ifndef PRODUCT if (trace_opto_pipelining()) { - tty->print("# latency_from_outputs: node_latency[%d] = %d for node", - n->_idx, _node_latency->at_grow(n->_idx)); + tty->print("# latency_from_outputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n)); dump(); } #endif @@ -1013,7 +1023,7 @@ if (latency < l) latency = l; } - _node_latency->at_put_grow(n->_idx, latency); + set_latency_for_node(n, latency); } //------------------------------hoist_to_cheaper_block------------------------- @@ -1023,9 +1033,9 @@ const double delta = 1+PROB_UNLIKELY_MAG(4); Block* least = LCA; double least_freq = least->_freq; - uint target = _node_latency->at_grow(self->_idx); - uint start_latency = _node_latency->at_grow(LCA->_nodes[0]->_idx); - uint end_latency = _node_latency->at_grow(LCA->_nodes[LCA->end_idx()]->_idx); + uint target = get_latency_for_node(self); + uint start_latency = get_latency_for_node(LCA->_nodes[0]); + uint end_latency = get_latency_for_node(LCA->_nodes[LCA->end_idx()]); bool in_latency = (target <= start_latency); const Block* root_block = get_block_for_node(_root); @@ -1042,8 +1052,7 @@ #ifndef PRODUCT if (trace_opto_pipelining()) { - tty->print("# Find cheaper block for latency %d: ", - _node_latency->at_grow(self->_idx)); + tty->print("# Find cheaper block for latency %d: ", get_latency_for_node(self)); self->dump(); tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g", LCA->_pre_order, @@ -1070,9 +1079,9 @@ if (mach && LCA == root_block) break; - uint start_lat = _node_latency->at_grow(LCA->_nodes[0]->_idx); + uint start_lat = get_latency_for_node(LCA->_nodes[0]); uint end_idx = LCA->end_idx(); - uint end_lat = _node_latency->at_grow(LCA->_nodes[end_idx]->_idx); + uint end_lat = get_latency_for_node(LCA->_nodes[end_idx]); double LCA_freq = LCA->_freq; #ifndef PRODUCT if (trace_opto_pipelining()) { @@ -1111,7 +1120,7 @@ tty->print_cr("# Change latency for [%4d] from %d to %d", self->_idx, target, end_latency); } #endif - _node_latency->at_put_grow(self->_idx, end_latency); + set_latency_for_node(self, end_latency); partial_latency_of_defs(self); } @@ -1256,7 +1265,7 @@ } // end ScheduleLate //------------------------------GlobalCodeMotion------------------------------- -void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_list ) { +void PhaseCFG::global_code_motion() { ResourceMark rm; #ifndef PRODUCT @@ -1266,21 +1275,22 @@ #endif // Initialize the node to block mapping for things on the proj_list - for (uint i = 0; i < proj_list.size(); i++) { - unmap_node_from_block(proj_list[i]); + for (uint i = 0; i < _matcher.number_of_projections(); i++) { + unmap_node_from_block(_matcher.get_projection(i)); } // Set the basic block for Nodes pinned into blocks - Arena *a = Thread::current()->resource_area(); - VectorSet visited(a); - schedule_pinned_nodes( visited ); + Arena* arena = Thread::current()->resource_area(); + VectorSet visited(arena); + schedule_pinned_nodes(visited); // Find the earliest Block any instruction can be placed in. Some // instructions are pinned into Blocks. Unpinned instructions can // appear in last block in which all their inputs occur. visited.Clear(); - Node_List stack(a); - stack.map( (unique >> 1) + 16, NULL); // Pre-grow the list + Node_List stack(arena); + // Pre-grow the list + stack.map((C->unique() >> 1) + 16, NULL); if (!schedule_early(visited, stack)) { // Bailout without retry C->record_method_not_compilable("early schedule failed"); @@ -1288,29 +1298,25 @@ } // Build Def-Use edges. - proj_list.push(_root); // Add real root as another root - proj_list.pop(); - // Compute the latency information (via backwards walk) for all the // instructions in the graph _node_latency = new GrowableArray(); // resource_area allocation - if( C->do_scheduling() ) - ComputeLatenciesBackwards(visited, stack); + if (C->do_scheduling()) { + compute_latencies_backwards(visited, stack); + } // Now schedule all codes as LATE as possible. This is the LCA in the // dominator tree of all USES of a value. Pick the block with the least // loop nesting depth that is lowest in the dominator tree. // ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() ) schedule_late(visited, stack); - if( C->failing() ) { + if (C->failing()) { // schedule_late fails only when graph is incorrect. assert(!VerifyGraphEdges, "verification should have failed"); return; } - unique = C->unique(); - #ifndef PRODUCT if (trace_opto_pipelining()) { tty->print("\n---- Detect implicit null checks ----\n"); @@ -1333,10 +1339,11 @@ // By reversing the loop direction we get a very minor gain on mpegaudio. // Feel free to revert to a forward loop for clarity. // for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) { - for( int i= matcher._null_check_tests.size()-2; i>=0; i-=2 ) { - Node *proj = matcher._null_check_tests[i ]; - Node *val = matcher._null_check_tests[i+1]; - get_block_for_node(proj)->implicit_null_check(this, proj, val, allowed_reasons); + for (int i = _matcher._null_check_tests.size() - 2; i >= 0; i -= 2) { + Node* proj = _matcher._null_check_tests[i]; + Node* val = _matcher._null_check_tests[i + 1]; + Block* block = get_block_for_node(proj); + block->implicit_null_check(this, proj, val, allowed_reasons); // The implicit_null_check will only perform the transformation // if the null branch is truly uncommon, *and* it leads to an // uncommon trap. Combined with the too_many_traps guards @@ -1353,11 +1360,11 @@ // Schedule locally. Right now a simple topological sort. // Later, do a real latency aware scheduler. - uint max_idx = C->unique(); - GrowableArray ready_cnt(max_idx, max_idx, -1); + GrowableArray ready_cnt(C->unique(), C->unique(), -1); visited.Clear(); - for (uint i = 0; i < _num_blocks; i++) { - if (!_blocks[i]->schedule_local(this, matcher, ready_cnt, visited)) { + for (uint i = 0; i < number_of_blocks(); i++) { + Block* block = get_block(i); + if (!block->schedule_local(this, _matcher, ready_cnt, visited)) { if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) { C->record_method_not_compilable("local schedule failed"); } @@ -1367,15 +1374,17 @@ // If we inserted any instructions between a Call and his CatchNode, // clone the instructions on all paths below the Catch. - for (uint i = 0; i < _num_blocks; i++) { - _blocks[i]->call_catch_cleanup(this, C); + for (uint i = 0; i < number_of_blocks(); i++) { + Block* block = get_block(i); + block->call_catch_cleanup(this, C); } #ifndef PRODUCT if (trace_opto_pipelining()) { tty->print("\n---- After GlobalCodeMotion ----\n"); - for (uint i = 0; i < _num_blocks; i++) { - _blocks[i]->dump(); + for (uint i = 0; i < number_of_blocks(); i++) { + Block* block = get_block(i); + block->dump(); } } #endif @@ -1383,10 +1392,29 @@ _node_latency = (GrowableArray *)0xdeadbeef; } +bool PhaseCFG::do_global_code_motion() { + + build_dominator_tree(); + if (C->failing()) { + return false; + } + + NOT_PRODUCT( C->verify_graph_edges(); ) + + estimate_block_frequency(); + + global_code_motion(); + + if (C->failing()) { + return false; + } + + return true; +} //------------------------------Estimate_Block_Frequency----------------------- // Estimate block frequencies based on IfNode probabilities. -void PhaseCFG::Estimate_Block_Frequency() { +void PhaseCFG::estimate_block_frequency() { // Force conditional branches leading to uncommon traps to be unlikely, // not because we get to the uncommon_trap with less relative frequency, @@ -1394,7 +1422,7 @@ // there once. if (C->do_freq_based_layout()) { Block_List worklist; - Block* root_blk = _blocks[0]; + Block* root_blk = get_block(0); for (uint i = 1; i < root_blk->num_preds(); i++) { Block *pb = get_block_for_node(root_blk->pred(i)); if (pb->has_uncommon_code()) { @@ -1403,7 +1431,9 @@ } while (worklist.size() > 0) { Block* uct = worklist.pop(); - if (uct == _broot) continue; + if (uct == get_root_block()) { + continue; + } for (uint i = 1; i < uct->num_preds(); i++) { Block *pb = get_block_for_node(uct->pred(i)); if (pb->_num_succs == 1) { @@ -1427,12 +1457,12 @@ _root_loop->scale_freq(); // Save outmost loop frequency for LRG frequency threshold - _outer_loop_freq = _root_loop->outer_loop_freq(); + _outer_loop_frequency = _root_loop->outer_loop_freq(); // force paths ending at uncommon traps to be infrequent if (!C->do_freq_based_layout()) { Block_List worklist; - Block* root_blk = _blocks[0]; + Block* root_blk = get_block(0); for (uint i = 1; i < root_blk->num_preds(); i++) { Block *pb = get_block_for_node(root_blk->pred(i)); if (pb->has_uncommon_code()) { @@ -1452,8 +1482,8 @@ } #ifdef ASSERT - for (uint i = 0; i < _num_blocks; i++ ) { - Block *b = _blocks[i]; + for (uint i = 0; i < number_of_blocks(); i++) { + Block* b = get_block(i); assert(b->_freq >= MIN_BLOCK_FREQUENCY, "Register Allocator requires meaningful block frequency"); } #endif @@ -1477,16 +1507,16 @@ CFGLoop* PhaseCFG::create_loop_tree() { #ifdef ASSERT - assert( _blocks[0] == _broot, "" ); - for (uint i = 0; i < _num_blocks; i++ ) { - Block *b = _blocks[i]; + assert(get_block(0) == get_root_block(), "first block should be root block"); + for (uint i = 0; i < number_of_blocks(); i++) { + Block* block = get_block(i); // Check that _loop field are clear...we could clear them if not. - assert(b->_loop == NULL, "clear _loop expected"); + assert(block->_loop == NULL, "clear _loop expected"); // Sanity check that the RPO numbering is reflected in the _blocks array. // It doesn't have to be for the loop tree to be built, but if it is not, // then the blocks have been reordered since dom graph building...which // may question the RPO numbering - assert(b->_rpo == i, "unexpected reverse post order number"); + assert(block->_rpo == i, "unexpected reverse post order number"); } #endif @@ -1496,11 +1526,11 @@ Block_List worklist; // Assign blocks to loops - for(uint i = _num_blocks - 1; i > 0; i-- ) { // skip Root block - Block *b = _blocks[i]; + for(uint i = number_of_blocks() - 1; i > 0; i-- ) { // skip Root block + Block* block = get_block(i); - if (b->head()->is_Loop()) { - Block* loop_head = b; + if (block->head()->is_Loop()) { + Block* loop_head = block; assert(loop_head->num_preds() - 1 == 2, "loop must have 2 predecessors"); Node* tail_n = loop_head->pred(LoopNode::LoopBackControl); Block* tail = get_block_for_node(tail_n); @@ -1534,23 +1564,23 @@ // Create a member list for each loop consisting // of both blocks and (immediate child) loops. - for (uint i = 0; i < _num_blocks; i++) { - Block *b = _blocks[i]; - CFGLoop* lp = b->_loop; + for (uint i = 0; i < number_of_blocks(); i++) { + Block* block = get_block(i); + CFGLoop* lp = block->_loop; if (lp == NULL) { // Not assigned to a loop. Add it to the method's pseudo loop. - b->_loop = root_loop; + block->_loop = root_loop; lp = root_loop; } - if (lp == root_loop || b != lp->head()) { // loop heads are already members - lp->add_member(b); + if (lp == root_loop || block != lp->head()) { // loop heads are already members + lp->add_member(block); } if (lp != root_loop) { if (lp->parent() == NULL) { // Not a nested loop. Make it a child of the method's pseudo loop. root_loop->add_nested_loop(lp); } - if (b == lp->head()) { + if (block == lp->head()) { // Add nested loop to member list of parent loop. lp->parent()->add_member(lp); } diff -r 1c3719302f46 -r 34d9f1ce747b src/share/vm/opto/idealGraphPrinter.cpp --- a/src/share/vm/opto/idealGraphPrinter.cpp Wed Apr 19 05:28:25 2017 +0100 +++ b/src/share/vm/opto/idealGraphPrinter.cpp Mon Apr 24 16:49:33 2017 +0100 @@ -416,7 +416,7 @@ if (C->cfg() != NULL) { Block* block = C->cfg()->get_block_for_node(node); if (block == NULL) { - print_prop("block", C->cfg()->_blocks[0]->_pre_order); + print_prop("block", C->cfg()->get_block(0)->_pre_order); } else { print_prop("block", block->_pre_order); } @@ -637,10 +637,10 @@ if (C->cfg() != NULL) { // once we have a CFG there are some nodes that aren't really // reachable but are in the CFG so add them here. - for (uint i = 0; i < C->cfg()->_blocks.size(); i++) { - Block *b = C->cfg()->_blocks[i]; - for (uint s = 0; s < b->_nodes.size(); s++) { - nodeStack.push(b->_nodes[s]); + for (uint i = 0; i < C->cfg()->number_of_blocks(); i++) { + Block* block = C->cfg()->get_block(i); + for (uint s = 0; s < block->_nodes.size(); s++) { + nodeStack.push(block->_nodes[s]); } } } @@ -698,24 +698,24 @@ tail(EDGES_ELEMENT); if (C->cfg() != NULL) { head(CONTROL_FLOW_ELEMENT); - for (uint i = 0; i < C->cfg()->_blocks.size(); i++) { - Block *b = C->cfg()->_blocks[i]; + for (uint i = 0; i < C->cfg()->number_of_blocks(); i++) { + Block* block = C->cfg()->get_block(i); begin_head(BLOCK_ELEMENT); - print_attr(BLOCK_NAME_PROPERTY, b->_pre_order); + print_attr(BLOCK_NAME_PROPERTY, block->_pre_order); end_head(); head(SUCCESSORS_ELEMENT); - for (uint s = 0; s < b->_num_succs; s++) { + for (uint s = 0; s < block->_num_succs; s++) { begin_elem(SUCCESSOR_ELEMENT); - print_attr(BLOCK_NAME_PROPERTY, b->_succs[s]->_pre_order); + print_attr(BLOCK_NAME_PROPERTY, block->_succs[s]->_pre_order); end_elem(); } tail(SUCCESSORS_ELEMENT); head(NODES_ELEMENT); - for (uint s = 0; s < b->_nodes.size(); s++) { + for (uint s = 0; s < block->_nodes.size(); s++) { begin_elem(NODE_ELEMENT); - print_attr(NODE_ID_PROPERTY, get_node_id(b->_nodes[s])); + print_attr(NODE_ID_PROPERTY, get_node_id(block->_nodes[s])); end_elem(); } tail(NODES_ELEMENT); diff -r 1c3719302f46 -r 34d9f1ce747b src/share/vm/opto/ifg.cpp --- a/src/share/vm/opto/ifg.cpp Wed Apr 19 05:28:25 2017 +0100 +++ b/src/share/vm/opto/ifg.cpp Mon Apr 24 16:49:33 2017 +0100 @@ -39,12 +39,9 @@ #define EXACT_PRESSURE 1 -//============================================================================= -//------------------------------IFG-------------------------------------------- PhaseIFG::PhaseIFG( Arena *arena ) : Phase(Interference_Graph), _arena(arena) { } -//------------------------------init------------------------------------------- void PhaseIFG::init( uint maxlrg ) { _maxlrg = maxlrg; _yanked = new (_arena) VectorSet(_arena); @@ -61,7 +58,6 @@ } } -//------------------------------add-------------------------------------------- // Add edge between vertices a & b. These are sorted (triangular matrix), // then the smaller number is inserted in the larger numbered array. int PhaseIFG::add_edge( uint a, uint b ) { @@ -73,7 +69,6 @@ return _adjs[a].insert( b ); } -//------------------------------add_vector------------------------------------- // Add an edge between 'a' and everything in the vector. void PhaseIFG::add_vector( uint a, IndexSet *vec ) { // IFG is triangular, so do the inserts where 'a' < 'b'. @@ -88,7 +83,6 @@ } } -//------------------------------test------------------------------------------- // Is there an edge between a and b? int PhaseIFG::test_edge( uint a, uint b ) const { // Sort a and b, so that a is larger @@ -97,7 +91,6 @@ return _adjs[a].member(b); } -//------------------------------SquareUp--------------------------------------- // Convert triangular matrix to square matrix void PhaseIFG::SquareUp() { assert( !_is_square, "only on triangular" ); @@ -113,7 +106,6 @@ _is_square = true; } -//------------------------------Compute_Effective_Degree----------------------- // Compute effective degree in bulk void PhaseIFG::Compute_Effective_Degree() { assert( _is_square, "only on square" ); @@ -122,7 +114,6 @@ lrgs(i).set_degree(effective_degree(i)); } -//------------------------------test_edge_sq----------------------------------- int PhaseIFG::test_edge_sq( uint a, uint b ) const { assert( _is_square, "only on square" ); // Swap, so that 'a' has the lesser count. Then binary search is on @@ -132,7 +123,6 @@ return _adjs[a].member(b); } -//------------------------------Union------------------------------------------ // Union edges of B into A void PhaseIFG::Union( uint a, uint b ) { assert( _is_square, "only on square" ); @@ -148,7 +138,6 @@ } } -//------------------------------remove_node------------------------------------ // Yank a Node and all connected edges from the IFG. Return a // list of neighbors (edges) yanked. IndexSet *PhaseIFG::remove_node( uint a ) { @@ -167,7 +156,6 @@ return neighbors(a); } -//------------------------------re_insert-------------------------------------- // Re-insert a yanked Node. void PhaseIFG::re_insert( uint a ) { assert( _is_square, "only on square" ); @@ -182,7 +170,6 @@ } } -//------------------------------compute_degree--------------------------------- // Compute the degree between 2 live ranges. If both live ranges are // aligned-adjacent powers-of-2 then we use the MAX size. If either is // mis-aligned (or for Fat-Projections, not-adjacent) then we have to @@ -198,7 +185,6 @@ return tmp; } -//------------------------------effective_degree------------------------------- // Compute effective degree for this live range. If both live ranges are // aligned-adjacent powers-of-2 then we use the MAX size. If either is // mis-aligned (or for Fat-Projections, not-adjacent) then we have to @@ -223,7 +209,6 @@ #ifndef PRODUCT -//------------------------------dump------------------------------------------- void PhaseIFG::dump() const { tty->print_cr("-- Interference Graph --%s--", _is_square ? "square" : "triangular" ); @@ -262,7 +247,6 @@ tty->print("\n"); } -//------------------------------stats------------------------------------------ void PhaseIFG::stats() const { ResourceMark rm; int *h_cnt = NEW_RESOURCE_ARRAY(int,_maxlrg*2); @@ -278,7 +262,6 @@ tty->print_cr(""); } -//------------------------------verify----------------------------------------- void PhaseIFG::verify( const PhaseChaitin *pc ) const { // IFG is square, sorted and no need for Find for( uint i = 0; i < _maxlrg; i++ ) { @@ -301,7 +284,6 @@ } #endif -//------------------------------interfere_with_live---------------------------- // Interfere this register with everything currently live. Use the RegMasks // to trim the set of possible interferences. Return a count of register-only // interferences as an estimate of register pressure. @@ -318,7 +300,6 @@ _ifg->add_edge( r, l ); } -//------------------------------build_ifg_virtual------------------------------ // Actually build the interference graph. Uses virtual registers only, no // physical register masks. This allows me to be very aggressive when // coalescing copies. Some of this aggressiveness will have to be undone @@ -328,9 +309,9 @@ void PhaseChaitin::build_ifg_virtual( ) { // For all blocks (in any order) do... - for( uint i=0; i<_cfg._num_blocks; i++ ) { - Block *b = _cfg._blocks[i]; - IndexSet *liveout = _live->live(b); + for (uint i = 0; i < _cfg.number_of_blocks(); i++) { + Block* block = _cfg.get_block(i); + IndexSet* liveout = _live->live(block); // The IFG is built by a single reverse pass over each basic block. // Starting with the known live-out set, we remove things that get @@ -340,8 +321,8 @@ // The defined value interferes with everything currently live. The // value is then removed from the live-ness set and it's inputs are // added to the live-ness set. - for( uint j = b->end_idx() + 1; j > 1; j-- ) { - Node *n = b->_nodes[j-1]; + for (uint j = block->end_idx() + 1; j > 1; j--) { + Node* n = block->_nodes[j - 1]; // Get value being defined uint r = n2lidx(n); @@ -407,7 +388,6 @@ } // End of forall blocks } -//------------------------------count_int_pressure----------------------------- uint PhaseChaitin::count_int_pressure( IndexSet *liveout ) { IndexSetIterator elements(liveout); uint lidx; @@ -423,7 +403,6 @@ return cnt; } -//------------------------------count_float_pressure--------------------------- uint PhaseChaitin::count_float_pressure( IndexSet *liveout ) { IndexSetIterator elements(liveout); uint lidx; @@ -437,7 +416,6 @@ return cnt; } -//------------------------------lower_pressure--------------------------------- // Adjust register pressure down by 1. Capture last hi-to-low transition, static void lower_pressure( LRG *lrg, uint where, Block *b, uint *pressure, uint *hrp_index ) { if (lrg->mask().is_UP() && lrg->mask_size()) { @@ -467,40 +445,41 @@ } } -//------------------------------build_ifg_physical----------------------------- // Build the interference graph using physical registers when available. // That is, if 2 live ranges are simultaneously alive but in their acceptable // register sets do not overlap, then they do not interfere. uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) { NOT_PRODUCT( Compile::TracePhase t3("buildIFG", &_t_buildIFGphysical, TimeCompiler); ) - uint spill_reg = LRG::SPILL_REG; uint must_spill = 0; // For all blocks (in any order) do... - for( uint i = 0; i < _cfg._num_blocks; i++ ) { - Block *b = _cfg._blocks[i]; + for (uint i = 0; i < _cfg.number_of_blocks(); i++) { + Block* block = _cfg.get_block(i); // Clone (rather than smash in place) the liveout info, so it is alive // for the "collect_gc_info" phase later. - IndexSet liveout(_live->live(b)); - uint last_inst = b->end_idx(); + IndexSet liveout(_live->live(block)); + uint last_inst = block->end_idx(); // Compute first nonphi node index uint first_inst; - for( first_inst = 1; first_inst < last_inst; first_inst++ ) - if( !b->_nodes[first_inst]->is_Phi() ) + for (first_inst = 1; first_inst < last_inst; first_inst++) { + if (!block->_nodes[first_inst]->is_Phi()) { break; + } + } // Spills could be inserted before CreateEx node which should be // first instruction in block after Phis. Move CreateEx up. - for( uint insidx = first_inst; insidx < last_inst; insidx++ ) { - Node *ex = b->_nodes[insidx]; - if( ex->is_SpillCopy() ) continue; - if( insidx > first_inst && ex->is_Mach() && - ex->as_Mach()->ideal_Opcode() == Op_CreateEx ) { + for (uint insidx = first_inst; insidx < last_inst; insidx++) { + Node *ex = block->_nodes[insidx]; + if (ex->is_SpillCopy()) { + continue; + } + if (insidx > first_inst && ex->is_Mach() && ex->as_Mach()->ideal_Opcode() == Op_CreateEx) { // If the CreateEx isn't above all the MachSpillCopies // then move it to the top. - b->_nodes.remove(insidx); - b->_nodes.insert(first_inst, ex); + block->_nodes.remove(insidx); + block->_nodes.insert(first_inst, ex); } // Stop once a CreateEx or any other node is found break; @@ -510,12 +489,12 @@ uint pressure[2], hrp_index[2]; pressure[0] = pressure[1] = 0; hrp_index[0] = hrp_index[1] = last_inst+1; - b->_reg_pressure = b->_freg_pressure = 0; + block->_reg_pressure = block->_freg_pressure = 0; // Liveout things are presumed live for the whole block. We accumulate // 'area' accordingly. If they get killed in the block, we'll subtract // the unused part of the block from the area. int inst_count = last_inst - first_inst; - double cost = (inst_count <= 0) ? 0.0 : b->_freq * double(inst_count); + double cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count); assert(!(cost < 0.0), "negative spill cost" ); IndexSetIterator elements(&liveout); uint lidx; @@ -527,15 +506,17 @@ if (lrg._is_float || lrg._is_vector) { // Count float pressure pressure[1] += lrg.reg_pressure(); #ifdef EXACT_PRESSURE - if( pressure[1] > b->_freg_pressure ) - b->_freg_pressure = pressure[1]; + if (pressure[1] > block->_freg_pressure) { + block->_freg_pressure = pressure[1]; + } #endif // Count int pressure, but do not count the SP, flags - } else if( lrgs(lidx).mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ) { + } else if(lrgs(lidx).mask().overlap(*Matcher::idealreg2regmask[Op_RegI])) { pressure[0] += lrg.reg_pressure(); #ifdef EXACT_PRESSURE - if( pressure[0] > b->_reg_pressure ) - b->_reg_pressure = pressure[0]; + if (pressure[0] > block->_reg_pressure) { + block->_reg_pressure = pressure[0]; + } #endif } } @@ -552,8 +533,8 @@ // value is then removed from the live-ness set and it's inputs are added // to the live-ness set. uint j; - for( j = last_inst + 1; j > 1; j-- ) { - Node *n = b->_nodes[j - 1]; + for (j = last_inst + 1; j > 1; j--) { + Node* n = block->_nodes[j - 1]; // Get value being defined uint r = n2lidx(n); @@ -562,7 +543,7 @@ if( r ) { // A DEF normally costs block frequency; rematerialized values are // removed from the DEF sight, so LOWER costs here. - lrgs(r)._cost += n->rematerialize() ? 0 : b->_freq; + lrgs(r)._cost += n->rematerialize() ? 0 : block->_freq; // If it is not live, then this instruction is dead. Probably caused // by spilling and rematerialization. Who cares why, yank this baby. @@ -590,7 +571,7 @@ } } if (remove) { - b->_nodes.remove(j - 1); + block->_nodes.remove(j - 1); if( lrgs(r)._def == n ) lrgs(r)._def = 0; n->disconnect_inputs(NULL, C); _cfg.unmap_node_from_block(n); @@ -610,30 +591,30 @@ itmp.AND(*Matcher::idealreg2regmask[Op_RegI]); int iregs = itmp.Size(); #ifdef EXACT_PRESSURE - if( pressure[0]+iregs > b->_reg_pressure ) - b->_reg_pressure = pressure[0]+iregs; + if (pressure[0]+iregs > block->_reg_pressure) { + block->_reg_pressure = pressure[0] + iregs; + } #endif - if( pressure[0] <= (uint)INTPRESSURE && - pressure[0]+iregs > (uint)INTPRESSURE ) { + if (pressure[0] <= (uint)INTPRESSURE && pressure[0] + iregs > (uint)INTPRESSURE) { #ifndef EXACT_PRESSURE - b->_reg_pressure = (uint)INTPRESSURE+1; + block->_reg_pressure = (uint)INTPRESSURE+1; #endif - hrp_index[0] = j-1; + hrp_index[0] = j - 1; } // Count the float-only registers RegMask ftmp = lrgs(r).mask(); ftmp.AND(*Matcher::idealreg2regmask[Op_RegD]); int fregs = ftmp.Size(); #ifdef EXACT_PRESSURE - if( pressure[1]+fregs > b->_freg_pressure ) - b->_freg_pressure = pressure[1]+fregs; + if (pressure[1] + fregs > block->_freg_pressure) { + block->_freg_pressure = pressure[1] + fregs; + } #endif - if( pressure[1] <= (uint)FLOATPRESSURE && - pressure[1]+fregs > (uint)FLOATPRESSURE ) { + if(pressure[1] <= (uint)FLOATPRESSURE && pressure[1]+fregs > (uint)FLOATPRESSURE) { #ifndef EXACT_PRESSURE - b->_freg_pressure = (uint)FLOATPRESSURE+1; + block->_freg_pressure = (uint)FLOATPRESSURE+1; #endif - hrp_index[1] = j-1; + hrp_index[1] = j - 1; } } @@ -646,7 +627,7 @@ if( n->is_SpillCopy() && lrgs(r).is_singledef() // MultiDef live range can still split && n->outcnt() == 1 // and use must be in this block - && _cfg.get_block_for_node(n->unique_out()) == b ) { + && _cfg.get_block_for_node(n->unique_out()) == block) { // All single-use MachSpillCopy(s) that immediately precede their // use must color early. If a longer live range steals their // color, the spill copy will split and may push another spill copy @@ -656,14 +637,16 @@ // Node *single_use = n->unique_out(); - assert( b->find_node(single_use) >= j, "Use must be later in block"); + assert(block->find_node(single_use) >= j, "Use must be later in block"); // Use can be earlier in block if it is a Phi, but then I should be a MultiDef // Find first non SpillCopy 'm' that follows the current instruction // (j - 1) is index for current instruction 'n' Node *m = n; - for( uint i = j; i <= last_inst && m->is_SpillCopy(); ++i ) { m = b->_nodes[i]; } - if( m == single_use ) { + for (uint i = j; i <= last_inst && m->is_SpillCopy(); ++i) { + m = block->_nodes[i]; + } + if (m == single_use) { lrgs(r)._area = 0.0; } } @@ -672,7 +655,7 @@ if( liveout.remove(r) ) { // Adjust register pressure. // Capture last hi-to-lo pressure transition - lower_pressure( &lrgs(r), j-1, b, pressure, hrp_index ); + lower_pressure(&lrgs(r), j - 1, block, pressure, hrp_index); assert( pressure[0] == count_int_pressure (&liveout), "" ); assert( pressure[1] == count_float_pressure(&liveout), "" ); } @@ -685,7 +668,7 @@ if( liveout.remove( x ) ) { lrgs(x)._area -= cost; // Adjust register pressure. - lower_pressure( &lrgs(x), j-1, b, pressure, hrp_index ); + lower_pressure(&lrgs(x), j - 1, block, pressure, hrp_index); assert( pressure[0] == count_int_pressure (&liveout), "" ); assert( pressure[1] == count_float_pressure(&liveout), "" ); } @@ -757,7 +740,7 @@ // Area remaining in the block inst_count--; - cost = (inst_count <= 0) ? 0.0 : b->_freq * double(inst_count); + cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count); // Make all inputs live if( !n->is_Phi() ) { // Phi function uses come from prior block @@ -780,7 +763,7 @@ if( k < debug_start ) // A USE costs twice block frequency (once for the Load, once // for a Load-delay). Rematerialized uses only cost once. - lrg._cost += (def->rematerialize() ? b->_freq : (b->_freq + b->_freq)); + lrg._cost += (def->rematerialize() ? block->_freq : (block->_freq + block->_freq)); // It is live now if( liveout.insert( x ) ) { // Newly live things assumed live from here to top of block @@ -790,14 +773,16 @@ if (lrg._is_float || lrg._is_vector) { pressure[1] += lrg.reg_pressure(); #ifdef EXACT_PRESSURE - if( pressure[1] > b->_freg_pressure ) - b->_freg_pressure = pressure[1]; + if (pressure[1] > block->_freg_pressure) { + block->_freg_pressure = pressure[1]; + } #endif } else if( lrg.mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ) { pressure[0] += lrg.reg_pressure(); #ifdef EXACT_PRESSURE - if( pressure[0] > b->_reg_pressure ) - b->_reg_pressure = pressure[0]; + if (pressure[0] > block->_reg_pressure) { + block->_reg_pressure = pressure[0]; + } #endif } } @@ -812,52 +797,55 @@ // If we run off the top of the block with high pressure and // never see a hi-to-low pressure transition, just record that // the whole block is high pressure. - if( pressure[0] > (uint)INTPRESSURE ) { + if (pressure[0] > (uint)INTPRESSURE) { hrp_index[0] = 0; #ifdef EXACT_PRESSURE - if( pressure[0] > b->_reg_pressure ) - b->_reg_pressure = pressure[0]; + if (pressure[0] > block->_reg_pressure) { + block->_reg_pressure = pressure[0]; + } #else - b->_reg_pressure = (uint)INTPRESSURE+1; + block->_reg_pressure = (uint)INTPRESSURE+1; #endif } - if( pressure[1] > (uint)FLOATPRESSURE ) { + if (pressure[1] > (uint)FLOATPRESSURE) { hrp_index[1] = 0; #ifdef EXACT_PRESSURE - if( pressure[1] > b->_freg_pressure ) - b->_freg_pressure = pressure[1]; + if (pressure[1] > block->_freg_pressure) { + block->_freg_pressure = pressure[1]; + } #else - b->_freg_pressure = (uint)FLOATPRESSURE+1; + block->_freg_pressure = (uint)FLOATPRESSURE+1; #endif } // Compute high pressure indice; avoid landing in the middle of projnodes j = hrp_index[0]; - if( j < b->_nodes.size() && j < b->end_idx()+1 ) { - Node *cur = b->_nodes[j]; - while( cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch() ) { + if (j < block->_nodes.size() && j < block->end_idx() + 1) { + Node* cur = block->_nodes[j]; + while (cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch()) { j--; - cur = b->_nodes[j]; + cur = block->_nodes[j]; } } - b->_ihrp_index = j; + block->_ihrp_index = j; j = hrp_index[1]; - if( j < b->_nodes.size() && j < b->end_idx()+1 ) { - Node *cur = b->_nodes[j]; - while( cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch() ) { + if (j < block->_nodes.size() && j < block->end_idx() + 1) { + Node* cur = block->_nodes[j]; + while (cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch()) { j--; - cur = b->_nodes[j]; + cur = block->_nodes[j]; } } - b->_fhrp_index = j; + block->_fhrp_index = j; #ifndef PRODUCT // Gather Register Pressure Statistics if( PrintOptoStatistics ) { - if( b->_reg_pressure > (uint)INTPRESSURE || b->_freg_pressure > (uint)FLOATPRESSURE ) + if (block->_reg_pressure > (uint)INTPRESSURE || block->_freg_pressure > (uint)FLOATPRESSURE) { _high_pressure++; - else + } else { _low_pressure++; + } } #endif } // End of for all blocks diff -r 1c3719302f46 -r 34d9f1ce747b src/share/vm/opto/lcm.cpp --- a/src/share/vm/opto/lcm.cpp Wed Apr 19 05:28:25 2017 +0100 +++ b/src/share/vm/opto/lcm.cpp Mon Apr 24 16:49:33 2017 +0100 @@ -539,7 +539,7 @@ n_choice = 1; } - uint n_latency = cfg->_node_latency->at_grow(n->_idx); + uint n_latency = cfg->get_latency_for_node(n); uint n_score = n->req(); // Many inputs get high score to break ties // Keep best latency found @@ -832,7 +832,7 @@ Node *n = _nodes[j]; int idx = n->_idx; tty->print("# ready cnt:%3d ", ready_cnt.at(idx)); - tty->print("latency:%3d ", cfg->_node_latency->at_grow(idx)); + tty->print("latency:%3d ", cfg->get_latency_for_node(n)); tty->print("%4d: %s\n", idx, n->Name()); } } @@ -860,7 +860,7 @@ #ifndef PRODUCT if (cfg->trace_opto_pipelining()) { tty->print("# select %d: %s", n->_idx, n->Name()); - tty->print(", latency:%d", cfg->_node_latency->at_grow(n->_idx)); + tty->print(", latency:%d", cfg->get_latency_for_node(n)); n->dump(); if (Verbose) { tty->print("# ready list:"); diff -r 1c3719302f46 -r 34d9f1ce747b src/share/vm/opto/live.cpp --- a/src/share/vm/opto/live.cpp Wed Apr 19 05:28:25 2017 +0100 +++ b/src/share/vm/opto/live.cpp Mon Apr 24 16:49:33 2017 +0100 @@ -30,9 +30,6 @@ #include "opto/machnode.hpp" - -//============================================================================= -//------------------------------PhaseLive-------------------------------------- // Compute live-in/live-out. We use a totally incremental algorithm. The LIVE // problem is monotonic. The steady-state solution looks like this: pull a // block from the worklist. It has a set of delta's - values which are newly @@ -53,9 +50,9 @@ // Init the sparse live arrays. This data is live on exit from here! // The _live info is the live-out info. - _live = (IndexSet*)_arena->Amalloc(sizeof(IndexSet)*_cfg._num_blocks); + _live = (IndexSet*)_arena->Amalloc(sizeof(IndexSet) * _cfg.number_of_blocks()); uint i; - for( i=0; i<_cfg._num_blocks; i++ ) { + for (i = 0; i < _cfg.number_of_blocks(); i++) { _live[i].initialize(_maxlrg); } @@ -65,14 +62,14 @@ // Does the memory used by _defs and _deltas get reclaimed? Does it matter? TT // Array of values defined locally in blocks - _defs = NEW_RESOURCE_ARRAY(IndexSet,_cfg._num_blocks); - for( i=0; i<_cfg._num_blocks; i++ ) { + _defs = NEW_RESOURCE_ARRAY(IndexSet,_cfg.number_of_blocks()); + for (i = 0; i < _cfg.number_of_blocks(); i++) { _defs[i].initialize(_maxlrg); } // Array of delta-set pointers, indexed by block pre_order-1. - _deltas = NEW_RESOURCE_ARRAY(IndexSet*,_cfg._num_blocks); - memset( _deltas, 0, sizeof(IndexSet*)* _cfg._num_blocks); + _deltas = NEW_RESOURCE_ARRAY(IndexSet*,_cfg.number_of_blocks()); + memset( _deltas, 0, sizeof(IndexSet*)* _cfg.number_of_blocks()); _free_IndexSet = NULL; @@ -80,31 +77,32 @@ VectorSet first_pass(Thread::current()->resource_area()); // Outer loop: must compute local live-in sets and push into predecessors. - uint iters = _cfg._num_blocks; // stat counters - for( uint j=_cfg._num_blocks; j>0; j-- ) { - Block *b = _cfg._blocks[j-1]; + for (uint j = _cfg.number_of_blocks(); j > 0; j--) { + Block* block = _cfg.get_block(j - 1); // Compute the local live-in set. Start with any new live-out bits. - IndexSet *use = getset( b ); - IndexSet *def = &_defs[b->_pre_order-1]; + IndexSet* use = getset(block); + IndexSet* def = &_defs[block->_pre_order-1]; DEBUG_ONLY(IndexSet *def_outside = getfreeset();) uint i; - for( i=b->_nodes.size(); i>1; i-- ) { - Node *n = b->_nodes[i-1]; - if( n->is_Phi() ) break; + for (i = block->_nodes.size(); i > 1; i--) { + Node* n = block->_nodes[i-1]; + if (n->is_Phi()) { + break; + } uint r = _names[n->_idx]; assert(!def_outside->member(r), "Use of external LRG overlaps the same LRG defined in this block"); def->insert( r ); use->remove( r ); uint cnt = n->req(); - for( uint k=1; kin(k); uint nkidx = nk->_idx; - if (_cfg.get_block_for_node(nk) != b) { + if (_cfg.get_block_for_node(nk) != block) { uint u = _names[nkidx]; - use->insert( u ); - DEBUG_ONLY(def_outside->insert( u );) + use->insert(u); + DEBUG_ONLY(def_outside->insert(u);) } } } @@ -113,41 +111,38 @@ _free_IndexSet = def_outside; // Drop onto free list #endif // Remove anything defined by Phis and the block start instruction - for( uint k=i; k>0; k-- ) { - uint r = _names[b->_nodes[k-1]->_idx]; - def->insert( r ); - use->remove( r ); + for (uint k = i; k > 0; k--) { + uint r = _names[block->_nodes[k - 1]->_idx]; + def->insert(r); + use->remove(r); } // Push these live-in things to predecessors - for( uint l=1; lnum_preds(); l++ ) { - Block *p = _cfg.get_block_for_node(b->pred(l)); - add_liveout( p, use, first_pass ); + for (uint l = 1; l < block->num_preds(); l++) { + Block* p = _cfg.get_block_for_node(block->pred(l)); + add_liveout(p, use, first_pass); // PhiNode uses go in the live-out set of prior blocks. - for( uint k=i; k>0; k-- ) - add_liveout( p, _names[b->_nodes[k-1]->in(l)->_idx], first_pass ); + for (uint k = i; k > 0; k--) { + add_liveout(p, _names[block->_nodes[k-1]->in(l)->_idx], first_pass); + } } - freeset( b ); - first_pass.set(b->_pre_order); + freeset(block); + first_pass.set(block->_pre_order); // Inner loop: blocks that picked up new live-out values to be propagated - while( _worklist->size() ) { - // !!!!! -// #ifdef ASSERT - iters++; -// #endif - Block *b = _worklist->pop(); - IndexSet *delta = getset(b); + while (_worklist->size()) { + Block* block = _worklist->pop(); + IndexSet *delta = getset(block); assert( delta->count(), "missing delta set" ); // Add new-live-in to predecessors live-out sets - for (uint l = 1; l < b->num_preds(); l++) { - Block* block = _cfg.get_block_for_node(b->pred(l)); - add_liveout(block, delta, first_pass); + for (uint l = 1; l < block->num_preds(); l++) { + Block* predecessor = _cfg.get_block_for_node(block->pred(l)); + add_liveout(predecessor, delta, first_pass); } - freeset(b); + freeset(block); } // End of while-worklist-not-empty } // End of for-all-blocks-outer-loop @@ -155,7 +150,7 @@ // We explicitly clear all of the IndexSets which we are about to release. // This allows us to recycle their internal memory into IndexSet's free list. - for( i=0; i<_cfg._num_blocks; i++ ) { + for (i = 0; i < _cfg.number_of_blocks(); i++) { _defs[i].clear(); if (_deltas[i]) { // Is this always true? @@ -171,13 +166,11 @@ } -//------------------------------stats------------------------------------------ #ifndef PRODUCT void PhaseLive::stats(uint iters) const { } #endif -//------------------------------getset----------------------------------------- // Get an IndexSet for a block. Return existing one, if any. Make a new // empty one if a prior one does not exist. IndexSet *PhaseLive::getset( Block *p ) { @@ -188,7 +181,6 @@ return delta; // Return set of new live-out items } -//------------------------------getfreeset------------------------------------- // Pull from free list, or allocate. Internal allocation on the returned set // is always from thread local storage. IndexSet *PhaseLive::getfreeset( ) { @@ -207,7 +199,6 @@ return f; } -//------------------------------freeset---------------------------------------- // Free an IndexSet from a block. void PhaseLive::freeset( const Block *p ) { IndexSet *f = _deltas[p->_pre_order-1]; @@ -216,7 +207,6 @@ _deltas[p->_pre_order-1] = NULL; } -//------------------------------add_liveout------------------------------------ // Add a live-out value to a given blocks live-out set. If it is new, then // also add it to the delta set and stick the block on the worklist. void PhaseLive::add_liveout( Block *p, uint r, VectorSet &first_pass ) { @@ -233,8 +223,6 @@ } } - -//------------------------------add_liveout------------------------------------ // Add a vector of live-out values to a given blocks live-out set. void PhaseLive::add_liveout( Block *p, IndexSet *lo, VectorSet &first_pass ) { IndexSet *live = &_live[p->_pre_order-1]; @@ -262,7 +250,6 @@ } #ifndef PRODUCT -//------------------------------dump------------------------------------------- // Dump the live-out set for a block void PhaseLive::dump( const Block *b ) const { tty->print("Block %d: ",b->_pre_order); @@ -275,18 +262,19 @@ tty->print("\n"); } -//------------------------------verify_base_ptrs------------------------------- // Verify that base pointers and derived pointers are still sane. void PhaseChaitin::verify_base_ptrs( ResourceArea *a ) const { #ifdef ASSERT Unique_Node_List worklist(a); - for( uint i = 0; i < _cfg._num_blocks; i++ ) { - Block *b = _cfg._blocks[i]; - for( uint j = b->end_idx() + 1; j > 1; j-- ) { - Node *n = b->_nodes[j-1]; - if( n->is_Phi() ) break; + for (uint i = 0; i < _cfg.number_of_blocks(); i++) { + Block* block = _cfg.get_block(i); + for (uint j = block->end_idx() + 1; j > 1; j--) { + Node* n = block->_nodes[j-1]; + if (n->is_Phi()) { + break; + } // Found a safepoint? - if( n->is_MachSafePoint() ) { + if (n->is_MachSafePoint()) { MachSafePointNode *sfpt = n->as_MachSafePoint(); JVMState* jvms = sfpt->jvms(); if (jvms != NULL) { @@ -357,7 +345,6 @@ #endif } -//------------------------------verify------------------------------------- // Verify that graphs and base pointers are still sane. void PhaseChaitin::verify( ResourceArea *a, bool verify_ifg ) const { #ifdef ASSERT diff -r 1c3719302f46 -r 34d9f1ce747b src/share/vm/opto/matcher.cpp --- a/src/share/vm/opto/matcher.cpp Wed Apr 19 05:28:25 2017 +0100 +++ b/src/share/vm/opto/matcher.cpp Mon Apr 24 16:49:33 2017 +0100 @@ -73,8 +73,8 @@ const uint Matcher::_end_rematerialize = _END_REMATERIALIZE; //---------------------------Matcher------------------------------------------- -Matcher::Matcher( Node_List &proj_list ) : - PhaseTransform( Phase::Ins_Select ), +Matcher::Matcher() +: PhaseTransform( Phase::Ins_Select ), #ifdef ASSERT _old2new_map(C->comp_arena()), _new2old_map(C->comp_arena()), @@ -84,7 +84,7 @@ _swallowed(swallowed), _begin_inst_chain_rule(_BEGIN_INST_CHAIN_RULE), _end_inst_chain_rule(_END_INST_CHAIN_RULE), - _must_clone(must_clone), _proj_list(proj_list), + _must_clone(must_clone), _register_save_policy(register_save_policy), _c_reg_save_policy(c_reg_save_policy), _register_save_type(register_save_type), @@ -1339,8 +1339,9 @@ for (int i = begin_out_arg_area; i < out_arg_limit_per_call; i++) proj->_rout.Insert(OptoReg::Name(i)); } - if( proj->_rout.is_NotEmpty() ) - _proj_list.push(proj); + if (proj->_rout.is_NotEmpty()) { + push_projection(proj); + } } // Transfer the safepoint information from the call to the mcall // Move the JVMState list @@ -1719,14 +1720,15 @@ } // If the _leaf is an AddP, insert the base edge - if( leaf->is_AddP() ) + if (leaf->is_AddP()) { mach->ins_req(AddPNode::Base,leaf->in(AddPNode::Base)); + } - uint num_proj = _proj_list.size(); + uint number_of_projections_prior = number_of_projections(); // Perform any 1-to-many expansions required - MachNode *ex = mach->Expand(s,_proj_list, mem); - if( ex != mach ) { + MachNode *ex = mach->Expand(s, _projection_list, mem); + if (ex != mach) { assert(ex->ideal_reg() == mach->ideal_reg(), "ideal types should match"); if( ex->in(1)->is_Con() ) ex->in(1)->set_req(0, C->root()); @@ -1747,7 +1749,7 @@ // generated belatedly during spill code generation. if (_allocation_started) { guarantee(ex == mach, "no expand rules during spill generation"); - guarantee(_proj_list.size() == num_proj, "no allocation during spill generation"); + guarantee(number_of_projections_prior == number_of_projections(), "no allocation during spill generation"); } if (leaf->is_Con() || leaf->is_DecodeN()) { diff -r 1c3719302f46 -r 34d9f1ce747b src/share/vm/opto/matcher.hpp --- a/src/share/vm/opto/matcher.hpp Wed Apr 19 05:28:25 2017 +0100 +++ b/src/share/vm/opto/matcher.hpp Mon Apr 24 16:49:33 2017 +0100 @@ -88,7 +88,7 @@ Node *transform( Node *dummy ); - Node_List &_proj_list; // For Machine nodes killing many values + Node_List _projection_list; // For Machine nodes killing many values Node_Array _shared_nodes; @@ -184,10 +184,30 @@ void collect_null_checks( Node *proj, Node *orig_proj ); void validate_null_checks( ); - Matcher( Node_List &proj_list ); + Matcher(); + + // Get a projection node at position pos + Node* get_projection(uint pos) { + return _projection_list[pos]; + } + + // Push a projection node onto the projection list + void push_projection(Node* node) { + _projection_list.push(node); + } + + Node* pop_projection() { + return _projection_list.pop(); + } + + // Number of nodes in the projection list + uint number_of_projections() const { + return _projection_list.size(); + } // Select instructions for entire method - void match( ); + void match(); + // Helper for match OptoReg::Name warp_incoming_stk_arg( VMReg reg ); diff -r 1c3719302f46 -r 34d9f1ce747b src/share/vm/opto/output.cpp --- a/src/share/vm/opto/output.cpp Wed Apr 19 05:28:25 2017 +0100 +++ b/src/share/vm/opto/output.cpp Mon Apr 24 16:49:33 2017 +0100 @@ -55,11 +55,10 @@ extern int emit_exception_handler(CodeBuffer &cbuf); extern int emit_deopt_handler(CodeBuffer &cbuf); -//------------------------------Output----------------------------------------- // Convert Nodes to instruction bits and pass off to the VM void Compile::Output() { // RootNode goes - assert( _cfg->_broot->_nodes.size() == 0, "" ); + assert( _cfg->get_root_block()->_nodes.size() == 0, "" ); // The number of new nodes (mostly MachNop) is proportional to // the number of java calls and inner loops which are aligned. @@ -69,8 +68,8 @@ return; } // Make sure I can find the Start Node - Block *entry = _cfg->_blocks[1]; - Block *broot = _cfg->_broot; + Block *entry = _cfg->get_block(1); + Block *broot = _cfg->get_root_block(); const StartNode *start = entry->_nodes[0]->as_Start(); @@ -110,40 +109,44 @@ } // Insert epilogs before every return - for( uint i=0; i<_cfg->_num_blocks; i++ ) { - Block *b = _cfg->_blocks[i]; - if( !b->is_connector() && b->non_connector_successor(0) == _cfg->_broot ) { // Found a program exit point? - Node *m = b->end(); - if( m->is_Mach() && m->as_Mach()->ideal_Opcode() != Op_Halt ) { - MachEpilogNode *epilog = new (this) MachEpilogNode(m->as_Mach()->ideal_Opcode() == Op_Return); - b->add_inst( epilog ); - _cfg->map_node_to_block(epilog, b); + for (uint i = 0; i < _cfg->number_of_blocks(); i++) { + Block* block = _cfg->get_block(i); + if (!block->is_connector() && block->non_connector_successor(0) == _cfg->get_root_block()) { // Found a program exit point? + Node* m = block->end(); + if (m->is_Mach() && m->as_Mach()->ideal_Opcode() != Op_Halt) { + MachEpilogNode* epilog = new (this) MachEpilogNode(m->as_Mach()->ideal_Opcode() == Op_Return); + block->add_inst(epilog); + _cfg->map_node_to_block(epilog, block); } } } # ifdef ENABLE_ZAP_DEAD_LOCALS - if ( ZapDeadCompiledLocals ) Insert_zap_nodes(); + if (ZapDeadCompiledLocals) { + Insert_zap_nodes(); + } # endif - uint* blk_starts = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks+1); - blk_starts[0] = 0; + uint* blk_starts = NEW_RESOURCE_ARRAY(uint, _cfg->number_of_blocks() + 1); + blk_starts[0] = 0; // Initialize code buffer and process short branches. CodeBuffer* cb = init_buffer(blk_starts); - if (cb == NULL || failing()) return; + if (cb == NULL || failing()) { + return; + } ScheduleAndBundle(); #ifndef PRODUCT if (trace_opto_output()) { tty->print("\n---- After ScheduleAndBundle ----\n"); - for (uint i = 0; i < _cfg->_num_blocks; i++) { + for (uint i = 0; i < _cfg->number_of_blocks(); i++) { tty->print("\nBB#%03d:\n", i); - Block *bb = _cfg->_blocks[i]; - for (uint j = 0; j < bb->_nodes.size(); j++) { - Node *n = bb->_nodes[j]; + Block* block = _cfg->get_block(i); + for (uint j = 0; j < block->_nodes.size(); j++) { + Node* n = block->_nodes[j]; OptoReg::Name reg = _regalloc->get_reg_first(n); tty->print(" %-6s ", reg >= 0 && reg < REG_COUNT ? Matcher::regName[reg] : ""); n->dump(); @@ -152,11 +155,15 @@ } #endif - if (failing()) return; + if (failing()) { + return; + } BuildOopMaps(); - if (failing()) return; + if (failing()) { + return; + } fill_buffer(cb, blk_starts); } @@ -218,8 +225,8 @@ return; // no safepoints/oopmaps emitted for calls in stubs,so we don't care // Insert call to zap runtime stub before every node with an oop map - for( uint i=0; i<_cfg->_num_blocks; i++ ) { - Block *b = _cfg->_blocks[i]; + for( uint i=0; i<_cfg->number_of_blocks(); i++ ) { + Block *b = _cfg->get_block(i); for ( uint j = 0; j < b->_nodes.size(); ++j ) { Node *n = b->_nodes[j]; @@ -276,7 +283,6 @@ return _matcher->match_sfpt(ideal_node); } -//------------------------------is_node_getting_a_safepoint-------------------- bool Compile::is_node_getting_a_safepoint( Node* n) { // This code duplicates the logic prior to the call of add_safepoint // below in this file. @@ -286,7 +292,6 @@ # endif // ENABLE_ZAP_DEAD_LOCALS -//------------------------------compute_loop_first_inst_sizes------------------ // Compute the size of first NumberOfLoopInstrToAlign instructions at the top // of a loop. When aligning a loop we need to provide enough instructions // in cpu's fetch buffer to feed decoders. The loop alignment could be @@ -303,42 +308,39 @@ // or alignment padding is larger then MaxLoopPad. By default, MaxLoopPad // is equal to OptoLoopAlignment-1 except on new Intel cpus, where it is // equal to 11 bytes which is the largest address NOP instruction. - if( MaxLoopPad < OptoLoopAlignment-1 ) { - uint last_block = _cfg->_num_blocks-1; - for( uint i=1; i <= last_block; i++ ) { - Block *b = _cfg->_blocks[i]; + if (MaxLoopPad < OptoLoopAlignment - 1) { + uint last_block = _cfg->number_of_blocks() - 1; + for (uint i = 1; i <= last_block; i++) { + Block* block = _cfg->get_block(i); // Check the first loop's block which requires an alignment. - if( b->loop_alignment() > (uint)relocInfo::addr_unit() ) { + if (block->loop_alignment() > (uint)relocInfo::addr_unit()) { uint sum_size = 0; uint inst_cnt = NumberOfLoopInstrToAlign; - inst_cnt = b->compute_first_inst_size(sum_size, inst_cnt, _regalloc); + inst_cnt = block->compute_first_inst_size(sum_size, inst_cnt, _regalloc); // Check subsequent fallthrough blocks if the loop's first // block(s) does not have enough instructions. - Block *nb = b; - while( inst_cnt > 0 && - i < last_block && - !_cfg->_blocks[i+1]->has_loop_alignment() && - !nb->has_successor(b) ) { + Block *nb = block; + while(inst_cnt > 0 && + i < last_block && + !_cfg->get_block(i + 1)->has_loop_alignment() && + !nb->has_successor(block)) { i++; - nb = _cfg->_blocks[i]; + nb = _cfg->get_block(i); inst_cnt = nb->compute_first_inst_size(sum_size, inst_cnt, _regalloc); } // while( inst_cnt > 0 && i < last_block ) - b->set_first_inst_size(sum_size); + block->set_first_inst_size(sum_size); } // f( b->head()->is_Loop() ) } // for( i <= last_block ) } // if( MaxLoopPad < OptoLoopAlignment-1 ) } -//----------------------shorten_branches--------------------------------------- // The architecture description provides short branch variants for some long // branch instructions. Replace eligible long branches with short branches. void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size, int& stub_size) { - - // ------------------ // Compute size of each block, method size, and relocation information size - uint nblocks = _cfg->_num_blocks; + uint nblocks = _cfg->number_of_blocks(); uint* jmp_offset = NEW_RESOURCE_ARRAY(uint,nblocks); uint* jmp_size = NEW_RESOURCE_ARRAY(uint,nblocks); @@ -370,7 +372,7 @@ uint last_avoid_back_to_back_adr = max_uint; uint nop_size = (new (this) MachNopNode())->size(_regalloc); for (uint i = 0; i < nblocks; i++) { // For all blocks - Block *b = _cfg->_blocks[i]; + Block* block = _cfg->get_block(i); // During short branch replacement, we store the relative (to blk_starts) // offset of jump in jmp_offset, rather than the absolute offset of jump. @@ -383,10 +385,10 @@ DEBUG_ONLY( jmp_rule[i] = 0; ) // Sum all instruction sizes to compute block size - uint last_inst = b->_nodes.size(); + uint last_inst = block->_nodes.size(); uint blk_size = 0; for (uint j = 0; j < last_inst; j++) { - Node* nj = b->_nodes[j]; + Node* nj = block->_nodes[j]; // Handle machine instruction nodes if (nj->is_Mach()) { MachNode *mach = nj->as_Mach(); @@ -447,8 +449,8 @@ // When the next block starts a loop, we may insert pad NOP // instructions. Since we cannot know our future alignment, // assume the worst. - if (i< nblocks-1) { - Block *nb = _cfg->_blocks[i+1]; + if (i < nblocks - 1) { + Block* nb = _cfg->get_block(i + 1); int max_loop_pad = nb->code_alignment()-relocInfo::addr_unit(); if (max_loop_pad > 0) { assert(is_power_of_2(max_loop_pad+relocInfo::addr_unit()), ""); @@ -480,26 +482,26 @@ has_short_branch_candidate = false; int adjust_block_start = 0; for (uint i = 0; i < nblocks; i++) { - Block *b = _cfg->_blocks[i]; + Block* block = _cfg->get_block(i); int idx = jmp_nidx[i]; - MachNode* mach = (idx == -1) ? NULL: b->_nodes[idx]->as_Mach(); + MachNode* mach = (idx == -1) ? NULL: block->_nodes[idx]->as_Mach(); if (mach != NULL && mach->may_be_short_branch()) { #ifdef ASSERT assert(jmp_size[i] > 0 && mach->is_MachBranch(), "sanity"); int j; // Find the branch; ignore trailing NOPs. - for (j = b->_nodes.size()-1; j>=0; j--) { - Node* n = b->_nodes[j]; + for (j = block->_nodes.size()-1; j>=0; j--) { + Node* n = block->_nodes[j]; if (!n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con) break; } - assert(j >= 0 && j == idx && b->_nodes[j] == (Node*)mach, "sanity"); + assert(j >= 0 && j == idx && block->_nodes[j] == (Node*)mach, "sanity"); #endif int br_size = jmp_size[i]; int br_offs = blk_starts[i] + jmp_offset[i]; // This requires the TRUE branch target be in succs[0] - uint bnum = b->non_connector_successor(0)->_pre_order; + uint bnum = block->non_connector_successor(0)->_pre_order; int offset = blk_starts[bnum] - br_offs; if (bnum > i) { // adjust following block's offset offset -= adjust_block_start; @@ -534,7 +536,7 @@ diff -= nop_size; } adjust_block_start += diff; - b->_nodes.map(idx, replacement); + block->_nodes.map(idx, replacement); mach->subsume_by(replacement, C); mach = replacement; progress = true; @@ -1114,8 +1116,8 @@ uint add_size = 0; // Fill the constant table. // Note: This must happen before shorten_branches. - for (uint i = 0; i < _cfg->_num_blocks; i++) { - Block* b = _cfg->_blocks[i]; + for (uint i = 0; i < _cfg->number_of_blocks(); i++) { + Block* b = _cfg->get_block(i); for (uint j = 0; j < b->_nodes.size(); j++) { Node* n = b->_nodes[j]; @@ -1209,7 +1211,7 @@ // !!!!! This preserves old handling of oopmaps for now debug_info()->set_oopmaps(_oop_map_set); - uint nblocks = _cfg->_num_blocks; + uint nblocks = _cfg->number_of_blocks(); // Count and start of implicit null check instructions uint inct_cnt = 0; uint *inct_starts = NEW_RESOURCE_ARRAY(uint, nblocks+1); @@ -1257,21 +1259,21 @@ // Now fill in the code buffer Node *delay_slot = NULL; - for (uint i=0; i < nblocks; i++) { - Block *b = _cfg->_blocks[i]; - - Node *head = b->head(); + for (uint i = 0; i < nblocks; i++) { + Block* block = _cfg->get_block(i); + Node* head = block->head(); // If this block needs to start aligned (i.e, can be reached other // than by falling-thru from the previous block), then force the // start of a new bundle. - if (Pipeline::requires_bundling() && starts_bundle(head)) + if (Pipeline::requires_bundling() && starts_bundle(head)) { cb->flush_bundle(true); + } #ifdef ASSERT - if (!b->is_connector()) { + if (!block->is_connector()) { stringStream st; - b->dump_head(_cfg, &st); + block->dump_head(_cfg, &st); MacroAssembler(cb).block_comment(st.as_string()); } jmp_target[i] = 0; @@ -1282,16 +1284,16 @@ int blk_offset = current_offset; // Define the label at the beginning of the basic block - MacroAssembler(cb).bind(blk_labels[b->_pre_order]); - - uint last_inst = b->_nodes.size(); + MacroAssembler(cb).bind(blk_labels[block->_pre_order]); + + uint last_inst = block->_nodes.size(); // Emit block normally, except for last instruction. // Emit means "dump code bits into code buffer". for (uint j = 0; j_nodes[j]; + Node* n = block->_nodes[j]; // See if delay slots are supported if (valid_bundle_info(n) && @@ -1345,9 +1347,9 @@ assert((padding % nop_size) == 0, "padding is not a multiple of NOP size"); int nops_cnt = padding / nop_size; MachNode *nop = new (this) MachNopNode(nops_cnt); - b->_nodes.insert(j++, nop); + block->_nodes.insert(j++, nop); last_inst++; - _cfg->map_node_to_block(nop, b); + _cfg->map_node_to_block(nop, block); nop->emit(*cb, _regalloc); cb->flush_bundle(true); current_offset = cb->insts_size(); @@ -1361,7 +1363,7 @@ mcall->method_set((intptr_t)mcall->entry_point()); // Save the return address - call_returns[b->_pre_order] = current_offset + mcall->ret_addr_offset(); + call_returns[block->_pre_order] = current_offset + mcall->ret_addr_offset(); if (mcall->is_MachCallLeaf()) { is_mcall = false; @@ -1398,7 +1400,7 @@ // If this is a branch, then fill in the label with the target BB's label else if (mach->is_MachBranch()) { // This requires the TRUE branch target be in succs[0] - uint block_num = b->non_connector_successor(0)->_pre_order; + uint block_num = block->non_connector_successor(0)->_pre_order; // Try to replace long branch if delay slot is not used, // it is mostly for back branches since forward branch's @@ -1431,8 +1433,8 @@ // Insert padding between avoid_back_to_back branches. if (needs_padding && replacement->avoid_back_to_back()) { MachNode *nop = new (this) MachNopNode(); - b->_nodes.insert(j++, nop); - _cfg->map_node_to_block(nop, b); + block->_nodes.insert(j++, nop); + _cfg->map_node_to_block(nop, block); last_inst++; nop->emit(*cb, _regalloc); cb->flush_bundle(true); @@ -1444,7 +1446,7 @@ jmp_size[i] = new_size; jmp_rule[i] = mach->rule(); #endif - b->_nodes.map(j, replacement); + block->_nodes.map(j, replacement); mach->subsume_by(replacement, C); n = replacement; mach = replacement; @@ -1452,8 +1454,8 @@ } mach->as_MachBranch()->label_set( &blk_labels[block_num], block_num ); } else if (mach->ideal_Opcode() == Op_Jump) { - for (uint h = 0; h < b->_num_succs; h++) { - Block* succs_block = b->_succs[h]; + for (uint h = 0; h < block->_num_succs; h++) { + Block* succs_block = block->_succs[h]; for (uint j = 1; j < succs_block->num_preds(); j++) { Node* jpn = succs_block->pred(j); if (jpn->is_JumpProj() && jpn->in(0) == mach) { @@ -1464,7 +1466,6 @@ } } } - #ifdef ASSERT // Check that oop-store precedes the card-mark else if (mach->ideal_Opcode() == Op_StoreCM) { @@ -1475,17 +1476,18 @@ if (oop_store == NULL) continue; count++; uint i4; - for( i4 = 0; i4 < last_inst; ++i4 ) { - if( b->_nodes[i4] == oop_store ) break; + for (i4 = 0; i4 < last_inst; ++i4) { + if (block->_nodes[i4] == oop_store) { + break; + } } // Note: This test can provide a false failure if other precedence // edges have been added to the storeCMNode. - assert( i4 == last_inst || i4 < storeCM_idx, "CM card-mark executes before oop-store"); + assert(i4 == last_inst || i4 < storeCM_idx, "CM card-mark executes before oop-store"); } assert(count > 0, "storeCM expects at least one precedence edge"); } #endif - else if (!n->is_Proj()) { // Remember the beginning of the previous instruction, in case // it's followed by a flag-kill and a null-check. Happens on @@ -1587,12 +1589,12 @@ // If the next block is the top of a loop, pad this block out to align // the loop top a little. Helps prevent pipe stalls at loop back branches. if (i < nblocks-1) { - Block *nb = _cfg->_blocks[i+1]; + Block *nb = _cfg->get_block(i + 1); int padding = nb->alignment_padding(current_offset); if( padding > 0 ) { MachNode *nop = new (this) MachNopNode(padding / nop_size); - b->_nodes.insert( b->_nodes.size(), nop ); - _cfg->map_node_to_block(nop, b); + block->_nodes.insert(block->_nodes.size(), nop); + _cfg->map_node_to_block(nop, block); nop->emit(*cb, _regalloc); current_offset = cb->insts_size(); } @@ -1632,8 +1634,6 @@ } #endif - // ------------------ - #ifndef PRODUCT // Information on the size of the method, without the extraneous code Scheduling::increment_method_size(cb->insts_size()); @@ -1695,52 +1695,55 @@ _inc_table.set_size(cnt); uint inct_cnt = 0; - for( uint i=0; i<_cfg->_num_blocks; i++ ) { - Block *b = _cfg->_blocks[i]; + for (uint i = 0; i < _cfg->number_of_blocks(); i++) { + Block* block = _cfg->get_block(i); Node *n = NULL; int j; // Find the branch; ignore trailing NOPs. - for( j = b->_nodes.size()-1; j>=0; j-- ) { - n = b->_nodes[j]; - if( !n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con ) + for (j = block->_nodes.size() - 1; j >= 0; j--) { + n = block->_nodes[j]; + if (!n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con) { break; + } } // If we didn't find anything, continue - if( j < 0 ) continue; + if (j < 0) { + continue; + } // Compute ExceptionHandlerTable subtable entry and add it // (skip empty blocks) - if( n->is_Catch() ) { + if (n->is_Catch()) { // Get the offset of the return from the call - uint call_return = call_returns[b->_pre_order]; + uint call_return = call_returns[block->_pre_order]; #ifdef ASSERT assert( call_return > 0, "no call seen for this basic block" ); - while( b->_nodes[--j]->is_MachProj() ) ; - assert( b->_nodes[j]->is_MachCall(), "CatchProj must follow call" ); + while (block->_nodes[--j]->is_MachProj()) ; + assert(block->_nodes[j]->is_MachCall(), "CatchProj must follow call"); #endif // last instruction is a CatchNode, find it's CatchProjNodes - int nof_succs = b->_num_succs; + int nof_succs = block->_num_succs; // allocate space GrowableArray handler_bcis(nof_succs); GrowableArray handler_pcos(nof_succs); // iterate through all successors for (int j = 0; j < nof_succs; j++) { - Block* s = b->_succs[j]; + Block* s = block->_succs[j]; bool found_p = false; - for( uint k = 1; k < s->num_preds(); k++ ) { - Node *pk = s->pred(k); - if( pk->is_CatchProj() && pk->in(0) == n ) { + for (uint k = 1; k < s->num_preds(); k++) { + Node* pk = s->pred(k); + if (pk->is_CatchProj() && pk->in(0) == n) { const CatchProjNode* p = pk->as_CatchProj(); found_p = true; // add the corresponding handler bci & pco information - if( p->_con != CatchProjNode::fall_through_index ) { + if (p->_con != CatchProjNode::fall_through_index) { // p leads to an exception handler (and is not fall through) - assert(s == _cfg->_blocks[s->_pre_order],"bad numbering"); + assert(s == _cfg->get_block(s->_pre_order), "bad numbering"); // no duplicates, please - if( !handler_bcis.contains(p->handler_bci()) ) { + if (!handler_bcis.contains(p->handler_bci())) { uint block_num = s->non_connector()->_pre_order; handler_bcis.append(p->handler_bci()); handler_pcos.append(blk_labels[block_num].loc_pos()); @@ -1759,14 +1762,14 @@ } // Handle implicit null exception table updates - if( n->is_MachNullCheck() ) { - uint block_num = b->non_connector_successor(0)->_pre_order; - _inc_table.append( inct_starts[inct_cnt++], blk_labels[block_num].loc_pos() ); + if (n->is_MachNullCheck()) { + uint block_num = block->non_connector_successor(0)->_pre_order; + _inc_table.append(inct_starts[inct_cnt++], blk_labels[block_num].loc_pos()); continue; } // Handle implicit exception table updates: trap instructions. if (n->is_TrapBasedCheckNode()) { - uint block_num = b->non_connector_successor(0)->_pre_order; + uint block_num = block->non_connector_successor(0)->_pre_order; _inc_table.append(inct_starts[inct_cnt++], blk_labels[block_num].loc_pos()); continue; } @@ -1826,14 +1829,12 @@ memset(_current_latency, 0, node_max * sizeof(unsigned short)); // Clear the bundling information - memcpy(_bundle_use_elements, - Pipeline_Use::elaborated_elements, - sizeof(Pipeline_Use::elaborated_elements)); + memcpy(_bundle_use_elements, Pipeline_Use::elaborated_elements, sizeof(Pipeline_Use::elaborated_elements)); // Get the last node - Block *bb = _cfg->_blocks[_cfg->_blocks.size()-1]; - - _next_node = bb->_nodes[bb->_nodes.size()-1]; + Block* block = _cfg->get_block(_cfg->number_of_blocks() - 1); + + _next_node = block->_nodes[block->_nodes.size() - 1]; } #ifndef PRODUCT @@ -1883,7 +1884,6 @@ sizeof(Pipeline_Use::elaborated_elements)); } -//------------------------------ScheduleAndBundle------------------------------ // Perform instruction scheduling and bundling over the sequence of // instructions in backwards order. void Compile::ScheduleAndBundle() { @@ -1910,7 +1910,6 @@ scheduling.DoScheduling(); } -//------------------------------ComputeLocalLatenciesForward------------------- // Compute the latency of all the instructions. This is fairly simple, // because we already have a legal ordering. Walk over the instructions // from first to last, and compute the latency of the instruction based @@ -2080,7 +2079,6 @@ return _available[0]; } -//------------------------------AddNodeToAvailableList------------------------- void Scheduling::AddNodeToAvailableList(Node *n) { assert( !n->is_Proj(), "projections never directly made available" ); #ifndef PRODUCT @@ -2126,7 +2124,6 @@ #endif } -//------------------------------DecrementUseCounts----------------------------- void Scheduling::DecrementUseCounts(Node *n, const Block *bb) { for ( uint i=0; i < n->len(); i++ ) { Node *def = n->in(i); @@ -2149,7 +2146,6 @@ } } -//------------------------------AddNodeToBundle-------------------------------- void Scheduling::AddNodeToBundle(Node *n, const Block *bb) { #ifndef PRODUCT if (_cfg->C->trace_opto_output()) { @@ -2364,7 +2360,6 @@ DecrementUseCounts(n,bb); } -//------------------------------ComputeUseCount-------------------------------- // This method sets the use count within a basic block. We will ignore all // uses outside the current basic block. As we are doing a backwards walk, // any node we reach that has a use count of 0 may be scheduled. This also @@ -2449,20 +2444,22 @@ Block *bb; // Walk over all the basic blocks in reverse order - for( int i=_cfg->_num_blocks-1; i >= 0; succ_bb = bb, i-- ) { - bb = _cfg->_blocks[i]; + for (int i = _cfg->number_of_blocks() - 1; i >= 0; succ_bb = bb, i--) { + bb = _cfg->get_block(i); #ifndef PRODUCT if (_cfg->C->trace_opto_output()) { tty->print("# Schedule BB#%03d (initial)\n", i); - for (uint j = 0; j < bb->_nodes.size(); j++) + for (uint j = 0; j < bb->_nodes.size(); j++) { bb->_nodes[j]->dump(); + } } #endif // On the head node, skip processing - if( bb == _cfg->_broot ) + if (bb == _cfg->get_root_block()) { continue; + } // Skip empty, connector blocks if (bb->is_connector()) @@ -2598,7 +2595,6 @@ } // end DoScheduling -//------------------------------verify_good_schedule--------------------------- // Verify that no live-range used in the block is killed in the block by a // wrong DEF. This doesn't verify live-ranges that span blocks. @@ -2611,7 +2607,6 @@ } #ifdef ASSERT -//------------------------------verify_do_def---------------------------------- void Scheduling::verify_do_def( Node *n, OptoReg::Name def, const char *msg ) { // Check for bad kills if( OptoReg::is_valid(def) ) { // Ignore stores & control flow @@ -2627,7 +2622,6 @@ } } -//------------------------------verify_good_schedule--------------------------- void Scheduling::verify_good_schedule( Block *b, const char *msg ) { // Zap to something reasonable for the verify code @@ -2687,7 +2681,6 @@ from->add_prec(to); } -//------------------------------anti_do_def------------------------------------ void Scheduling::anti_do_def( Block *b, Node *def, OptoReg::Name def_reg, int is_def ) { if( !OptoReg::is_valid(def_reg) ) // Ignore stores & control flow return; @@ -2757,7 +2750,6 @@ add_prec_edge_from_to(kill,pinch); } -//------------------------------anti_do_use------------------------------------ void Scheduling::anti_do_use( Block *b, Node *use, OptoReg::Name use_reg ) { if( !OptoReg::is_valid(use_reg) ) // Ignore stores & control flow return; @@ -2778,7 +2770,6 @@ } } -//------------------------------ComputeRegisterAntidependences----------------- // We insert antidependences between the reads and following write of // allocated registers to prevent illegal code motion. Hopefully, the // number of added references should be fairly small, especially as we @@ -2912,8 +2903,6 @@ } } -//------------------------------garbage_collect_pinch_nodes------------------------------- - // Garbage collect pinch nodes for reuse by other blocks. // // The block scheduler's insertion of anti-dependence @@ -2988,7 +2977,6 @@ pinch->set_req(0, NULL); } -//------------------------------print_statistics------------------------------- #ifndef PRODUCT void Scheduling::dump_available() const { diff -r 1c3719302f46 -r 34d9f1ce747b src/share/vm/opto/phaseX.cpp --- a/src/share/vm/opto/phaseX.cpp Wed Apr 19 05:28:25 2017 +0100 +++ b/src/share/vm/opto/phaseX.cpp Mon Apr 24 16:49:33 2017 +0100 @@ -1672,8 +1672,8 @@ bool method_name_not_printed = true; // Examine each basic block - for( uint block_number = 1; block_number < _cfg._num_blocks; ++block_number ) { - Block *block = _cfg._blocks[block_number]; + for (uint block_number = 1; block_number < _cfg.number_of_blocks(); ++block_number) { + Block* block = _cfg.get_block(block_number); bool block_not_printed = true; // and each instruction within a block diff -r 1c3719302f46 -r 34d9f1ce747b src/share/vm/opto/postaloc.cpp --- a/src/share/vm/opto/postaloc.cpp Wed Apr 19 05:28:25 2017 +0100 +++ b/src/share/vm/opto/postaloc.cpp Mon Apr 24 16:49:33 2017 +0100 @@ -403,28 +403,29 @@ // Need a mapping from basic block Node_Lists. We need a Node_List to // map from register number to value-producing Node. - Node_List **blk2value = NEW_RESOURCE_ARRAY( Node_List *, _cfg._num_blocks+1); - memset( blk2value, 0, sizeof(Node_List*)*(_cfg._num_blocks+1) ); + Node_List **blk2value = NEW_RESOURCE_ARRAY( Node_List *, _cfg.number_of_blocks() + 1); + memset(blk2value, 0, sizeof(Node_List*) * (_cfg.number_of_blocks() + 1)); // Need a mapping from basic block Node_Lists. We need a Node_List to // map from register number to register-defining Node. - Node_List **blk2regnd = NEW_RESOURCE_ARRAY( Node_List *, _cfg._num_blocks+1); - memset( blk2regnd, 0, sizeof(Node_List*)*(_cfg._num_blocks+1) ); + Node_List **blk2regnd = NEW_RESOURCE_ARRAY( Node_List *, _cfg.number_of_blocks() + 1); + memset(blk2regnd, 0, sizeof(Node_List*) * (_cfg.number_of_blocks() + 1)); // We keep unused Node_Lists on a free_list to avoid wasting // memory. GrowableArray free_list = GrowableArray(16); // For all blocks - for( uint i = 0; i < _cfg._num_blocks; i++ ) { + for (uint i = 0; i < _cfg.number_of_blocks(); i++) { uint j; - Block *b = _cfg._blocks[i]; + Block* block = _cfg.get_block(i); // Count of Phis in block uint phi_dex; - for( phi_dex = 1; phi_dex < b->_nodes.size(); phi_dex++ ) { - Node *phi = b->_nodes[phi_dex]; - if( !phi->is_Phi() ) + for (phi_dex = 1; phi_dex < block->_nodes.size(); phi_dex++) { + Node* phi = block->_nodes[phi_dex]; + if (!phi->is_Phi()) { break; + } } // If any predecessor has not been visited, we do not know the state @@ -432,21 +433,23 @@ // along Phi input edges bool missing_some_inputs = false; Block *freed = NULL; - for( j = 1; j < b->num_preds(); j++ ) { - Block *pb = _cfg.get_block_for_node(b->pred(j)); + for (j = 1; j < block->num_preds(); j++) { + Block* pb = _cfg.get_block_for_node(block->pred(j)); // Remove copies along phi edges - for( uint k=1; k_nodes[k], j, b, *blk2value[pb->_pre_order], *blk2regnd[pb->_pre_order], false ); - if( blk2value[pb->_pre_order] ) { // Have a mapping on this edge? + for (uint k = 1; k < phi_dex; k++) { + elide_copy(block->_nodes[k], j, block, *blk2value[pb->_pre_order], *blk2regnd[pb->_pre_order], false); + } + if (blk2value[pb->_pre_order]) { // Have a mapping on this edge? // See if this predecessor's mappings have been used by everybody // who wants them. If so, free 'em. uint k; - for( k=0; k_num_succs; k++ ) { - Block *pbsucc = pb->_succs[k]; - if( !blk2value[pbsucc->_pre_order] && pbsucc != b ) + for (k = 0; k < pb->_num_succs; k++) { + Block* pbsucc = pb->_succs[k]; + if (!blk2value[pbsucc->_pre_order] && pbsucc != block) { break; // Found a future user + } } - if( k >= pb->_num_succs ) { // No more uses, free! + if (k >= pb->_num_succs) { // No more uses, free! freed = pb; // Record last block freed free_list.push(blk2value[pb->_pre_order]); free_list.push(blk2regnd[pb->_pre_order]); @@ -465,20 +468,20 @@ value.map(_max_reg,NULL); regnd.map(_max_reg,NULL); // Set mappings as OUR mappings - blk2value[b->_pre_order] = &value; - blk2regnd[b->_pre_order] = ®nd; + blk2value[block->_pre_order] = &value; + blk2regnd[block->_pre_order] = ®nd; // Initialize value & regnd for this block - if( missing_some_inputs ) { + if (missing_some_inputs) { // Some predecessor has not yet been visited; zap map to empty - for( uint k = 0; k < (uint)_max_reg; k++ ) { + for (uint k = 0; k < (uint)_max_reg; k++) { value.map(k,NULL); regnd.map(k,NULL); } } else { if( !freed ) { // Didn't get a freebie prior block // Must clone some data - freed = _cfg.get_block_for_node(b->pred(1)); + freed = _cfg.get_block_for_node(block->pred(1)); Node_List &f_value = *blk2value[freed->_pre_order]; Node_List &f_regnd = *blk2regnd[freed->_pre_order]; for( uint k = 0; k < (uint)_max_reg; k++ ) { @@ -487,9 +490,11 @@ } } // Merge all inputs together, setting to NULL any conflicts. - for( j = 1; j < b->num_preds(); j++ ) { - Block *pb = _cfg.get_block_for_node(b->pred(j)); - if( pb == freed ) continue; // Did self already via freelist + for (j = 1; j < block->num_preds(); j++) { + Block* pb = _cfg.get_block_for_node(block->pred(j)); + if (pb == freed) { + continue; // Did self already via freelist + } Node_List &p_regnd = *blk2regnd[pb->_pre_order]; for( uint k = 0; k < (uint)_max_reg; k++ ) { if( regnd[k] != p_regnd[k] ) { // Conflict on reaching defs? @@ -501,9 +506,9 @@ } // For all Phi's - for( j = 1; j < phi_dex; j++ ) { + for (j = 1; j < phi_dex; j++) { uint k; - Node *phi = b->_nodes[j]; + Node *phi = block->_nodes[j]; uint pidx = n2lidx(phi); OptoReg::Name preg = lrgs(n2lidx(phi)).reg(); @@ -514,8 +519,8 @@ if( phi != x && u != x ) // Found a different input u = u ? NodeSentinel : x; // Capture unique input, or NodeSentinel for 2nd input } - if( u != NodeSentinel ) { // Junk Phi. Remove - b->_nodes.remove(j--); + if (u != NodeSentinel) { // Junk Phi. Remove + block->_nodes.remove(j--); phi_dex--; _cfg.unmap_node_from_block(phi); phi->replace_by(u); @@ -545,13 +550,13 @@ } // For all remaining instructions - for( j = phi_dex; j < b->_nodes.size(); j++ ) { - Node *n = b->_nodes[j]; + for (j = phi_dex; j < block->_nodes.size(); j++) { + Node* n = block->_nodes[j]; - if( n->outcnt() == 0 && // Dead? - n != C->top() && // (ignore TOP, it has no du info) - !n->is_Proj() ) { // fat-proj kills - j -= yank_if_dead(n,b,&value,®nd); + if(n->outcnt() == 0 && // Dead? + n != C->top() && // (ignore TOP, it has no du info) + !n->is_Proj() ) { // fat-proj kills + j -= yank_if_dead(n, block, &value, ®nd); continue; } @@ -596,8 +601,9 @@ const uint two_adr = n->is_Mach() ? n->as_Mach()->two_adr() : 0; // Remove copies along input edges - for( k = 1; k < n->req(); k++ ) - j -= elide_copy( n, k, b, value, regnd, two_adr!=k ); + for (k = 1; k < n->req(); k++) { + j -= elide_copy(n, k, block, value, regnd, two_adr != k); + } // Unallocated Nodes define no registers uint lidx = n2lidx(n); @@ -626,8 +632,8 @@ // then 'n' is a useless copy. Do not update the register->node // mapping so 'n' will go dead. if( value[nreg] != val ) { - if (eliminate_copy_of_constant(val, n, b, value, regnd, nreg, OptoReg::Bad)) { - j -= replace_and_yank_if_dead(n, nreg, b, value, regnd); + if (eliminate_copy_of_constant(val, n, block, value, regnd, nreg, OptoReg::Bad)) { + j -= replace_and_yank_if_dead(n, nreg, block, value, regnd); } else { // Update the mapping: record new Node defined by the register regnd.map(nreg,n); @@ -636,8 +642,8 @@ value.map(nreg,val); } } else if( !may_be_copy_of_callee(n) ) { - assert( n->is_Copy(), "" ); - j -= replace_and_yank_if_dead(n, nreg, b, value, regnd); + assert(n->is_Copy(), ""); + j -= replace_and_yank_if_dead(n, nreg, block, value, regnd); } } else if (RegMask::is_vector(n_ideal_reg)) { // If Node 'n' does not change the value mapped by the register, @@ -656,7 +662,7 @@ } } else if (n->is_Copy()) { // Note: vector can't be constant and can't be copy of calee. - j -= replace_and_yank_if_dead(n, nreg, b, value, regnd); + j -= replace_and_yank_if_dead(n, nreg, block, value, regnd); } } else { // If the value occupies a register pair, record same info @@ -670,18 +676,18 @@ tmp.Remove(nreg); nreg_lo = tmp.find_first_elem(); } - if( value[nreg] != val || value[nreg_lo] != val ) { - if (eliminate_copy_of_constant(val, n, b, value, regnd, nreg, nreg_lo)) { - j -= replace_and_yank_if_dead(n, nreg, b, value, regnd); + if (value[nreg] != val || value[nreg_lo] != val) { + if (eliminate_copy_of_constant(val, n, block, value, regnd, nreg, nreg_lo)) { + j -= replace_and_yank_if_dead(n, nreg, block, value, regnd); } else { regnd.map(nreg , n ); regnd.map(nreg_lo, n ); value.map(nreg ,val); value.map(nreg_lo,val); } - } else if( !may_be_copy_of_callee(n) ) { - assert( n->is_Copy(), "" ); - j -= replace_and_yank_if_dead(n, nreg, b, value, regnd); + } else if (!may_be_copy_of_callee(n)) { + assert(n->is_Copy(), ""); + j -= replace_and_yank_if_dead(n, nreg, block, value, regnd); } } diff -r 1c3719302f46 -r 34d9f1ce747b src/share/vm/opto/reg_split.cpp --- a/src/share/vm/opto/reg_split.cpp Wed Apr 19 05:28:25 2017 +0100 +++ b/src/share/vm/opto/reg_split.cpp Mon Apr 24 16:49:33 2017 +0100 @@ -514,13 +514,13 @@ // a Def is UP or DOWN. UP means that it should get a register (ie - // it is always in LRP regions), and DOWN means that it is probably // on the stack (ie - it crosses HRP regions). - Node ***Reaches = NEW_SPLIT_ARRAY( Node**, _cfg._num_blocks+1 ); - bool **UP = NEW_SPLIT_ARRAY( bool*, _cfg._num_blocks+1 ); + Node ***Reaches = NEW_SPLIT_ARRAY( Node**, _cfg.number_of_blocks() + 1); + bool **UP = NEW_SPLIT_ARRAY( bool*, _cfg.number_of_blocks() + 1); Node **debug_defs = NEW_SPLIT_ARRAY( Node*, spill_cnt ); VectorSet **UP_entry= NEW_SPLIT_ARRAY( VectorSet*, spill_cnt ); // Initialize Reaches & UP - for( bidx = 0; bidx < _cfg._num_blocks+1; bidx++ ) { + for (bidx = 0; bidx < _cfg.number_of_blocks() + 1; bidx++) { Reaches[bidx] = NEW_SPLIT_ARRAY( Node*, spill_cnt ); UP[bidx] = NEW_SPLIT_ARRAY( bool, spill_cnt ); Node **Reachblock = Reaches[bidx]; @@ -540,13 +540,13 @@ //----------PASS 1---------- //----------Propagation & Node Insertion Code---------- // Walk the Blocks in RPO for DEF & USE info - for( bidx = 0; bidx < _cfg._num_blocks; bidx++ ) { + for( bidx = 0; bidx < _cfg.number_of_blocks(); bidx++ ) { if (C->check_node_count(spill_cnt, out_of_nodes)) { return 0; } - b = _cfg._blocks[bidx]; + b = _cfg.get_block(bidx); // Reaches & UP arrays for this block Reachblock = Reaches[b->_pre_order]; UPblock = UP[b->_pre_order]; @@ -1364,8 +1364,8 @@ // DEBUG #ifdef ASSERT // Validate all live range index assignments - for( bidx = 0; bidx < _cfg._num_blocks; bidx++ ) { - b = _cfg._blocks[bidx]; + for (bidx = 0; bidx < _cfg.number_of_blocks(); bidx++) { + b = _cfg.get_block(bidx); for( insidx = 0; insidx <= b->end_idx(); insidx++ ) { Node *n = b->_nodes[insidx]; uint defidx = Find(n); diff -r 1c3719302f46 -r 34d9f1ce747b src/share/vm/runtime/vmStructs.cpp --- a/src/share/vm/runtime/vmStructs.cpp Wed Apr 19 05:28:25 2017 +0100 +++ b/src/share/vm/runtime/vmStructs.cpp Mon Apr 24 16:49:33 2017 +0100 @@ -1174,10 +1174,10 @@ \ c2_nonstatic_field(MachCallRuntimeNode, _name, const char*) \ \ - c2_nonstatic_field(PhaseCFG, _num_blocks, uint) \ + c2_nonstatic_field(PhaseCFG, _number_of_blocks, uint) \ c2_nonstatic_field(PhaseCFG, _blocks, Block_List) \ c2_nonstatic_field(PhaseCFG, _node_to_block_mapping, Block_Array) \ - c2_nonstatic_field(PhaseCFG, _broot, Block*) \ + c2_nonstatic_field(PhaseCFG, _root_block, Block*) \ \ c2_nonstatic_field(PhaseRegAlloc, _node_regs, OptoRegPair*) \ c2_nonstatic_field(PhaseRegAlloc, _node_regs_max_index, uint) \