Mercurial > hg > icedtea7-forest-aarch64 > hotspot
changeset 6027:81528bb814f8
merge
author | adinn |
---|---|
date | Thu, 04 Dec 2014 14:30:02 +0000 |
parents | 205e1ae8868b (diff) 0c2099cd04cd (current diff) |
children | 6712ee98b46e |
files | |
diffstat | 37 files changed, 2229 insertions(+), 2503 deletions(-) [+] |
line wrap: on
line diff
--- a/make/linux/makefiles/vm.make Fri Nov 28 03:10:21 2014 +0000 +++ b/make/linux/makefiles/vm.make Thu Dec 04 14:30:02 2014 +0000 @@ -92,6 +92,10 @@ BUILD_USER = -DHOTSPOT_BUILD_USER="\"$(HOTSPOT_BUILD_USER)\"" VM_DISTRO = -DHOTSPOT_VM_DISTRO="\"$(HOTSPOT_VM_DISTRO)\"" +ifeq ($(BUILTIN_SIM), true) + HS_LIB_ARCH=-DHOTSPOT_LIB_ARCH="\"aarch64\"" +endif + CXXFLAGS = \ ${SYSDEFS} \ ${INCLUDES} \
--- a/src/cpu/aarch64/vm/aarch64.ad Fri Nov 28 03:10:21 2014 +0000 +++ b/src/cpu/aarch64/vm/aarch64.ad Thu Dec 04 14:30:02 2014 +0000 @@ -804,11 +804,6 @@ //============================================================================= -// Emit an interrupt that is caught by the debugger (for debugging compiler). -void emit_break(CodeBuffer &cbuf) { - Unimplemented(); -} - #ifndef PRODUCT void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const { st->print("BREAKPOINT"); @@ -1363,12 +1358,10 @@ return 4; } -// !!! FIXME AARCH64 -- this needs to be reworked for jdk7 - uint size_java_to_interp() { - // count a mov mem --> to 3 movz/k and a branch - return 4 * NativeInstruction::instruction_size; + // ob jdk7 we only need a mov oop and a branch + return 2 * NativeInstruction::instruction_size; } // Offset from start of compiled java to interpreter stub to the load @@ -1395,11 +1388,11 @@ // static stub relocation stores the instruction address of the call const RelocationHolder &rspec = static_stub_Relocation::spec(mark); __ relocate(rspec); - // !!! FIXME AARCH64 // static stub relocation also tags the methodOop in the code-stream. - // for jdk7 we have to use movoop and locate the oop in the cpool - // if we use an immediate then patching fails to update the pool - // oop and GC overwrites the patch with movk/z 0x0000 again + // + // n.b. for jdk7 we have to use movoop and locate the oop in the + // cpool if we use an immediate then patching fails to update the + // pool oop and GC overwrites the patch with movk/z 0x0000 again __ movoop(rmethod, (jobject) NULL); // This is recognized as unresolved by relocs/nativeinst/ic code __ b(__ pc()); @@ -1412,9 +1405,8 @@ // relocation entries for call stub, compiled java to interpretor uint reloc_java_to_interp() { - // TODO fixme - // return a large number - return 5; + // n.b. on jdk7 we use a movoop and a branch + return 2; } //============================================================================= @@ -2414,16 +2406,13 @@ int disp = $mem$$disp; if (index == -1) { __ prfm(Address(base, disp), PLDL1KEEP); - __ nop(); } else { Register index_reg = as_Register(index); if (disp == 0) { - // __ prfm(Address(base, index_reg, Address::lsl(scale)), PLDL1KEEP); - __ nop(); + __ prfm(Address(base, index_reg, Address::lsl(scale)), PLDL1KEEP); } else { __ lea(rscratch1, Address(base, disp)); __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PLDL1KEEP); - __ nop(); } } %} @@ -2441,11 +2430,9 @@ Register index_reg = as_Register(index); if (disp == 0) { __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP); - __ nop(); } else { __ lea(rscratch1, Address(base, disp)); __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP); - __ nop(); } } %} @@ -2458,16 +2445,13 @@ int disp = $mem$$disp; if (index == -1) { __ prfm(Address(base, disp), PSTL1STRM); - __ nop(); } else { Register index_reg = as_Register(index); if (disp == 0) { __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1STRM); - __ nop(); } else { __ lea(rscratch1, Address(base, disp)); __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1STRM); - __ nop(); } } %} @@ -2589,7 +2573,12 @@ Register dst_reg = as_Register($dst$$reg); unsigned long off; __ adrp(dst_reg, ExternalAddress(page), off); - assert(off == 0, "assumed offset == 0"); + assert((off & 0x3ffL) == 0, "assumed offset aligned to 0x400"); + // n.b. intra-page offset will never change even if this gets + // relocated so it is safe to omit the lea when off == 0 + if (off != 0) { + __ lea(dst_reg, Address(dst_reg, off)); + } %} enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{ @@ -3374,6 +3363,16 @@ interface(CONST_INTER); %} +operand immI_le_4() +%{ + predicate(n->get_int() <= 4); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + operand immI_31() %{ predicate(n->get_int() == 31); @@ -4698,17 +4697,14 @@ attributes %{ // ARM instructions are of fixed length fixed_size_instructions; // Fixed size instructions TODO does - // TODO does this relate to how many instructions can be scheduled - // at once? just guess 8 for now - max_instructions_per_bundle = 8; // Up to 8 instructions per bundle + max_instructions_per_bundle = 2; // A53 = 2, A57 = 4 // ARM instructions come in 32-bit word units instruction_unit_size = 4; // An instruction is 4 bytes long - // TODO identify correct cache line size just guess 64 for now instruction_fetch_unit_size = 64; // The processor fetches one line instruction_fetch_units = 1; // of 64 bytes // List of nop instructions - //nops( MachNop ); + nops( MachNop ); %} // We don't use an actual pipeline model so don't care about resources @@ -4718,21 +4714,387 @@ //----------RESOURCES---------------------------------------------------------- // Resources are the functional units available to the machine -resources( D0, D1, D2, DECODE = D0 | D1 | D2, - MS0, MS1, MS2, MEM = MS0 | MS1 | MS2, - BR, FPU, - ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2); +resources( INS0, INS1, INS01 = INS0 | INS1, + ALU0, ALU1, ALU = ALU0 | ALU1, + MAC, + DIV, + BRANCH, + LDST, + NEON_FP); //----------PIPELINE DESCRIPTION----------------------------------------------- // Pipeline Description specifies the stages in the machine's pipeline // Generic P2/P3 pipeline -pipe_desc(S0, S1, S2, S3, S4, S5); +pipe_desc(ISS, EX1, EX2, WR); //----------PIPELINE CLASSES--------------------------------------------------- // Pipeline Classes describe the stages in which input and output are // referenced by the hardware pipeline. +//------- Integer ALU operations -------------------------- + +// Integer ALU reg-reg operation +// Operands needed in EX1, result generated in EX2 +// Eg. ADD x0, x1, x2 +pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2) +%{ + single_instruction; + dst : EX2(write); + src1 : EX1(read); + src2 : EX1(read); + INS01 : ISS; // Dual issue as instruction 0 or 1 + ALU : EX2; +%} + +// Integer ALU reg-reg operation with constant shift +// Shifted register must be available in LATE_ISS instead of EX1 +// Eg. ADD x0, x1, x2, LSL #2 +pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift) +%{ + single_instruction; + dst : EX2(write); + src1 : EX1(read); + src2 : ISS(read); + INS01 : ISS; + ALU : EX2; +%} + +// Integer ALU reg operation with constant shift +// Eg. LSL x0, x1, #shift +pipe_class ialu_reg_shift(iRegI dst, iRegI src1) +%{ + single_instruction; + dst : EX2(write); + src1 : ISS(read); + INS01 : ISS; + ALU : EX2; +%} + +// Integer ALU reg-reg operation with variable shift +// Both operands must be available in LATE_ISS instead of EX1 +// Result is available in EX1 instead of EX2 +// Eg. LSLV x0, x1, x2 +pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2) +%{ + single_instruction; + dst : EX1(write); + src1 : ISS(read); + src2 : ISS(read); + INS01 : ISS; + ALU : EX1; +%} + +// Integer ALU reg-reg operation with extract +// As for _vshift above, but result generated in EX2 +// Eg. EXTR x0, x1, x2, #N +pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2) +%{ + single_instruction; + dst : EX2(write); + src1 : ISS(read); + src2 : ISS(read); + INS1 : ISS; // Can only dual issue as Instruction 1 + ALU : EX1; +%} + +// Integer ALU reg operation +// Eg. NEG x0, x1 +pipe_class ialu_reg(iRegI dst, iRegI src) +%{ + single_instruction; + dst : EX2(write); + src : EX1(read); + INS01 : ISS; + ALU : EX2; +%} + +// Integer ALU reg mmediate operation +// Eg. ADD x0, x1, #N +pipe_class ialu_reg_imm(iRegI dst, iRegI src1) +%{ + single_instruction; + dst : EX2(write); + src1 : EX1(read); + INS01 : ISS; + ALU : EX2; +%} + +// Integer ALU immediate operation (no source operands) +// Eg. MOV x0, #N +pipe_class ialu_imm(iRegI dst) +%{ + single_instruction; + dst : EX1(write); + INS01 : ISS; + ALU : EX1; +%} + +//------- Compare operation ------------------------------- + +// Compare reg-reg +// Eg. CMP x0, x1 +pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2) +%{ + single_instruction; +// fixed_latency(16); + cr : EX2(write); + op1 : EX1(read); + op2 : EX1(read); + INS01 : ISS; + ALU : EX2; +%} + +// Compare reg-reg +// Eg. CMP x0, #N +pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1) +%{ + single_instruction; +// fixed_latency(16); + cr : EX2(write); + op1 : EX1(read); + INS01 : ISS; + ALU : EX2; +%} + +//------- Conditional instructions ------------------------ + +// Conditional no operands +// Eg. CSINC x0, zr, zr, <cond> +pipe_class icond_none(iRegI dst, rFlagsReg cr) +%{ + single_instruction; + cr : EX1(read); + dst : EX2(write); + INS01 : ISS; + ALU : EX2; +%} + +// Conditional 2 operand +// EG. CSEL X0, X1, X2, <cond> +pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr) +%{ + single_instruction; + cr : EX1(read); + src1 : EX1(read); + src2 : EX1(read); + dst : EX2(write); + INS01 : ISS; + ALU : EX2; +%} + +// Conditional 2 operand +// EG. CSEL X0, X1, X2, <cond> +pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr) +%{ + single_instruction; + cr : EX1(read); + src : EX1(read); + dst : EX2(write); + INS01 : ISS; + ALU : EX2; +%} + +//------- Multiply pipeline operations -------------------- + +// Multiply reg-reg +// Eg. MUL w0, w1, w2 +pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2) +%{ + single_instruction; + dst : WR(write); + src1 : ISS(read); + src2 : ISS(read); + INS01 : ISS; + MAC : WR; +%} + +// Multiply accumulate +// Eg. MADD w0, w1, w2, w3 +pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) +%{ + single_instruction; + dst : WR(write); + src1 : ISS(read); + src2 : ISS(read); + src3 : ISS(read); + INS01 : ISS; + MAC : WR; +%} + +// Eg. MUL w0, w1, w2 +pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2) +%{ + single_instruction; + fixed_latency(3); // Maximum latency for 64 bit mul + dst : WR(write); + src1 : ISS(read); + src2 : ISS(read); + INS01 : ISS; + MAC : WR; +%} + +// Multiply accumulate +// Eg. MADD w0, w1, w2, w3 +pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) +%{ + single_instruction; + fixed_latency(3); // Maximum latency for 64 bit mul + dst : WR(write); + src1 : ISS(read); + src2 : ISS(read); + src3 : ISS(read); + INS01 : ISS; + MAC : WR; +%} + +//------- Divide pipeline operations -------------------- + +// Eg. SDIV w0, w1, w2 +pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2) +%{ + single_instruction; + fixed_latency(8); // Maximum latency for 32 bit divide + dst : WR(write); + src1 : ISS(read); + src2 : ISS(read); + INS0 : ISS; // Can only dual issue as instruction 0 + DIV : WR; +%} + +// Eg. SDIV x0, x1, x2 +pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2) +%{ + single_instruction; + fixed_latency(16); // Maximum latency for 64 bit divide + dst : WR(write); + src1 : ISS(read); + src2 : ISS(read); + INS0 : ISS; // Can only dual issue as instruction 0 + DIV : WR; +%} + +//------- Load pipeline operations ------------------------ + +// Load - prefetch +// Eg. PFRM <mem> +pipe_class iload_prefetch(memory mem) +%{ + single_instruction; + mem : ISS(read); + INS01 : ISS; + LDST : WR; +%} + +// Load - reg, mem +// Eg. LDR x0, <mem> +pipe_class iload_reg_mem(iRegI dst, memory mem) +%{ + single_instruction; + dst : WR(write); + mem : ISS(read); + INS01 : ISS; + LDST : WR; +%} + +// Load - reg, reg +// Eg. LDR x0, [sp, x1] +pipe_class iload_reg_reg(iRegI dst, iRegI src) +%{ + single_instruction; + dst : WR(write); + src : ISS(read); + INS01 : ISS; + LDST : WR; +%} + +//------- Store pipeline operations ----------------------- + +// Store - zr, mem +// Eg. STR zr, <mem> +pipe_class istore_mem(memory mem) +%{ + single_instruction; + mem : ISS(read); + INS01 : ISS; + LDST : WR; +%} + +// Store - reg, mem +// Eg. STR x0, <mem> +pipe_class istore_reg_mem(iRegI src, memory mem) +%{ + single_instruction; + mem : ISS(read); + src : EX2(read); + INS01 : ISS; + LDST : WR; +%} + +// Store - reg, reg +// Eg. STR x0, [sp, x1] +pipe_class istore_reg_reg(iRegI dst, iRegI src) +%{ + single_instruction; + dst : ISS(read); + src : EX2(read); + INS01 : ISS; + LDST : WR; +%} + +//------- Store pipeline operations ----------------------- + +// Branch +pipe_class pipe_branch() +%{ + single_instruction; + INS01 : ISS; + BRANCH : EX1; +%} + +// Conditional branch +pipe_class pipe_branch_cond(rFlagsReg cr) +%{ + single_instruction; + cr : EX1(read); + INS01 : ISS; + BRANCH : EX1; +%} + +// Compare & Branch +// EG. CBZ/CBNZ +pipe_class pipe_cmp_branch(iRegI op1) +%{ + single_instruction; + op1 : EX1(read); + INS01 : ISS; + BRANCH : EX1; +%} + +//------- Synchronisation operations ---------------------- + +// Any operation requiring serialization. +// EG. DMB/Atomic Ops/Load Acquire/Str Release +pipe_class pipe_serial() +%{ + single_instruction; + force_serialization; + fixed_latency(16); + INS01 : ISS(2); // Cannot dual issue with any other instruction + LDST : WR; +%} + +// Generic big/slow expanded idiom - also serialized +pipe_class pipe_slow() +%{ + instruction_count(10); + multiple_bundles; + force_serialization; + fixed_latency(16); + INS01 : ISS(2); // Cannot dual issue with any other instruction + LDST : WR; +%} + // Empty pipeline class pipe_class pipe_class_empty() %{ @@ -4754,13 +5116,6 @@ fixed_latency(16); %} -// Pipeline class for traps. -pipe_class pipe_class_trap() -%{ - single_instruction; - fixed_latency(100); -%} - // Pipeline class for memory operations. pipe_class pipe_class_memory() %{ @@ -4777,7 +5132,7 @@ // Define the class for the Nop node. define %{ - MachNop = pipe_class_default; + MachNop = pipe_class_empty; %} %} @@ -4817,7 +5172,7 @@ ins_encode(aarch64_enc_ldrsbw(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Byte (8 bit signed) into long @@ -4830,7 +5185,7 @@ ins_encode(aarch64_enc_ldrsb(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Byte (8 bit unsigned) @@ -4843,7 +5198,7 @@ ins_encode(aarch64_enc_ldrb(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Byte (8 bit unsigned) into long @@ -4856,7 +5211,7 @@ ins_encode(aarch64_enc_ldrb(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Short (16 bit signed) @@ -4869,7 +5224,7 @@ ins_encode(aarch64_enc_ldrshw(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Short (16 bit signed) into long @@ -4882,7 +5237,7 @@ ins_encode(aarch64_enc_ldrsh(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Char (16 bit unsigned) @@ -4895,7 +5250,7 @@ ins_encode(aarch64_enc_ldrh(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Short/Char (16 bit unsigned) into long @@ -4908,7 +5263,7 @@ ins_encode(aarch64_enc_ldrh(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Integer (32 bit signed) @@ -4921,7 +5276,7 @@ ins_encode(aarch64_enc_ldrw(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Integer (32 bit signed) into long @@ -4934,7 +5289,7 @@ ins_encode(aarch64_enc_ldrsw(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Integer (32 bit unsigned) into long @@ -4947,7 +5302,7 @@ ins_encode(aarch64_enc_ldrw(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Long (64 bit signed) @@ -4960,7 +5315,7 @@ ins_encode(aarch64_enc_ldr(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Range @@ -4973,7 +5328,7 @@ ins_encode(aarch64_enc_ldrw(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Pointer @@ -4986,7 +5341,7 @@ ins_encode(aarch64_enc_ldr(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Compressed Pointer @@ -4999,7 +5354,7 @@ ins_encode(aarch64_enc_ldrw(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Klass Pointer @@ -5012,7 +5367,7 @@ ins_encode(aarch64_enc_ldr(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Narrow Klass Pointer @@ -5025,7 +5380,7 @@ ins_encode(aarch64_enc_ldrw(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_mem); %} // Load Float @@ -5065,7 +5420,7 @@ ins_encode( aarch64_enc_movw_imm(dst, src) ); - ins_pipe(pipe_class_default); + ins_pipe(ialu_imm); %} // Load Long Constant @@ -5078,7 +5433,7 @@ ins_encode( aarch64_enc_mov_imm(dst, src) ); - ins_pipe(pipe_class_default); + ins_pipe(ialu_imm); %} // Load Pointer Constant @@ -5094,7 +5449,7 @@ ins_encode(aarch64_enc_mov_p(dst, con)); - ins_pipe(pipe_class_default); + ins_pipe(ialu_imm); %} // Load Null Pointer Constant @@ -5108,7 +5463,7 @@ ins_encode(aarch64_enc_mov_p0(dst, con)); - ins_pipe(pipe_class_default); + ins_pipe(ialu_imm); %} // Load Pointer Constant One @@ -5122,7 +5477,7 @@ ins_encode(aarch64_enc_mov_p1(dst, con)); - ins_pipe(pipe_class_default); + ins_pipe(ialu_imm); %} // Load Poll Page Constant @@ -5136,7 +5491,7 @@ ins_encode(aarch64_enc_mov_poll_page(dst, con)); - ins_pipe(pipe_class_default); + ins_pipe(ialu_imm); %} // Load Byte Map Base Constant @@ -5150,7 +5505,7 @@ ins_encode(aarch64_enc_mov_byte_map_base(dst, con)); - ins_pipe(pipe_class_default); + ins_pipe(ialu_imm); %} // Load Narrow Pointer Constant @@ -5164,7 +5519,7 @@ ins_encode(aarch64_enc_mov_n(dst, con)); - ins_pipe(pipe_class_default); + ins_pipe(ialu_imm); %} // Load Narrow Null Pointer Constant @@ -5178,7 +5533,7 @@ ins_encode(aarch64_enc_mov_n0(dst, con)); - ins_pipe(pipe_class_default); + ins_pipe(ialu_imm); %} // Load Packed Float Constant @@ -5254,7 +5609,7 @@ ins_encode(aarch64_enc_strb0(mem)); - ins_pipe(pipe_class_memory); + ins_pipe(istore_mem); %} // Store Byte @@ -5267,7 +5622,7 @@ ins_encode(aarch64_enc_strb(src, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(istore_reg_mem); %} @@ -5280,7 +5635,7 @@ ins_encode(aarch64_enc_strb0(mem)); - ins_pipe(pipe_class_memory); + ins_pipe(istore_mem); %} // Store Char/Short @@ -5293,7 +5648,7 @@ ins_encode(aarch64_enc_strh(src, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(istore_reg_mem); %} instruct storeimmC0(immI0 zero, memory mem) @@ -5305,7 +5660,7 @@ ins_encode(aarch64_enc_strh0(mem)); - ins_pipe(pipe_class_memory); + ins_pipe(istore_mem); %} // Store Integer @@ -5319,7 +5674,7 @@ ins_encode(aarch64_enc_strw(src, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(istore_reg_mem); %} instruct storeimmI0(immI0 zero, memory mem) @@ -5331,7 +5686,7 @@ ins_encode(aarch64_enc_strw0(mem)); - ins_pipe(pipe_class_memory); + ins_pipe(istore_mem); %} // Store Long (64 bit signed) @@ -5344,7 +5699,7 @@ ins_encode(aarch64_enc_str(src, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(istore_reg_mem); %} // Store Long (64 bit signed) @@ -5357,7 +5712,7 @@ ins_encode(aarch64_enc_str0(mem)); - ins_pipe(pipe_class_memory); + ins_pipe(istore_mem); %} // Store Pointer @@ -5370,7 +5725,7 @@ ins_encode(aarch64_enc_str(src, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(istore_reg_mem); %} // Store Pointer @@ -5383,7 +5738,7 @@ ins_encode(aarch64_enc_str0(mem)); - ins_pipe(pipe_class_memory); + ins_pipe(istore_mem); %} // Save last Java PC to thread anchor @@ -5500,7 +5855,7 @@ ins_encode( aarch64_enc_prefetchr(mem) ); - ins_pipe(pipe_class_memory); + ins_pipe(iload_prefetch); %} instruct prefetchw( memory mem ) %{ @@ -5511,7 +5866,7 @@ ins_encode( aarch64_enc_prefetchw(mem) ); - ins_pipe(pipe_class_memory); + ins_pipe(iload_prefetch); %} instruct prefetchnta( memory mem ) %{ @@ -5522,64 +5877,64 @@ ins_encode( aarch64_enc_prefetchnta(mem) ); - ins_pipe(pipe_class_memory); + ins_pipe(iload_prefetch); %} // ============================================================================ // BSWAP Instructions -instruct bytes_reverse_int(iRegINoSp dst) %{ - match(Set dst (ReverseBytesI dst)); - - ins_cost(INSN_COST); - format %{ "revw $dst, $dst" %} - - ins_encode %{ - __ revw(as_Register($dst$$reg), as_Register($dst$$reg)); - %} - - ins_pipe( pipe_class_default ); -%} - -instruct bytes_reverse_long(iRegLNoSp dst) %{ - match(Set dst (ReverseBytesL dst)); - - ins_cost(INSN_COST); - format %{ "rev $dst, $dst" %} - - ins_encode %{ - __ rev(as_Register($dst$$reg), as_Register($dst$$reg)); - %} - - ins_pipe( pipe_class_default ); -%} - -instruct bytes_reverse_unsigned_short(iRegINoSp dst) %{ - match(Set dst (ReverseBytesUS dst)); - - ins_cost(INSN_COST); - format %{ "rev16w $dst, $dst" %} - - ins_encode %{ - __ rev16w(as_Register($dst$$reg), as_Register($dst$$reg)); - %} - - ins_pipe( pipe_class_default ); -%} - -instruct bytes_reverse_short(iRegINoSp dst) %{ - match(Set dst (ReverseBytesS dst)); - - ins_cost(INSN_COST); - format %{ "rev16w $dst, $dst\n\t" +instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{ + match(Set dst (ReverseBytesI src)); + + ins_cost(INSN_COST); + format %{ "revw $dst, $src" %} + + ins_encode %{ + __ revw(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{ + match(Set dst (ReverseBytesL src)); + + ins_cost(INSN_COST); + format %{ "rev $dst, $src" %} + + ins_encode %{ + __ rev(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{ + match(Set dst (ReverseBytesUS src)); + + ins_cost(INSN_COST); + format %{ "rev16w $dst, $src" %} + + ins_encode %{ + __ rev16w(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{ + match(Set dst (ReverseBytesS src)); + + ins_cost(INSN_COST); + format %{ "rev16w $dst, $src\n\t" "sbfmw $dst, $dst, #0, #15" %} ins_encode %{ - __ rev16w(as_Register($dst$$reg), as_Register($dst$$reg)); + __ rev16w(as_Register($dst$$reg), as_Register($src$$reg)); __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U); %} - ins_pipe( pipe_class_default ); + ins_pipe(ialu_reg); %} // ============================================================================ @@ -5594,7 +5949,7 @@ __ clzw(as_Register($dst$$reg), as_Register($src$$reg)); %} - ins_pipe( pipe_class_default ); + ins_pipe( ialu_reg ); %} instruct countLeadingZerosL(iRegI dst, iRegL src) %{ @@ -5606,7 +5961,7 @@ __ clz(as_Register($dst$$reg), as_Register($src$$reg)); %} - ins_pipe( pipe_class_default ); + ins_pipe( ialu_reg ); %} instruct countTrailingZerosI(iRegI dst, iRegI src) %{ @@ -5620,7 +5975,7 @@ __ clzw(as_Register($dst$$reg), as_Register($dst$$reg)); %} - ins_pipe( pipe_class_default ); + ins_pipe(ialu_reg ); %} instruct countTrailingZerosL(iRegI dst, iRegL src) %{ @@ -5634,7 +5989,7 @@ __ clz(as_Register($dst$$reg), as_Register($dst$$reg)); %} - ins_pipe( pipe_class_default ); + ins_pipe( pipe_serial ); %} // ============================================================================ @@ -5651,7 +6006,7 @@ __ membar(Assembler::Membar_mask_bits(Assembler::LoadLoad|Assembler::LoadStore)); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct membar_release() @@ -5663,7 +6018,7 @@ ins_encode %{ __ membar(Assembler::AnyAny); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct membar_volatile() %{ @@ -5676,7 +6031,7 @@ __ membar(Assembler::AnyAny); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct unnecessary_membar_volatile() %{ @@ -5698,7 +6053,7 @@ __ membar(Assembler::StoreStore); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct membar_acquire_lock() %{ @@ -5711,7 +6066,7 @@ __ membar(Assembler::Membar_mask_bits(Assembler::LoadLoad|Assembler::LoadStore)); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct membar_release_lock() %{ @@ -5724,7 +6079,7 @@ __ membar(Assembler::AnyAny); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} // ============================================================================ @@ -5742,7 +6097,7 @@ } %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} instruct castP2X(iRegLNoSp dst, iRegP src) %{ @@ -5757,7 +6112,7 @@ } %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} // Convert oop into int for vectors alignment masking @@ -5770,7 +6125,7 @@ __ movw($dst$$Register, $src$$Register); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} // Convert compressed oop into int for vectors alignment masking @@ -5786,7 +6141,7 @@ __ movw($dst$$Register, $src$$Register); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} @@ -5802,7 +6157,7 @@ Register d = $dst$$Register; __ encode_heap_oop(d, s); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{ @@ -5813,7 +6168,7 @@ ins_encode %{ __ encode_heap_oop_not_null($dst$$Register, $src$$Register); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{ @@ -5827,7 +6182,7 @@ Register d = $dst$$Register; __ decode_heap_oop(d, s); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{ @@ -5841,7 +6196,7 @@ Register d = $dst$$Register; __ decode_heap_oop_not_null(d, s); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} instruct checkCastPP(iRegPNoSp dst) @@ -5913,7 +6268,7 @@ ins_encode(aarch64_enc_ldaxr(dst, mem)); - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} // Conditional-store of the updated heap-top. @@ -5938,7 +6293,7 @@ ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr)); - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} // this has to be implemented as a CAS @@ -5955,7 +6310,7 @@ ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval)); - ins_pipe(pipe_class_memory); + ins_pipe(pipe_slow); %} // this has to be implemented as a CAS @@ -5972,7 +6327,7 @@ ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval)); - ins_pipe(pipe_class_memory); + ins_pipe(pipe_slow); %} // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher @@ -5992,7 +6347,7 @@ ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval), aarch64_enc_cset_eq(res)); - ins_pipe(pipe_class_memory); + ins_pipe(pipe_slow); %} instruct compareAndSwapL(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{ @@ -6009,7 +6364,7 @@ ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval), aarch64_enc_cset_eq(res)); - ins_pipe(pipe_class_memory); + ins_pipe(pipe_slow); %} instruct compareAndSwapP(iRegINoSp res, memory mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ @@ -6026,7 +6381,7 @@ ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval), aarch64_enc_cset_eq(res)); - ins_pipe(pipe_class_memory); + ins_pipe(pipe_slow); %} instruct compareAndSwapN(iRegINoSp res, memory mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{ @@ -6043,7 +6398,7 @@ ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval), aarch64_enc_cset_eq(res)); - ins_pipe(pipe_class_memory); + ins_pipe(pipe_slow); %} @@ -6053,7 +6408,7 @@ ins_encode %{ __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base)); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct get_and_setL(indirect mem, iRegLNoSp newv, iRegL prev) %{ @@ -6062,7 +6417,7 @@ ins_encode %{ __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct get_and_setN(indirect mem, iRegNNoSp newv, iRegI prev) %{ @@ -6071,7 +6426,7 @@ ins_encode %{ __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base)); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct get_and_setP(indirect mem, iRegPNoSp newv, iRegP prev) %{ @@ -6080,7 +6435,7 @@ ins_encode %{ __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} @@ -6091,7 +6446,7 @@ ins_encode %{ __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base)); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{ @@ -6102,7 +6457,7 @@ ins_encode %{ __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base)); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{ @@ -6112,7 +6467,7 @@ ins_encode %{ __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base)); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{ @@ -6123,7 +6478,7 @@ ins_encode %{ __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base)); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{ @@ -6133,7 +6488,7 @@ ins_encode %{ __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base)); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{ @@ -6144,7 +6499,7 @@ ins_encode %{ __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base)); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{ @@ -6154,7 +6509,7 @@ ins_encode %{ __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base)); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{ @@ -6165,7 +6520,7 @@ ins_encode %{ __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base)); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} // ============================================================================ @@ -6194,7 +6549,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg_reg); %} instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegI src1, iRegI src2) %{ @@ -6210,7 +6565,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg_reg); %} // special cases where one arg is zero @@ -6235,7 +6590,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg); %} instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegI src2) %{ @@ -6251,7 +6606,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg); %} instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegI src1, immI0 zero) %{ @@ -6267,7 +6622,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg); %} instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegI src1, immI0 zero) %{ @@ -6283,7 +6638,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg); %} // special case for creating a boolean 0 or 1 @@ -6307,7 +6662,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_none); %} instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{ @@ -6326,7 +6681,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_none); %} instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{ @@ -6342,7 +6697,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg_reg); %} instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{ @@ -6358,7 +6713,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg_reg); %} // special cases where one arg is zero @@ -6376,7 +6731,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg); %} instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, immL0 zero) %{ @@ -6392,7 +6747,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg); %} instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src2) %{ @@ -6408,7 +6763,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg); %} instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src2) %{ @@ -6440,7 +6795,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg_reg); %} instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{ @@ -6456,7 +6811,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg_reg); %} // special cases where one arg is zero @@ -6474,7 +6829,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg); %} instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, immP0 zero) %{ @@ -6490,7 +6845,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg); %} instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src2) %{ @@ -6506,7 +6861,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg); %} instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src2) %{ @@ -6522,7 +6877,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg); %} instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{ @@ -6538,7 +6893,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg_reg); %} instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{ @@ -6554,7 +6909,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg_reg); %} // special cases where one arg is zero @@ -6572,7 +6927,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg); %} instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, immN0 zero) %{ @@ -6588,7 +6943,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg); %} instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src2) %{ @@ -6604,7 +6959,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg); %} instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src2) %{ @@ -6620,7 +6975,7 @@ (Assembler::Condition)$cmp$$cmpcode); %} - ins_pipe(pipe_class_default); + ins_pipe(icond_reg); %} instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1, vRegF src2) @@ -6719,7 +7074,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct addI_reg_imm(iRegINoSp dst, iRegI src1, immIAddSub src2) %{ @@ -6733,7 +7088,7 @@ ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2)); - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_imm); %} instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{ @@ -6747,7 +7102,7 @@ ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2)); - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_imm); %} // Pointer Addition @@ -6763,7 +7118,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{ @@ -6778,7 +7133,7 @@ as_Register($src2$$reg), ext::sxtw); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{ @@ -6793,7 +7148,7 @@ Address::lsl($scale$$constant))); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{ @@ -6808,7 +7163,7 @@ Address::sxtw($scale$$constant))); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{ @@ -6823,7 +7178,7 @@ $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} // Pointer Immediate Addition @@ -6840,7 +7195,7 @@ ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) ); - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_imm); %} // Long Addition @@ -6857,7 +7212,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} // No constant pool entries requiredLong Immediate Addition. @@ -6872,7 +7227,7 @@ ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) ); - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_imm); %} // Integer Subtraction @@ -6888,7 +7243,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} // Immediate Subtraction @@ -6903,7 +7258,7 @@ ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2)); - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_imm); %} // Long Subtraction @@ -6920,7 +7275,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} // No constant pool entries requiredLong Immediate Subtraction. @@ -6935,7 +7290,7 @@ ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) ); - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_imm); %} // Integer Negation (special case for sub) @@ -6947,11 +7302,11 @@ format %{ "negw $dst, $src\t# int" %} ins_encode %{ - __ negsw(as_Register($dst$$reg), + __ negw(as_Register($dst$$reg), as_Register($src$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} // Long Negation @@ -6967,7 +7322,7 @@ as_Register($src$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} // Integer Multiply @@ -6984,7 +7339,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(imul_reg_reg); %} instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ @@ -6999,7 +7354,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(imul_reg_reg); %} // Long Multiply @@ -7016,7 +7371,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(lmul_reg_reg); %} instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) @@ -7032,7 +7387,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(lmul_reg_reg); %} // Combined Integer Multiply & Add/Sub @@ -7050,7 +7405,7 @@ as_Register($src3$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(imac_reg_reg); %} instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{ @@ -7066,7 +7421,7 @@ as_Register($src3$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(imac_reg_reg); %} // Combined Long Multiply & Add/Sub @@ -7084,7 +7439,7 @@ as_Register($src3$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(lmac_reg_reg); %} instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{ @@ -7100,7 +7455,7 @@ as_Register($src3$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(lmac_reg_reg); %} // Integer Divide @@ -7112,7 +7467,7 @@ format %{ "sdivw $dst, $src1, $src2" %} ins_encode(aarch64_enc_divw(dst, src1, src2)); - ins_pipe(pipe_class_default); + ins_pipe(idiv_reg_reg); %} instruct signExtract(iRegINoSp dst, iRegI src, immI_31 div1, immI_31 div2) %{ @@ -7122,7 +7477,7 @@ ins_encode %{ __ lsrw(as_Register($dst$$reg), as_Register($src$$reg), 31); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} instruct div2Round(iRegINoSp dst, iRegI src, immI_31 div1, immI_31 div2) %{ @@ -7136,7 +7491,7 @@ as_Register($src$$reg), Assembler::LSR, 31); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} // Long Divide @@ -7148,7 +7503,7 @@ format %{ "sdiv $dst, $src1, $src2" %} ins_encode(aarch64_enc_div(dst, src1, src2)); - ins_pipe(pipe_class_default); + ins_pipe(ldiv_reg_reg); %} instruct signExtractL(iRegLNoSp dst, iRegL src, immL_63 div1, immL_63 div2) %{ @@ -7158,7 +7513,7 @@ ins_encode %{ __ lsr(as_Register($dst$$reg), as_Register($src$$reg), 63); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} instruct div2RoundL(iRegLNoSp dst, iRegL src, immL_63 div1, immL_63 div2) %{ @@ -7172,7 +7527,7 @@ as_Register($src$$reg), Assembler::LSR, 63); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} // Integer Remainder @@ -7185,7 +7540,7 @@ "msubw($dst, rscratch1, $src2, $src1" %} ins_encode(aarch64_enc_modw(dst, src1, src2)); - ins_pipe(pipe_class_default); + ins_pipe(idiv_reg_reg); %} // Long Remainder @@ -7198,7 +7553,7 @@ "msub($dst, rscratch1, $src2, $src1" %} ins_encode(aarch64_enc_mod(dst, src1, src2)); - ins_pipe(pipe_class_default); + ins_pipe(ldiv_reg_reg); %} // Integer Shifts @@ -7216,7 +7571,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_vshift); %} // Shift Left Immediate @@ -7232,7 +7587,7 @@ $src2$$constant & 0x1f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} // Shift Right Logical Register @@ -7264,7 +7619,7 @@ $src2$$constant & 0x1f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} // Shift Right Arithmetic Register @@ -7280,7 +7635,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_vshift); %} // Shift Right Arithmetic Immediate @@ -7296,7 +7651,7 @@ $src2$$constant & 0x1f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} // Combined Int Mask and Right Shift (using UBFM) @@ -7317,7 +7672,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_vshift); %} // Shift Left Immediate @@ -7333,7 +7688,7 @@ $src2$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} // Shift Right Logical Register @@ -7349,7 +7704,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_vshift); %} // Shift Right Logical Immediate @@ -7365,7 +7720,23 @@ $src2$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); +%} + +// A special-case pattern for card table stores. +instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{ + match(Set dst (URShiftL (CastP2X src1) src2)); + + ins_cost(INSN_COST); + format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %} + + ins_encode %{ + __ lsr(as_Register($dst$$reg), + as_Register($src1$$reg), + $src2$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_shift); %} // Shift Right Arithmetic Register @@ -7381,7 +7752,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_vshift); %} // Shift Right Arithmetic Immediate @@ -7397,7 +7768,7 @@ $src2$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} // BEGIN This section of the file is automatically generated. Do not edit -------------- @@ -7416,7 +7787,7 @@ Assembler::LSL, 0); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} instruct regI_not_reg(iRegINoSp dst, iRegI src1, immI_M1 m1, @@ -7432,7 +7803,7 @@ Assembler::LSL, 0); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} instruct AndI_reg_not_reg(iRegINoSp dst, @@ -7449,7 +7820,7 @@ Assembler::LSL, 0); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct AndL_reg_not_reg(iRegLNoSp dst, @@ -7466,7 +7837,7 @@ Assembler::LSL, 0); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct OrI_reg_not_reg(iRegINoSp dst, @@ -7483,7 +7854,7 @@ Assembler::LSL, 0); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct OrL_reg_not_reg(iRegLNoSp dst, @@ -7500,7 +7871,7 @@ Assembler::LSL, 0); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct XorI_reg_not_reg(iRegINoSp dst, @@ -7517,7 +7888,7 @@ Assembler::LSL, 0); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct XorL_reg_not_reg(iRegLNoSp dst, @@ -7534,7 +7905,7 @@ Assembler::LSL, 0); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct AndI_reg_URShift_not_reg(iRegINoSp dst, @@ -7552,7 +7923,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AndL_reg_URShift_not_reg(iRegLNoSp dst, @@ -7570,7 +7941,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AndI_reg_RShift_not_reg(iRegINoSp dst, @@ -7588,7 +7959,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AndL_reg_RShift_not_reg(iRegLNoSp dst, @@ -7606,7 +7977,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AndI_reg_LShift_not_reg(iRegINoSp dst, @@ -7624,7 +7995,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AndL_reg_LShift_not_reg(iRegLNoSp dst, @@ -7642,7 +8013,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct XorI_reg_URShift_not_reg(iRegINoSp dst, @@ -7660,7 +8031,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct XorL_reg_URShift_not_reg(iRegLNoSp dst, @@ -7678,7 +8049,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct XorI_reg_RShift_not_reg(iRegINoSp dst, @@ -7696,7 +8067,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct XorL_reg_RShift_not_reg(iRegLNoSp dst, @@ -7714,7 +8085,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct XorI_reg_LShift_not_reg(iRegINoSp dst, @@ -7732,7 +8103,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct XorL_reg_LShift_not_reg(iRegLNoSp dst, @@ -7750,7 +8121,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct OrI_reg_URShift_not_reg(iRegINoSp dst, @@ -7768,7 +8139,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct OrL_reg_URShift_not_reg(iRegLNoSp dst, @@ -7786,7 +8157,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct OrI_reg_RShift_not_reg(iRegINoSp dst, @@ -7804,7 +8175,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct OrL_reg_RShift_not_reg(iRegLNoSp dst, @@ -7822,7 +8193,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct OrI_reg_LShift_not_reg(iRegINoSp dst, @@ -7840,7 +8211,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct OrL_reg_LShift_not_reg(iRegLNoSp dst, @@ -7858,7 +8229,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AndI_reg_URShift_reg(iRegINoSp dst, @@ -7877,7 +8248,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AndL_reg_URShift_reg(iRegLNoSp dst, @@ -7896,7 +8267,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AndI_reg_RShift_reg(iRegINoSp dst, @@ -7915,7 +8286,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AndL_reg_RShift_reg(iRegLNoSp dst, @@ -7934,7 +8305,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AndI_reg_LShift_reg(iRegINoSp dst, @@ -7953,7 +8324,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AndL_reg_LShift_reg(iRegLNoSp dst, @@ -7972,7 +8343,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct XorI_reg_URShift_reg(iRegINoSp dst, @@ -7991,7 +8362,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct XorL_reg_URShift_reg(iRegLNoSp dst, @@ -8010,7 +8381,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct XorI_reg_RShift_reg(iRegINoSp dst, @@ -8029,7 +8400,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct XorL_reg_RShift_reg(iRegLNoSp dst, @@ -8048,7 +8419,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct XorI_reg_LShift_reg(iRegINoSp dst, @@ -8067,7 +8438,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct XorL_reg_LShift_reg(iRegLNoSp dst, @@ -8086,7 +8457,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct OrI_reg_URShift_reg(iRegINoSp dst, @@ -8105,7 +8476,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct OrL_reg_URShift_reg(iRegLNoSp dst, @@ -8124,7 +8495,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct OrI_reg_RShift_reg(iRegINoSp dst, @@ -8143,7 +8514,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct OrL_reg_RShift_reg(iRegLNoSp dst, @@ -8162,7 +8533,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct OrI_reg_LShift_reg(iRegINoSp dst, @@ -8181,7 +8552,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct OrL_reg_LShift_reg(iRegLNoSp dst, @@ -8200,7 +8571,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AddI_reg_URShift_reg(iRegINoSp dst, @@ -8219,7 +8590,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AddL_reg_URShift_reg(iRegLNoSp dst, @@ -8238,7 +8609,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AddI_reg_RShift_reg(iRegINoSp dst, @@ -8257,7 +8628,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AddL_reg_RShift_reg(iRegLNoSp dst, @@ -8276,7 +8647,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AddI_reg_LShift_reg(iRegINoSp dst, @@ -8295,7 +8666,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct AddL_reg_LShift_reg(iRegLNoSp dst, @@ -8314,7 +8685,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct SubI_reg_URShift_reg(iRegINoSp dst, @@ -8333,7 +8704,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct SubL_reg_URShift_reg(iRegLNoSp dst, @@ -8352,7 +8723,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct SubI_reg_RShift_reg(iRegINoSp dst, @@ -8371,7 +8742,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct SubL_reg_RShift_reg(iRegLNoSp dst, @@ -8390,7 +8761,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct SubI_reg_LShift_reg(iRegINoSp dst, @@ -8409,7 +8780,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} instruct SubL_reg_LShift_reg(iRegLNoSp dst, @@ -8428,7 +8799,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %} @@ -8453,7 +8824,7 @@ r, s); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} // Shift Left followed by Shift Right. @@ -8476,7 +8847,7 @@ r, s); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} // Shift Left followed by Shift Right. @@ -8499,7 +8870,7 @@ r, s); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} // Shift Left followed by Shift Right. @@ -8522,7 +8893,7 @@ r, s); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} // Bitfield extract with shift & mask @@ -8539,7 +8910,7 @@ __ ubfxw(as_Register($dst$$reg), as_Register($src$$reg), rshift, width); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask) %{ @@ -8554,7 +8925,7 @@ __ ubfx(as_Register($dst$$reg), as_Register($src$$reg), rshift, width); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} // We can use ubfx when extending an And with a mask when we know mask @@ -8572,7 +8943,7 @@ __ ubfx(as_Register($dst$$reg), as_Register($src$$reg), rshift, width); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} // Rotations @@ -8589,7 +8960,7 @@ __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), $rshift$$constant & 63); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_extr); %} instruct extrOrI(iRegINoSp dst, iRegI src1, iRegI src2, immI lshift, immI rshift, rFlagsReg cr) @@ -8604,7 +8975,7 @@ __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), $rshift$$constant & 31); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_extr); %} instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr) @@ -8619,7 +8990,7 @@ __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), $rshift$$constant & 63); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_extr); %} instruct extrAddI(iRegINoSp dst, iRegI src1, iRegI src2, immI lshift, immI rshift, rFlagsReg cr) @@ -8634,7 +9005,7 @@ __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), $rshift$$constant & 31); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_extr); %} @@ -8651,7 +9022,7 @@ __ rorv(as_Register($dst$$reg), as_Register($src$$reg), rscratch1); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_vshift); %} // rol expander @@ -8667,7 +9038,7 @@ __ rorvw(as_Register($dst$$reg), as_Register($src$$reg), rscratch1); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_vshift); %} instruct rolL_rReg_Var_C_64(iRegL dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr) @@ -8718,7 +9089,7 @@ __ rorv(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_vshift); %} // ror expander @@ -8733,7 +9104,7 @@ __ rorvw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_vshift); %} instruct rorL_rReg_Var_C_64(iRegL dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr) @@ -8784,7 +9155,7 @@ __ add(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::sxtw); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %}; instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr) @@ -8797,7 +9168,7 @@ __ sub(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::sxtw); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %}; @@ -8811,7 +9182,7 @@ __ add(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::sxth); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct AddExtI_sxtb(iRegINoSp dst, iRegI src1, iRegI src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr) @@ -8824,7 +9195,7 @@ __ add(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::sxtb); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct AddExtI_uxtb(iRegINoSp dst, iRegI src1, iRegI src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr) @@ -8837,7 +9208,7 @@ __ add(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::uxtb); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr) @@ -8850,7 +9221,7 @@ __ add(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::sxth); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr) @@ -8863,7 +9234,7 @@ __ add(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::sxtw); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr) @@ -8876,7 +9247,7 @@ __ add(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::sxtb); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr) @@ -8889,7 +9260,7 @@ __ add(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::uxtb); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} @@ -8903,7 +9274,7 @@ __ addw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::uxtb); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct AddExtI_uxth_and(iRegINoSp dst, iRegI src1, iRegI src2, immI_65535 mask, rFlagsReg cr) @@ -8916,7 +9287,7 @@ __ addw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::uxth); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr) @@ -8929,7 +9300,7 @@ __ add(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::uxtb); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr) @@ -8942,7 +9313,7 @@ __ add(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::uxth); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr) @@ -8955,7 +9326,7 @@ __ add(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::uxtw); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct SubExtI_uxtb_and(iRegINoSp dst, iRegI src1, iRegI src2, immI_255 mask, rFlagsReg cr) @@ -8968,7 +9339,7 @@ __ subw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::uxtb); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct SubExtI_uxth_and(iRegINoSp dst, iRegI src1, iRegI src2, immI_65535 mask, rFlagsReg cr) @@ -8981,7 +9352,7 @@ __ subw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::uxth); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr) @@ -8994,7 +9365,7 @@ __ sub(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::uxtb); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr) @@ -9007,7 +9378,7 @@ __ sub(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::uxth); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr) @@ -9020,7 +9391,7 @@ __ sub(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::uxtw); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} // END This section of the file is automatically generated. Do not edit -------------- @@ -9382,7 +9753,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{ @@ -9397,7 +9768,7 @@ (unsigned long)($src2$$constant)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_imm); %} // Or Instructions @@ -9414,7 +9785,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{ @@ -9429,7 +9800,7 @@ (unsigned long)($src2$$constant)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_imm); %} // Xor Instructions @@ -9446,7 +9817,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{ @@ -9461,7 +9832,7 @@ (unsigned long)($src2$$constant)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_imm); %} // Long Logical Instructions @@ -9479,7 +9850,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{ @@ -9494,7 +9865,7 @@ (unsigned long)($src2$$constant)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_imm); %} // Or Instructions @@ -9511,7 +9882,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{ @@ -9526,7 +9897,7 @@ (unsigned long)($src2$$constant)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_imm); %} // Xor Instructions @@ -9543,7 +9914,7 @@ as_Register($src2$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{ @@ -9558,7 +9929,7 @@ (unsigned long)($src2$$constant)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_imm); %} instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src) @@ -9570,7 +9941,7 @@ ins_encode %{ __ sbfm($dst$$Register, $src$$Register, 0, 31); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} // this pattern occurs in bigmath arithmetic @@ -9584,7 +9955,7 @@ __ ubfm($dst$$Register, $src$$Register, 0, 31); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} instruct convL2I_reg(iRegINoSp dst, iRegL src) %{ @@ -9597,7 +9968,7 @@ __ movw(as_Register($dst$$reg), as_Register($src$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} instruct convI2B(iRegINoSp dst, iRegI src, rFlagsReg cr) @@ -9615,7 +9986,7 @@ __ cset(as_Register($dst$$reg), Assembler::NE); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr) @@ -9633,7 +10004,7 @@ __ cset(as_Register($dst$$reg), Assembler::NE); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %} instruct convD2F_reg(vRegF dst, vRegD src) %{ @@ -9782,7 +10153,7 @@ __ ldrw($dst$$Register, Address(sp, $src$$disp)); %} - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_reg); %} @@ -9818,7 +10189,7 @@ __ ldr($dst$$Register, Address(sp, $src$$disp)); %} - ins_pipe(pipe_class_memory); + ins_pipe(iload_reg_reg); %} @@ -9872,7 +10243,7 @@ __ strw($src$$Register, Address(sp, $dst$$disp)); %} - ins_pipe(pipe_class_memory); + ins_pipe(istore_reg_reg); %} @@ -9908,7 +10279,7 @@ __ str($src$$Register, Address(sp, $dst$$disp)); %} - ins_pipe(pipe_class_memory); + ins_pipe(istore_reg_reg); %} @@ -10014,7 +10385,7 @@ ins_encode(aarch64_enc_cmpw(op1, op2)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_reg); %} instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero) @@ -10028,7 +10399,7 @@ ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_imm); %} instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2) @@ -10042,7 +10413,7 @@ ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_imm); %} instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2) @@ -10056,7 +10427,7 @@ ins_encode(aarch64_enc_cmpw_imm(op1, op2)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_imm); %} // Unsigned compare Instructions; really, same as signed compare @@ -10074,7 +10445,7 @@ ins_encode(aarch64_enc_cmpw(op1, op2)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_reg); %} instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero) @@ -10088,7 +10459,7 @@ ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_imm); %} instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2) @@ -10102,7 +10473,7 @@ ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_imm); %} instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2) @@ -10130,7 +10501,7 @@ ins_encode(aarch64_enc_cmp(op1, op2)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_reg); %} instruct compL_reg_immI0(rFlagsReg cr, iRegL op1, immI0 zero) @@ -10144,7 +10515,7 @@ ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_imm); %} instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2) @@ -10158,7 +10529,7 @@ ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_imm); %} instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2) @@ -10172,7 +10543,7 @@ ins_encode(aarch64_enc_cmp_imm(op1, op2)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_imm); %} instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2) @@ -10186,7 +10557,7 @@ ins_encode(aarch64_enc_cmpp(op1, op2)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_reg); %} instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2) @@ -10200,7 +10571,7 @@ ins_encode(aarch64_enc_cmpn(op1, op2)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_reg); %} instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero) @@ -10214,7 +10585,7 @@ ins_encode(aarch64_enc_testp(op1)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_imm); %} instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero) @@ -10228,7 +10599,7 @@ ins_encode(aarch64_enc_testn(op1)); - ins_pipe(pipe_class_compare); + ins_pipe(icmp_reg_imm); %} // FP comparisons @@ -10400,6 +10771,29 @@ %} +// Manifest a CmpL result in an integer register. +// (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0) +instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags) +%{ + match(Set dst (CmpL3 src1 src2)); + effect(KILL flags); + + ins_cost(INSN_COST * 6); + format %{ + "cmp $src1, $src2" + "csetw $dst, ne" + "cnegw $dst, lt" + %} + // format %{ "CmpL3 $dst, $src1, $src2" %} + ins_encode %{ + __ cmp($src1$$Register, $src2$$Register); + __ csetw($dst$$Register, Assembler::NE); + __ cnegw($dst$$Register, $dst$$Register, Assembler::LT); + %} + + ins_pipe(ialu_reg_reg); +%} + instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegI p, iRegI q, rFlagsReg cr) %{ match(Set dst (CmpLTMask p q)); @@ -10418,7 +10812,7 @@ __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %} instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegI src, immI0 zero, rFlagsReg cr) @@ -10434,7 +10828,7 @@ __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} // ============================================================================ @@ -10462,7 +10856,7 @@ Assembler::LT); %} - ins_pipe(pipe_class_compare); + ins_pipe(ialu_reg_reg); %} instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr) @@ -10487,7 +10881,7 @@ Assembler::GT); %} - ins_pipe(pipe_class_compare); + ins_pipe(ialu_reg_reg); %} // ============================================================================ @@ -10505,7 +10899,7 @@ ins_encode(aarch64_enc_b(lbl)); - ins_pipe(pipe_class_default); + ins_pipe(pipe_branch); %} // Conditional Near Branch @@ -10526,7 +10920,7 @@ ins_encode(aarch64_enc_br_con(cmp, lbl)); - ins_pipe(pipe_class_default); + ins_pipe(pipe_branch_cond); %} // Conditional Near Branch Unsigned @@ -10547,7 +10941,7 @@ ins_encode(aarch64_enc_br_conU(cmp, lbl)); - ins_pipe(pipe_class_default); + ins_pipe(pipe_branch_cond); %} // Make use of CBZ and CBNZ. These instructions, as well as being @@ -10570,7 +10964,7 @@ else __ cbnzw($op1$$Register, *L); %} - ins_pipe(pipe_class_default); + ins_pipe(pipe_cmp_branch); %} instruct cmpL_imm0_branch(cmpOp cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{ @@ -10589,7 +10983,7 @@ else __ cbnz($op1$$Register, *L); %} - ins_pipe(pipe_class_default); + ins_pipe(pipe_cmp_branch); %} instruct cmpP_imm0_branch(cmpOp cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{ @@ -10608,7 +11002,7 @@ else __ cbnz($op1$$Register, *L); %} - ins_pipe(pipe_class_default); + ins_pipe(pipe_cmp_branch); %} // Conditional Far Branch @@ -10629,7 +11023,7 @@ ins_encode(aarch64_enc_br_con(cmp, lbl)); - ins_pipe(pipe_class_default); + ins_pipe(pipe_branch); %} // counted loop end branch near Unsigned @@ -10646,7 +11040,7 @@ ins_encode(aarch64_enc_br_conU(cmp, lbl)); - ins_pipe(pipe_class_default); + ins_pipe(pipe_branch); %} // counted loop end branch far @@ -10668,7 +11062,7 @@ ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2)); - ins_pipe(pipe_class_compare); + ins_pipe(pipe_serial); %} instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2) @@ -10681,7 +11075,7 @@ ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2)); - ins_pipe(pipe_class_compare); + ins_pipe(pipe_serial); %} @@ -10701,7 +11095,7 @@ ins_encode %{ __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type); %} - ins_pipe(pipe_class_memory); + ins_pipe(pipe_serial); %} @@ -10861,7 +11255,7 @@ ins_encode( /*empty*/ ); - ins_pipe(pipe_class_default); + ins_pipe(pipe_class_empty); %} // Rethrow exception: The exception oop will come in the first @@ -10888,7 +11282,7 @@ ins_encode( aarch64_enc_ret() ); - ins_pipe(pipe_class_default); + ins_pipe(pipe_branch); %} // Die now. @@ -10960,6 +11354,44 @@ ins_pipe(pipe_class_memory); %} +instruct string_indexof(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2, + iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr) +%{ + match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result" %} + + ins_encode %{ + __ string_indexof($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, + $tmp1$$Register, $tmp2$$Register, + $tmp3$$Register, $tmp4$$Register, + -1, $result$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +instruct string_indexof_con(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, + immI_le_4 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2, + iRegI tmp3, iRegI tmp4, rFlagsReg cr) +%{ + match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result" %} + + ins_encode %{ + int icnt2 = (int)$int_cnt2$$constant; + __ string_indexof($str1$$Register, $str2$$Register, + $cnt1$$Register, zr, + $tmp1$$Register, $tmp2$$Register, + $tmp3$$Register, $tmp4$$Register, + icnt2, $result$$Register); + %} + ins_pipe(pipe_class_memory); +%} + instruct string_equals(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt, iRegI_R0 result, iRegP_R10 tmp, rFlagsReg cr) %{ @@ -10975,6 +11407,20 @@ ins_pipe(pipe_class_memory); %} +instruct array_equals(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result, + iRegP_R10 tmp, rFlagsReg cr) +%{ + match(Set result (AryEq ary1 ary2)); + effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr); + + format %{ "Array Equals $ary1,ary2 -> $result // KILL $tmp" %} + ins_encode %{ + __ char_arrays_equals($ary1$$Register, $ary2$$Register, + $result$$Register, $tmp$$Register); + %} + ins_pipe(pipe_class_memory); +%} + // ============================================================================ // This name is KNOWN by the ADLC and cannot be changed. // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
--- a/src/cpu/aarch64/vm/aarch64Test.cpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/cpu/aarch64/vm/aarch64Test.cpp Thu Dec 04 14:30:02 2014 +0000 @@ -35,43 +35,4 @@ CodeBuffer code(b); MacroAssembler _masm(&code); entry(&code); - // dive now before we hit all the Unimplemented() calls - // exit(0); - -#if 0 - // old test code to compute sum of squares - enum { r0, r1, r2, r3, r4, LR = 30 }; - - address entry = __ pc(); - - __ _mov_imm(r0, 100); - address loop = __ pc(); - __ _sub_imm(r0, r0, 1); - __ _cbnz(r0, loop); - // __ _br(LR); - - char stack[4096]; - unsigned long memory[100]; - - __ _mov_imm(r0, 1); - __ _mov_imm(r4, 100); - loop = __ pc(); - __ _mov(r1, r0); - __ _mul(r2, r1, r1); - __ _str_post(r2, r3, 8); - __ _add_imm(r0, r0, 1); - __ _sub_imm(r4, r4, 1); - __ _cbnz(r4, loop); - __ _br(LR); - - Disassembler::decode(entry, __ pc()); - - sim.init((u_int64_t)entry, (u_int64_t)stack + sizeof stack, - (u_int64_t)stack); - sim.getCPUState().xreg((GReg)r3, 0) = (u_int64_t)memory; - sim.run(); - printf("Table of squares:\n"); - for (int i = 0; i < 100; i++) - printf(" %d\n", memory[i]); -#endif }
--- a/src/cpu/aarch64/vm/aarch64_ad.m4 Fri Nov 28 03:10:21 2014 +0000 +++ b/src/cpu/aarch64/vm/aarch64_ad.m4 Thu Dec 04 14:30:02 2014 +0000 @@ -18,7 +18,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %}')dnl define(`BASE_INVERTED_INSN', ` @@ -40,7 +40,7 @@ Assembler::LSL, 0); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %}')dnl define(`INVERTED_SHIFT_INSN', ` @@ -63,7 +63,7 @@ $src3$$constant & 0x3f); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_shift); %}')dnl define(`NOT_INSN', `instruct reg$1_not_reg(iReg$1NoSp dst, @@ -80,7 +80,7 @@ Assembler::LSL, 0); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg); %}')dnl dnl define(`BOTH_SHIFT_INSNS', @@ -142,7 +142,7 @@ r, s); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %}') BFM_INSN(L, 63, RShift, sbfm) BFM_INSN(I, 31, RShift, sbfmw) @@ -164,7 +164,7 @@ __ $3(as_Register($dst$$reg), as_Register($src$$reg), rshift, width); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %}') BFX_INSN(I,URShift,ubfxw) BFX_INSN(L,URShift,ubfx) @@ -184,7 +184,7 @@ __ ubfx(as_Register($dst$$reg), as_Register($src$$reg), rshift, width); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_shift); %} // Rotations @@ -202,7 +202,7 @@ __ $4(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), $rshift$$constant & $2); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_extr); %} ')dnl EXTRACT_INSN(L, 63, Or, extr) @@ -223,7 +223,7 @@ __ $3(as_Register($dst$$reg), as_Register($src$$reg), rscratch1); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_vshift); %}')dnl define(`ROR_EXPAND', ` // $2 expander @@ -238,7 +238,7 @@ __ $3(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg_vshift); %}')dnl define(ROL_INSN, ` instruct $3$1_rReg_Var_C$2(iRegL dst, iRegL src, iRegI shift, immI$2 c$2, rFlagsReg cr) @@ -284,7 +284,7 @@ __ $4(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::$5); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %}')dnl ADD_SUB_CONV(I,L,Add,add,sxtw); ADD_SUB_CONV(I,L,Sub,sub,sxtw); @@ -300,7 +300,7 @@ __ $5(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::$6); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %}') ADD_SUB_EXTENDED(I,16,Add,RShift,add,sxth,32) ADD_SUB_EXTENDED(I,8,Add,RShift,add,sxtb,32) @@ -322,7 +322,7 @@ __ $4(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), ext::$5); %} - ins_pipe(pipe_class_default); + ins_pipe(ialu_reg_reg); %}') dnl ADD_SUB_ZERO_EXTEND(I,255,Add,addw,uxtb)
--- a/src/cpu/aarch64/vm/assembler_aarch64.cpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/cpu/aarch64/vm/assembler_aarch64.cpp Thu Dec 04 14:30:02 2014 +0000 @@ -1194,18 +1194,11 @@ } #ifndef PRODUCT - { - address PC = __ pc(); - __ bl(__ pc()+(1<<27)-4); - NativeCall* call = nativeCall_at(PC); - ptrdiff_t offset = call->destination()-PC; - assert(offset == (1<<27)-4, "broken branch coding"); - PC = __ pc(); - __ bl(__ pc()-(1<<27)); - call = nativeCall_at(PC); - offset = call->destination()-PC; - assert(offset == -(1<<27), "broken branch coding"); - } + + address PC = __ pc(); + __ ld1(v0, __ T16B, Address(r16)); // No offset + __ ld1(v0, __ T16B, __ post(r16, 0)); // Post-index + __ ld1(v0, __ T16B, Address(r16, r17)); // #endif // PRODUCT @@ -1464,7 +1457,7 @@ bool Assembler::operand_valid_for_add_sub_immediate(long imm) { bool shift = false; - unsigned long uimm = labs(imm); + unsigned long uimm = uabs(imm); if (uimm < (1 << 12)) return true; if (uimm < (1 << 24) @@ -1573,7 +1566,8 @@ // Implementation of MacroAssembler -void MacroAssembler::pd_patch_instruction(address branch, address target) { +int MacroAssembler::pd_patch_instruction_size(address branch, address target) { + int instructions = 1; assert((uint64_t)target < (1ul << 48), "48-bit overflow in address constant"); long offset = (target - branch) >> 2; unsigned insn = *(unsigned*)branch; @@ -1609,15 +1603,22 @@ // 2 - adrp Rx, target_page // add Ry, Rx, #offset_in_page // 3 - adrp Rx, target_page (page aligned reloc, offset == 0) - // In the first 2 cases we must check that Rx is the same in the adrp and the - // subsequent ldr/str or add instruction. Otherwise we could accidentally end - // up treating a type 3 relocation as a type 1 or 2 just because it happened - // to be followed by a random unrelated ldr/str or add instruction. + // + // In the first 2 cases we must check that Rx is the same in the + // adrp and the subsequent ldr/str or add instruction. Otherwise + // we could accidentally end up treating a type 3 relocation as + // a type 1 or 2 just because it happened to be followed by a + // random unrelated ldr/str or add instruction. // - // In the case of a type 3 relocation, we know that these are only generated - // for the safepoint polling page, or for the card type byte map base so we - // assert as much and of course that the offset is 0. - // + // In the case of a type 3 relocation, we know that these are + // only generated for the safepoint polling page, the crc table + // base or the card type byte map base so we assert as much + // and of course that the offset is 0. + // + // In jdk7 the card type byte map base is aligned on a 1K + // boundary which may fail to be 4K aligned. In that case the + // card table load will fall into category 2. + unsigned insn2 = ((unsigned*)branch)[1]; if (Instruction_aarch64::extract(insn2, 29, 24) == 0b111001 && Instruction_aarch64::extract(insn, 4, 0) == @@ -1627,19 +1628,24 @@ Instruction_aarch64::patch(branch + sizeof (unsigned), 21, 10, offset_lo >> size); guarantee(((dest >> size) << size) == dest, "misaligned target"); + instructions = 2; } else if (Instruction_aarch64::extract(insn2, 31, 22) == 0b1001000100 && Instruction_aarch64::extract(insn, 4, 0) == Instruction_aarch64::extract(insn2, 4, 0)) { // add (immediate) + assert (((jbyte *)target != + ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base) || + (offset_lo & 0x3FFl) == 0, "offset must be 0x400 aligned for crc_table"); Instruction_aarch64::patch(branch + sizeof (unsigned), 21, 10, offset_lo); + instructions = 2; } else { assert((jbyte *)target == ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base || target == StubRoutines::crc_table_addr() || (address)target == os::get_polling_page(), - "adrp must be polling page or byte map base"); - assert(offset_lo == 0, "offset must be 0 for polling page or byte map base"); + "adrp must be polling page, crc_table or byte map base"); + assert(offset_lo == 0, "offset must be 0 for polling page, crc_table or byte map base"); } } int offset_lo = offset & 3; @@ -1655,6 +1661,7 @@ Instruction_aarch64::patch(branch+4, 20, 5, (dest >>= 16) & 0xffff); Instruction_aarch64::patch(branch+8, 20, 5, (dest >>= 16) & 0xffff); assert(pd_call_destination(branch) == target, "should be"); + instructions = 2; } else if (Instruction_aarch64::extract(insn, 31, 22) == 0b1011100101 && Instruction_aarch64::extract(insn, 4, 0) == 0b11111) { // nothing to do @@ -1662,19 +1669,34 @@ } else { ShouldNotReachHere(); } + return instructions * NativeInstruction::instruction_size; } -void MacroAssembler::patch_oop(address insn_addr, address o) { +int MacroAssembler::patch_oop(address insn_addr, address o) { + int instructions; unsigned insn = *(unsigned*)insn_addr; + assert(nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch"); + + // OOPs are either narrow (32 bits) or wide (48 bits). We encode + // narrow OOPs by setting the upper 16 bits in the first + // instruction. if (Instruction_aarch64::extract(insn, 31, 21) == 0b11010010101) { - // Move narrow constant - assert(nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch"); - narrowOop n = oopDesc::encode_heap_oop((oop)o); - Instruction_aarch64::patch(insn_addr, 20, 5, n >> 16); - Instruction_aarch64::patch(insn_addr+4, 20, 5, n & 0xffff); + // Move narrow OOP + assert(nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch"); + narrowOop n = oopDesc::encode_heap_oop((oop)o); + Instruction_aarch64::patch(insn_addr, 20, 5, n >> 16); + Instruction_aarch64::patch(insn_addr+4, 20, 5, n & 0xffff); + instructions = 2; } else { - pd_patch_instruction(insn_addr, o); + // Move wide OOP + assert(nativeInstruction_at(insn_addr+8)->is_movk(), "wrong insns in patch"); + uintptr_t dest = (uintptr_t)o; + Instruction_aarch64::patch(insn_addr, 20, 5, dest & 0xffff); + Instruction_aarch64::patch(insn_addr+4, 20, 5, (dest >>= 16) & 0xffff); + Instruction_aarch64::patch(insn_addr+8, 20, 5, (dest >>= 16) & 0xffff); + instructions = 3; } + return instructions * NativeInstruction::instruction_size; } address MacroAssembler::target_addr_for_insn(address insn_addr, unsigned insn) { @@ -2236,15 +2258,27 @@ while (offset() % modulus != 0) nop(); } -// these are meant to be no-ops overridden by InterpreterMacroAssembler - -void MacroAssembler::check_and_handle_earlyret(Register java_thread) { Unimplemented(); } - -void MacroAssembler::check_and_handle_popframe(Register java_thread) { Unimplemented(); } +// these are no-ops overridden by InterpreterMacroAssembler + +void MacroAssembler::check_and_handle_earlyret(Register java_thread) { } + +void MacroAssembler::check_and_handle_popframe(Register java_thread) { } RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, Register tmp, - int offset) { Unimplemented(); return RegisterOrConstant(r0); } + int offset) { + intptr_t value = *delayed_value_addr; + if (value != 0) + return RegisterOrConstant(value + offset); + + // load indirectly to solve generation ordering problem + ldr(tmp, ExternalAddress((address) delayed_value_addr)); + + if (offset != 0) + add(tmp, tmp, offset); + + return RegisterOrConstant(tmp); +} void MacroAssembler:: notify(int type) { if (type == bytecode_start) { @@ -2687,6 +2721,9 @@ Label *retaddr) { Label E, L; + // !!! FIXME AARCH64 we normally need to save rmethod as it is + // volatile. however we don't need to when calling from the + // interpreter. stp(rscratch1, rmethod, Address(pre(sp, -2 * wordSize))); // We add 1 to number_of_arguments because the thread in arg0 is @@ -2697,6 +2734,7 @@ bind(*retaddr); ldp(rscratch1, rmethod, Address(post(sp, 2 * wordSize))); + maybe_isb(); } void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { @@ -3187,7 +3225,7 @@ } } -void MacroAssembler::increment(Address dst, int value) +void MacroAssembler::incrementw(Address dst, int value) { assert(!dst.uses(rscratch1), "invalid dst for address increment"); ldrw(rscratch1, dst); @@ -3195,7 +3233,7 @@ strw(rscratch1, dst); } -void MacroAssembler::incrementw(Address dst, int value) +void MacroAssembler::increment(Address dst, int value) { assert(!dst.uses(rscratch1), "invalid dst for address increment"); ldr(rscratch1, dst); @@ -3312,7 +3350,7 @@ if (operand_valid_for_add_sub_immediate((int)imm)) { (this->*insn1)(Rd, Rn, imm); } else { - if (labs(imm) < (1 << 24)) { + if (uabs(imm) < (1 << 24)) { (this->*insn1)(Rd, Rn, imm & -(1 << 12)); (this->*insn1)(Rd, Rd, imm & ((1 << 12)-1)); } else { @@ -3760,131 +3798,131 @@ if (UseNeon) { cmp(len, 64); br(Assembler::LT, L_by16); - v_eor(v16, T16B, v16, v16); + eor(v16, T16B, v16, v16); Label L_fold; add(tmp, table0, 4*256*sizeof(juint)); // Point at the Neon constants - v_ld1(v0, v1, T2D, buf, 32); - v_ld1r(v4, T2D, tmp, 8); - v_ld1r(v5, T2D, tmp, 8); - v_ld1r(v6, T2D, tmp, 8); - v_ld1r(v7, T2D, tmp, 8); - v_mov(v16, T4S, 0, crc); - - v_eor(v0, T16B, v0, v16); + ld1(v0, v1, T2D, post(buf, 32)); + ld1r(v4, T2D, post(tmp, 8)); + ld1r(v5, T2D, post(tmp, 8)); + ld1r(v6, T2D, post(tmp, 8)); + ld1r(v7, T2D, post(tmp, 8)); + mov(v16, T4S, 0, crc); + + eor(v0, T16B, v0, v16); sub(len, len, 64); BIND(L_fold); - v_pmull(v22, T8H, v0, v5, T8B); - v_pmull(v20, T8H, v0, v7, T8B); - v_pmull(v23, T8H, v0, v4, T8B); - v_pmull(v21, T8H, v0, v6, T8B); + pmull(v22, T8H, v0, v5, T8B); + pmull(v20, T8H, v0, v7, T8B); + pmull(v23, T8H, v0, v4, T8B); + pmull(v21, T8H, v0, v6, T8B); - v_pmull2(v18, T8H, v0, v5, T16B); - v_pmull2(v16, T8H, v0, v7, T16B); - v_pmull2(v19, T8H, v0, v4, T16B); - v_pmull2(v17, T8H, v0, v6, T16B); + pmull2(v18, T8H, v0, v5, T16B); + pmull2(v16, T8H, v0, v7, T16B); + pmull2(v19, T8H, v0, v4, T16B); + pmull2(v17, T8H, v0, v6, T16B); - v_uzp1(v24, v20, v22, T8H); - v_uzp2(v25, v20, v22, T8H); - v_eor(v20, T16B, v24, v25); + uzp1(v24, v20, v22, T8H); + uzp2(v25, v20, v22, T8H); + eor(v20, T16B, v24, v25); - v_uzp1(v26, v16, v18, T8H); - v_uzp2(v27, v16, v18, T8H); - v_eor(v16, T16B, v26, v27); + uzp1(v26, v16, v18, T8H); + uzp2(v27, v16, v18, T8H); + eor(v16, T16B, v26, v27); - v_ushll2(v22, T4S, v20, T8H, 8); - v_ushll(v20, T4S, v20, T4H, 8); + ushll2(v22, T4S, v20, T8H, 8); + ushll(v20, T4S, v20, T4H, 8); - v_ushll2(v18, T4S, v16, T8H, 8); - v_ushll(v16, T4S, v16, T4H, 8); + ushll2(v18, T4S, v16, T8H, 8); + ushll(v16, T4S, v16, T4H, 8); - v_eor(v22, T16B, v23, v22); - v_eor(v18, T16B, v19, v18); - v_eor(v20, T16B, v21, v20); - v_eor(v16, T16B, v17, v16); + eor(v22, T16B, v23, v22); + eor(v18, T16B, v19, v18); + eor(v20, T16B, v21, v20); + eor(v16, T16B, v17, v16); - v_uzp1(v17, v16, v20, T2D); - v_uzp2(v21, v16, v20, T2D); - v_eor(v17, T16B, v17, v21); + uzp1(v17, v16, v20, T2D); + uzp2(v21, v16, v20, T2D); + eor(v17, T16B, v17, v21); - v_ushll2(v20, T2D, v17, T4S, 16); - v_ushll(v16, T2D, v17, T2S, 16); + ushll2(v20, T2D, v17, T4S, 16); + ushll(v16, T2D, v17, T2S, 16); - v_eor(v20, T16B, v20, v22); - v_eor(v16, T16B, v16, v18); + eor(v20, T16B, v20, v22); + eor(v16, T16B, v16, v18); - v_uzp1(v17, v20, v16, T2D); - v_uzp2(v21, v20, v16, T2D); - v_eor(v28, T16B, v17, v21); + uzp1(v17, v20, v16, T2D); + uzp2(v21, v20, v16, T2D); + eor(v28, T16B, v17, v21); - v_pmull(v22, T8H, v1, v5, T8B); - v_pmull(v20, T8H, v1, v7, T8B); - v_pmull(v23, T8H, v1, v4, T8B); - v_pmull(v21, T8H, v1, v6, T8B); + pmull(v22, T8H, v1, v5, T8B); + pmull(v20, T8H, v1, v7, T8B); + pmull(v23, T8H, v1, v4, T8B); + pmull(v21, T8H, v1, v6, T8B); - v_pmull2(v18, T8H, v1, v5, T16B); - v_pmull2(v16, T8H, v1, v7, T16B); - v_pmull2(v19, T8H, v1, v4, T16B); - v_pmull2(v17, T8H, v1, v6, T16B); + pmull2(v18, T8H, v1, v5, T16B); + pmull2(v16, T8H, v1, v7, T16B); + pmull2(v19, T8H, v1, v4, T16B); + pmull2(v17, T8H, v1, v6, T16B); - v_ld1(v0, v1, T2D, buf, 32); + ld1(v0, v1, T2D, post(buf, 32)); - v_uzp1(v24, v20, v22, T8H); - v_uzp2(v25, v20, v22, T8H); - v_eor(v20, T16B, v24, v25); + uzp1(v24, v20, v22, T8H); + uzp2(v25, v20, v22, T8H); + eor(v20, T16B, v24, v25); - v_uzp1(v26, v16, v18, T8H); - v_uzp2(v27, v16, v18, T8H); - v_eor(v16, T16B, v26, v27); + uzp1(v26, v16, v18, T8H); + uzp2(v27, v16, v18, T8H); + eor(v16, T16B, v26, v27); - v_ushll2(v22, T4S, v20, T8H, 8); - v_ushll(v20, T4S, v20, T4H, 8); + ushll2(v22, T4S, v20, T8H, 8); + ushll(v20, T4S, v20, T4H, 8); - v_ushll2(v18, T4S, v16, T8H, 8); - v_ushll(v16, T4S, v16, T4H, 8); + ushll2(v18, T4S, v16, T8H, 8); + ushll(v16, T4S, v16, T4H, 8); - v_eor(v22, T16B, v23, v22); - v_eor(v18, T16B, v19, v18); - v_eor(v20, T16B, v21, v20); - v_eor(v16, T16B, v17, v16); + eor(v22, T16B, v23, v22); + eor(v18, T16B, v19, v18); + eor(v20, T16B, v21, v20); + eor(v16, T16B, v17, v16); - v_uzp1(v17, v16, v20, T2D); - v_uzp2(v21, v16, v20, T2D); - v_eor(v16, T16B, v17, v21); + uzp1(v17, v16, v20, T2D); + uzp2(v21, v16, v20, T2D); + eor(v16, T16B, v17, v21); - v_ushll2(v20, T2D, v16, T4S, 16); - v_ushll(v16, T2D, v16, T2S, 16); + ushll2(v20, T2D, v16, T4S, 16); + ushll(v16, T2D, v16, T2S, 16); - v_eor(v20, T16B, v22, v20); - v_eor(v16, T16B, v16, v18); + eor(v20, T16B, v22, v20); + eor(v16, T16B, v16, v18); - v_uzp1(v17, v20, v16, T2D); - v_uzp2(v21, v20, v16, T2D); - v_eor(v20, T16B, v17, v21); + uzp1(v17, v20, v16, T2D); + uzp2(v21, v20, v16, T2D); + eor(v20, T16B, v17, v21); - v_shl(v16, v28, T2D, 1); - v_shl(v17, v20, T2D, 1); + shl(v16, v28, T2D, 1); + shl(v17, v20, T2D, 1); - v_eor(v0, T16B, v0, v16); - v_eor(v1, T16B, v1, v17); + eor(v0, T16B, v0, v16); + eor(v1, T16B, v1, v17); subs(len, len, 32); br(Assembler::GE, L_fold); mov(crc, 0); - v_mov(tmp, v0, T1D, 0); + mov(tmp, v0, T1D, 0); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true); - v_mov(tmp, v0, T1D, 1); + mov(tmp, v0, T1D, 1); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true); - v_mov(tmp, v1, T1D, 0); + mov(tmp, v1, T1D, 0); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true); - v_mov(tmp, v1, T1D, 1); + mov(tmp, v1, T1D, 1); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true); @@ -4707,7 +4745,7 @@ void MacroAssembler::adrp(Register reg1, const Address &dest, unsigned long &byte_offset) { relocInfo::relocType rtype = dest.rspec().reloc()->type(); - if (labs(pc() - dest.target()) >= (1LL << 32)) { + if (uabs(pc() - dest.target()) >= (1LL << 32)) { guarantee(rtype == relocInfo::none || rtype == relocInfo::external_word_type || rtype == relocInfo::poll_type @@ -4760,6 +4798,346 @@ } } +// Search for str1 in str2 and return index or -1 +void MacroAssembler::string_indexof(Register str2, Register str1, + Register cnt2, Register cnt1, + Register tmp1, Register tmp2, + Register tmp3, Register tmp4, + int icnt1, Register result) { + Label BM, LINEARSEARCH, DONE, NOMATCH, MATCH; + + Register ch1 = rscratch1; + Register ch2 = rscratch2; + Register cnt1tmp = tmp1; + Register cnt2tmp = tmp2; + Register cnt1_neg = cnt1; + Register cnt2_neg = cnt2; + Register result_tmp = tmp4; + + // Note, inline_string_indexOf() generates checks: + // if (substr.count > string.count) return -1; + // if (substr.count == 0) return 0; + +// We have two strings, a source string in str2, cnt2 and a pattern string +// in str1, cnt1. Find the 1st occurence of pattern in source or return -1. + +// For larger pattern and source we use a simplified Boyer Moore algorithm. +// With a small pattern and source we use linear scan. + + if (icnt1 == -1) { + cmp(cnt1, 256); // Use Linear Scan if cnt1 < 8 || cnt1 >= 256 + ccmp(cnt1, 8, 0b0000, LO); // Can't handle skip >= 256 because we use + br(LO, LINEARSEARCH); // a byte array. + cmp(cnt1, cnt2, LSR, 2); // Source must be 4 * pattern for BM + br(HS, LINEARSEARCH); + } + +// The Boyer Moore alogorithm is based on the description here:- +// +// http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm +// +// This describes and algorithm with 2 shift rules. The 'Bad Character' rule +// and the 'Good Suffix' rule. +// +// These rules are essentially heuristics for how far we can shift the +// pattern along the search string. +// +// The implementation here uses the 'Bad Character' rule only because of the +// complexity of initialisation for the 'Good Suffix' rule. +// +// This is also known as the Boyer-Moore-Horspool algorithm:- +// +// http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm +// +// #define ASIZE 128 +// +// int bm(unsigned char *x, int m, unsigned char *y, int n) { +// int i, j; +// unsigned c; +// unsigned char bc[ASIZE]; +// +// /* Preprocessing */ +// for (i = 0; i < ASIZE; ++i) +// bc[i] = 0; +// for (i = 0; i < m - 1; ) { +// c = x[i]; +// ++i; +// if (c < ASIZE) bc[c] = i; +// } +// +// /* Searching */ +// j = 0; +// while (j <= n - m) { +// c = y[i+j]; +// if (x[m-1] == c) +// for (i = m - 2; i >= 0 && x[i] == y[i + j]; --i); +// if (i < 0) return j; +// if (c < ASIZE) +// j = j - bc[y[j+m-1]] + m; +// else +// j += 1; // Advance by 1 only if char >= ASIZE +// } +// } + + if (icnt1 == -1) { + BIND(BM); + + Label ZLOOP, BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP; + Label BMADV, BMMATCH, BMCHECKEND; + + Register cnt1end = tmp2; + Register str2end = cnt2; + Register skipch = tmp2; + + // Restrict ASIZE to 128 to reduce stack space/initialisation. + // The presence of chars >= ASIZE in the target string does not affect + // performance, but we must be careful not to initialise them in the stack + // array. + // The presence of chars >= ASIZE in the source string may adversely affect + // performance since we can only advance by one when we encounter one. + + stp(zr, zr, pre(sp, -128)); + for (int i = 1; i < 8; i++) + stp(zr, zr, Address(sp, i*16)); + + mov(cnt1tmp, 0); + sub(cnt1end, cnt1, 1); + BIND(BCLOOP); + ldrh(ch1, Address(str1, cnt1tmp, Address::lsl(1))); + cmp(ch1, 128); + add(cnt1tmp, cnt1tmp, 1); + br(HS, BCSKIP); + strb(cnt1tmp, Address(sp, ch1)); + BIND(BCSKIP); + cmp(cnt1tmp, cnt1end); + br(LT, BCLOOP); + + mov(result_tmp, str2); + + sub(cnt2, cnt2, cnt1); + add(str2end, str2, cnt2, LSL, 1); + BIND(BMLOOPSTR2); + sub(cnt1tmp, cnt1, 1); + ldrh(ch1, Address(str1, cnt1tmp, Address::lsl(1))); + ldrh(skipch, Address(str2, cnt1tmp, Address::lsl(1))); + cmp(ch1, skipch); + br(NE, BMSKIP); + subs(cnt1tmp, cnt1tmp, 1); + br(LT, BMMATCH); + BIND(BMLOOPSTR1); + ldrh(ch1, Address(str1, cnt1tmp, Address::lsl(1))); + ldrh(ch2, Address(str2, cnt1tmp, Address::lsl(1))); + cmp(ch1, ch2); + br(NE, BMSKIP); + subs(cnt1tmp, cnt1tmp, 1); + br(GE, BMLOOPSTR1); + BIND(BMMATCH); + sub(result_tmp, str2, result_tmp); + lsr(result, result_tmp, 1); + add(sp, sp, 128); + b(DONE); + BIND(BMADV); + add(str2, str2, 2); + b(BMCHECKEND); + BIND(BMSKIP); + cmp(skipch, 128); + br(HS, BMADV); + ldrb(ch2, Address(sp, skipch)); + add(str2, str2, cnt1, LSL, 1); + sub(str2, str2, ch2, LSL, 1); + BIND(BMCHECKEND); + cmp(str2, str2end); + br(LE, BMLOOPSTR2); + add(sp, sp, 128); + b(NOMATCH); + } + + BIND(LINEARSEARCH); + { + Label DO1, DO2, DO3; + + Register str2tmp = tmp2; + Register first = tmp3; + + if (icnt1 == -1) + { + Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT, LAST_WORD; + + cmp(cnt1, 4); + br(LT, DOSHORT); + + sub(cnt2, cnt2, cnt1); + sub(cnt1, cnt1, 4); + mov(result_tmp, cnt2); + + lea(str1, Address(str1, cnt1, Address::uxtw(1))); + lea(str2, Address(str2, cnt2, Address::uxtw(1))); + sub(cnt1_neg, zr, cnt1, LSL, 1); + sub(cnt2_neg, zr, cnt2, LSL, 1); + ldr(first, Address(str1, cnt1_neg)); + + BIND(FIRST_LOOP); + ldr(ch2, Address(str2, cnt2_neg)); + cmp(first, ch2); + br(EQ, STR1_LOOP); + BIND(STR2_NEXT); + adds(cnt2_neg, cnt2_neg, 2); + br(LE, FIRST_LOOP); + b(NOMATCH); + + BIND(STR1_LOOP); + adds(cnt1tmp, cnt1_neg, 8); + add(cnt2tmp, cnt2_neg, 8); + br(GE, LAST_WORD); + + BIND(STR1_NEXT); + ldr(ch1, Address(str1, cnt1tmp)); + ldr(ch2, Address(str2, cnt2tmp)); + cmp(ch1, ch2); + br(NE, STR2_NEXT); + adds(cnt1tmp, cnt1tmp, 8); + add(cnt2tmp, cnt2tmp, 8); + br(LT, STR1_NEXT); + + BIND(LAST_WORD); + ldr(ch1, Address(str1)); + sub(str2tmp, str2, cnt1_neg); // adjust to corresponding + ldr(ch2, Address(str2tmp, cnt2_neg)); // word in str2 + cmp(ch1, ch2); + br(NE, STR2_NEXT); + b(MATCH); + + BIND(DOSHORT); + cmp(cnt1, 2); + br(LT, DO1); + br(GT, DO3); + } + + if (icnt1 == 4) { + Label CH1_LOOP; + + ldr(ch1, str1); + sub(cnt2, cnt2, 4); + mov(result_tmp, cnt2); + lea(str2, Address(str2, cnt2, Address::uxtw(1))); + sub(cnt2_neg, zr, cnt2, LSL, 1); + + BIND(CH1_LOOP); + ldr(ch2, Address(str2, cnt2_neg)); + cmp(ch1, ch2); + br(EQ, MATCH); + adds(cnt2_neg, cnt2_neg, 2); + br(LE, CH1_LOOP); + b(NOMATCH); + } + + if (icnt1 == -1 || icnt1 == 2) { + Label CH1_LOOP; + + BIND(DO2); + ldrw(ch1, str1); + sub(cnt2, cnt2, 2); + mov(result_tmp, cnt2); + lea(str2, Address(str2, cnt2, Address::uxtw(1))); + sub(cnt2_neg, zr, cnt2, LSL, 1); + + BIND(CH1_LOOP); + ldrw(ch2, Address(str2, cnt2_neg)); + cmp(ch1, ch2); + br(EQ, MATCH); + adds(cnt2_neg, cnt2_neg, 2); + br(LE, CH1_LOOP); + b(NOMATCH); + } + + if (icnt1 == -1 || icnt1 == 3) { + Label FIRST_LOOP, STR2_NEXT, STR1_LOOP; + + BIND(DO3); + ldrw(first, str1); + ldrh(ch1, Address(str1, 4)); + + sub(cnt2, cnt2, 3); + mov(result_tmp, cnt2); + lea(str2, Address(str2, cnt2, Address::uxtw(1))); + sub(cnt2_neg, zr, cnt2, LSL, 1); + + BIND(FIRST_LOOP); + ldrw(ch2, Address(str2, cnt2_neg)); + cmpw(first, ch2); + br(EQ, STR1_LOOP); + BIND(STR2_NEXT); + adds(cnt2_neg, cnt2_neg, 2); + br(LE, FIRST_LOOP); + b(NOMATCH); + + BIND(STR1_LOOP); + add(cnt2tmp, cnt2_neg, 4); + ldrh(ch2, Address(str2, cnt2tmp)); + cmp(ch1, ch2); + br(NE, STR2_NEXT); + b(MATCH); + } + + if (icnt1 == -1 || icnt1 == 1) { + Label CH1_LOOP, HAS_ZERO; + Label DO1_SHORT, DO1_LOOP; + + BIND(DO1); + ldrh(ch1, str1); + cmp(cnt2, 4); + br(LT, DO1_SHORT); + + orr(ch1, ch1, ch1, LSL, 16); + orr(ch1, ch1, ch1, LSL, 32); + + sub(cnt2, cnt2, 4); + mov(result_tmp, cnt2); + lea(str2, Address(str2, cnt2, Address::uxtw(1))); + sub(cnt2_neg, zr, cnt2, LSL, 1); + + mov(tmp3, 0x0001000100010001); + BIND(CH1_LOOP); + ldr(ch2, Address(str2, cnt2_neg)); + eor(ch2, ch1, ch2); + sub(tmp1, ch2, tmp3); + orr(tmp2, ch2, 0x7fff7fff7fff7fff); + bics(tmp1, tmp1, tmp2); + br(NE, HAS_ZERO); + adds(cnt2_neg, cnt2_neg, 8); + br(LT, CH1_LOOP); + + cmp(cnt2_neg, 8); + mov(cnt2_neg, 0); + br(LT, CH1_LOOP); + b(NOMATCH); + + BIND(HAS_ZERO); + rev(tmp1, tmp1); + clz(tmp1, tmp1); + add(cnt2_neg, cnt2_neg, tmp1, LSR, 3); + b(MATCH); + + BIND(DO1_SHORT); + mov(result_tmp, cnt2); + lea(str2, Address(str2, cnt2, Address::uxtw(1))); + sub(cnt2_neg, zr, cnt2, LSL, 1); + BIND(DO1_LOOP); + ldrh(ch2, Address(str2, cnt2_neg)); + cmpw(ch1, ch2); + br(EQ, MATCH); + adds(cnt2_neg, cnt2_neg, 2); + br(LT, DO1_LOOP); + } + } + BIND(NOMATCH); + mov(result, -1); + b(DONE); + BIND(MATCH); + add(result, result_tmp, cnt2_neg, ASR, 1); + BIND(DONE); +} + // Compare strings. void MacroAssembler::string_compare(Register str1, Register str2, Register cnt1, Register cnt2, Register result, @@ -4919,3 +5297,72 @@ BLOCK_COMMENT("} string_equals"); } + + +// Compare char[] arrays aligned to 4 bytes +void MacroAssembler::char_arrays_equals(Register ary1, Register ary2, + Register result, Register tmp1) +{ + Register cnt1 = rscratch1; + Register cnt2 = rscratch2; + Register tmp2 = rscratch2; + + Label SAME, DIFFER, NEXT, TAIL03, TAIL01; + + int length_offset = arrayOopDesc::length_offset_in_bytes(); + int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); + + BLOCK_COMMENT("char_arrays_equals {"); + + // different until proven equal + mov(result, false); + + // same array? + cmp(ary1, ary2); + br(Assembler::EQ, SAME); + + // ne if either null + cbz(ary1, DIFFER); + cbz(ary2, DIFFER); + + // lengths ne? + ldrw(cnt1, Address(ary1, length_offset)); + ldrw(cnt2, Address(ary2, length_offset)); + cmp(cnt1, cnt2); + br(Assembler::NE, DIFFER); + + lea(ary1, Address(ary1, base_offset)); + lea(ary2, Address(ary2, base_offset)); + + subs(cnt1, cnt1, 4); + br(LT, TAIL03); + + BIND(NEXT); + ldr(tmp1, Address(post(ary1, 8))); + ldr(tmp2, Address(post(ary2, 8))); + subs(cnt1, cnt1, 4); + eor(tmp1, tmp1, tmp2); + cbnz(tmp1, DIFFER); + br(GE, NEXT); + + BIND(TAIL03); // 0-3 chars left, cnt1 = #chars left - 4 + tst(cnt1, 0b10); + br(EQ, TAIL01); + ldrw(tmp1, Address(post(ary1, 4))); + ldrw(tmp2, Address(post(ary2, 4))); + cmp(tmp1, tmp2); + br(NE, DIFFER); + BIND(TAIL01); // 0-1 chars left + tst(cnt1, 0b01); + br(EQ, SAME); + ldrh(tmp1, ary1); + ldrh(tmp2, ary2); + cmp(tmp1, tmp2); + br(NE, DIFFER); + + BIND(SAME); + mov(result, true); + BIND(DIFFER); // result already set + + BLOCK_COMMENT("} char_arrays_equals"); +}
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp Thu Dec 04 14:30:02 2014 +0000 @@ -322,6 +322,29 @@ enum operation { uxtb, uxth, uxtw, uxtx, sxtb, sxth, sxtw, sxtx }; }; +// abs methods which cannot overflow and so are well-defined across +// the entire domain of integer types. +static inline unsigned int uabs(unsigned int n) { + union { + unsigned int result; + int value; + }; + result = n; + if (value < 0) result = -result; + return result; +} +static inline unsigned long uabs(unsigned long n) { + union { + unsigned long result; + long value; + }; + result = n; + if (value < 0) result = -result; + return result; +} +static inline unsigned long uabs(long n) { return uabs((unsigned long)n); } +static inline unsigned long uabs(int n) { return uabs((unsigned int)n); } + // Addressing modes class Address VALUE_OBJ_CLASS_SPEC { public: @@ -419,15 +442,16 @@ } } - Register base() { - guarantee((_mode == base_plus_offset | _mode == base_plus_offset_reg), + Register base() const { + guarantee((_mode == base_plus_offset | _mode == base_plus_offset_reg + | _mode == post), "wrong mode"); return _base; } - long offset() { + long offset() const { return _offset; } - Register index() { + Register index() const { return _index; } mode getMode() const { @@ -548,7 +572,7 @@ static bool offset_ok_for_immed(long offset, int shift = 0) { unsigned mask = (1 << shift) - 1; if (offset < 0 || offset & mask) { - return (abs(offset) < (1 << (20 - 12))); // Unscaled offset + return (uabs(offset) < (1 << (20 - 12))); // Unscaled offset } else { return ((offset >> shift) < (1 << (21 - 10 + 1))); // Scaled, unsigned offset } @@ -1250,12 +1274,6 @@ f(size & 0b01, 31, 30), f(0b011, 29, 27), f(0b00, 25, 24); long offset = (adr.target() - pc()) >> 2; sf(offset, 23, 5); -#if 0 - Relocation* reloc = adr.rspec().reloc(); - relocInfo::relocType rtype = (relocInfo::relocType) reloc->type(); - assert(rtype == relocInfo::internal_word_type, - "only internal_word_type relocs make sense here"); -#endif // code_section()->relocate(pc(), adr.rspec()); relocate(pc(), adr.rspec()); return; @@ -1855,7 +1873,7 @@ * We just use FloatRegister in the following. They are exactly the same * as SIMD registers. */ -public: + public: enum SIMD_Arrangement { T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D @@ -1865,7 +1883,136 @@ S32, D64, Q128 }; - void v_shl(FloatRegister Vd, FloatRegister Vn, SIMD_Arrangement T, int shift){ + private: + + void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, int op1, int op2) { + starti; + f(0,31), f((int)T & 1, 30); + f(op1, 29, 21), f(0, 20, 16), f(op2, 15, 12); + f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); + } + void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, + int imm, int op1, int op2) { + starti; + f(0,31), f((int)T & 1, 30); + f(op1 | 0b100, 29, 21), f(0b11111, 20, 16), f(op2, 15, 12); + f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); + } + void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, + Register Xm, int op1, int op2) { + starti; + f(0,31), f((int)T & 1, 30); + f(op1 | 0b100, 29, 21), rf(Xm, 16), f(op2, 15, 12); + f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); + } + + void ld_st(FloatRegister Vt, SIMD_Arrangement T, Address a, int op1, int op2) { + switch (a.getMode()) { + case Address::base_plus_offset: + guarantee(a.offset() == 0, "no offset allowed here"); + ld_st(Vt, T, a.base(), op1, op2); + break; + case Address::post: + ld_st(Vt, T, a.base(), a.offset(), op1, op2); + break; + case Address::base_plus_offset_reg: + ld_st(Vt, T, a.base(), a.index(), op1, op2); + break; + default: + ShouldNotReachHere(); + } + } + + public: + +#define INSN1(NAME, op1, op2) \ + void NAME(FloatRegister Vt, SIMD_Arrangement T, const Address &a) { \ + ld_st(Vt, T, a, op1, op2); \ + } + +#define INSN2(NAME, op1, op2) \ + void NAME(FloatRegister Vt, FloatRegister Vt2, SIMD_Arrangement T, const Address &a) { \ + assert(Vt->successor() == Vt2, "Registers must be ordered"); \ + ld_st(Vt, T, a, op1, op2); \ + } + +#define INSN3(NAME, op1, op2) \ + void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \ + SIMD_Arrangement T, const Address &a) { \ + assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3, \ + "Registers must be ordered"); \ + ld_st(Vt, T, a, op1, op2); \ + } + +#define INSN4(NAME, op1, op2) \ + void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \ + FloatRegister Vt4, SIMD_Arrangement T, const Address &a) { \ + assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3 && \ + Vt3->successor() == Vt4, "Registers must be ordered"); \ + ld_st(Vt, T, a, op1, op2); \ + } + + INSN1(ld1, 0b001100010, 0b0111); + INSN2(ld1, 0b001100010, 0b1010); + INSN3(ld1, 0b001100010, 0b0110); + INSN4(ld1, 0b001100010, 0b0010); + + INSN2(ld2, 0b001100010, 0b1000); + INSN3(ld3, 0b001100010, 0b0100); + INSN4(ld4, 0b001100010, 0b0000); + + INSN1(st1, 0b001100000, 0b0111); + INSN2(st1, 0b001100000, 0b1010); + INSN3(st1, 0b001100000, 0b0110); + INSN4(st1, 0b001100000, 0b0010); + + INSN2(st2, 0b001100000, 0b1000); + INSN3(st3, 0b001100000, 0b0100); + INSN4(st4, 0b001100000, 0b0000); + + INSN1(ld1r, 0b001101010, 0b1100); + INSN2(ld2r, 0b001101011, 0b1100); + INSN3(ld3r, 0b001101010, 0b1110); + INSN4(ld4r, 0b001101011, 0b1110); + +#undef INSN1 +#undef INSN2 +#undef INSN3 +#undef INSN4 + +#define INSN(NAME, opc) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \ + starti; \ + assert(T == T8B || T == T16B, "must be T8B or T16B"); \ + f(0, 31), f((int)T & 1, 30), f(opc, 29, 21); \ + rf(Vm, 16), f(0b000111, 15, 10), rf(Vn, 5), rf(Vd, 0); \ + } + + INSN(eor, 0b101110001); + INSN(orr, 0b001110101); + INSN(andr, 0b001110001); + INSN(bic, 0b001110011); + INSN(bif, 0b101110111); + INSN(bit, 0b101110101); + INSN(bsl, 0b101110011); + INSN(orn, 0b001110111); + +#undef INSN + +#define INSN(NAME, opc) \ + void NAME(FloatRegister Vd, FloatRegister Vn) { \ + starti; \ + f(opc, 31, 10), rf(Vn, 5), rf(Vd, 0); \ + } + + INSN(aese, 0b0100111000101000010010); + INSN(aesd, 0b0100111000101000010110); + INSN(aesmc, 0b0100111000101000011010); + INSN(aesimc, 0b0100111000101000011110); + +#undef INSN + + void shl(FloatRegister Vd, FloatRegister Vn, SIMD_Arrangement T, int shift){ starti; /* The encodings for the immh:immb fields (bits 22:16) are * 0001 xxx 8B/16B, shift = xxx @@ -1878,7 +2025,7 @@ f(0b010101, 15, 10), rf(Vn, 5), rf(Vd, 0); } - void v_ushll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { + void ushll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { starti; /* The encodings for the immh:immb fields (bits 22:16) are * 0001 xxx 8H, 8B/16b shift = xxx @@ -1891,22 +2038,22 @@ f(0, 31), f(Tb & 1, 30), f(0b1011110, 29, 23), f((1 << ((Tb>>1)+3))|shift, 22, 16); f(0b101001, 15, 10), rf(Vn, 5), rf(Vd, 0); } - void v_ushll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { - v_ushll(Vd, Ta, Vn, Tb, shift); + void ushll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { + ushll(Vd, Ta, Vn, Tb, shift); } - void v_uzp1(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement T, int op = 0){ + void uzp1(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement T, int op = 0){ starti; f(0, 31), f((T & 0x1), 30), f(0b001110, 29, 24), f((T >> 1), 23, 22), f(0, 21); rf(Vm, 16), f(0, 15), f(op, 14), f(0b0110, 13, 10), rf(Vn, 5), rf(Vd, 0); } - void v_uzp2(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement T){ - v_uzp1(Vd, Vn, Vm, T, 1); + void uzp2(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement T){ + uzp1(Vd, Vn, Vm, T, 1); } // Move from general purpose register // mov Vd.T[index], Rn - void v_mov(FloatRegister Vd, SIMD_Arrangement T, int index, Register Xn) { + void mov(FloatRegister Vd, SIMD_Arrangement T, int index, Register Xn) { starti; f(0b01001110000, 31, 21), f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16); f(0b000111, 15, 10), rf(Xn, 5), rf(Vd, 0); @@ -1914,7 +2061,7 @@ // Move to general purpose register // mov Rd, Vn.T[index] - void v_mov(Register Xd, FloatRegister Vn, SIMD_Arrangement T, int index) { + void mov(Register Xd, FloatRegister Vn, SIMD_Arrangement T, int index) { starti; f(0, 31), f((T >= T1D) ? 1:0, 30), f(0b001110000, 29, 21); f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16); @@ -1922,149 +2069,23 @@ } // We do not handle the 1Q arrangement. - void v_pmull(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) { + void pmull(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) { starti; assert(Ta == T8H && (Tb == T8B || Tb == T16B), "Invalid Size specifier"); f(0, 31), f(Tb & 1, 30), f(0b001110001, 29, 21), rf(Vm, 16), f(0b111000, 15, 10); rf(Vn, 5), rf(Vd, 0); } - void v_pmull2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) { - v_pmull(Vd, Ta, Vn, Vm, Tb); - } - - void v_ld1(FloatRegister Vt, SIMD_Arrangement T, Register Xn) { - starti; - f(0,31), f((int)T & 1, 30), f(0b00110001000000, 29, 16), f(0b0111, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - void v_ld1(FloatRegister Vt, FloatRegister Vt2, SIMD_Arrangement T, Register Xn) { - starti; - assert((Vt2->encoding_nocheck()) == ((Vt->encoding_nocheck() + 1) % 32), "Invalid Vt2"); - f(0,31), f((int)T & 1, 30), f(0b00110001000000, 29, 16), f(0b1010, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - void v_ld1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, SIMD_Arrangement T, Register Xn) { - starti; - assert((Vt2->encoding_nocheck()) == ((Vt->encoding_nocheck() + 1) % 32), "Invalid Vt2"); - assert((Vt3->encoding_nocheck()) == ((Vt->encoding_nocheck() + 2) % 32), "Invalid Vt3"); - f(0,31), f((int)T & 1, 30), f(0b00110001000000, 29, 16), f(0b0110, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - void v_ld1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, FloatRegister Vt4, SIMD_Arrangement T, Register Xn) { - starti; - assert((Vt2->encoding_nocheck()) == ((Vt->encoding_nocheck() + 1) % 32), "Invalid Vt2"); - assert((Vt3->encoding_nocheck()) == ((Vt->encoding_nocheck() + 2) % 32), "Invalid Vt3"); - assert((Vt4->encoding_nocheck()) == ((Vt->encoding_nocheck() + 3) % 32), "Invalid Vt4"); - f(0,31), f((int)T & 1, 30), f(0b00110001000000, 29, 16), f(0b0010, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); + void pmull2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) { + pmull(Vd, Ta, Vn, Vm, Tb); } - void v_ld1(FloatRegister Vt, SIMD_Arrangement T, Register Xn, int imm) { - starti; - assert((8 << ((int)T & 1)) == imm, "size/imm mismatch"); - f(0, 31), f((int)T & 1, 30), f(0b001100110, 29, 21), f(0b11111, 20, 16), f(0b0111, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - void v_ld1(FloatRegister Vt, SIMD_Arrangement T, Register Xn, Register Xm) { - starti; - f(0, 31), f((int)T & 1, 30), f(0b001100110, 29, 21), rf(Xm, 16), f(0b0111, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - void v_ld1(FloatRegister Vt, FloatRegister Vt2, SIMD_Arrangement T, Register Xn, int imm) { - starti; - assert((16 << ((int)T & 1)) == imm, "size/imm mismatch"); - assert((Vt2->encoding_nocheck()) == ((Vt->encoding_nocheck() + 1) % 32), "Invalid Vt2"); - f(0, 31), f((int)T & 1, 30), f(0b001100110, 29, 21), f(0b11111, 20, 16), f(0b1010, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - void v_ld1(FloatRegister Vt, FloatRegister Vt2, SIMD_Arrangement T, Register Xn, Register Xm) { - starti; - assert((Vt2->encoding_nocheck()) == ((Vt->encoding_nocheck() + 1) % 32), "Invalid Vt2"); - f(0, 31), f((int)T & 1, 30), f(0b001100110, 29, 21), rf(Xm, 16), f(0b1010, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - void v_ld1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, SIMD_Arrangement T, Register Xn, int imm) { - starti; - assert((24 << ((int)T & 1)) == imm, "size/imm mismatch"); - assert((Vt2->encoding_nocheck()) == ((Vt->encoding_nocheck() + 1) % 32), "Invalid Vt2"); - assert((Vt3->encoding_nocheck()) == ((Vt->encoding_nocheck() + 2) % 32), "Invalid Vt3"); - f(0, 31), f((int)T & 1, 30), f(0b001100110, 29, 21), f(0b11111, 20, 16), f(0b0110, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - void v_ld1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, SIMD_Arrangement T, Register Xn, Register Xm) { + void rev32(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) + { starti; - assert((Vt2->encoding_nocheck()) == ((Vt->encoding_nocheck() + 1) % 32), "Invalid Vt2"); - assert((Vt3->encoding_nocheck()) == ((Vt->encoding_nocheck() + 2) % 32), "Invalid Vt3"); - f(0, 31), f((int)T & 1, 30), f(0b001100110, 29, 21), rf(Xm, 16), f(0b0110, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - void v_ld1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, FloatRegister Vt4, SIMD_Arrangement T, Register Xn, int imm) { - starti; - assert((32 << ((int)T & 1)) == imm, "size/imm mismatch"); - assert((Vt2->encoding_nocheck()) == ((Vt->encoding_nocheck() + 1) % 32), "Invalid Vt2"); - assert((Vt3->encoding_nocheck()) == ((Vt->encoding_nocheck() + 2) % 32), "Invalid Vt3"); - assert((Vt4->encoding_nocheck()) == ((Vt->encoding_nocheck() + 3) % 32), "Invalid Vt4"); - f(0, 31), f((int)T & 1, 30), f(0b001100110, 29, 21), f(0b11111, 20, 16), f(0b0010, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - void v_ld1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, FloatRegister Vt4, SIMD_Arrangement T, Register Xn, Register Xm) { - starti; - assert((Vt2->encoding_nocheck()) == ((Vt->encoding_nocheck() + 1) % 32), "Invalid Vt2"); - assert((Vt3->encoding_nocheck()) == ((Vt->encoding_nocheck() + 2) % 32), "Invalid Vt3"); - assert((Vt4->encoding_nocheck()) == ((Vt->encoding_nocheck() + 3) % 32), "Invalid Vt4"); - f(0, 31), f((int)T & 1, 30), f(0b001100110, 29, 21), rf(Xm, 16), f(0b0010, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - - void v_st1(FloatRegister Vt, SIMD_Arrangement T, Register Xn) { - starti; - f(0, 31), f((int)T & 1, 30), f(0b00110000000000, 29, 16), f(0b0111, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - void v_st1(FloatRegister Vt, FloatRegister Vt2, SIMD_Arrangement T, Register Xn) { - starti; - assert((Vt2->encoding_nocheck()) == ((Vt->encoding_nocheck() + 1) % 32), "Invalid Vt2"); - f(0, 31), f((int)T & 1, 30), f(0b00110000000000, 29, 16), f(0b1010, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - void v_st1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, SIMD_Arrangement T, Register Xn) { - starti; - assert((Vt2->encoding_nocheck()) == ((Vt->encoding_nocheck() + 1) % 32), "Invalid Vt2"); - assert((Vt3->encoding_nocheck()) == ((Vt->encoding_nocheck() + 2) % 32), "Invalid Vt3"); - f(0, 31), f((int)T & 1, 30), f(0b00110000000000, 29, 16), f(0b0110, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - void v_st1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, FloatRegister Vt4, SIMD_Arrangement T, Register Xn) { - starti; - assert((Vt2->encoding_nocheck()) == ((Vt->encoding_nocheck() + 1) % 32), "Invalid Vt2"); - assert((Vt3->encoding_nocheck()) == ((Vt->encoding_nocheck() + 2) % 32), "Invalid Vt3"); - assert((Vt4->encoding_nocheck()) == ((Vt->encoding_nocheck() + 3) % 32), "Invalid Vt4"); - f(0, 31), f((int)T & 1, 30), f(0b00110000000000, 29, 16), f(0b0010, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - - void v_ld1r(FloatRegister Vt, SIMD_Arrangement T, Register Xn) { - starti; - f(0, 31), f((int)T & 1, 30), f(0b001101010000001100, 29, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - void v_ld1r(FloatRegister Vt, SIMD_Arrangement T, Register Xn, Register Xm) { - starti; - f(0, 31), f((int)T & 1, 30), f(0b001101110, 29, 21), rf(Xm, 16); - f(0b1100, 15, 12), f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - void v_ld1r(FloatRegister Vt, SIMD_Arrangement T, Register Xn, int imm) { - starti; - assert((1 << ((int)T & 3)) == imm, "size/imm mismatch"); - f(0, 31), f((int)T & 1, 30), f(0b001101110111111100, 29, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - - void v_eor(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { - starti; - assert(T == T8B || T == T16B, "must be T8B or T16B"); - f(0, 31), f((int)T & 1, 30), f(0b101110001, 29, 21); - rf(Vm, 16), f(0b000111, 15, 10), rf(Vn, 5), rf(Vd, 0); + assert(T <= T8H, "must be one of T8B, T16B, T4H, T8H"); + f(0, 31), f((int)T & 1, 30), f(0b101110, 29, 24); + f(T <= T16B ? 0b00 : 0b01, 23, 22), f(0b100000000010, 21, 10); + rf(Vn, 5), rf(Vd, 0); } // CRC32 instructions @@ -2259,6 +2280,8 @@ class MacroAssembler: public Assembler { friend class LIR_Assembler; + using Assembler::mov; + protected: // Support for VM calls @@ -2396,6 +2419,13 @@ csincw(Rd, zr, zr, ~cond); } + void cneg(Register Rd, Register Rn, Assembler::Condition cond) { + csneg(Rd, Rn, Rn, ~cond); + } + void cnegw(Register Rd, Register Rn, Assembler::Condition cond) { + csnegw(Rd, Rn, Rn, ~cond); + } + inline void movw(Register Rd, Register Rn) { if (Rd == sp || Rn == sp) { addw(Rd, Rn, 0U); @@ -2703,7 +2733,10 @@ // Required platform-specific helpers for Label::patch_instructions. // They _shadow_ the declarations in AbstractAssembler, which are undefined. - static void pd_patch_instruction(address branch, address target); + static int pd_patch_instruction_size (address branch, address target); + static void pd_patch_instruction(address branch, address target) { + pd_patch_instruction_size (branch, target); + } static address pd_call_destination(address branch) { unsigned insn = *(unsigned*)branch; return target_addr_for_insn(branch, insn); @@ -2712,7 +2745,7 @@ static void pd_print_patched_instruction(address branch); #endif - static void patch_oop(address insn_addr, address o); + static int patch_oop(address insn_addr, address o); // The following 4 methods return the offset of the appropriate move instruction @@ -2909,19 +2942,6 @@ void store_check_part_1(Register obj); void store_check_part_2(Register obj); - // currently unimplemented -#if 0 - // C 'boolean' to Java boolean: x == 0 ? 0 : 1 - void c2bool(Register x); - - // C++ bool manipulation - - void movbool(Register dst, Address src); - void movbool(Address dst, bool boolconst); - void movbool(Address dst, Register src); - void testbool(Register dst); -#endif - // oop manipulations void load_klass(Register dst, Register src); void store_klass(Register dst, Register src); @@ -2932,7 +2952,6 @@ void load_heap_oop_not_null(Register dst, Address src); void store_heap_oop(Address dst, Register src); - // currently unimplemented // Used for storing NULL. All other oop constants should be // stored using routines that take a jobject. void store_heap_oop_null(Address dst); @@ -2957,23 +2976,12 @@ void decode_heap_oop_not_null(Register dst, Register src); void set_narrow_oop(Register dst, jobject obj); - // currently unimplemented -#if 0 - void set_narrow_oop(Address dst, jobject obj); - void cmp_narrow_oop(Register dst, jobject obj); - void cmp_narrow_oop(Address dst, jobject obj); -#endif // if heap base register is used - reinit it with the correct value void reinit_heapbase(); DEBUG_ONLY(void verify_heapbase(const char* msg);) - // currently unimplemented -#if 0 - void int3(); -#endif - void push_CPU_state(); void pop_CPU_state() ; @@ -3470,12 +3478,21 @@ Register table0, Register table1, Register table2, Register table3, bool upper = false); + void string_indexof(Register str1, Register str2, + Register cnt1, Register cnt2, + Register tmp1, Register tmp2, + Register tmp3, Register tmp4, + int int_cnt1, Register result); void string_compare(Register str1, Register str2, Register cnt1, Register cnt2, Register result, Register tmp1); void string_equals(Register str1, Register str2, Register cnt, Register result, Register tmp1); + void char_arrays_equals(Register ary1, Register ary2, + Register result, Register tmp1); + // ISB may be needed because of a safepoint + void maybe_isb() { isb(); } }; #ifdef ASSERT
--- a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp Thu Dec 04 14:30:02 2014 +0000 @@ -503,6 +503,7 @@ __ str(r0, Address(rthread, JavaThread::saved_exception_pc_offset())); __ mov(rscratch1, CAST_FROM_FN_PTR(address, SharedRuntime::get_poll_stub)); __ blrt(rscratch1, 1, 0, 1); + __ maybe_isb(); __ pop(0x3ffffffc, sp); // integer registers except lr & sp & r0 & r1 __ mov(rscratch1, r0); __ pop(0x3, sp); // r0 & r1 @@ -2681,6 +2682,7 @@ if (info != NULL) { add_call_info_here(info); } + __ maybe_isb(); } void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) { @@ -2723,7 +2725,9 @@ void LIR_Assembler::membar_storeload() { __ membar(MacroAssembler::StoreLoad); } -void LIR_Assembler::get_thread(LIR_Opr result_reg) { Unimplemented(); } +void LIR_Assembler::get_thread(LIR_Opr result_reg) { + __ mov(result_reg->as_register(), rthread); +} void LIR_Assembler::peephole(LIR_List *lir) {
--- a/src/cpu/aarch64/vm/c1_LinearScan_aarch64.cpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/cpu/aarch64/vm/c1_LinearScan_aarch64.cpp Thu Dec 04 14:30:02 2014 +0000 @@ -35,1211 +35,5 @@ //---------------------------------------------------------------------- void LinearScan::allocate_fpu_stack() { - // First compute which FPU registers are live at the start of each basic block - // (To minimize the amount of work we have to do if we have to merge FPU stacks) - if (ComputeExactFPURegisterUsage) { - Interval* intervals_in_register, *intervals_in_memory; - create_unhandled_lists(&intervals_in_register, &intervals_in_memory, is_in_fpu_register, NULL); - - // ignore memory intervals by overwriting intervals_in_memory - // the dummy interval is needed to enforce the walker to walk until the given id: - // without it, the walker stops when the unhandled-list is empty -> live information - // beyond this point would be incorrect. - Interval* dummy_interval = new Interval(any_reg); - dummy_interval->add_range(max_jint - 2, max_jint - 1); - dummy_interval->set_next(Interval::end()); - intervals_in_memory = dummy_interval; - - IntervalWalker iw(this, intervals_in_register, intervals_in_memory); - - const int num_blocks = block_count(); - for (int i = 0; i < num_blocks; i++) { - BlockBegin* b = block_at(i); - - // register usage is only needed for merging stacks -> compute only - // when more than one predecessor. - // the block must not have any spill moves at the beginning (checked by assertions) - // spill moves would use intervals that are marked as handled and so the usage bit - // would been set incorrectly - - // NOTE: the check for number_of_preds > 1 is necessary. A block with only one - // predecessor may have spill moves at the begin of the block. - // If an interval ends at the current instruction id, it is not possible - // to decide if the register is live or not at the block begin -> the - // register information would be incorrect. - if (b->number_of_preds() > 1) { - int id = b->first_lir_instruction_id(); - BitMap regs(FrameMap::nof_fpu_regs); - regs.clear(); - - iw.walk_to(id); // walk after the first instruction (always a label) of the block - assert(iw.current_position() == id, "did not walk completely to id"); - - // Only consider FPU values in registers - Interval* interval = iw.active_first(fixedKind); - while (interval != Interval::end()) { - int reg = interval->assigned_reg(); - assert(reg >= pd_first_fpu_reg && reg <= pd_last_fpu_reg, "no fpu register"); - assert(interval->assigned_regHi() == -1, "must not have hi register (doubles stored in one register)"); - assert(interval->from() <= id && id < interval->to(), "interval out of range"); - -#ifndef PRODUCT - if (TraceFPURegisterUsage) { - tty->print("fpu reg %d is live because of ", reg - pd_first_fpu_reg); interval->print(); - } -#endif - - regs.set_bit(reg - pd_first_fpu_reg); - interval = interval->next(); - } - - b->set_fpu_register_usage(regs); - -#ifndef PRODUCT - if (TraceFPURegisterUsage) { - tty->print("FPU regs for block %d, LIR instr %d): ", b->block_id(), id); regs.print_on(tty); tty->print_cr(""); - } -#endif - } - } - } - -#ifndef TARGET_ARCH_aarch64 - FpuStackAllocator alloc(ir()->compilation(), this); - _fpu_stack_allocator = &alloc; - alloc.allocate(); - _fpu_stack_allocator = NULL; -#endif -} - - -FpuStackAllocator::FpuStackAllocator(Compilation* compilation, LinearScan* allocator) - : _compilation(compilation) - , _lir(NULL) - , _pos(-1) - , _allocator(allocator) - , _sim(compilation) - , _temp_sim(compilation) -{} - -void FpuStackAllocator::allocate() { - int num_blocks = allocator()->block_count(); - for (int i = 0; i < num_blocks; i++) { - // Set up to process block - BlockBegin* block = allocator()->block_at(i); - intArray* fpu_stack_state = block->fpu_stack_state(); - -#ifndef PRODUCT - if (TraceFPUStack) { - tty->cr(); - tty->print_cr("------- Begin of new Block %d -------", block->block_id()); - } -#endif - - assert(fpu_stack_state != NULL || - block->end()->as_Base() != NULL || - block->is_set(BlockBegin::exception_entry_flag), - "FPU stack state must be present due to linear-scan order for FPU stack allocation"); - // note: exception handler entries always start with an empty fpu stack - // because stack merging would be too complicated - - if (fpu_stack_state != NULL) { - sim()->read_state(fpu_stack_state); - } else { - sim()->clear(); - } - -#ifndef PRODUCT - if (TraceFPUStack) { - tty->print("Reading FPU state for block %d:", block->block_id()); - sim()->print(); - tty->cr(); - } -#endif - - allocate_block(block); - CHECK_BAILOUT(); - } -} - -void FpuStackAllocator::allocate_block(BlockBegin* block) { - bool processed_merge = false; - LIR_OpList* insts = block->lir()->instructions_list(); - set_lir(block->lir()); - set_pos(0); - - - // Note: insts->length() may change during loop - while (pos() < insts->length()) { - LIR_Op* op = insts->at(pos()); - _debug_information_computed = false; - -#ifndef PRODUCT - if (TraceFPUStack) { - op->print(); - } - check_invalid_lir_op(op); -#endif - - LIR_OpBranch* branch = op->as_OpBranch(); - LIR_Op1* op1 = op->as_Op1(); - LIR_Op2* op2 = op->as_Op2(); - LIR_OpCall* opCall = op->as_OpCall(); - - if (branch != NULL && branch->block() != NULL) { - if (!processed_merge) { - // propagate stack at first branch to a successor - processed_merge = true; - bool required_merge = merge_fpu_stack_with_successors(block); - - assert(!required_merge || branch->cond() == lir_cond_always, "splitting of critical edges should prevent FPU stack mismatches at cond branches"); - } - - } else if (op1 != NULL) { - handle_op1(op1); - } else if (op2 != NULL) { - handle_op2(op2); - } else if (opCall != NULL) { - handle_opCall(opCall); - } - - compute_debug_information(op); - - set_pos(1 + pos()); - } - - // Propagate stack when block does not end with branch - if (!processed_merge) { - merge_fpu_stack_with_successors(block); - } -} - - -void FpuStackAllocator::compute_debug_information(LIR_Op* op) { - if (!_debug_information_computed && op->id() != -1 && allocator()->has_info(op->id())) { - visitor.visit(op); - - // exception handling - if (allocator()->compilation()->has_exception_handlers()) { - XHandlers* xhandlers = visitor.all_xhandler(); - int n = xhandlers->length(); - for (int k = 0; k < n; k++) { - allocate_exception_handler(xhandlers->handler_at(k)); - } - } else { - assert(visitor.all_xhandler()->length() == 0, "missed exception handler"); - } - - // compute debug information - int n = visitor.info_count(); - assert(n > 0, "should not visit operation otherwise"); - - for (int j = 0; j < n; j++) { - CodeEmitInfo* info = visitor.info_at(j); - // Compute debug information - allocator()->compute_debug_info(info, op->id()); - } - } - _debug_information_computed = true; -} - -void FpuStackAllocator::allocate_exception_handler(XHandler* xhandler) { - if (!sim()->is_empty()) { - LIR_List* old_lir = lir(); - int old_pos = pos(); - intArray* old_state = sim()->write_state(); - -#ifndef PRODUCT - if (TraceFPUStack) { - tty->cr(); - tty->print_cr("------- begin of exception handler -------"); - } -#endif - - if (xhandler->entry_code() == NULL) { - // need entry code to clear FPU stack - LIR_List* entry_code = new LIR_List(_compilation); - entry_code->jump(xhandler->entry_block()); - xhandler->set_entry_code(entry_code); - } - - LIR_OpList* insts = xhandler->entry_code()->instructions_list(); - set_lir(xhandler->entry_code()); - set_pos(0); - - // Note: insts->length() may change during loop - while (pos() < insts->length()) { - LIR_Op* op = insts->at(pos()); - -#ifndef PRODUCT - if (TraceFPUStack) { - op->print(); - } - check_invalid_lir_op(op); -#endif - - switch (op->code()) { - case lir_move: - assert(op->as_Op1() != NULL, "must be LIR_Op1"); - assert(pos() != insts->length() - 1, "must not be last operation"); - - handle_op1((LIR_Op1*)op); - break; - - case lir_branch: - assert(op->as_OpBranch()->cond() == lir_cond_always, "must be unconditional branch"); - assert(pos() == insts->length() - 1, "must be last operation"); - - // remove all remaining dead registers from FPU stack - clear_fpu_stack(LIR_OprFact::illegalOpr); - break; - - default: - // other operations not allowed in exception entry code - ShouldNotReachHere(); - } - - set_pos(pos() + 1); - } - -#ifndef PRODUCT - if (TraceFPUStack) { - tty->cr(); - tty->print_cr("------- end of exception handler -------"); - } -#endif - - set_lir(old_lir); - set_pos(old_pos); - sim()->read_state(old_state); - } -} - - -int FpuStackAllocator::fpu_num(LIR_Opr opr) { - assert(opr->is_fpu_register() && !opr->is_xmm_register(), "shouldn't call this otherwise"); - return opr->is_single_fpu() ? opr->fpu_regnr() : opr->fpu_regnrLo(); -} - -int FpuStackAllocator::tos_offset(LIR_Opr opr) { - return sim()->offset_from_tos(fpu_num(opr)); -} - - -LIR_Opr FpuStackAllocator::to_fpu_stack(LIR_Opr opr) { - assert(opr->is_fpu_register() && !opr->is_xmm_register(), "shouldn't call this otherwise"); - - int stack_offset = tos_offset(opr); - if (opr->is_single_fpu()) { - return LIR_OprFact::single_fpu(stack_offset)->make_fpu_stack_offset(); - } else { - assert(opr->is_double_fpu(), "shouldn't call this otherwise"); - return LIR_OprFact::double_fpu(stack_offset)->make_fpu_stack_offset(); - } -} - -LIR_Opr FpuStackAllocator::to_fpu_stack_top(LIR_Opr opr, bool dont_check_offset) { - assert(opr->is_fpu_register() && !opr->is_xmm_register(), "shouldn't call this otherwise"); - assert(dont_check_offset || tos_offset(opr) == 0, "operand is not on stack top"); - - int stack_offset = 0; - if (opr->is_single_fpu()) { - return LIR_OprFact::single_fpu(stack_offset)->make_fpu_stack_offset(); - } else { - assert(opr->is_double_fpu(), "shouldn't call this otherwise"); - return LIR_OprFact::double_fpu(stack_offset)->make_fpu_stack_offset(); - } -} - - - -void FpuStackAllocator::insert_op(LIR_Op* op) { - lir()->insert_before(pos(), op); - set_pos(1 + pos()); -} - - -void FpuStackAllocator::insert_exchange(int offset) { - if (offset > 0) { - LIR_Op1* fxch_op = new LIR_Op1(lir_fxch, LIR_OprFact::intConst(offset), LIR_OprFact::illegalOpr); - insert_op(fxch_op); - sim()->swap(offset); - -#ifndef PRODUCT - if (TraceFPUStack) { - tty->print("Exchanged register: %d New state: ", sim()->get_slot(0)); sim()->print(); tty->cr(); - } -#endif - - } -} - -void FpuStackAllocator::insert_exchange(LIR_Opr opr) { - insert_exchange(tos_offset(opr)); -} - - -void FpuStackAllocator::insert_free(int offset) { - // move stack slot to the top of stack and then pop it - insert_exchange(offset); - - LIR_Op* fpop = new LIR_Op0(lir_fpop_raw); - insert_op(fpop); - sim()->pop(); - -#ifndef PRODUCT - if (TraceFPUStack) { - tty->print("Inserted pop New state: "); sim()->print(); tty->cr(); - } -#endif -} - - -void FpuStackAllocator::insert_free_if_dead(LIR_Opr opr) { - if (sim()->contains(fpu_num(opr))) { - int res_slot = tos_offset(opr); - insert_free(res_slot); - } -} - -void FpuStackAllocator::insert_free_if_dead(LIR_Opr opr, LIR_Opr ignore) { - if (fpu_num(opr) != fpu_num(ignore) && sim()->contains(fpu_num(opr))) { - int res_slot = tos_offset(opr); - insert_free(res_slot); - } -} - -void FpuStackAllocator::insert_copy(LIR_Opr from, LIR_Opr to) { - int offset = tos_offset(from); - LIR_Op1* fld = new LIR_Op1(lir_fld, LIR_OprFact::intConst(offset), LIR_OprFact::illegalOpr); - insert_op(fld); - - sim()->push(fpu_num(to)); - -#ifndef PRODUCT - if (TraceFPUStack) { - tty->print("Inserted copy (%d -> %d) New state: ", fpu_num(from), fpu_num(to)); sim()->print(); tty->cr(); - } -#endif -} - -void FpuStackAllocator::do_rename(LIR_Opr from, LIR_Opr to) { - sim()->rename(fpu_num(from), fpu_num(to)); -} - -void FpuStackAllocator::do_push(LIR_Opr opr) { - sim()->push(fpu_num(opr)); -} - -void FpuStackAllocator::pop_if_last_use(LIR_Op* op, LIR_Opr opr) { - assert(op->fpu_pop_count() == 0, "fpu_pop_count alredy set"); - assert(tos_offset(opr) == 0, "can only pop stack top"); - - if (opr->is_last_use()) { - op->set_fpu_pop_count(1); - sim()->pop(); - } -} - -void FpuStackAllocator::pop_always(LIR_Op* op, LIR_Opr opr) { - assert(op->fpu_pop_count() == 0, "fpu_pop_count alredy set"); - assert(tos_offset(opr) == 0, "can only pop stack top"); - - op->set_fpu_pop_count(1); - sim()->pop(); -} - -void FpuStackAllocator::clear_fpu_stack(LIR_Opr preserve) { - int result_stack_size = (preserve->is_fpu_register() && !preserve->is_xmm_register() ? 1 : 0); - while (sim()->stack_size() > result_stack_size) { - assert(!sim()->slot_is_empty(0), "not allowed"); - - if (result_stack_size == 0 || sim()->get_slot(0) != fpu_num(preserve)) { - insert_free(0); - } else { - // move "preserve" to bottom of stack so that all other stack slots can be popped - insert_exchange(sim()->stack_size() - 1); - } - } + // No FPU stack on AArch64 } - - -void FpuStackAllocator::handle_op1(LIR_Op1* op1) { - LIR_Opr in = op1->in_opr(); - LIR_Opr res = op1->result_opr(); - - LIR_Opr new_in = in; // new operands relative to the actual fpu stack top - LIR_Opr new_res = res; - - // Note: this switch is processed for all LIR_Op1, regardless if they have FPU-arguments, - // so checks for is_float_kind() are necessary inside the cases - switch (op1->code()) { - - case lir_return: { - // FPU-Stack must only contain the (optional) fpu return value. - // All remaining dead values are popped from the stack - // If the input operand is a fpu-register, it is exchanged to the bottom of the stack - - clear_fpu_stack(in); - if (in->is_fpu_register() && !in->is_xmm_register()) { - new_in = to_fpu_stack_top(in); - } - - break; - } - - case lir_move: { - if (in->is_fpu_register() && !in->is_xmm_register()) { - if (res->is_xmm_register()) { - // move from fpu register to xmm register (necessary for operations that - // are not available in the SSE instruction set) - insert_exchange(in); - new_in = to_fpu_stack_top(in); - pop_always(op1, in); - - } else if (res->is_fpu_register() && !res->is_xmm_register()) { - // move from fpu-register to fpu-register: - // * input and result register equal: - // nothing to do - // * input register is last use: - // rename the input register to result register -> input register - // not present on fpu-stack afterwards - // * input register not last use: - // duplicate input register to result register to preserve input - // - // Note: The LIR-Assembler does not produce any code for fpu register moves, - // so input and result stack index must be equal - - if (fpu_num(in) == fpu_num(res)) { - // nothing to do - } else if (in->is_last_use()) { - insert_free_if_dead(res);//, in); - do_rename(in, res); - } else { - insert_free_if_dead(res); - insert_copy(in, res); - } - new_in = to_fpu_stack(res); - new_res = new_in; - - } else { - // move from fpu-register to memory - // input operand must be on top of stack - - insert_exchange(in); - - // create debug information here because afterwards the register may have been popped - compute_debug_information(op1); - - new_in = to_fpu_stack_top(in); - pop_if_last_use(op1, in); - } - - } else if (res->is_fpu_register() && !res->is_xmm_register()) { - // move from memory/constant to fpu register - // result is pushed on the stack - - insert_free_if_dead(res); - - // create debug information before register is pushed - compute_debug_information(op1); - - do_push(res); - new_res = to_fpu_stack_top(res); - } - break; - } - - case lir_neg: { - if (in->is_fpu_register() && !in->is_xmm_register()) { - assert(res->is_fpu_register() && !res->is_xmm_register(), "must be"); - assert(in->is_last_use(), "old value gets destroyed"); - - insert_free_if_dead(res, in); - insert_exchange(in); - new_in = to_fpu_stack_top(in); - - do_rename(in, res); - new_res = to_fpu_stack_top(res); - } - break; - } - - case lir_convert: { - Bytecodes::Code bc = op1->as_OpConvert()->bytecode(); - switch (bc) { - case Bytecodes::_d2f: - case Bytecodes::_f2d: - assert(res->is_fpu_register(), "must be"); - assert(in->is_fpu_register(), "must be"); - - if (!in->is_xmm_register() && !res->is_xmm_register()) { - // this is quite the same as a move from fpu-register to fpu-register - // Note: input and result operands must have different types - if (fpu_num(in) == fpu_num(res)) { - // nothing to do - new_in = to_fpu_stack(in); - } else if (in->is_last_use()) { - insert_free_if_dead(res);//, in); - new_in = to_fpu_stack(in); - do_rename(in, res); - } else { - insert_free_if_dead(res); - insert_copy(in, res); - new_in = to_fpu_stack_top(in, true); - } - new_res = to_fpu_stack(res); - } - - break; - - case Bytecodes::_i2f: - case Bytecodes::_l2f: - case Bytecodes::_i2d: - case Bytecodes::_l2d: - assert(res->is_fpu_register(), "must be"); - if (!res->is_xmm_register()) { - insert_free_if_dead(res); - do_push(res); - new_res = to_fpu_stack_top(res); - } - break; - - case Bytecodes::_f2i: - case Bytecodes::_d2i: - assert(in->is_fpu_register(), "must be"); - if (!in->is_xmm_register()) { - insert_exchange(in); - new_in = to_fpu_stack_top(in); - - // TODO: update registes of stub - } - break; - - case Bytecodes::_f2l: - case Bytecodes::_d2l: - assert(in->is_fpu_register(), "must be"); - if (!in->is_xmm_register()) { - insert_exchange(in); - new_in = to_fpu_stack_top(in); - pop_always(op1, in); - } - break; - - case Bytecodes::_i2l: - case Bytecodes::_l2i: - case Bytecodes::_i2b: - case Bytecodes::_i2c: - case Bytecodes::_i2s: - // no fpu operands - break; - - default: - ShouldNotReachHere(); - } - break; - } - - case lir_roundfp: { - assert(in->is_fpu_register() && !in->is_xmm_register(), "input must be in register"); - assert(res->is_stack(), "result must be on stack"); - - insert_exchange(in); - new_in = to_fpu_stack_top(in); - pop_if_last_use(op1, in); - break; - } - - default: { - assert(!in->is_float_kind() && !res->is_float_kind(), "missed a fpu-operation"); - } - } - - op1->set_in_opr(new_in); - op1->set_result_opr(new_res); -} - -void FpuStackAllocator::handle_op2(LIR_Op2* op2) { - LIR_Opr left = op2->in_opr1(); - if (!left->is_float_kind()) { - return; - } - if (left->is_xmm_register()) { - return; - } - - LIR_Opr right = op2->in_opr2(); - LIR_Opr res = op2->result_opr(); - LIR_Opr new_left = left; // new operands relative to the actual fpu stack top - LIR_Opr new_right = right; - LIR_Opr new_res = res; - - assert(!left->is_xmm_register() && !right->is_xmm_register() && !res->is_xmm_register(), "not for xmm registers"); - - switch (op2->code()) { - case lir_cmp: - case lir_cmp_fd2i: - case lir_ucmp_fd2i: { - assert(left->is_fpu_register(), "invalid LIR"); - assert(right->is_fpu_register(), "invalid LIR"); - - // the left-hand side must be on top of stack. - // the right-hand side is never popped, even if is_last_use is set - insert_exchange(left); - new_left = to_fpu_stack_top(left); - new_right = to_fpu_stack(right); - pop_if_last_use(op2, left); - break; - } - - case lir_mul_strictfp: - case lir_div_strictfp: { - assert(op2->tmp1_opr()->is_fpu_register(), "strict operations need temporary fpu stack slot"); - insert_free_if_dead(op2->tmp1_opr()); - assert(sim()->stack_size() <= 7, "at least one stack slot must be free"); - // fall-through: continue with the normal handling of lir_mul and lir_div - } - case lir_add: - case lir_sub: - case lir_mul: - case lir_div: { - assert(left->is_fpu_register(), "must be"); - assert(res->is_fpu_register(), "must be"); - assert(left->is_equal(res), "must be"); - - // either the left-hand or the right-hand side must be on top of stack - // (if right is not a register, left must be on top) - if (!right->is_fpu_register()) { - insert_exchange(left); - new_left = to_fpu_stack_top(left); - } else { - // no exchange necessary if right is alredy on top of stack - if (tos_offset(right) == 0) { - new_left = to_fpu_stack(left); - new_right = to_fpu_stack_top(right); - } else { - insert_exchange(left); - new_left = to_fpu_stack_top(left); - new_right = to_fpu_stack(right); - } - - if (right->is_last_use()) { - op2->set_fpu_pop_count(1); - - if (tos_offset(right) == 0) { - sim()->pop(); - } else { - // if left is on top of stack, the result is placed in the stack - // slot of right, so a renaming from right to res is necessary - assert(tos_offset(left) == 0, "must be"); - sim()->pop(); - do_rename(right, res); - } - } - } - new_res = to_fpu_stack(res); - - break; - } - - case lir_rem: { - assert(left->is_fpu_register(), "must be"); - assert(right->is_fpu_register(), "must be"); - assert(res->is_fpu_register(), "must be"); - assert(left->is_equal(res), "must be"); - - // Must bring both operands to top of stack with following operand ordering: - // * fpu stack before rem: ... right left - // * fpu stack after rem: ... left - if (tos_offset(right) != 1) { - insert_exchange(right); - insert_exchange(1); - } - insert_exchange(left); - assert(tos_offset(right) == 1, "check"); - assert(tos_offset(left) == 0, "check"); - - new_left = to_fpu_stack_top(left); - new_right = to_fpu_stack(right); - - op2->set_fpu_pop_count(1); - sim()->pop(); - do_rename(right, res); - - new_res = to_fpu_stack_top(res); - break; - } - - case lir_abs: - case lir_sqrt: { - // Right argument appears to be unused - assert(right->is_illegal(), "must be"); - assert(left->is_fpu_register(), "must be"); - assert(res->is_fpu_register(), "must be"); - assert(left->is_last_use(), "old value gets destroyed"); - - insert_free_if_dead(res, left); - insert_exchange(left); - do_rename(left, res); - - new_left = to_fpu_stack_top(res); - new_res = new_left; - - op2->set_fpu_stack_size(sim()->stack_size()); - break; - } - - case lir_log: - case lir_log10: { - // log and log10 need one temporary fpu stack slot, so - // there is one temporary registers stored in temp of the - // operation. the stack allocator must guarantee that the stack - // slots are really free, otherwise there might be a stack - // overflow. - assert(right->is_illegal(), "must be"); - assert(left->is_fpu_register(), "must be"); - assert(res->is_fpu_register(), "must be"); - assert(op2->tmp1_opr()->is_fpu_register(), "must be"); - - insert_free_if_dead(op2->tmp1_opr()); - insert_free_if_dead(res, left); - insert_exchange(left); - do_rename(left, res); - - new_left = to_fpu_stack_top(res); - new_res = new_left; - - op2->set_fpu_stack_size(sim()->stack_size()); - assert(sim()->stack_size() <= 7, "at least one stack slot must be free"); - break; - } - - - case lir_tan: - case lir_sin: - case lir_cos: - case lir_exp: { - // sin, cos and exp need two temporary fpu stack slots, so there are two temporary - // registers (stored in right and temp of the operation). - // the stack allocator must guarantee that the stack slots are really free, - // otherwise there might be a stack overflow. - assert(left->is_fpu_register(), "must be"); - assert(res->is_fpu_register(), "must be"); - // assert(left->is_last_use(), "old value gets destroyed"); - assert(right->is_fpu_register(), "right is used as the first temporary register"); - assert(op2->tmp1_opr()->is_fpu_register(), "temp is used as the second temporary register"); - assert(fpu_num(left) != fpu_num(right) && fpu_num(right) != fpu_num(op2->tmp1_opr()) && fpu_num(op2->tmp1_opr()) != fpu_num(res), "need distinct temp registers"); - - insert_free_if_dead(right); - insert_free_if_dead(op2->tmp1_opr()); - - insert_free_if_dead(res, left); - insert_exchange(left); - do_rename(left, res); - - new_left = to_fpu_stack_top(res); - new_res = new_left; - - op2->set_fpu_stack_size(sim()->stack_size()); - assert(sim()->stack_size() <= 6, "at least two stack slots must be free"); - break; - } - - case lir_pow: { - // pow needs two temporary fpu stack slots, so there are two temporary - // registers (stored in tmp1 and tmp2 of the operation). - // the stack allocator must guarantee that the stack slots are really free, - // otherwise there might be a stack overflow. - assert(left->is_fpu_register(), "must be"); - assert(right->is_fpu_register(), "must be"); - assert(res->is_fpu_register(), "must be"); - - assert(op2->tmp1_opr()->is_fpu_register(), "tmp1 is the first temporary register"); - assert(op2->tmp2_opr()->is_fpu_register(), "tmp2 is the second temporary register"); - assert(fpu_num(left) != fpu_num(right) && fpu_num(left) != fpu_num(op2->tmp1_opr()) && fpu_num(left) != fpu_num(op2->tmp2_opr()) && fpu_num(left) != fpu_num(res), "need distinct temp registers"); - assert(fpu_num(right) != fpu_num(op2->tmp1_opr()) && fpu_num(right) != fpu_num(op2->tmp2_opr()) && fpu_num(right) != fpu_num(res), "need distinct temp registers"); - assert(fpu_num(op2->tmp1_opr()) != fpu_num(op2->tmp2_opr()) && fpu_num(op2->tmp1_opr()) != fpu_num(res), "need distinct temp registers"); - assert(fpu_num(op2->tmp2_opr()) != fpu_num(res), "need distinct temp registers"); - - insert_free_if_dead(op2->tmp1_opr()); - insert_free_if_dead(op2->tmp2_opr()); - - // Must bring both operands to top of stack with following operand ordering: - // * fpu stack before pow: ... right left - // * fpu stack after pow: ... left - - insert_free_if_dead(res, right); - - if (tos_offset(right) != 1) { - insert_exchange(right); - insert_exchange(1); - } - insert_exchange(left); - assert(tos_offset(right) == 1, "check"); - assert(tos_offset(left) == 0, "check"); - - new_left = to_fpu_stack_top(left); - new_right = to_fpu_stack(right); - - op2->set_fpu_stack_size(sim()->stack_size()); - assert(sim()->stack_size() <= 6, "at least two stack slots must be free"); - - sim()->pop(); - - do_rename(right, res); - - new_res = to_fpu_stack_top(res); - break; - } - - default: { - assert(false, "missed a fpu-operation"); - } - } - - op2->set_in_opr1(new_left); - op2->set_in_opr2(new_right); - op2->set_result_opr(new_res); -} - -void FpuStackAllocator::handle_opCall(LIR_OpCall* opCall) { - LIR_Opr res = opCall->result_opr(); - - // clear fpu-stack before call - // it may contain dead values that could not have been remved by previous operations - clear_fpu_stack(LIR_OprFact::illegalOpr); - assert(sim()->is_empty(), "fpu stack must be empty now"); - - // compute debug information before (possible) fpu result is pushed - compute_debug_information(opCall); - - if (res->is_fpu_register() && !res->is_xmm_register()) { - do_push(res); - opCall->set_result_opr(to_fpu_stack_top(res)); - } -} - -#ifndef PRODUCT -void FpuStackAllocator::check_invalid_lir_op(LIR_Op* op) { - switch (op->code()) { - case lir_24bit_FPU: - case lir_reset_FPU: - case lir_ffree: - assert(false, "operations not allowed in lir. If one of these operations is needed, check if they have fpu operands"); - break; - - case lir_fpop_raw: - case lir_fxch: - case lir_fld: - assert(false, "operations only inserted by FpuStackAllocator"); - break; - } -} -#endif - - -void FpuStackAllocator::merge_insert_add(LIR_List* instrs, FpuStackSim* cur_sim, int reg) { - LIR_Op1* move = new LIR_Op1(lir_move, LIR_OprFact::doubleConst(0), LIR_OprFact::double_fpu(reg)->make_fpu_stack_offset()); - - instrs->instructions_list()->push(move); - - cur_sim->push(reg); - move->set_result_opr(to_fpu_stack(move->result_opr())); - - #ifndef PRODUCT - if (TraceFPUStack) { - tty->print("Added new register: %d New state: ", reg); cur_sim->print(); tty->cr(); - } - #endif -} - -void FpuStackAllocator::merge_insert_xchg(LIR_List* instrs, FpuStackSim* cur_sim, int slot) { - assert(slot > 0, "no exchange necessary"); - - LIR_Op1* fxch = new LIR_Op1(lir_fxch, LIR_OprFact::intConst(slot)); - instrs->instructions_list()->push(fxch); - cur_sim->swap(slot); - - #ifndef PRODUCT - if (TraceFPUStack) { - tty->print("Exchanged register: %d New state: ", cur_sim->get_slot(slot)); cur_sim->print(); tty->cr(); - } - #endif -} - -void FpuStackAllocator::merge_insert_pop(LIR_List* instrs, FpuStackSim* cur_sim) { - int reg = cur_sim->get_slot(0); - - LIR_Op* fpop = new LIR_Op0(lir_fpop_raw); - instrs->instructions_list()->push(fpop); - cur_sim->pop(reg); - - #ifndef PRODUCT - if (TraceFPUStack) { - tty->print("Removed register: %d New state: ", reg); cur_sim->print(); tty->cr(); - } - #endif -} - -bool FpuStackAllocator::merge_rename(FpuStackSim* cur_sim, FpuStackSim* sux_sim, int start_slot, int change_slot) { - int reg = cur_sim->get_slot(change_slot); - - for (int slot = start_slot; slot >= 0; slot--) { - int new_reg = sux_sim->get_slot(slot); - - if (!cur_sim->contains(new_reg)) { - cur_sim->set_slot(change_slot, new_reg); - - #ifndef PRODUCT - if (TraceFPUStack) { - tty->print("Renamed register %d to %d New state: ", reg, new_reg); cur_sim->print(); tty->cr(); - } - #endif - - return true; - } - } - return false; -} - - -void FpuStackAllocator::merge_fpu_stack(LIR_List* instrs, FpuStackSim* cur_sim, FpuStackSim* sux_sim) { -#ifndef PRODUCT - if (TraceFPUStack) { - tty->cr(); - tty->print("before merging: pred: "); cur_sim->print(); tty->cr(); - tty->print(" sux: "); sux_sim->print(); tty->cr(); - } - - int slot; - for (slot = 0; slot < cur_sim->stack_size(); slot++) { - assert(!cur_sim->slot_is_empty(slot), "not handled by algorithm"); - } - for (slot = 0; slot < sux_sim->stack_size(); slot++) { - assert(!sux_sim->slot_is_empty(slot), "not handled by algorithm"); - } -#endif - - // size difference between cur and sux that must be resolved by adding or removing values form the stack - int size_diff = cur_sim->stack_size() - sux_sim->stack_size(); - - if (!ComputeExactFPURegisterUsage) { - // add slots that are currently free, but used in successor - // When the exact FPU register usage is computed, the stack does - // not contain dead values at merging -> no values must be added - - int sux_slot = sux_sim->stack_size() - 1; - while (size_diff < 0) { - assert(sux_slot >= 0, "slot out of bounds -> error in algorithm"); - - int reg = sux_sim->get_slot(sux_slot); - if (!cur_sim->contains(reg)) { - merge_insert_add(instrs, cur_sim, reg); - size_diff++; - - if (sux_slot + size_diff != 0) { - merge_insert_xchg(instrs, cur_sim, sux_slot + size_diff); - } - } - sux_slot--; - } - } - - assert(cur_sim->stack_size() >= sux_sim->stack_size(), "stack size must be equal or greater now"); - assert(size_diff == cur_sim->stack_size() - sux_sim->stack_size(), "must be"); - - // stack merge algorithm: - // 1) as long as the current stack top is not in the right location (that meens - // it should not be on the stack top), exchange it into the right location - // 2) if the stack top is right, but the remaining stack is not ordered correctly, - // the stack top is exchanged away to get another value on top -> - // now step 1) can be continued - // the stack can also contain unused items -> these items are removed from stack - - int finished_slot = sux_sim->stack_size() - 1; - while (finished_slot >= 0 || size_diff > 0) { - while (size_diff > 0 || (cur_sim->stack_size() > 0 && cur_sim->get_slot(0) != sux_sim->get_slot(0))) { - int reg = cur_sim->get_slot(0); - if (sux_sim->contains(reg)) { - int sux_slot = sux_sim->offset_from_tos(reg); - merge_insert_xchg(instrs, cur_sim, sux_slot + size_diff); - - } else if (!merge_rename(cur_sim, sux_sim, finished_slot, 0)) { - assert(size_diff > 0, "must be"); - - merge_insert_pop(instrs, cur_sim); - size_diff--; - } - assert(cur_sim->stack_size() == 0 || cur_sim->get_slot(0) != reg, "register must have been changed"); - } - - while (finished_slot >= 0 && cur_sim->get_slot(finished_slot) == sux_sim->get_slot(finished_slot)) { - finished_slot--; - } - - if (finished_slot >= 0) { - int reg = cur_sim->get_slot(finished_slot); - - if (sux_sim->contains(reg) || !merge_rename(cur_sim, sux_sim, finished_slot, finished_slot)) { - assert(sux_sim->contains(reg) || size_diff > 0, "must be"); - merge_insert_xchg(instrs, cur_sim, finished_slot); - } - assert(cur_sim->get_slot(finished_slot) != reg, "register must have been changed"); - } - } - -#ifndef PRODUCT - if (TraceFPUStack) { - tty->print("after merging: pred: "); cur_sim->print(); tty->cr(); - tty->print(" sux: "); sux_sim->print(); tty->cr(); - tty->cr(); - } -#endif - assert(cur_sim->stack_size() == sux_sim->stack_size(), "stack size must be equal now"); -} - - -void FpuStackAllocator::merge_cleanup_fpu_stack(LIR_List* instrs, FpuStackSim* cur_sim, BitMap& live_fpu_regs) { -#ifndef PRODUCT - if (TraceFPUStack) { - tty->cr(); - tty->print("before cleanup: state: "); cur_sim->print(); tty->cr(); - tty->print(" live: "); live_fpu_regs.print_on(tty); tty->cr(); - } -#endif - - int slot = 0; - while (slot < cur_sim->stack_size()) { - int reg = cur_sim->get_slot(slot); - if (!live_fpu_regs.at(reg)) { - if (slot != 0) { - merge_insert_xchg(instrs, cur_sim, slot); - } - merge_insert_pop(instrs, cur_sim); - } else { - slot++; - } - } - -#ifndef PRODUCT - if (TraceFPUStack) { - tty->print("after cleanup: state: "); cur_sim->print(); tty->cr(); - tty->print(" live: "); live_fpu_regs.print_on(tty); tty->cr(); - tty->cr(); - } - - // check if fpu stack only contains live registers - for (unsigned int i = 0; i < live_fpu_regs.size(); i++) { - if (live_fpu_regs.at(i) != cur_sim->contains(i)) { - tty->print_cr("mismatch between required and actual stack content"); - break; - } - } -#endif -} - - -bool FpuStackAllocator::merge_fpu_stack_with_successors(BlockBegin* block) { -#ifndef PRODUCT - if (TraceFPUStack) { - tty->print_cr("Propagating FPU stack state for B%d at LIR_Op position %d to successors:", - block->block_id(), pos()); - sim()->print(); - tty->cr(); - } -#endif - - bool changed = false; - int number_of_sux = block->number_of_sux(); - - if (number_of_sux == 1 && block->sux_at(0)->number_of_preds() > 1) { - // The successor has at least two incoming edges, so a stack merge will be necessary - // If this block is the first predecessor, cleanup the current stack and propagate it - // If this block is not the first predecessor, a stack merge will be necessary - - BlockBegin* sux = block->sux_at(0); - intArray* state = sux->fpu_stack_state(); - LIR_List* instrs = new LIR_List(_compilation); - - if (state != NULL) { - // Merge with a successors that already has a FPU stack state - // the block must only have one successor because critical edges must been split - FpuStackSim* cur_sim = sim(); - FpuStackSim* sux_sim = temp_sim(); - sux_sim->read_state(state); - - merge_fpu_stack(instrs, cur_sim, sux_sim); - - } else { - // propagate current FPU stack state to successor without state - // clean up stack first so that there are no dead values on the stack - if (ComputeExactFPURegisterUsage) { - FpuStackSim* cur_sim = sim(); - BitMap live_fpu_regs = block->sux_at(0)->fpu_register_usage(); - assert(live_fpu_regs.size() == FrameMap::nof_fpu_regs, "missing register usage"); - - merge_cleanup_fpu_stack(instrs, cur_sim, live_fpu_regs); - } - - intArray* state = sim()->write_state(); - if (TraceFPUStack) { - tty->print_cr("Setting FPU stack state of B%d (merge path)", sux->block_id()); - sim()->print(); tty->cr(); - } - sux->set_fpu_stack_state(state); - } - - if (instrs->instructions_list()->length() > 0) { - lir()->insert_before(pos(), instrs); - set_pos(instrs->instructions_list()->length() + pos()); - changed = true; - } - - } else { - // Propagate unmodified Stack to successors where a stack merge is not necessary - intArray* state = sim()->write_state(); - for (int i = 0; i < number_of_sux; i++) { - BlockBegin* sux = block->sux_at(i); - -#ifdef ASSERT - for (int j = 0; j < sux->number_of_preds(); j++) { - assert(block == sux->pred_at(j), "all critical edges must be broken"); - } - - // check if new state is same - if (sux->fpu_stack_state() != NULL) { - intArray* sux_state = sux->fpu_stack_state(); - assert(state->length() == sux_state->length(), "overwriting existing stack state"); - for (int j = 0; j < state->length(); j++) { - assert(state->at(j) == sux_state->at(j), "overwriting existing stack state"); - } - } -#endif -#ifndef PRODUCT - if (TraceFPUStack) { - tty->print_cr("Setting FPU stack state of B%d", sux->block_id()); - sim()->print(); tty->cr(); - } -#endif - - sux->set_fpu_stack_state(state); - } - } - -#ifndef PRODUCT - // assertions that FPU stack state conforms to all successors' states - intArray* cur_state = sim()->write_state(); - for (int i = 0; i < number_of_sux; i++) { - BlockBegin* sux = block->sux_at(i); - intArray* sux_state = sux->fpu_stack_state(); - - assert(sux_state != NULL, "no fpu state"); - assert(cur_state->length() == sux_state->length(), "incorrect length"); - for (int i = 0; i < cur_state->length(); i++) { - assert(cur_state->at(i) == sux_state->at(i), "element not equal"); - } - } -#endif - - return changed; -}
--- a/src/cpu/aarch64/vm/c1_LinearScan_aarch64.hpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/cpu/aarch64/vm/c1_LinearScan_aarch64.hpp Thu Dec 04 14:30:02 2014 +0000 @@ -24,8 +24,8 @@ * */ -#ifndef CPU_X86_VM_C1_LINEARSCAN_X86_HPP -#define CPU_X86_VM_C1_LINEARSCAN_X86_HPP +#ifndef CPU_AARCH64_VM_C1_LINEARSCAN_HPP +#define CPU_AARCH64_VM_C1_LINEARSCAN_HPP inline bool LinearScan::is_processed_reg_num(int reg_num) { return reg_num <= FrameMap::last_cpu_reg() || reg_num >= pd_nof_cpu_regs_frame_map; @@ -75,77 +75,4 @@ -class FpuStackAllocator VALUE_OBJ_CLASS_SPEC { - private: - Compilation* _compilation; - LinearScan* _allocator; - - LIR_OpVisitState visitor; - - LIR_List* _lir; - int _pos; - FpuStackSim _sim; - FpuStackSim _temp_sim; - - bool _debug_information_computed; - - LinearScan* allocator() { return _allocator; } - Compilation* compilation() const { return _compilation; } - - // unified bailout support - void bailout(const char* msg) const { compilation()->bailout(msg); } - bool bailed_out() const { return compilation()->bailed_out(); } - - int pos() { return _pos; } - void set_pos(int pos) { _pos = pos; } - LIR_Op* cur_op() { Unimplemented(); return lir()->instructions_list()->at(pos()); } - LIR_List* lir() { return _lir; } - void set_lir(LIR_List* lir) { _lir = lir; } - FpuStackSim* sim() { return &_sim; } - FpuStackSim* temp_sim() { return &_temp_sim; } - - int fpu_num(LIR_Opr opr); - int tos_offset(LIR_Opr opr); - LIR_Opr to_fpu_stack_top(LIR_Opr opr, bool dont_check_offset = false); - - // Helper functions for handling operations - void insert_op(LIR_Op* op); - void insert_exchange(int offset); - void insert_exchange(LIR_Opr opr); - void insert_free(int offset); - void insert_free_if_dead(LIR_Opr opr); - void insert_free_if_dead(LIR_Opr opr, LIR_Opr ignore); - void insert_copy(LIR_Opr from, LIR_Opr to); - void do_rename(LIR_Opr from, LIR_Opr to); - void do_push(LIR_Opr opr); - void pop_if_last_use(LIR_Op* op, LIR_Opr opr); - void pop_always(LIR_Op* op, LIR_Opr opr); - void clear_fpu_stack(LIR_Opr preserve); - void handle_op1(LIR_Op1* op1); - void handle_op2(LIR_Op2* op2); - void handle_opCall(LIR_OpCall* opCall); - void compute_debug_information(LIR_Op* op); - void allocate_exception_handler(XHandler* xhandler); - void allocate_block(BlockBegin* block); - -#ifndef PRODUCT - void check_invalid_lir_op(LIR_Op* op); -#endif - - // Helper functions for merging of fpu stacks - void merge_insert_add(LIR_List* instrs, FpuStackSim* cur_sim, int reg); - void merge_insert_xchg(LIR_List* instrs, FpuStackSim* cur_sim, int slot); - void merge_insert_pop(LIR_List* instrs, FpuStackSim* cur_sim); - bool merge_rename(FpuStackSim* cur_sim, FpuStackSim* sux_sim, int start_slot, int change_slot); - void merge_fpu_stack(LIR_List* instrs, FpuStackSim* cur_sim, FpuStackSim* sux_sim); - void merge_cleanup_fpu_stack(LIR_List* instrs, FpuStackSim* cur_sim, BitMap& live_fpu_regs); - bool merge_fpu_stack_with_successors(BlockBegin* block); - - public: - LIR_Opr to_fpu_stack(LIR_Opr opr); // used by LinearScan for creation of debug information - - FpuStackAllocator(Compilation* compilation, LinearScan* allocator); - void allocate(); -}; - -#endif // CPU_X86_VM_C1_LINEARSCAN_X86_HPP +#endif // CPU_AARCH64_VM_C1_LINEARSCAN_HPP
--- a/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp Thu Dec 04 14:30:02 2014 +0000 @@ -41,7 +41,6 @@ FloatRegister f0, FloatRegister f1, Register result) { - Label done; if (is_float) { fcmps(f0, f1); } else { @@ -50,21 +49,14 @@ if (unordered_result < 0) { // we want -1 for unordered or less than, 0 for equal and 1 for // greater than. - mov(result, (u_int64_t)-1L); - // for FP LT tests less than or unordered - br(Assembler::LT, done); - // install 0 for EQ otherwise 1 - csinc(result, zr, zr, Assembler::EQ); + cset(result, NE); // Not equal or unordered + cneg(result, result, LT); // Less than or unordered } else { // we want -1 for less than, 0 for equal and 1 for unordered or // greater than. - mov(result, 1L); - // for FP HI tests greater than or unordered - br(Assembler::HI, done); - // install 0 for EQ otherwise ~0 - csinv(result, zr, zr, Assembler::EQ); + cset(result, NE); // Not equal or unordered + cneg(result, result, LO); // Less than } - bind(done); } int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) { @@ -431,10 +423,6 @@ } } - -void C1_MacroAssembler::unverified_entry(Register receiver, Register ic_klass) { Unimplemented(); } - - void C1_MacroAssembler::verified_entry() { }
--- a/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp Thu Dec 04 14:30:02 2014 +0000 @@ -80,6 +80,7 @@ pop(r0, sp); #endif reset_last_Java_frame(true, true); + maybe_isb(); // check for pending exceptions { Label L; @@ -569,6 +570,7 @@ } #endif __ reset_last_Java_frame(true, false); + __ maybe_isb(); // check for pending exceptions { Label L; @@ -1235,6 +1237,12 @@ __ lsr(card_addr, card_addr, CardTableModRefBS::card_shift); unsigned long offset; __ adrp(rscratch1, cardtable, offset); + assert((offset & 0x3ffL) == 0, "assumed offset aligned to 0x400"); + // n.b. intra-page offset will never change even if this gets + // relocated so it is safe to omit the lea when offset == 0 + if (offset != 0) { + __ lea(rscratch1, Address(rscratch1, offset)); + } __ add(card_addr, card_addr, rscratch1); __ ldrb(rscratch1, Address(card_addr, offset)); __ cmpw(rscratch1, (int)G1SATBCardTableModRefBS::g1_young_card_val());
--- a/src/cpu/aarch64/vm/cppInterpreterGenerator_aarch64.hpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/cpu/aarch64/vm/cppInterpreterGenerator_aarch64.hpp Thu Dec 04 14:30:02 2014 +0000 @@ -29,29 +29,7 @@ protected: -#if 0 - address generate_asm_interpreter_entry(bool synchronized); - address generate_native_entry(bool synchronized); - address generate_abstract_entry(void); - address generate_math_entry(AbstractInterpreter::MethodKind kind); - address generate_empty_entry(void); - address generate_accessor_entry(void); - address generate_Reference_get_entry(void); - void lock_method(void); - void generate_stack_overflow_check(void); - - void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue); - void generate_counter_overflow(Label* do_continue); -#endif - void generate_more_monitors(); void generate_deopt_handling(); -#if 0 - address generate_interpreter_frame_manager(bool synchronized); // C++ interpreter only - void generate_compute_interpreter_state(const Register state, - const Register prev_state, - const Register sender_sp, - bool native); // C++ interpreter only -#endif #endif // CPU_AARCH64_VM_CPPINTERPRETERGENERATOR_AARCH64_HPP
--- a/src/cpu/aarch64/vm/frame_aarch64.cpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/cpu/aarch64/vm/frame_aarch64.cpp Thu Dec 04 14:30:02 2014 +0000 @@ -810,7 +810,7 @@ unsigned long bcx, unsigned long thread) { RegisterMap map((JavaThread*)thread, false); if (!reg_map) { - reg_map = (RegisterMap*)new char[sizeof map]; + reg_map = (RegisterMap*)os::malloc(sizeof map, mtNone); } memcpy(reg_map, &map, sizeof map); {
--- a/src/cpu/aarch64/vm/icache_aarch64.cpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/cpu/aarch64/vm/icache_aarch64.cpp Thu Dec 04 14:30:02 2014 +0000 @@ -32,7 +32,10 @@ void ICacheStubGenerator::generate_icache_flush( ICache::flush_icache_stub_t* flush_icache_stub) { - aarch64TestHook(); // Give anyone who calls this a surprise *flush_icache_stub = (ICache::flush_icache_stub_t)NULL; } + +void ICache::initialize() { + aarch64TestHook(); +}
--- a/src/cpu/aarch64/vm/icache_aarch64.hpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/cpu/aarch64/vm/icache_aarch64.hpp Thu Dec 04 14:30:02 2014 +0000 @@ -33,7 +33,7 @@ class ICache : public AbstractICache { public: - static void initialize() {} + static void initialize(); static void invalidate_word(address addr) { __clear_cache((char *)addr, (char *)(addr + 3)); }
--- a/src/cpu/aarch64/vm/interp_masm_aarch64.cpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/cpu/aarch64/vm/interp_masm_aarch64.cpp Thu Dec 04 14:30:02 2014 +0000 @@ -54,10 +54,6 @@ // Implementation of InterpreterMacroAssembler -#ifdef CC_INTERP -void InterpreterMacroAssembler::get_method(Register reg) { Unimplemented(); } -#endif // CC_INTERP - #ifndef CC_INTERP void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) { @@ -1413,6 +1409,7 @@ // Note: No need to save/restore rbcp & rlocals pointer since these // are callee saved registers and no blocking/ GC can happen // in leaf calls. + // also no need to restore method register #ifdef ASSERT { Label L; @@ -1456,6 +1453,8 @@ entry_point, number_of_arguments, check_exceptions); // interpreter specific + // method oop may have moved so reload from interpreter stack frame + get_method(rmethod); restore_bcp(); restore_locals(); // reload the constant pool cache in case a PermGen GC moved it
--- a/src/cpu/aarch64/vm/javaFrameAnchor_aarch64.hpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/cpu/aarch64/vm/javaFrameAnchor_aarch64.hpp Thu Dec 04 14:30:02 2014 +0000 @@ -42,25 +42,16 @@ void clear(void) { // clearing _last_Java_sp must be first _last_Java_sp = NULL; - // fence? + OrderAccess::release(); _last_Java_fp = NULL; _last_Java_pc = NULL; } void copy(JavaFrameAnchor* src) { - // In order to make sure the transition state is valid for "this" - // We must clear _last_Java_sp before copying the rest of the new data - // - // Hack Alert: Temporary bugfix for 4717480/4721647 - // To act like previous version (pd_cache_state) don't NULL _last_Java_sp - // unless the value is changing - // - if (_last_Java_sp != src->_last_Java_sp) - _last_Java_sp = NULL; - _last_Java_fp = src->_last_Java_fp; _last_Java_pc = src->_last_Java_pc; // Must be last so profiler will always see valid frame if has_last_frame() is true + OrderAccess::release(); _last_Java_sp = src->_last_Java_sp; }
--- a/src/cpu/aarch64/vm/jniFastGetField_aarch64.cpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/cpu/aarch64/vm/jniFastGetField_aarch64.cpp Thu Dec 04 14:30:02 2014 +0000 @@ -128,10 +128,15 @@ case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; default: ShouldNotReachHere(); } - // tail call - __ lea(rscratch1, ExternalAddress(slow_case_addr)); - __ br(rscratch1); + { + __ enter(); + __ lea(rscratch1, ExternalAddress(slow_case_addr)); + __ blr(rscratch1); + __ maybe_isb(); + __ leave(); + __ ret(lr); + } __ flush (); return fast_entry;
--- a/src/cpu/aarch64/vm/methodHandles_aarch64.cpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/cpu/aarch64/vm/methodHandles_aarch64.cpp Thu Dec 04 14:30:02 2014 +0000 @@ -47,9 +47,9 @@ void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) { if (VerifyMethodHandles) - verify_klass(_masm, klass_reg, SystemDictionary::Class_klass(), + verify_klass(_masm, klass_reg, SystemDictionaryHandles::Class_klass(), "MH argument is a Class"); - __ ldr(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes())); + __ load_heap_oop(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes())); } #ifdef ASSERT @@ -140,7 +140,7 @@ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes()))); __ verify_oop(method_temp); // the following assumes that a methodOop is normally compressed in the vmtarget field: - __ ldr(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes()))); + __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes()))); __ verify_oop(method_temp); if (VerifyMethodHandles && !for_compiler_entry) { @@ -334,14 +334,14 @@ if (VerifyMethodHandles) { verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3); } - __ ldr(rmethod, member_vmtarget); + __ load_heap_oop(rmethod, member_vmtarget); break; case vmIntrinsics::_linkToStatic: if (VerifyMethodHandles) { verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3); } - __ ldr(rmethod, member_vmtarget); + __ load_heap_oop(rmethod, member_vmtarget); break; case vmIntrinsics::_linkToVirtual:
--- a/src/cpu/aarch64/vm/methodHandles_aarch64.hpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/cpu/aarch64/vm/methodHandles_aarch64.hpp Thu Dec 04 14:30:02 2014 +0000 @@ -29,7 +29,7 @@ // Adapters static unsigned int adapter_code_size() { - 32000 DEBUG_ONLY(+ 120000); + return 32000 DEBUG_ONLY(+ 120000); } public:
--- a/src/cpu/aarch64/vm/nativeInst_aarch64.cpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/cpu/aarch64/vm/nativeInst_aarch64.cpp Thu Dec 04 14:30:02 2014 +0000 @@ -37,22 +37,17 @@ #include "c1/c1_Runtime1.hpp" #endif -void NativeInstruction::wrote(int offset) { - // FIXME: Native needs ISB here -; } - - void NativeCall::verify() { ; } address NativeCall::destination() const { return instruction_address() + displacement(); } -void NativeCall::print() { Unimplemented(); } - // Inserts a native call instruction at a given pc void NativeCall::insert(address code_pos, address entry) { Unimplemented(); } +//------------------------------------------------------------------- + void NativeMovConstReg::verify() { // make sure code pattern is actually mov reg64, imm64 instructions } @@ -73,6 +68,7 @@ *(intptr_t*)addr = x; } else { MacroAssembler::pd_patch_instruction(instruction_address(), (address)x); + ICache::invalidate_range(instruction_address(), instruction_size); } }; @@ -83,12 +79,8 @@ //------------------------------------------------------------------- -int NativeMovRegMem::instruction_start() const { Unimplemented(); return 0; } - address NativeMovRegMem::instruction_address() const { return addr_at(instruction_offset); } -address NativeMovRegMem::next_instruction_address() const { Unimplemented(); return 0; } - int NativeMovRegMem::offset() const { address pc = instruction_address(); unsigned insn = *(unsigned*)pc; @@ -108,6 +100,7 @@ *(long*)addr = x; } else { MacroAssembler::pd_patch_instruction(pc, (address)intptr_t(x)); + ICache::invalidate_range(instruction_address(), instruction_size); } } @@ -117,23 +110,11 @@ #endif } - -void NativeMovRegMem::print() { Unimplemented(); } - -//------------------------------------------------------------------- - -void NativeLoadAddress::verify() { Unimplemented(); } - - -void NativeLoadAddress::print() { Unimplemented(); } - //-------------------------------------------------------------------------------- void NativeJump::verify() { ; } -void NativeJump::insert(address code_pos, address entry) { Unimplemented(); } - void NativeJump::check_verified_entry_alignment(address entry, address verified_entry) { } @@ -156,8 +137,11 @@ dest = instruction_address(); MacroAssembler::pd_patch_instruction(instruction_address(), dest); + ICache::invalidate_range(instruction_address(), instruction_size); }; +//------------------------------------------------------------------- + bool NativeInstruction::is_safepoint_poll() { // a safepoint_poll is implemented in two steps as either // @@ -207,7 +191,7 @@ return Instruction_aarch64::extract(int_at(0), 30, 23) == 0b11100101; } -// MT safe inserting of a jump over an unknown instruction sequence (used by nmethod::makeZombie) +// MT safe inserting of a jump over a jump or a nop (used by nmethod::makeZombie) void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { ptrdiff_t disp = dest - verified_entry; @@ -221,15 +205,10 @@ ICache::invalidate_range(verified_entry, instruction_size); } - -void NativePopReg::insert(address code_pos, Register reg) { Unimplemented(); } - - void NativeIllegalInstruction::insert(address code_pos) { Unimplemented(); } void NativeGeneralJump::verify() { } - void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { ptrdiff_t disp = entry - code_pos; guarantee(disp < 1 << 27 && disp > - (1 << 27), "branch overflow"); @@ -242,7 +221,8 @@ // MT-safe patching of a long jump instruction. void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { - assert(nativeInstruction_at(instr_addr)->is_jump_or_nop(), + NativeGeneralJump* n_jump = (NativeGeneralJump*)instr_addr; + assert(n_jump->is_jump_or_nop(), "Aarch64 cannot replace non-jump with jump"); uint32_t instr = *(uint32_t*)code_buffer; *(uint32_t*)instr_addr = instr;
--- a/src/cpu/aarch64/vm/nativeInst_aarch64.hpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/cpu/aarch64/vm/nativeInst_aarch64.hpp Thu Dec 04 14:30:02 2014 +0000 @@ -57,7 +57,6 @@ enum { instruction_size = 4 }; inline bool is_nop(); bool is_dtrace_trap(); - inline bool is_call(); inline bool is_illegal(); inline bool is_return(); bool is_jump(); @@ -81,10 +80,10 @@ oop oop_at (int offset) const { return *(oop*) addr_at(offset); } - void set_char_at(int offset, char c) { *addr_at(offset) = (u_char)c; wrote(offset); } - void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; wrote(offset); } - void set_ptr_at (int offset, intptr_t ptr) { *(intptr_t*) addr_at(offset) = ptr; wrote(offset); } - void set_oop_at (int offset, oop o) { *(oop*) addr_at(offset) = o; wrote(offset); } + void set_char_at(int offset, char c) { *addr_at(offset) = (u_char)c; } + void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; } + void set_ptr_at (int offset, intptr_t ptr) { *(intptr_t*) addr_at(offset) = ptr; } + void set_oop_at (int offset, oop o) { *(oop*) addr_at(offset) = o; } // This doesn't really do anything on AArch64, but it is the place where // cache invalidation belongs, generically: @@ -143,6 +142,7 @@ offset &= (1 << 26) - 1; // mask off insn part insn |= offset; set_int_at(displacement_offset, insn); + ICache::invalidate_range(instruction_address(), instruction_size); } // Similar to replace_mt_safe, but just changes the destination. The @@ -175,11 +175,6 @@ return is_call_at(return_address - NativeCall::return_address_offset); } - static bool is_call_to(address instr, address target) { - return nativeInstruction_at(instr)->is_call() && - nativeCall_at(instr)->destination() == target; - } - // MT-safe patching of a call instruction. static void insert(address code_pos, address entry); @@ -345,9 +340,6 @@ // unit test stuff static void test() {} - - private: - friend NativeLoadAddress* nativeLoadAddress_at (address address) { Unimplemented(); return 0; } }; class NativeJump: public NativeInstruction { @@ -434,10 +426,6 @@ public: }; -inline bool NativeInstruction::is_illegal() { Unimplemented(); return false; } -inline bool NativeInstruction::is_call() { Unimplemented(); return false; } -inline bool NativeInstruction::is_return() { Unimplemented(); return false; } - inline bool NativeInstruction::is_nop() { uint32_t insn = *(uint32_t*)addr_at(0); return insn == 0xd503201f; @@ -466,8 +454,4 @@ return is_nop() || is_jump(); } -inline bool NativeInstruction::is_cond_jump() { Unimplemented(); return false; } - -inline bool NativeInstruction::is_mov_literal64() { Unimplemented(); return false; } - #endif // CPU_AARCH64_VM_NATIVEINST_AARCH64_HPP
--- a/src/cpu/aarch64/vm/register_aarch64.hpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/cpu/aarch64/vm/register_aarch64.hpp Thu Dec 04 14:30:02 2014 +0000 @@ -174,44 +174,6 @@ CONSTANT_REGISTER_DECLARATION(FloatRegister, v30 , (30)); CONSTANT_REGISTER_DECLARATION(FloatRegister, v31 , (31)); -// #ifndef DONT_USE_REGISTER_DEFINES -#if 0 -#define fnoreg ((FloatRegister)(fnoreg_FloatRegisterEnumValue)) -#define v0 ((FloatRegister)( v0_FloatRegisterEnumValue)) -#define v1 ((FloatRegister)( v1_FloatRegisterEnumValue)) -#define v2 ((FloatRegister)( v2_FloatRegisterEnumValue)) -#define v3 ((FloatRegister)( v3_FloatRegisterEnumValue)) -#define v4 ((FloatRegister)( v4_FloatRegisterEnumValue)) -#define v5 ((FloatRegister)( v5_FloatRegisterEnumValue)) -#define v6 ((FloatRegister)( v6_FloatRegisterEnumValue)) -#define v7 ((FloatRegister)( v7_FloatRegisterEnumValue)) -#define v8 ((FloatRegister)( v8_FloatRegisterEnumValue)) -#define v9 ((FloatRegister)( v9_FloatRegisterEnumValue)) -#define v10 ((FloatRegister)( v10_FloatRegisterEnumValue)) -#define v11 ((FloatRegister)( v11_FloatRegisterEnumValue)) -#define v12 ((FloatRegister)( v12_FloatRegisterEnumValue)) -#define v13 ((FloatRegister)( v13_FloatRegisterEnumValue)) -#define v14 ((FloatRegister)( v14_FloatRegisterEnumValue)) -#define v15 ((FloatRegister)( v15_FloatRegisterEnumValue)) -#define v16 ((FloatRegister)( v16_FloatRegisterEnumValue)) -#define v17 ((FloatRegister)( v17_FloatRegisterEnumValue)) -#define v18 ((FloatRegister)( v18_FloatRegisterEnumValue)) -#define v19 ((FloatRegister)( v19_FloatRegisterEnumValue)) -#define v20 ((FloatRegister)( v20_FloatRegisterEnumValue)) -#define v21 ((FloatRegister)( v21_FloatRegisterEnumValue)) -#define v22 ((FloatRegister)( v22_FloatRegisterEnumValue)) -#define v23 ((FloatRegister)( v23_FloatRegisterEnumValue)) -#define v24 ((FloatRegister)( v24_FloatRegisterEnumValue)) -#define v25 ((FloatRegister)( v25_FloatRegisterEnumValue)) -#define v26 ((FloatRegister)( v26_FloatRegisterEnumValue)) -#define v27 ((FloatRegister)( v27_FloatRegisterEnumValue)) -#define v28 ((FloatRegister)( v28_FloatRegisterEnumValue)) -#define v29 ((FloatRegister)( v29_FloatRegisterEnumValue)) -#define v30 ((FloatRegister)( v30_FloatRegisterEnumValue)) -#define v31 ((FloatRegister)( v31_FloatRegisterEnumValue)) -#endif // 0 -//#endif // DONT_USE_REGISTER_DEFINES - // Need to know the total number of registers of all sorts for SharedInfo. // Define a class that exports it. class ConcreteRegisterImpl : public AbstractRegisterImpl {
--- a/src/cpu/aarch64/vm/relocInfo_aarch64.cpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/cpu/aarch64/vm/relocInfo_aarch64.cpp Thu Dec 04 14:30:02 2014 +0000 @@ -34,23 +34,30 @@ void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { + if (verify_only) { + return; + } + + int bytes; + switch(type()) { case relocInfo::oop_type: { oop_Relocation *reloc = (oop_Relocation *)this; if (NativeInstruction::is_ldr_literal_at(addr())) { address constptr = (address)code()->oop_addr_at(reloc->oop_index()); - MacroAssembler::pd_patch_instruction(addr(), constptr); + bytes = MacroAssembler::pd_patch_instruction_size(addr(), constptr); assert(*(address*)constptr == x, "error in oop relocation"); } else{ - MacroAssembler::patch_oop(addr(), x); + bytes = MacroAssembler::patch_oop(addr(), x); } } break; default: - MacroAssembler::pd_patch_instruction(addr(), x); + bytes = MacroAssembler::pd_patch_instruction_size(addr(), x); break; } + ICache::invalidate_range(addr(), bytes); } address Relocation::pd_call_destination(address orig_addr) {
--- a/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Thu Dec 04 14:30:02 2014 +0000 @@ -319,6 +319,8 @@ __ mov(c_rarg1, lr); __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite))); __ blrt(rscratch1, 2, 0, 0); + __ maybe_isb(); + __ membar(Assembler::LoadLoad | Assembler::LoadStore); __ pop_CPU_state(); // restore sp @@ -1171,6 +1173,7 @@ __ mov(rscratch2, (gpargs << 6) | (fpargs << 2) | type); __ blrt(rscratch1, rscratch2); // __ blrt(rscratch1, gpargs, fpargs, type); + __ maybe_isb(); } } @@ -1977,6 +1980,7 @@ __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition))); } __ blrt(rscratch1, 1, 0, 1); + __ maybe_isb(); // Restore any method result value restore_native_result(masm, ret_type, stack_slots); @@ -2594,11 +2598,6 @@ } #endif - // TODO check various assumptions here - // - // call unimplemented to make sure we actually check this later - // __ call_Unimplemented(); - assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); address start = __ pc(); @@ -2842,6 +2841,8 @@ __ reset_last_Java_frame(false, true); + __ maybe_isb(); + __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); __ cbz(rscratch1, noException); @@ -2911,6 +2912,8 @@ oop_maps->add_gc_map( __ offset() - start, map); + __ maybe_isb(); + // r0 contains the address we are going to jump to assuming no exception got installed // clear last_Java_sp @@ -3033,7 +3036,8 @@ __ mov(c_rarg0, rthread); __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C))); __ blrt(rscratch1, 1, 0, MacroAssembler::ret_type_integral); - + __ maybe_isb(); + // Set an oopmap for the call site. This oopmap will only be used if we // are unwinding the stack. Hence, all locations will be dead. // Callee-saved registers will be the same as the frame above (i.e.,
--- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Thu Dec 04 14:30:02 2014 +0000 @@ -78,10 +78,6 @@ // Stub Code definitions -#if 0 -static address handle_unsafe_access() { Unimplemented(); return 0; } -#endif - class StubGenerator: public StubCodeGenerator { private: @@ -603,159 +599,6 @@ return start; } - // Support for jint atomic::xchg(jint exchange_value, volatile jint* dest) - // - // Arguments : - // c_rarg0: exchange_value - // c_rarg0: dest - // - // Result: - // *dest <- ex, return (orig *dest) - - // NOTE: not sure this is actually needed but if so it looks like it - // is called from os-specific code i.e. it needs an x86 prolog - - address generate_atomic_xchg() { return 0; } - - // Support for intptr_t atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) - // - // Arguments : - // c_rarg0: exchange_value - // c_rarg1: dest - // - // Result: - // *dest <- ex, return (orig *dest) - - // NOTE: not sure this is actually needed but if so it looks like it - // is called from os-specific code i.e. it needs an x86 prolog - - address generate_atomic_xchg_ptr() { return 0; } - - // Support for jint atomic::atomic_cmpxchg(jint exchange_value, volatile jint* dest, - // jint compare_value) - // - // Arguments : - // c_rarg0: exchange_value - // c_rarg1: dest - // c_rarg2: compare_value - // - // Result: - // if ( compare_value == *dest ) { - // *dest = exchange_value - // return compare_value; - // else - // return *dest; - address generate_atomic_cmpxchg() { return 0; } - - // Support for jint atomic::atomic_cmpxchg_long(jlong exchange_value, - // volatile jlong* dest, - // jlong compare_value) - // Arguments : - // c_rarg0: exchange_value - // c_rarg1: dest - // c_rarg2: compare_value - // - // Result: - // if ( compare_value == *dest ) { - // *dest = exchange_value - // return compare_value; - // else - // return *dest; - - // NOTE: not sure this is actually needed but if so it looks like it - // is called from os-specific code i.e. it needs an x86 prolog - - address generate_atomic_cmpxchg_long() { return 0; } - - // Support for jint atomic::add(jint add_value, volatile jint* dest) - // - // Arguments : - // c_rarg0: add_value - // c_rarg1: dest - // - // Result: - // *dest += add_value - // return *dest; - - // NOTE: not sure this is actually needed but if so it looks like it - // is called from os-specific code i.e. it needs an x86 prolog - - address generate_atomic_add() { return 0; } - - // Support for intptr_t atomic::add_ptr(intptr_t add_value, volatile intptr_t* dest) - // - // Arguments : - // c_rarg0: add_value - // c_rarg1: dest - // - // Result: - // *dest += add_value - // return *dest; - - // NOTE: not sure this is actually needed but if so it looks like it - // is called from os-specific code i.e. it needs an x86 prolog - - address generate_atomic_add_ptr() { return 0; } - - // Support for intptr_t OrderAccess::fence() - // - // Arguments : - // - // Result: - - // NOTE: this is called from C code so it needs an x86 prolog - // or else we need to fiddle it with inline asm for now - - address generate_orderaccess_fence() { return 0; } - - // Support for intptr_t get_previous_fp() - // - // This routine is used to find the previous frame pointer for the - // caller (current_frame_guess). This is used as part of debugging - // ps() is seemingly lost trying to find frames. - // This code assumes that caller current_frame_guess) has a frame. - - // NOTE: this is called from C code in os_windows.cpp with AMD64. other - // builds use inline asm -- so we should be ok for aarch64 - - address generate_get_previous_fp() { return 0; } - - // Support for intptr_t get_previous_sp() - // - // This routine is used to find the previous stack pointer for the - // caller. - - // NOTE: this is called from C code in os_windows.cpp with AMD64. other - // builds use inline asm -- so we should be ok for aarch64 - - address generate_get_previous_sp() { return 0; } - - // NOTE: these fixup routines appear only to be called from the - // opto code (they are mentioned in x86_64.ad) so we can do - // without them for now on aarch64 - - address generate_f2i_fixup() { Unimplemented(); return 0; } - - address generate_f2l_fixup() { Unimplemented(); return 0; } - - address generate_d2i_fixup() { Unimplemented(); return 0; } - - address generate_d2l_fixup() { Unimplemented(); return 0; } - - // The following routine generates a subroutine to throw an - // asynchronous UnknownError when an unsafe access gets a fault that - // could not be reasonably prevented by the programmer. (Example: - // SIGBUS/OBJERR.) - - // NOTE: this is used by the signal handler code as a return address - // to re-enter Java execution so it needs an x86 prolog which will - // reenter the simulator executing the generated handler code. so - // the prolog needs to adjust the sim's restart pc to enter the - // generated code at the start position then return from native to - // simulated execution. - - address generate_handler_for_unsafe_access() { return 0; } - // Non-destructive plausibility checks for oops // // Arguments: @@ -835,29 +678,7 @@ return start; } - // - // Verify that a register contains clean 32-bits positive value - // (high 32-bits are 0) so it could be used in 64-bits shifts. - // - // Input: - // Rint - 32-bits value - // Rtmp - scratch - // - void assert_clean_int(Register Rint, Register Rtmp) { Unimplemented(); } - - // Generate overlap test for array copy stubs - // - // Input: - // c_rarg0 - from - // c_rarg1 - to - // c_rarg2 - element count - // - // Output: - // r0 - &from[element count - 1] - // - void array_overlap_test(address no_overlap_target, int sf) { Unimplemented(); } void array_overlap_test(Label& L_no_overlap, Address::sxtw sf) { __ b(L_no_overlap); } - void array_overlap_test(address no_overlap_target, Label* NOLp, int sf) { Unimplemented(); } // Generate code for an array write pre barrier // @@ -1096,7 +917,7 @@ void copy_memory_small(Register s, Register d, Register count, Register tmp, int step) { bool is_backwards = step < 0; - size_t granularity = abs(step); + size_t granularity = uabs(step); int direction = is_backwards ? -1 : 1; int unit = wordSize * direction; @@ -1152,7 +973,7 @@ Register count, Register tmp, int step) { copy_direction direction = step < 0 ? copy_backwards : copy_forwards; bool is_backwards = step < 0; - int granularity = abs(step); + int granularity = uabs(step); const Register t0 = r3, t1 = r4; if (is_backwards) { @@ -1748,23 +1569,6 @@ return start; } - // - // Generate 'unsafe' array copy stub - // Though just as safe as the other stubs, it takes an unscaled - // size_t argument instead of an element count. - // - // Input: - // c_rarg0 - source array address - // c_rarg1 - destination array address - // c_rarg2 - byte count, treated as ssize_t, can be zero - // - // Examines the alignment of the operands and dispatches - // to a long, int, short, or byte copy loop. - // - address generate_unsafe_copy(const char *name, - address byte_copy_entry, address short_copy_entry, - address int_copy_entry, address long_copy_entry) { Unimplemented(); return 0; } - // Perform range checks on the proposed arraycopy. // Kills temp, but nothing else. // Also, clean the sign bits of src_pos and dst_pos. @@ -1776,28 +1580,6 @@ Register temp, Label& L_failed) { Unimplemented(); } - // - // Generate generic array copy stubs - // - // Input: - // c_rarg0 - src oop - // c_rarg1 - src_pos (32-bits) - // c_rarg2 - dst oop - // c_rarg3 - dst_pos (32-bits) - // not Win64 - // c_rarg4 - element count (32-bits) - // Win64 - // rsp+40 - element count (32-bits) - // - // Output: - // r0 == 0 - success - // r0 == -1^K - failure, where K is partial transfer count - // - address generate_generic_copy(const char *name, - address byte_copy_entry, address short_copy_entry, - address int_copy_entry, address oop_copy_entry, - address long_copy_entry, address checkcast_copy_entry) { Unimplemented(); return 0; } - // These stubs get called from some dumb test routine. // I'll write them properly when they're called from // something that's actually doing something. @@ -1894,7 +1676,413 @@ /*dest_uninitialized*/true); } - void generate_math_stubs() { Unimplemented(); } + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // + address generate_aescrypt_encryptBlock() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); + + Label L_doLast; + + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register keylen = rscratch1; + + address start = __ pc(); + __ enter(); + + __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + + __ ld1(v0, __ T16B, from); // get 16 bytes of input + + __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64)); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + __ rev32(v3, __ T16B, v3); + __ rev32(v4, __ T16B, v4); + __ aese(v0, v1); + __ aesmc(v0, v0); + __ aese(v0, v2); + __ aesmc(v0, v0); + __ aese(v0, v3); + __ aesmc(v0, v0); + __ aese(v0, v4); + __ aesmc(v0, v0); + + __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64)); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + __ rev32(v3, __ T16B, v3); + __ rev32(v4, __ T16B, v4); + __ aese(v0, v1); + __ aesmc(v0, v0); + __ aese(v0, v2); + __ aesmc(v0, v0); + __ aese(v0, v3); + __ aesmc(v0, v0); + __ aese(v0, v4); + __ aesmc(v0, v0); + + __ ld1(v1, v2, __ T16B, __ post(key, 32)); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + + __ cmpw(keylen, 44); + __ br(Assembler::EQ, L_doLast); + + __ aese(v0, v1); + __ aesmc(v0, v0); + __ aese(v0, v2); + __ aesmc(v0, v0); + + __ ld1(v1, v2, __ T16B, __ post(key, 32)); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + + __ cmpw(keylen, 52); + __ br(Assembler::EQ, L_doLast); + + __ aese(v0, v1); + __ aesmc(v0, v0); + __ aese(v0, v2); + __ aesmc(v0, v0); + + __ ld1(v1, v2, __ T16B, __ post(key, 32)); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + + __ BIND(L_doLast); + + __ aese(v0, v1); + __ aesmc(v0, v0); + __ aese(v0, v2); + + __ ld1(v1, __ T16B, key); + __ rev32(v1, __ T16B, v1); + __ eor(v0, __ T16B, v0, v1); + + __ st1(v0, __ T16B, to); + + __ mov(r0, 0); + + __ leave(); + __ ret(lr); + + return start; + } + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // + address generate_aescrypt_decryptBlock() { + assert(UseAES, "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); + Label L_doLast; + + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register keylen = rscratch1; + + address start = __ pc(); + __ enter(); // required for proper stackwalking of RuntimeStub frame + + __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + + __ ld1(v0, __ T16B, from); // get 16 bytes of input + + __ ld1(v5, __ T16B, __ post(key, 16)); + __ rev32(v5, __ T16B, v5); + + __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64)); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + __ rev32(v3, __ T16B, v3); + __ rev32(v4, __ T16B, v4); + __ aesd(v0, v1); + __ aesimc(v0, v0); + __ aesd(v0, v2); + __ aesimc(v0, v0); + __ aesd(v0, v3); + __ aesimc(v0, v0); + __ aesd(v0, v4); + __ aesimc(v0, v0); + + __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64)); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + __ rev32(v3, __ T16B, v3); + __ rev32(v4, __ T16B, v4); + __ aesd(v0, v1); + __ aesimc(v0, v0); + __ aesd(v0, v2); + __ aesimc(v0, v0); + __ aesd(v0, v3); + __ aesimc(v0, v0); + __ aesd(v0, v4); + __ aesimc(v0, v0); + + __ ld1(v1, v2, __ T16B, __ post(key, 32)); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + + __ cmpw(keylen, 44); + __ br(Assembler::EQ, L_doLast); + + __ aesd(v0, v1); + __ aesimc(v0, v0); + __ aesd(v0, v2); + __ aesimc(v0, v0); + + __ ld1(v1, v2, __ T16B, __ post(key, 32)); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + + __ cmpw(keylen, 52); + __ br(Assembler::EQ, L_doLast); + + __ aesd(v0, v1); + __ aesimc(v0, v0); + __ aesd(v0, v2); + __ aesimc(v0, v0); + + __ ld1(v1, v2, __ T16B, __ post(key, 32)); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + + __ BIND(L_doLast); + + __ aesd(v0, v1); + __ aesimc(v0, v0); + __ aesd(v0, v2); + + __ eor(v0, __ T16B, v0, v5); + + __ st1(v0, __ T16B, to); + + __ mov(r0, 0); + + __ leave(); + __ ret(lr); + + return start; + } + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // c_rarg3 - r vector byte array address + // c_rarg4 - input length + // + // Output: + // x0 - input length + // + address generate_cipherBlockChaining_encryptAESCrypt() { + assert(UseAES, "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); + + Label L_loadkeys_44, L_loadkeys_52, L_aes_loop, L_rounds_44, L_rounds_52; + + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register rvec = c_rarg3; // r byte array initialized from initvector array address + // and left with the results of the last encryption block + const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16) + const Register keylen = rscratch1; + + address start = __ pc(); + __ enter(); + + __ mov(rscratch1, len_reg); + __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + + __ ld1(v0, __ T16B, rvec); + + __ cmpw(keylen, 52); + __ br(Assembler::CC, L_loadkeys_44); + __ br(Assembler::EQ, L_loadkeys_52); + + __ ld1(v17, v18, __ T16B, __ post(key, 32)); + __ rev32(v17, __ T16B, v17); + __ rev32(v18, __ T16B, v18); + __ BIND(L_loadkeys_52); + __ ld1(v19, v20, __ T16B, __ post(key, 32)); + __ rev32(v19, __ T16B, v19); + __ rev32(v20, __ T16B, v20); + __ BIND(L_loadkeys_44); + __ ld1(v21, v22, v23, v24, __ T16B, __ post(key, 64)); + __ rev32(v21, __ T16B, v21); + __ rev32(v22, __ T16B, v22); + __ rev32(v23, __ T16B, v23); + __ rev32(v24, __ T16B, v24); + __ ld1(v25, v26, v27, v28, __ T16B, __ post(key, 64)); + __ rev32(v25, __ T16B, v25); + __ rev32(v26, __ T16B, v26); + __ rev32(v27, __ T16B, v27); + __ rev32(v28, __ T16B, v28); + __ ld1(v29, v30, v31, __ T16B, key); + __ rev32(v29, __ T16B, v29); + __ rev32(v30, __ T16B, v30); + __ rev32(v31, __ T16B, v31); + + __ BIND(L_aes_loop); + __ ld1(v1, __ T16B, __ post(from, 16)); + __ eor(v0, __ T16B, v0, v1); + + __ br(Assembler::CC, L_rounds_44); + __ br(Assembler::EQ, L_rounds_52); + + __ aese(v0, v17); __ aesmc(v0, v0); + __ aese(v0, v18); __ aesmc(v0, v0); + __ BIND(L_rounds_52); + __ aese(v0, v19); __ aesmc(v0, v0); + __ aese(v0, v20); __ aesmc(v0, v0); + __ BIND(L_rounds_44); + __ aese(v0, v21); __ aesmc(v0, v0); + __ aese(v0, v22); __ aesmc(v0, v0); + __ aese(v0, v23); __ aesmc(v0, v0); + __ aese(v0, v24); __ aesmc(v0, v0); + __ aese(v0, v25); __ aesmc(v0, v0); + __ aese(v0, v26); __ aesmc(v0, v0); + __ aese(v0, v27); __ aesmc(v0, v0); + __ aese(v0, v28); __ aesmc(v0, v0); + __ aese(v0, v29); __ aesmc(v0, v0); + __ aese(v0, v30); + __ eor(v0, __ T16B, v0, v31); + + __ st1(v0, __ T16B, __ post(to, 16)); + __ sub(len_reg, len_reg, 16); + __ cbnz(len_reg, L_aes_loop); + + __ st1(v0, __ T16B, rvec); + + __ mov(r0, rscratch2); + + __ leave(); + __ ret(lr); + } + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // c_rarg3 - r vector byte array address + // c_rarg4 - input length + // + // Output: + // rax - input length + // + address generate_cipherBlockChaining_decryptAESCrypt() { + assert(UseAES, "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); + + Label L_loadkeys_44, L_loadkeys_52, L_aes_loop, L_rounds_44, L_rounds_52; + + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register rvec = c_rarg3; // r byte array initialized from initvector array address + // and left with the results of the last encryption block + const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16) + const Register keylen = rscratch1; + + address start = __ pc(); + __ enter(); + + __ mov(rscratch2, len_reg); + __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + + __ ld1(v2, __ T16B, rvec); + + __ ld1(v31, __ T16B, __ post(key, 16)); + __ rev32(v31, __ T16B, v31); + + __ cmpw(keylen, 52); + __ br(Assembler::CC, L_loadkeys_44); + __ br(Assembler::EQ, L_loadkeys_52); + + __ ld1(v17, v18, __ T16B, __ post(key, 32)); + __ rev32(v17, __ T16B, v17); + __ rev32(v18, __ T16B, v18); + __ BIND(L_loadkeys_52); + __ ld1(v19, v20, __ T16B, __ post(key, 32)); + __ rev32(v19, __ T16B, v19); + __ rev32(v20, __ T16B, v20); + __ BIND(L_loadkeys_44); + __ ld1(v21, v22, v23, v24, __ T16B, __ post(key, 64)); + __ rev32(v21, __ T16B, v21); + __ rev32(v22, __ T16B, v22); + __ rev32(v23, __ T16B, v23); + __ rev32(v24, __ T16B, v24); + __ ld1(v25, v26, v27, v28, __ T16B, __ post(key, 64)); + __ rev32(v25, __ T16B, v25); + __ rev32(v26, __ T16B, v26); + __ rev32(v27, __ T16B, v27); + __ rev32(v28, __ T16B, v28); + __ ld1(v29, v30, __ T16B, key); + __ rev32(v29, __ T16B, v29); + __ rev32(v30, __ T16B, v30); + + __ BIND(L_aes_loop); + __ ld1(v0, __ T16B, __ post(from, 16)); + __ orr(v1, __ T16B, v0, v0); + + __ br(Assembler::CC, L_rounds_44); + __ br(Assembler::EQ, L_rounds_52); + + __ aesd(v0, v17); __ aesimc(v0, v0); + __ aesd(v0, v17); __ aesimc(v0, v0); + __ BIND(L_rounds_52); + __ aesd(v0, v19); __ aesimc(v0, v0); + __ aesd(v0, v20); __ aesimc(v0, v0); + __ BIND(L_rounds_44); + __ aesd(v0, v21); __ aesimc(v0, v0); + __ aesd(v0, v22); __ aesimc(v0, v0); + __ aesd(v0, v23); __ aesimc(v0, v0); + __ aesd(v0, v24); __ aesimc(v0, v0); + __ aesd(v0, v25); __ aesimc(v0, v0); + __ aesd(v0, v26); __ aesimc(v0, v0); + __ aesd(v0, v27); __ aesimc(v0, v0); + __ aesd(v0, v28); __ aesimc(v0, v0); + __ aesd(v0, v29); __ aesimc(v0, v0); + __ aesd(v0, v30); + __ eor(v0, __ T16B, v0, v31); + __ eor(v0, __ T16B, v0, v2); + + __ st1(v0, __ T16B, __ post(to, 16)); + __ orr(v2, __ T16B, v1, v1); + + __ sub(len_reg, len_reg, 16); + __ cbnz(len_reg, L_aes_loop); + + __ st1(v2, __ T16B, rvec); + + __ mov(r0, rscratch2); + + __ leave(); + __ ret(lr); + + return start; + } // AARCH64 use safefetch stubs unless we are building for the simulator // in which case the x86 asm code in linux_aarch64.S is used @@ -2005,38 +2193,6 @@ // otherwise assume that stack unwinding will be initiated, so // caller saved registers were assumed volatile in the compiler. - // NOTE: this needs carefully checking to see where the generated - // code gets called from for each generated error - // - // WrongMethodTypeException : jumped to directly from generated method - // handle code. - // - // StackOverflowError : jumped to directly from generated code in - // cpp and template interpreter. the generated code address also - // appears to be returned from the signal handler as the re-entry - // address forJava execution to continue from. This means it needs - // to be enterable from x86 code. Hmm, we may need to expose both an - // x86 prolog and the address of the generated ARM code and clients - // will have to be mdoified to pick the correct one. - // - // AbstractMethodError : never jumped to from generated code but the - // generated code address appears to be returned from the signal - // handler as the re-entry address for Java execution to continue - // from. This means it needs to be enterable from x86 code. So, we - // will need to provide this one with an x86 prolog as per - // StackOverflowError - // - // IncompatibleClassChangeError : only appears to be jumped to - // directly from vtableStubs code - // - // NullPointerException : never jumped to from generated code but - // the generated code address appears to be returned from the signal - // handler as the re-entry address for Java execution to continue - // from. This means it needs to be enterable from x86 code. So, we - // will need to provide this one with an x86 prolog as per - // StackOverflowError - - address generate_throw_exception(const char* name, address runtime_entry, Register arg1 = noreg, @@ -2100,6 +2256,7 @@ oop_maps->add_gc_map(the_pc - start, map); __ reset_last_Java_frame(true, true); + __ maybe_isb(); __ leave(); @@ -2142,22 +2299,6 @@ // is referenced by megamorphic call StubRoutines::_catch_exception_entry = generate_catch_exception(); - // atomic calls - StubRoutines::_atomic_xchg_entry = generate_atomic_xchg(); - StubRoutines::_atomic_xchg_ptr_entry = generate_atomic_xchg_ptr(); - StubRoutines::_atomic_cmpxchg_entry = generate_atomic_cmpxchg(); - StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long(); - StubRoutines::_atomic_add_entry = generate_atomic_add(); - StubRoutines::_atomic_add_ptr_entry = generate_atomic_add_ptr(); - StubRoutines::_fence_entry = generate_orderaccess_fence(); - - StubRoutines::_handler_for_unsafe_access_entry = - generate_handler_for_unsafe_access(); - - // platform dependent - StubRoutines::aarch64::_get_previous_fp_entry = generate_get_previous_fp(); - StubRoutines::aarch64::_get_previous_sp_entry = generate_get_previous_sp(); - // Build this early so it's available for the interpreter. StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", @@ -2197,6 +2338,13 @@ generate_arraycopy_stubs(); #ifndef BUILTIN_SIM + if (UseAESIntrinsics) { + StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); + StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); + StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); + StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt(); + } + // Safefetch stubs. generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, &StubRoutines::_safefetch32_fault_pc,
--- a/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp Thu Dec 04 14:30:02 2014 +0000 @@ -325,6 +325,7 @@ address entry = __ pc(); __ push(state); __ call_VM(noreg, runtime_entry); + __ membar(Assembler::AnyAny); __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos)); return entry; } @@ -1049,6 +1050,7 @@ // Call the native method. __ blrt(r10, rscratch1); + __ maybe_isb(); __ get_method(rmethod); // result potentially in r0 or v0 @@ -1106,6 +1108,7 @@ __ mov(c_rarg0, rthread); __ mov(rscratch2, CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)); __ blrt(rscratch2, 1, 0, 0); + __ maybe_isb(); __ get_method(rmethod); __ reinit_heapbase(); __ bind(Continue);
--- a/src/cpu/aarch64/vm/templateTable_aarch64.cpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/cpu/aarch64/vm/templateTable_aarch64.cpp Thu Dec 04 14:30:02 2014 +0000 @@ -1604,6 +1604,12 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) { + // We might be moving to a safepoint. The thread which calls + // Interpreter::notice_safepoints() will effectively flush its cache + // when it makes a system call, but we need to do something to + // ensure that we see the changed dispatch table. + __ membar(MacroAssembler::LoadLoad); + __ profile_taken_branch(r0, r1); const ByteSize be_offset = methodOopDesc::backedge_counter_offset() + InvocationCounter::counter_offset(); @@ -1867,6 +1873,12 @@ void TemplateTable::ret() { transition(vtos, vtos); + // We might be moving to a safepoint. The thread which calls + // Interpreter::notice_safepoints() will effectively flush its cache + // when it makes a system call, but we need to do something to + // ensure that we see the changed dispatch table. + __ membar(MacroAssembler::LoadLoad); + locals_index(r1); __ ldr(r1, aaddress(r1)); // get return bci, compute return bcp __ profile_ret(r1, r2); @@ -3379,6 +3391,8 @@ // continue __ bind(done); + // Must prevent reordering of stores for object initialization with stores that publish the new object. + __ membar(Assembler::StoreStore); } void TemplateTable::newarray() { @@ -3387,6 +3401,7 @@ __ mov(c_rarg2, r0); call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), c_rarg1, c_rarg2); + __ membar(Assembler::StoreStore); } void TemplateTable::anewarray() { @@ -3396,6 +3411,7 @@ __ mov(c_rarg3, r0); call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), c_rarg1, c_rarg2, c_rarg3); + __ membar(Assembler::StoreStore); } void TemplateTable::arraylength() {
--- a/src/cpu/aarch64/vm/vm_version_aarch64.cpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/cpu/aarch64/vm/vm_version_aarch64.cpp Thu Dec 04 14:30:02 2014 +0000 @@ -47,6 +47,10 @@ #include <sys/auxv.h> #include <asm/hwcap.h> +#ifndef HWCAP_AES +#define HWCAP_AES (1<<3) +#endif + #ifndef HWCAP_CRC32 #define HWCAP_CRC32 (1<<7) #endif @@ -105,10 +109,14 @@ _supports_atomic_getset8 = true; _supports_atomic_getadd8 = true; - FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256); + if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) + FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256); + if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) + FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64); FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 256); FLAG_SET_DEFAULT(PrefetchFieldsAhead, 256); FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 256); + FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); #ifndef BUILTIN_SIM unsigned long auxv = getauxval(AT_HWCAP); @@ -118,11 +126,32 @@ if (UseCRC32 && (auxv & HWCAP_CRC32) == 0) { warning("UseCRC32 specified, but not supported on this CPU"); } + if (auxv & HWCAP_AES) { + UseAES = UseAES || FLAG_IS_DEFAULT(UseAES); + UseAESIntrinsics = + UseAESIntrinsics || (UseAES && FLAG_IS_DEFAULT(UseAESIntrinsics)); + if (UseAESIntrinsics && !UseAES) { + warning("UseAESIntrinsics enabled, but UseAES not, enabling"); + UseAES = true; + } + } else { + if (UseAES) { + warning("UseAES specified, but not supported on this CPU"); + } + if (UseAESIntrinsics) { + warning("UseAESIntrinsics specified, but not supported on this CPU"); + } + } #endif if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { UseCRC32Intrinsics = true; } +#ifdef COMPILER2 + if (FLAG_IS_DEFAULT(OptoScheduling)) { + OptoScheduling = true; + } +#endif } void VM_Version::initialize() {
--- a/src/cpu/aarch64/vm/vtableStubs_aarch64.cpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/cpu/aarch64/vm/vtableStubs_aarch64.cpp Thu Dec 04 14:30:02 2014 +0000 @@ -58,7 +58,8 @@ #ifndef PRODUCT if (CountCompiledCalls) { - __ increment(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); + __ lea(r19, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); + __ incrementw(Address(r19)); } #endif @@ -73,12 +74,14 @@ if (DebugVtables) { Label L; // check offset vs vtable length - __ ldrw(rscratch1, Address(r0, instanceKlass::vtable_length_offset() * wordSize)); + __ ldrw(rscratch1, Address(r19, instanceKlass::vtable_length_offset() * wordSize)); __ cmpw(rscratch1, vtable_index * vtableEntry::size()); __ br(Assembler::GT, L); + __ enter(); __ mov(r2, vtable_index); __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, r2); + __ leave(); __ bind(L); } #endif // PRODUCT @@ -109,9 +112,6 @@ (int)(s->code_end() - __ pc())); } guarantee(__ pc() <= s->code_end(), "overflowed buffer"); - // shut the door on sizing bugs - int slop = 3; // 32-bit offset is this much larger than an 8-bit one - assert(vtable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset"); s->set_exception_points(npe_addr, ame_addr); return s; @@ -130,7 +130,8 @@ #ifndef PRODUCT if (CountCompiledCalls) { - __ increment(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); + __ lea(r10, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); + __ incrementw(Address(r10)); } #endif @@ -190,9 +191,6 @@ (int)(s->code_end() - __ pc())); } guarantee(__ pc() <= s->code_end(), "overflowed buffer"); - // shut the door on sizing bugs - int slop = 3; // 32-bit offset is this much larger than an 8-bit one - assert(itable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset"); s->set_exception_points(npe_addr, ame_addr); return s; @@ -200,8 +198,48 @@ int VtableStub::pd_code_size_limit(bool is_vtable_stub) { - // FIXME - return 200; + int size = DebugVtables ? 216 : 0; + if (CountCompiledCalls) + size += 6 * 4; + // FIXME + if (is_vtable_stub) + size += 52; + else + size += 104; + return size; + + // In order to tune these parameters, run the JVM with VM options + // +PrintMiscellaneous and +WizardMode to see information about + // actual itable stubs. Run it with -Xmx31G -XX:+UseCompressedOops. + // + // If Universe::narrow_klass_base is nonzero, decoding a compressed + // class can take zeveral instructions. Run it with -Xmx31G + // -XX:+UseCompressedOops. + // + // The JVM98 app. _202_jess has a megamorphic interface call. + // The itable code looks like this: + // Decoding VtableStub itbl[1]@12 + // ldr w10, [x1,#8] + // lsl x10, x10, #3 + // ldr w11, [x10,#280] + // add x11, x10, x11, uxtx #3 + // add x11, x11, #0x1b8 + // ldr x12, [x11] + // cmp x9, x12 + // b.eq success + // loop: + // cbz x12, throw_icce + // add x11, x11, #0x10 + // ldr x12, [x11] + // cmp x9, x12 + // b.ne loop + // success: + // ldr x11, [x11,#8] + // ldr x12, [x10,x11] + // ldr x8, [x12,#72] + // br x8 + // throw_icce: + // b throw_ICCE_entry } int VtableStub::pd_code_alignment() { return 4; }
--- a/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp Thu Dec 04 14:30:02 2014 +0000 @@ -31,6 +31,10 @@ // Implementation of class atomic +#define FULL_MEM_BARRIER __sync_synchronize() +#define READ_MEM_BARRIER __atomic_thread_fence(__ATOMIC_ACQUIRE); +#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE); + inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; } inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; } inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; } @@ -71,7 +75,9 @@ inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) { - return __sync_lock_test_and_set (dest, exchange_value); + jint res = __sync_lock_test_and_set (dest, exchange_value); + FULL_MEM_BARRIER; + return res; } inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) @@ -111,7 +117,9 @@ inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) { - return __sync_lock_test_and_set (dest, exchange_value); + intptr_t res = __sync_lock_test_and_set (dest, exchange_value); + FULL_MEM_BARRIER; + return res; } inline jlong Atomic::cmpxchg (jlong exchange_value, volatile jlong* dest, jlong compare_value)
--- a/src/os_cpu/linux_aarch64/vm/globals_linux_aarch64.hpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/os_cpu/linux_aarch64/vm/globals_linux_aarch64.hpp Thu Dec 04 14:30:02 2014 +0000 @@ -41,4 +41,6 @@ // Only used on 64 bit Windows platforms define_pd_global(bool, UseVectoredExceptions, false); +extern __thread Thread *aarch64_currentThread; + #endif // OS_CPU_LINUX_AARCH64_VM_GLOBALS_LINUX_AARCH64_HPP
--- a/src/os_cpu/linux_aarch64/vm/orderAccess_linux_aarch64.inline.hpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/os_cpu/linux_aarch64/vm/orderAccess_linux_aarch64.inline.hpp Thu Dec 04 14:30:02 2014 +0000 @@ -27,13 +27,10 @@ #define OS_CPU_LINUX_AARCH64_VM_ORDERACCESS_LINUX_AARCH64_INLINE_HPP #include "runtime/atomic.hpp" +#include "atomic_linux_aarch64.inline.hpp" #include "runtime/orderAccess.hpp" #include "vm_version_aarch64.hpp" -#define FULL_MEM_BARRIER __sync_synchronize() -#define READ_MEM_BARRIER __atomic_thread_fence(__ATOMIC_ACQUIRE); -#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE); - // Implementation of class OrderAccess. inline void OrderAccess::loadload() { acquire(); }
--- a/src/os_cpu/linux_aarch64/vm/threadLS_linux_aarch64.cpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/os_cpu/linux_aarch64/vm/threadLS_linux_aarch64.cpp Thu Dec 04 14:30:02 2014 +0000 @@ -26,32 +26,6 @@ #include "runtime/threadLocalStorage.hpp" #include "thread_linux.inline.hpp" -// Map stack pointer (%esp) to thread pointer for faster TLS access -// -// Here we use a flat table for better performance. Getting current thread -// is down to one memory access (read _sp_map[%esp>>12]) in generated code -// and two in runtime code (-fPIC code needs an extra load for _sp_map). -// -// This code assumes stack page is not shared by different threads. It works -// in 32-bit VM when page size is 4K (or a multiple of 4K, if that matters). -// -// Notice that _sp_map is allocated in the bss segment, which is ZFOD -// (zero-fill-on-demand). While it reserves 4M address space upfront, -// actual memory pages are committed on demand. -// -// If an application creates and destroys a lot of threads, usually the -// stack space freed by a thread will soon get reused by new thread -// (this is especially true in NPTL or LinuxThreads in fixed-stack mode). -// No memory page in _sp_map is wasted. -// -// However, it's still possible that we might end up populating & -// committing a large fraction of the 4M table over time, but the actual -// amount of live data in the table could be quite small. The max wastage -// is less than 4M bytes. If it becomes an issue, we could use madvise() -// with MADV_DONTNEED to reclaim unused (i.e. all-zero) pages in _sp_map. -// MADV_DONTNEED on Linux keeps the virtual memory mapping, but zaps the -// physical memory page (i.e. similar to MADV_FREE on Solaris). - void ThreadLocalStorage::generate_code_for_get_thread() { // nothing we can do here for user-level thread } @@ -59,6 +33,9 @@ void ThreadLocalStorage::pd_init() { } +__thread Thread *aarch64_currentThread; + void ThreadLocalStorage::pd_set_thread(Thread* thread) { os::thread_local_storage_at_put(ThreadLocalStorage::thread_index(), thread); + aarch64_currentThread = thread; }
--- a/src/os_cpu/linux_aarch64/vm/threadLS_linux_aarch64.hpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/os_cpu/linux_aarch64/vm/threadLS_linux_aarch64.hpp Thu Dec 04 14:30:02 2014 +0000 @@ -29,8 +29,8 @@ public: - static Thread* thread() { - return (Thread*) os::thread_local_storage_at(thread_index()); + static Thread* thread() { + return aarch64_currentThread; } #endif // OS_CPU_LINUX_AARCH64_VM_THREADLS_LINUX_AARCH64_HPP
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp Thu Dec 04 14:30:02 2014 +0000 @@ -130,7 +130,6 @@ // The alignment used for eden and survivors within the young gen // and for boundary between young gen and old gen. size_t intra_heap_alignment() const { return 64 * K * HeapWordSize; } - size_t capacity() const; size_t used() const;
--- a/src/share/vm/memory/collectorPolicy.cpp Fri Nov 28 03:10:21 2014 +0000 +++ b/src/share/vm/memory/collectorPolicy.cpp Thu Dec 04 14:30:02 2014 +0000 @@ -73,7 +73,7 @@ } PermSize = MAX2(min_alignment(), align_size_down_(PermSize, min_alignment())); // Don't increase Perm size limit above specified. - MaxPermSize = align_size_down(MaxPermSize, max_alignment()); + MaxPermSize = MAX2(max_alignment(), align_size_down_(MaxPermSize, max_alignment())); if (PermSize > MaxPermSize) { PermSize = MaxPermSize; }