Mercurial > hg > icedtea7-forest-aarch64 > hotspot
changeset 4285:7554f9b2bcc7 hs24-b29
Merge
author | amurillo |
---|---|
date | Fri, 11 Jan 2013 10:38:38 -0800 |
parents | 0d5d62e38450 (current diff) 0e25216625f7 (diff) |
children | 181528fd1e74 |
files | make/hotspot_version src/share/vm/runtime/arguments.cpp |
diffstat | 86 files changed, 3031 insertions(+), 1819 deletions(-) [+] |
line wrap: on
line diff
--- a/make/hotspot_version Wed Jan 09 20:33:26 2013 -0800 +++ b/make/hotspot_version Fri Jan 11 10:38:38 2013 -0800 @@ -35,7 +35,7 @@ HS_MAJOR_VER=24 HS_MINOR_VER=0 -HS_BUILD_NUMBER=28 +HS_BUILD_NUMBER=29 JDK_MAJOR_VER=1 JDK_MINOR_VER=7
--- a/src/cpu/sparc/vm/sparc.ad Wed Jan 09 20:33:26 2013 -0800 +++ b/src/cpu/sparc/vm/sparc.ad Fri Jan 11 10:38:38 2013 -0800 @@ -10177,7 +10177,7 @@ //---------- Zeros Count Instructions ------------------------------------------ -instruct countLeadingZerosI(iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{ +instruct countLeadingZerosI(iRegIsafe dst, iRegI src, iRegI tmp, flagsReg cr) %{ predicate(UsePopCountInstruction); // See Matcher::match_rule_supported match(Set dst (CountLeadingZerosI src)); effect(TEMP dst, TEMP tmp, KILL cr); @@ -10274,7 +10274,7 @@ ins_pipe(ialu_reg); %} -instruct countTrailingZerosI(iRegI dst, iRegI src, flagsReg cr) %{ +instruct countTrailingZerosI(iRegIsafe dst, iRegI src, flagsReg cr) %{ predicate(UsePopCountInstruction); // See Matcher::match_rule_supported match(Set dst (CountTrailingZerosI src)); effect(TEMP dst, KILL cr); @@ -10317,19 +10317,21 @@ //---------- Population Count Instructions ------------------------------------- -instruct popCountI(iRegI dst, iRegI src) %{ +instruct popCountI(iRegIsafe dst, iRegI src) %{ predicate(UsePopCountInstruction); match(Set dst (PopCountI src)); - format %{ "POPC $src, $dst" %} - ins_encode %{ - __ popc($src$$Register, $dst$$Register); + format %{ "SRL $src, G0, $dst\t! clear upper word for 64 bit POPC\n\t" + "POPC $dst, $dst" %} + ins_encode %{ + __ srl($src$$Register, G0, $dst$$Register); + __ popc($dst$$Register, $dst$$Register); %} ins_pipe(ialu_reg); %} // Note: Long.bitCount(long) returns an int. -instruct popCountL(iRegI dst, iRegL src) %{ +instruct popCountL(iRegIsafe dst, iRegL src) %{ predicate(UsePopCountInstruction); match(Set dst (PopCountL src));
--- a/src/cpu/x86/vm/assembler_x86.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/cpu/x86/vm/assembler_x86.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -2407,7 +2407,6 @@ void Assembler::pshufb(XMMRegister dst, Address src) { assert(VM_Version::supports_ssse3(), ""); - assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); InstructionMark im(this); simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); emit_byte(0x00); @@ -2476,6 +2475,26 @@ emit_byte(0xC0 | encode); } +void Assembler::vptest(XMMRegister dst, Address src) { + assert(VM_Version::supports_avx(), ""); + InstructionMark im(this); + bool vector256 = true; + assert(dst != xnoreg, "sanity"); + int dst_enc = dst->encoding(); + // swap src<->dst for encoding + vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256); + emit_byte(0x17); + emit_operand(dst, src); +} + +void Assembler::vptest(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_avx(), ""); + bool vector256 = true; + int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38); + emit_byte(0x17); + emit_byte(0xC0 | encode); +} + void Assembler::punpcklbw(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); @@ -2552,12 +2571,18 @@ emit_byte(0xA5); } +// sets rcx bytes with rax, value at [edi] +void Assembler::rep_stosb() { + emit_byte(0xF3); // REP + LP64_ONLY(prefix(REX_W)); + emit_byte(0xAA); // STOSB +} + // sets rcx pointer sized words with rax, value at [edi] // generic -void Assembler::rep_set() { // rep_set - emit_byte(0xF3); - // STOSQ - LP64_ONLY(prefix(REX_W)); +void Assembler::rep_stos() { + emit_byte(0xF3); // REP + LP64_ONLY(prefix(REX_W)); // LP64:STOSQ, LP32:STOSD emit_byte(0xAB); } @@ -3648,6 +3673,15 @@ emit_byte(0x01); } +// duplicate 4-bytes integer data from src into 8 locations in dest +void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_avx2(), ""); + bool vector256 = true; + int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38); + emit_byte(0x58); + emit_byte((unsigned char)(0xC0 | encode)); +} + void Assembler::vzeroupper() { assert(VM_Version::supports_avx(), ""); (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE); @@ -8427,7 +8461,8 @@ void MacroAssembler::pshufb(XMMRegister dst, AddressLiteral src) { // Used in sign-bit flipping with aligned address. - assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); + bool aligned_adr = (((intptr_t)src.target() & 15) == 0); + assert((UseAVX > 0) || aligned_adr, "SSE mode requires address alignment 16 bytes"); if (reachable(src)) { Assembler::pshufb(dst, as_Address(src)); } else { @@ -10483,6 +10518,22 @@ } +void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp) { + // cnt - number of qwords (8-byte words). + // base - start address, qword aligned. + assert(base==rdi, "base register must be edi for rep stos"); + assert(tmp==rax, "tmp register must be eax for rep stos"); + assert(cnt==rcx, "cnt register must be ecx for rep stos"); + + xorptr(tmp, tmp); + if (UseFastStosb) { + shlptr(cnt,3); // convert to number of bytes + rep_stosb(); + } else { + NOT_LP64(shlptr(cnt,1);) // convert to number of dwords for 32-bit VM + rep_stos(); + } +} // IndexOf for constant substrings with size >= 8 chars // which don't need to be loaded through stack. @@ -10918,42 +10969,114 @@ testl(cnt2, cnt2); jcc(Assembler::zero, LENGTH_DIFF_LABEL); - // Load first characters + // Compare first characters load_unsigned_short(result, Address(str1, 0)); load_unsigned_short(cnt1, Address(str2, 0)); - - // Compare first characters subl(result, cnt1); jcc(Assembler::notZero, POP_LABEL); - decrementl(cnt2); - jcc(Assembler::zero, LENGTH_DIFF_LABEL); - - { - // Check after comparing first character to see if strings are equivalent - Label LSkip2; - // Check if the strings start at same location - cmpptr(str1, str2); - jccb(Assembler::notEqual, LSkip2); - - // Check if the length difference is zero (from stack) - cmpl(Address(rsp, 0), 0x0); - jcc(Assembler::equal, LENGTH_DIFF_LABEL); - - // Strings might not be equivalent - bind(LSkip2); - } + cmpl(cnt2, 1); + jcc(Assembler::equal, LENGTH_DIFF_LABEL); + + // Check if the strings start at the same location. + cmpptr(str1, str2); + jcc(Assembler::equal, LENGTH_DIFF_LABEL); Address::ScaleFactor scale = Address::times_2; int stride = 8; - // Advance to next element - addptr(str1, 16/stride); - addptr(str2, 16/stride); - - if (UseSSE42Intrinsics) { + if (UseAVX >= 2) { + Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR; + Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR; + Label COMPARE_TAIL_LONG; + int pcmpmask = 0x19; + + // Setup to compare 16-chars (32-bytes) vectors, + // start from first character again because it has aligned address. + int stride2 = 16; + int adr_stride = stride << scale; + int adr_stride2 = stride2 << scale; + + assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri"); + // rax and rdx are used by pcmpestri as elements counters + movl(result, cnt2); + andl(cnt2, ~(stride2-1)); // cnt2 holds the vector count + jcc(Assembler::zero, COMPARE_TAIL_LONG); + + // fast path : compare first 2 8-char vectors. + bind(COMPARE_16_CHARS); + movdqu(vec1, Address(str1, 0)); + pcmpestri(vec1, Address(str2, 0), pcmpmask); + jccb(Assembler::below, COMPARE_INDEX_CHAR); + + movdqu(vec1, Address(str1, adr_stride)); + pcmpestri(vec1, Address(str2, adr_stride), pcmpmask); + jccb(Assembler::aboveEqual, COMPARE_WIDE_VECTORS); + addl(cnt1, stride); + + // Compare the characters at index in cnt1 + bind(COMPARE_INDEX_CHAR); //cnt1 has the offset of the mismatching character + load_unsigned_short(result, Address(str1, cnt1, scale)); + load_unsigned_short(cnt2, Address(str2, cnt1, scale)); + subl(result, cnt2); + jmp(POP_LABEL); + + // Setup the registers to start vector comparison loop + bind(COMPARE_WIDE_VECTORS); + lea(str1, Address(str1, result, scale)); + lea(str2, Address(str2, result, scale)); + subl(result, stride2); + subl(cnt2, stride2); + jccb(Assembler::zero, COMPARE_WIDE_TAIL); + negptr(result); + + // In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest) + bind(COMPARE_WIDE_VECTORS_LOOP); + vmovdqu(vec1, Address(str1, result, scale)); + vpxor(vec1, Address(str2, result, scale)); + vptest(vec1, vec1); + jccb(Assembler::notZero, VECTOR_NOT_EQUAL); + addptr(result, stride2); + subl(cnt2, stride2); + jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP); + + // compare wide vectors tail + bind(COMPARE_WIDE_TAIL); + testptr(result, result); + jccb(Assembler::zero, LENGTH_DIFF_LABEL); + + movl(result, stride2); + movl(cnt2, result); + negptr(result); + jmpb(COMPARE_WIDE_VECTORS_LOOP); + + // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors. + bind(VECTOR_NOT_EQUAL); + lea(str1, Address(str1, result, scale)); + lea(str2, Address(str2, result, scale)); + jmp(COMPARE_16_CHARS); + + // Compare tail chars, length between 1 to 15 chars + bind(COMPARE_TAIL_LONG); + movl(cnt2, result); + cmpl(cnt2, stride); + jccb(Assembler::less, COMPARE_SMALL_STR); + + movdqu(vec1, Address(str1, 0)); + pcmpestri(vec1, Address(str2, 0), pcmpmask); + jcc(Assembler::below, COMPARE_INDEX_CHAR); + subptr(cnt2, stride); + jccb(Assembler::zero, LENGTH_DIFF_LABEL); + lea(str1, Address(str1, result, scale)); + lea(str2, Address(str2, result, scale)); + negptr(cnt2); + jmpb(WHILE_HEAD_LABEL); + + bind(COMPARE_SMALL_STR); + } else if (UseSSE42Intrinsics) { Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; int pcmpmask = 0x19; - // Setup to compare 16-byte vectors + // Setup to compare 8-char (16-byte) vectors, + // start from first character again because it has aligned address. movl(result, cnt2); andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count jccb(Assembler::zero, COMPARE_TAIL); @@ -10985,7 +11108,7 @@ jccb(Assembler::notZero, COMPARE_WIDE_VECTORS); // compare wide vectors tail - testl(result, result); + testptr(result, result); jccb(Assembler::zero, LENGTH_DIFF_LABEL); movl(cnt2, stride); @@ -10997,21 +11120,20 @@ // Mismatched characters in the vectors bind(VECTOR_NOT_EQUAL); - addptr(result, cnt1); - movptr(cnt2, result); - load_unsigned_short(result, Address(str1, cnt2, scale)); - load_unsigned_short(cnt1, Address(str2, cnt2, scale)); - subl(result, cnt1); + addptr(cnt1, result); + load_unsigned_short(result, Address(str1, cnt1, scale)); + load_unsigned_short(cnt2, Address(str2, cnt1, scale)); + subl(result, cnt2); jmpb(POP_LABEL); bind(COMPARE_TAIL); // limit is zero movl(cnt2, result); // Fallthru to tail compare } - // Shift str2 and str1 to the end of the arrays, negate min - lea(str1, Address(str1, cnt2, scale, 0)); - lea(str2, Address(str2, cnt2, scale, 0)); + lea(str1, Address(str1, cnt2, scale)); + lea(str2, Address(str2, cnt2, scale)); + decrementl(cnt2); // first character was compared already negptr(cnt2); // Compare the rest of the elements @@ -11076,7 +11198,44 @@ shll(limit, 1); // byte count != 0 movl(result, limit); // copy - if (UseSSE42Intrinsics) { + if (UseAVX >= 2) { + // With AVX2, use 32-byte vector compare + Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; + + // Compare 32-byte vectors + andl(result, 0x0000001e); // tail count (in bytes) + andl(limit, 0xffffffe0); // vector count (in bytes) + jccb(Assembler::zero, COMPARE_TAIL); + + lea(ary1, Address(ary1, limit, Address::times_1)); + lea(ary2, Address(ary2, limit, Address::times_1)); + negptr(limit); + + bind(COMPARE_WIDE_VECTORS); + vmovdqu(vec1, Address(ary1, limit, Address::times_1)); + vmovdqu(vec2, Address(ary2, limit, Address::times_1)); + vpxor(vec1, vec2); + + vptest(vec1, vec1); + jccb(Assembler::notZero, FALSE_LABEL); + addptr(limit, 32); + jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); + + testl(result, result); + jccb(Assembler::zero, TRUE_LABEL); + + vmovdqu(vec1, Address(ary1, result, Address::times_1, -32)); + vmovdqu(vec2, Address(ary2, result, Address::times_1, -32)); + vpxor(vec1, vec2); + + vptest(vec1, vec1); + jccb(Assembler::notZero, FALSE_LABEL); + jmpb(TRUE_LABEL); + + bind(COMPARE_TAIL); // limit is zero + movl(limit, result); + // Fallthru to tail compare + } else if (UseSSE42Intrinsics) { // With SSE4.2, use double quad vector compare Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; @@ -11254,29 +11413,53 @@ { assert( UseSSE >= 2, "supported cpu only" ); Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; - // Fill 32-byte chunks movdl(xtmp, value); - pshufd(xtmp, xtmp, 0); - - subl(count, 8 << shift); - jcc(Assembler::less, L_check_fill_8_bytes); - align(16); - - BIND(L_fill_32_bytes_loop); - - if (UseUnalignedLoadStores) { - movdqu(Address(to, 0), xtmp); - movdqu(Address(to, 16), xtmp); + if (UseAVX >= 2 && UseUnalignedLoadStores) { + // Fill 64-byte chunks + Label L_fill_64_bytes_loop, L_check_fill_32_bytes; + vpbroadcastd(xtmp, xtmp); + + subl(count, 16 << shift); + jcc(Assembler::less, L_check_fill_32_bytes); + align(16); + + BIND(L_fill_64_bytes_loop); + vmovdqu(Address(to, 0), xtmp); + vmovdqu(Address(to, 32), xtmp); + addptr(to, 64); + subl(count, 16 << shift); + jcc(Assembler::greaterEqual, L_fill_64_bytes_loop); + + BIND(L_check_fill_32_bytes); + addl(count, 8 << shift); + jccb(Assembler::less, L_check_fill_8_bytes); + vmovdqu(Address(to, 0), xtmp); + addptr(to, 32); + subl(count, 8 << shift); } else { - movq(Address(to, 0), xtmp); - movq(Address(to, 8), xtmp); - movq(Address(to, 16), xtmp); - movq(Address(to, 24), xtmp); + // Fill 32-byte chunks + pshufd(xtmp, xtmp, 0); + + subl(count, 8 << shift); + jcc(Assembler::less, L_check_fill_8_bytes); + align(16); + + BIND(L_fill_32_bytes_loop); + + if (UseUnalignedLoadStores) { + movdqu(Address(to, 0), xtmp); + movdqu(Address(to, 16), xtmp); + } else { + movq(Address(to, 0), xtmp); + movq(Address(to, 8), xtmp); + movq(Address(to, 16), xtmp); + movq(Address(to, 24), xtmp); + } + + addptr(to, 32); + subl(count, 8 << shift); + jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); } - - addptr(to, 32); - subl(count, 8 << shift); - jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); BIND(L_check_fill_8_bytes); addl(count, 8 << shift); jccb(Assembler::zero, L_exit);
--- a/src/cpu/x86/vm/assembler_x86.hpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/cpu/x86/vm/assembler_x86.hpp Fri Jan 11 10:38:38 2013 -0800 @@ -842,7 +842,8 @@ // These do register sized moves/scans void rep_mov(); - void rep_set(); + void rep_stos(); + void rep_stosb(); void repne_scan(); #ifdef _LP64 void repne_scanl(); @@ -1460,9 +1461,12 @@ // Shift Right by bytes Logical DoubleQuadword Immediate void psrldq(XMMRegister dst, int shift); - // Logical Compare Double Quadword + // Logical Compare 128bit void ptest(XMMRegister dst, XMMRegister src); void ptest(XMMRegister dst, Address src); + // Logical Compare 256bit + void vptest(XMMRegister dst, XMMRegister src); + void vptest(XMMRegister dst, Address src); // Interleave Low Bytes void punpcklbw(XMMRegister dst, XMMRegister src); @@ -1774,6 +1778,9 @@ void vextractf128h(Address dst, XMMRegister src); void vextracti128h(Address dst, XMMRegister src); + // duplicate 4-bytes integer data from src into 8 locations in dest + void vpbroadcastd(XMMRegister dst, XMMRegister src); + // AVX instruction which is used to clear upper 128 bits of YMM registers and // to avoid transaction penalty between AVX and SSE states. There is no // penalty if legacy SSE instructions are encoded using VEX prefix because @@ -2733,6 +2740,10 @@ Assembler::vxorpd(dst, nds, src, vector256); } + // Simple version for AVX2 256bit vectors + void vpxor(XMMRegister dst, XMMRegister src) { Assembler::vpxor(dst, dst, src, true); } + void vpxor(XMMRegister dst, Address src) { Assembler::vpxor(dst, dst, src, true); } + // Move packed integer values from low 128 bit to hign 128 bit in 256 bit vector. void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { if (UseAVX > 1) // vinserti128h is available only in AVX2 @@ -2814,6 +2825,9 @@ // C2 compiled method's prolog code. void verified_entry(int framesize, bool stack_bang, bool fp_mode_24b); + // clear memory of size 'cnt' qwords, starting at 'base'. + void clear_mem(Register base, Register cnt, Register rtmp); + // IndexOf strings. // Small strings are loaded through stack if they cross page boundary. void string_indexof(Register str1, Register str2,
--- a/src/cpu/x86/vm/globals_x86.hpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/cpu/x86/vm/globals_x86.hpp Fri Jan 11 10:38:38 2013 -0800 @@ -120,6 +120,9 @@ product(bool, UseUnalignedLoadStores, false, \ "Use SSE2 MOVDQU instruction for Arraycopy") \ \ + product(bool, UseFastStosb, false, \ + "Use fast-string operation for zeroing: rep stosb") \ + \ /* assembler */ \ product(bool, Use486InstrsOnly, false, \ "Use 80486 Compliant instruction subset") \
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -825,16 +825,22 @@ __ align(OptoLoopAlignment); __ BIND(L_copy_64_bytes_loop); - if(UseUnalignedLoadStores) { - __ movdqu(xmm0, Address(from, 0)); - __ movdqu(Address(from, to_from, Address::times_1, 0), xmm0); - __ movdqu(xmm1, Address(from, 16)); - __ movdqu(Address(from, to_from, Address::times_1, 16), xmm1); - __ movdqu(xmm2, Address(from, 32)); - __ movdqu(Address(from, to_from, Address::times_1, 32), xmm2); - __ movdqu(xmm3, Address(from, 48)); - __ movdqu(Address(from, to_from, Address::times_1, 48), xmm3); - + if (UseUnalignedLoadStores) { + if (UseAVX >= 2) { + __ vmovdqu(xmm0, Address(from, 0)); + __ vmovdqu(Address(from, to_from, Address::times_1, 0), xmm0); + __ vmovdqu(xmm1, Address(from, 32)); + __ vmovdqu(Address(from, to_from, Address::times_1, 32), xmm1); + } else { + __ movdqu(xmm0, Address(from, 0)); + __ movdqu(Address(from, to_from, Address::times_1, 0), xmm0); + __ movdqu(xmm1, Address(from, 16)); + __ movdqu(Address(from, to_from, Address::times_1, 16), xmm1); + __ movdqu(xmm2, Address(from, 32)); + __ movdqu(Address(from, to_from, Address::times_1, 32), xmm2); + __ movdqu(xmm3, Address(from, 48)); + __ movdqu(Address(from, to_from, Address::times_1, 48), xmm3); + } } else { __ movq(xmm0, Address(from, 0)); __ movq(Address(from, to_from, Address::times_1, 0), xmm0); @@ -2203,13 +2209,13 @@ // c_rarg2 - K (key) in little endian int array // address generate_aescrypt_encryptBlock() { - assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + assert(UseAES, "need AES instructions and misaligned SSE support"); __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); Label L_doLast; address start = __ pc(); - const Register from = rsi; // source array address + const Register from = rdx; // source array address const Register to = rdx; // destination array address const Register key = rcx; // key array address const Register keylen = rax; @@ -2218,47 +2224,74 @@ const Address key_param (rbp, 8+8); const XMMRegister xmm_result = xmm0; - const XMMRegister xmm_temp = xmm1; - const XMMRegister xmm_key_shuf_mask = xmm2; - - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ push(rsi); - __ movptr(from , from_param); - __ movptr(to , to_param); - __ movptr(key , key_param); - + const XMMRegister xmm_key_shuf_mask = xmm1; + const XMMRegister xmm_temp1 = xmm2; + const XMMRegister xmm_temp2 = xmm3; + const XMMRegister xmm_temp3 = xmm4; + const XMMRegister xmm_temp4 = xmm5; + + __ enter(); // required for proper stackwalking of RuntimeStub frame + __ movptr(from, from_param); + __ movptr(key, key_param); + + // keylen could be only {11, 13, 15} * 4 = {44, 52, 60} __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - // keylen = # of 32-bit words, convert to 128-bit words - __ shrl(keylen, 2); - __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input + __ movptr(to, to_param); // For encryption, the java expanded key ordering is just what we need - load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); - __ pxor(xmm_result, xmm_temp); - for (int offset = 0x10; offset <= 0x90; offset += 0x10) { - aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); - } - load_key (xmm_temp, key, 0xa0, xmm_key_shuf_mask); - __ cmpl(keylen, 0); - __ jcc(Assembler::equal, L_doLast); - __ aesenc(xmm_result, xmm_temp); // only in 192 and 256 bit keys - aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); - load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask); - __ subl(keylen, 2); - __ jcc(Assembler::equal, L_doLast); - __ aesenc(xmm_result, xmm_temp); // only in 256 bit keys - aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); - load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask); + load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask); + __ pxor(xmm_result, xmm_temp1); + + load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); + load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); + load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); + + __ aesenc(xmm_result, xmm_temp1); + __ aesenc(xmm_result, xmm_temp2); + __ aesenc(xmm_result, xmm_temp3); + __ aesenc(xmm_result, xmm_temp4); + + load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); + load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); + load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); + + __ aesenc(xmm_result, xmm_temp1); + __ aesenc(xmm_result, xmm_temp2); + __ aesenc(xmm_result, xmm_temp3); + __ aesenc(xmm_result, xmm_temp4); + + load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); + + __ cmpl(keylen, 44); + __ jccb(Assembler::equal, L_doLast); + + __ aesenc(xmm_result, xmm_temp1); + __ aesenc(xmm_result, xmm_temp2); + + load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); + + __ cmpl(keylen, 52); + __ jccb(Assembler::equal, L_doLast); + + __ aesenc(xmm_result, xmm_temp1); + __ aesenc(xmm_result, xmm_temp2); + + load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); __ BIND(L_doLast); - __ aesenclast(xmm_result, xmm_temp); + __ aesenc(xmm_result, xmm_temp1); + __ aesenclast(xmm_result, xmm_temp2); __ movdqu(Address(to, 0), xmm_result); // store the result __ xorptr(rax, rax); // return 0 - __ pop(rsi); __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); @@ -2274,13 +2307,13 @@ // c_rarg2 - K (key) in little endian int array // address generate_aescrypt_decryptBlock() { - assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + assert(UseAES, "need AES instructions and misaligned SSE support"); __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); Label L_doLast; address start = __ pc(); - const Register from = rsi; // source array address + const Register from = rdx; // source array address const Register to = rdx; // destination array address const Register key = rcx; // key array address const Register keylen = rax; @@ -2289,51 +2322,76 @@ const Address key_param (rbp, 8+8); const XMMRegister xmm_result = xmm0; - const XMMRegister xmm_temp = xmm1; - const XMMRegister xmm_key_shuf_mask = xmm2; + const XMMRegister xmm_key_shuf_mask = xmm1; + const XMMRegister xmm_temp1 = xmm2; + const XMMRegister xmm_temp2 = xmm3; + const XMMRegister xmm_temp3 = xmm4; + const XMMRegister xmm_temp4 = xmm5; __ enter(); // required for proper stackwalking of RuntimeStub frame - __ push(rsi); - __ movptr(from , from_param); - __ movptr(to , to_param); - __ movptr(key , key_param); - + __ movptr(from, from_param); + __ movptr(key, key_param); + + // keylen could be only {11, 13, 15} * 4 = {44, 52, 60} __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - // keylen = # of 32-bit words, convert to 128-bit words - __ shrl(keylen, 2); - __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); __ movdqu(xmm_result, Address(from, 0)); + __ movptr(to, to_param); // for decryption java expanded key ordering is rotated one position from what we want // so we start from 0x10 here and hit 0x00 last // we don't know if the key is aligned, hence not using load-execute form - load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask); - __ pxor (xmm_result, xmm_temp); - for (int offset = 0x20; offset <= 0xa0; offset += 0x10) { - aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); - } - __ cmpl(keylen, 0); - __ jcc(Assembler::equal, L_doLast); - // only in 192 and 256 bit keys - aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); - aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask); - __ subl(keylen, 2); - __ jcc(Assembler::equal, L_doLast); - // only in 256 bit keys - aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); - aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask); + load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); + load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); + load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); + + __ pxor (xmm_result, xmm_temp1); + __ aesdec(xmm_result, xmm_temp2); + __ aesdec(xmm_result, xmm_temp3); + __ aesdec(xmm_result, xmm_temp4); + + load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); + load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); + load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); + + __ aesdec(xmm_result, xmm_temp1); + __ aesdec(xmm_result, xmm_temp2); + __ aesdec(xmm_result, xmm_temp3); + __ aesdec(xmm_result, xmm_temp4); + + load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); + load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask); + + __ cmpl(keylen, 44); + __ jccb(Assembler::equal, L_doLast); + + __ aesdec(xmm_result, xmm_temp1); + __ aesdec(xmm_result, xmm_temp2); + + load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); + + __ cmpl(keylen, 52); + __ jccb(Assembler::equal, L_doLast); + + __ aesdec(xmm_result, xmm_temp1); + __ aesdec(xmm_result, xmm_temp2); + + load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); __ BIND(L_doLast); + __ aesdec(xmm_result, xmm_temp1); + __ aesdec(xmm_result, xmm_temp2); + // for decryption the aesdeclast operation is always on key+0x00 - load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); - __ aesdeclast(xmm_result, xmm_temp); - + __ aesdeclast(xmm_result, xmm_temp3); __ movdqu(Address(to, 0), xmm_result); // store the result - __ xorptr(rax, rax); // return 0 - __ pop(rsi); __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); @@ -2369,7 +2427,7 @@ // c_rarg4 - input length // address generate_cipherBlockChaining_encryptAESCrypt() { - assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + assert(UseAES, "need AES instructions and misaligned SSE support"); __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); address start = __ pc(); @@ -2422,7 +2480,7 @@ __ jcc(Assembler::notEqual, L_key_192_256); // 128 bit code follows here - __ movptr(pos, 0); + __ movl(pos, 0); __ align(OptoLoopAlignment); __ BIND(L_loopTop_128); __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input @@ -2452,15 +2510,15 @@ __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); - __ BIND(L_key_192_256); - // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) + __ BIND(L_key_192_256); + // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) __ cmpl(rax, 52); __ jcc(Assembler::notEqual, L_key_256); // 192-bit code follows here (could be changed to use more xmm registers) - __ movptr(pos, 0); - __ align(OptoLoopAlignment); - __ BIND(L_loopTop_192); + __ movl(pos, 0); + __ align(OptoLoopAlignment); + __ BIND(L_loopTop_192); __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input __ pxor (xmm_result, xmm_temp); // xor with the current r vector @@ -2481,11 +2539,11 @@ __ jcc(Assembler::notEqual, L_loopTop_192); __ jmp(L_exit); - __ BIND(L_key_256); + __ BIND(L_key_256); // 256-bit code follows here (could be changed to use more xmm registers) - __ movptr(pos, 0); - __ align(OptoLoopAlignment); - __ BIND(L_loopTop_256); + __ movl(pos, 0); + __ align(OptoLoopAlignment); + __ BIND(L_loopTop_256); __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input __ pxor (xmm_result, xmm_temp); // xor with the current r vector @@ -2524,7 +2582,7 @@ // address generate_cipherBlockChaining_decryptAESCrypt() { - assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + assert(UseAES, "need AES instructions and misaligned SSE support"); __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); address start = __ pc(); @@ -2585,9 +2643,9 @@ // 128-bit code follows here, parallelized - __ movptr(pos, 0); - __ align(OptoLoopAlignment); - __ BIND(L_singleBlock_loopTop_128); + __ movl(pos, 0); + __ align(OptoLoopAlignment); + __ BIND(L_singleBlock_loopTop_128); __ cmpptr(len_reg, 0); // any blocks left?? __ jcc(Assembler::equal, L_exit); __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input @@ -2626,7 +2684,7 @@ __ jcc(Assembler::notEqual, L_key_256); // 192-bit code follows here (could be optimized to use parallelism) - __ movptr(pos, 0); + __ movl(pos, 0); __ align(OptoLoopAlignment); __ BIND(L_singleBlock_loopTop_192); __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input @@ -2651,7 +2709,7 @@ __ BIND(L_key_256); // 256-bit code follows here (could be optimized to use parallelism) - __ movptr(pos, 0); + __ movl(pos, 0); __ align(OptoLoopAlignment); __ BIND(L_singleBlock_loopTop_256); __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -1314,23 +1314,54 @@ // end_to - destination array end address // qword_count - 64-bits element count, negative // to - scratch - // L_copy_32_bytes - entry label + // L_copy_bytes - entry label // L_copy_8_bytes - exit label // - void copy_32_bytes_forward(Register end_from, Register end_to, + void copy_bytes_forward(Register end_from, Register end_to, Register qword_count, Register to, - Label& L_copy_32_bytes, Label& L_copy_8_bytes) { + Label& L_copy_bytes, Label& L_copy_8_bytes) { DEBUG_ONLY(__ stop("enter at entry label, not here")); Label L_loop; __ align(OptoLoopAlignment); - __ BIND(L_loop); - if(UseUnalignedLoadStores) { - __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24)); - __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0); - __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8)); - __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1); - + if (UseUnalignedLoadStores) { + Label L_end; + // Copy 64-bytes per iteration + __ BIND(L_loop); + if (UseAVX >= 2) { + __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56)); + __ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0); + __ vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24)); + __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1); + } else { + __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56)); + __ movdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0); + __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, -40)); + __ movdqu(Address(end_to, qword_count, Address::times_8, -40), xmm1); + __ movdqu(xmm2, Address(end_from, qword_count, Address::times_8, -24)); + __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm2); + __ movdqu(xmm3, Address(end_from, qword_count, Address::times_8, - 8)); + __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm3); + } + __ BIND(L_copy_bytes); + __ addptr(qword_count, 8); + __ jcc(Assembler::lessEqual, L_loop); + __ subptr(qword_count, 4); // sub(8) and add(4) + __ jccb(Assembler::greater, L_end); + // Copy trailing 32 bytes + if (UseAVX >= 2) { + __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24)); + __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0); + } else { + __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24)); + __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0); + __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8)); + __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1); + } + __ addptr(qword_count, 4); + __ BIND(L_end); } else { + // Copy 32-bytes per iteration + __ BIND(L_loop); __ movq(to, Address(end_from, qword_count, Address::times_8, -24)); __ movq(Address(end_to, qword_count, Address::times_8, -24), to); __ movq(to, Address(end_from, qword_count, Address::times_8, -16)); @@ -1339,15 +1370,15 @@ __ movq(Address(end_to, qword_count, Address::times_8, - 8), to); __ movq(to, Address(end_from, qword_count, Address::times_8, - 0)); __ movq(Address(end_to, qword_count, Address::times_8, - 0), to); + + __ BIND(L_copy_bytes); + __ addptr(qword_count, 4); + __ jcc(Assembler::lessEqual, L_loop); } - __ BIND(L_copy_32_bytes); - __ addptr(qword_count, 4); - __ jcc(Assembler::lessEqual, L_loop); __ subptr(qword_count, 4); __ jcc(Assembler::less, L_copy_8_bytes); // Copy trailing qwords } - // Copy big chunks backward // // Inputs: @@ -1355,23 +1386,55 @@ // dest - destination array address // qword_count - 64-bits element count // to - scratch - // L_copy_32_bytes - entry label + // L_copy_bytes - entry label // L_copy_8_bytes - exit label // - void copy_32_bytes_backward(Register from, Register dest, + void copy_bytes_backward(Register from, Register dest, Register qword_count, Register to, - Label& L_copy_32_bytes, Label& L_copy_8_bytes) { + Label& L_copy_bytes, Label& L_copy_8_bytes) { DEBUG_ONLY(__ stop("enter at entry label, not here")); Label L_loop; __ align(OptoLoopAlignment); - __ BIND(L_loop); - if(UseUnalignedLoadStores) { - __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16)); - __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0); - __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 0)); - __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm1); - + if (UseUnalignedLoadStores) { + Label L_end; + // Copy 64-bytes per iteration + __ BIND(L_loop); + if (UseAVX >= 2) { + __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32)); + __ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0); + __ vmovdqu(xmm1, Address(from, qword_count, Address::times_8, 0)); + __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm1); + } else { + __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 48)); + __ movdqu(Address(dest, qword_count, Address::times_8, 48), xmm0); + __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 32)); + __ movdqu(Address(dest, qword_count, Address::times_8, 32), xmm1); + __ movdqu(xmm2, Address(from, qword_count, Address::times_8, 16)); + __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm2); + __ movdqu(xmm3, Address(from, qword_count, Address::times_8, 0)); + __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm3); + } + __ BIND(L_copy_bytes); + __ subptr(qword_count, 8); + __ jcc(Assembler::greaterEqual, L_loop); + + __ addptr(qword_count, 4); // add(8) and sub(4) + __ jccb(Assembler::less, L_end); + // Copy trailing 32 bytes + if (UseAVX >= 2) { + __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 0)); + __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm0); + } else { + __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16)); + __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0); + __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 0)); + __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm1); + } + __ subptr(qword_count, 4); + __ BIND(L_end); } else { + // Copy 32-bytes per iteration + __ BIND(L_loop); __ movq(to, Address(from, qword_count, Address::times_8, 24)); __ movq(Address(dest, qword_count, Address::times_8, 24), to); __ movq(to, Address(from, qword_count, Address::times_8, 16)); @@ -1380,10 +1443,11 @@ __ movq(Address(dest, qword_count, Address::times_8, 8), to); __ movq(to, Address(from, qword_count, Address::times_8, 0)); __ movq(Address(dest, qword_count, Address::times_8, 0), to); + + __ BIND(L_copy_bytes); + __ subptr(qword_count, 4); + __ jcc(Assembler::greaterEqual, L_loop); } - __ BIND(L_copy_32_bytes); - __ subptr(qword_count, 4); - __ jcc(Assembler::greaterEqual, L_loop); __ addptr(qword_count, 4); __ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords } @@ -1413,7 +1477,7 @@ StubCodeMark mark(this, "StubRoutines", name); address start = __ pc(); - Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes; + Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes; Label L_copy_byte, L_exit; const Register from = rdi; // source array address const Register to = rsi; // destination array address @@ -1445,7 +1509,7 @@ __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); __ negptr(qword_count); // make the count negative - __ jmp(L_copy_32_bytes); + __ jmp(L_copy_bytes); // Copy trailing qwords __ BIND(L_copy_8_bytes); @@ -1488,8 +1552,8 @@ __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); - // Copy in 32-bytes chunks - copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); + // Copy in multi-bytes chunks + copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); __ jmp(L_copy_4_bytes); return start; @@ -1516,7 +1580,7 @@ StubCodeMark mark(this, "StubRoutines", name); address start = __ pc(); - Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes; + Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes; const Register from = rdi; // source array address const Register to = rsi; // destination array address const Register count = rdx; // elements count @@ -1559,10 +1623,10 @@ // Check for and copy trailing dword __ BIND(L_copy_4_bytes); __ testl(byte_count, 4); - __ jcc(Assembler::zero, L_copy_32_bytes); + __ jcc(Assembler::zero, L_copy_bytes); __ movl(rax, Address(from, qword_count, Address::times_8)); __ movl(Address(to, qword_count, Address::times_8), rax); - __ jmp(L_copy_32_bytes); + __ jmp(L_copy_bytes); // Copy trailing qwords __ BIND(L_copy_8_bytes); @@ -1577,8 +1641,8 @@ __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); - // Copy in 32-bytes chunks - copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); + // Copy in multi-bytes chunks + copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); restore_arg_regs(); inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free @@ -1613,7 +1677,7 @@ StubCodeMark mark(this, "StubRoutines", name); address start = __ pc(); - Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes,L_copy_2_bytes,L_exit; + Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes,L_copy_2_bytes,L_exit; const Register from = rdi; // source array address const Register to = rsi; // destination array address const Register count = rdx; // elements count @@ -1644,7 +1708,7 @@ __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); __ negptr(qword_count); - __ jmp(L_copy_32_bytes); + __ jmp(L_copy_bytes); // Copy trailing qwords __ BIND(L_copy_8_bytes); @@ -1680,8 +1744,8 @@ __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); - // Copy in 32-bytes chunks - copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); + // Copy in multi-bytes chunks + copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); __ jmp(L_copy_4_bytes); return start; @@ -1728,7 +1792,7 @@ StubCodeMark mark(this, "StubRoutines", name); address start = __ pc(); - Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes; + Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes; const Register from = rdi; // source array address const Register to = rsi; // destination array address const Register count = rdx; // elements count @@ -1763,10 +1827,10 @@ // Check for and copy trailing dword __ BIND(L_copy_4_bytes); __ testl(word_count, 2); - __ jcc(Assembler::zero, L_copy_32_bytes); + __ jcc(Assembler::zero, L_copy_bytes); __ movl(rax, Address(from, qword_count, Address::times_8)); __ movl(Address(to, qword_count, Address::times_8), rax); - __ jmp(L_copy_32_bytes); + __ jmp(L_copy_bytes); // Copy trailing qwords __ BIND(L_copy_8_bytes); @@ -1781,8 +1845,8 @@ __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); - // Copy in 32-bytes chunks - copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); + // Copy in multi-bytes chunks + copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); restore_arg_regs(); inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free @@ -1818,7 +1882,7 @@ StubCodeMark mark(this, "StubRoutines", name); address start = __ pc(); - Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit; + Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit; const Register from = rdi; // source array address const Register to = rsi; // destination array address const Register count = rdx; // elements count @@ -1854,7 +1918,7 @@ __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); __ negptr(qword_count); - __ jmp(L_copy_32_bytes); + __ jmp(L_copy_bytes); // Copy trailing qwords __ BIND(L_copy_8_bytes); @@ -1881,8 +1945,8 @@ __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); - // Copy 32-bytes chunks - copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); + // Copy in multi-bytes chunks + copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); __ jmp(L_copy_4_bytes); return start; @@ -1910,7 +1974,7 @@ StubCodeMark mark(this, "StubRoutines", name); address start = __ pc(); - Label L_copy_32_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit; + Label L_copy_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit; const Register from = rdi; // source array address const Register to = rsi; // destination array address const Register count = rdx; // elements count @@ -1944,10 +2008,10 @@ // Check for and copy trailing dword __ testl(dword_count, 1); - __ jcc(Assembler::zero, L_copy_32_bytes); + __ jcc(Assembler::zero, L_copy_bytes); __ movl(rax, Address(from, dword_count, Address::times_4, -4)); __ movl(Address(to, dword_count, Address::times_4, -4), rax); - __ jmp(L_copy_32_bytes); + __ jmp(L_copy_bytes); // Copy trailing qwords __ BIND(L_copy_8_bytes); @@ -1965,8 +2029,8 @@ __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); - // Copy in 32-bytes chunks - copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); + // Copy in multi-bytes chunks + copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); __ bind(L_exit); if (is_oop) { @@ -2004,7 +2068,7 @@ StubCodeMark mark(this, "StubRoutines", name); address start = __ pc(); - Label L_copy_32_bytes, L_copy_8_bytes, L_exit; + Label L_copy_bytes, L_copy_8_bytes, L_exit; const Register from = rdi; // source array address const Register to = rsi; // destination array address const Register qword_count = rdx; // elements count @@ -2036,7 +2100,7 @@ __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); __ negptr(qword_count); - __ jmp(L_copy_32_bytes); + __ jmp(L_copy_bytes); // Copy trailing qwords __ BIND(L_copy_8_bytes); @@ -2055,8 +2119,8 @@ __ ret(0); } - // Copy 64-byte chunks - copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); + // Copy in multi-bytes chunks + copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); if (is_oop) { __ BIND(L_exit); @@ -2093,7 +2157,7 @@ StubCodeMark mark(this, "StubRoutines", name); address start = __ pc(); - Label L_copy_32_bytes, L_copy_8_bytes, L_exit; + Label L_copy_bytes, L_copy_8_bytes, L_exit; const Register from = rdi; // source array address const Register to = rsi; // destination array address const Register qword_count = rdx; // elements count @@ -2119,7 +2183,7 @@ gen_write_ref_array_pre_barrier(to, saved_count, dest_uninitialized); } - __ jmp(L_copy_32_bytes); + __ jmp(L_copy_bytes); // Copy trailing qwords __ BIND(L_copy_8_bytes); @@ -2138,8 +2202,8 @@ __ ret(0); } - // Copy in 32-bytes chunks - copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); + // Copy in multi-bytes chunks + copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); if (is_oop) { __ BIND(L_exit); @@ -2981,21 +3045,6 @@ } } - // aesenc using specified key+offset - // can optionally specify that the shuffle mask is already in an xmmregister - void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { - load_key(xmmtmp, key, offset, xmm_shuf_mask); - __ aesenc(xmmdst, xmmtmp); - } - - // aesdec using specified key+offset - // can optionally specify that the shuffle mask is already in an xmmregister - void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { - load_key(xmmtmp, key, offset, xmm_shuf_mask); - __ aesdec(xmmdst, xmmtmp); - } - - // Arguments: // // Inputs: @@ -3004,7 +3053,7 @@ // c_rarg2 - K (key) in little endian int array // address generate_aescrypt_encryptBlock() { - assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + assert(UseAES, "need AES instructions and misaligned SSE support"); __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); Label L_doLast; @@ -3016,15 +3065,17 @@ const Register keylen = rax; const XMMRegister xmm_result = xmm0; - const XMMRegister xmm_temp = xmm1; - const XMMRegister xmm_key_shuf_mask = xmm2; + const XMMRegister xmm_key_shuf_mask = xmm1; + // On win64 xmm6-xmm15 must be preserved so don't use them. + const XMMRegister xmm_temp1 = xmm2; + const XMMRegister xmm_temp2 = xmm3; + const XMMRegister xmm_temp3 = xmm4; + const XMMRegister xmm_temp4 = xmm5; __ enter(); // required for proper stackwalking of RuntimeStub frame + // keylen could be only {11, 13, 15} * 4 = {44, 52, 60} __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - // keylen = # of 32-bit words, convert to 128-bit words - __ shrl(keylen, 2); - __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input @@ -3032,25 +3083,53 @@ // For encryption, the java expanded key ordering is just what we need // we don't know if the key is aligned, hence not using load-execute form - load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); - __ pxor(xmm_result, xmm_temp); - for (int offset = 0x10; offset <= 0x90; offset += 0x10) { - aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); - } - load_key (xmm_temp, key, 0xa0, xmm_key_shuf_mask); - __ cmpl(keylen, 0); - __ jcc(Assembler::equal, L_doLast); - __ aesenc(xmm_result, xmm_temp); // only in 192 and 256 bit keys - aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); - load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask); - __ subl(keylen, 2); - __ jcc(Assembler::equal, L_doLast); - __ aesenc(xmm_result, xmm_temp); // only in 256 bit keys - aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); - load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask); + load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask); + __ pxor(xmm_result, xmm_temp1); + + load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); + load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); + load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); + + __ aesenc(xmm_result, xmm_temp1); + __ aesenc(xmm_result, xmm_temp2); + __ aesenc(xmm_result, xmm_temp3); + __ aesenc(xmm_result, xmm_temp4); + + load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); + load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); + load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); + + __ aesenc(xmm_result, xmm_temp1); + __ aesenc(xmm_result, xmm_temp2); + __ aesenc(xmm_result, xmm_temp3); + __ aesenc(xmm_result, xmm_temp4); + + load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); + + __ cmpl(keylen, 44); + __ jccb(Assembler::equal, L_doLast); + + __ aesenc(xmm_result, xmm_temp1); + __ aesenc(xmm_result, xmm_temp2); + + load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); + + __ cmpl(keylen, 52); + __ jccb(Assembler::equal, L_doLast); + + __ aesenc(xmm_result, xmm_temp1); + __ aesenc(xmm_result, xmm_temp2); + + load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); __ BIND(L_doLast); - __ aesenclast(xmm_result, xmm_temp); + __ aesenc(xmm_result, xmm_temp1); + __ aesenclast(xmm_result, xmm_temp2); __ movdqu(Address(to, 0), xmm_result); // store the result __ xorptr(rax, rax); // return 0 __ leave(); // required for proper stackwalking of RuntimeStub frame @@ -3068,7 +3147,7 @@ // c_rarg2 - K (key) in little endian int array // address generate_aescrypt_decryptBlock() { - assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + assert(UseAES, "need AES instructions and misaligned SSE support"); __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); Label L_doLast; @@ -3080,15 +3159,17 @@ const Register keylen = rax; const XMMRegister xmm_result = xmm0; - const XMMRegister xmm_temp = xmm1; - const XMMRegister xmm_key_shuf_mask = xmm2; + const XMMRegister xmm_key_shuf_mask = xmm1; + // On win64 xmm6-xmm15 must be preserved so don't use them. + const XMMRegister xmm_temp1 = xmm2; + const XMMRegister xmm_temp2 = xmm3; + const XMMRegister xmm_temp3 = xmm4; + const XMMRegister xmm_temp4 = xmm5; __ enter(); // required for proper stackwalking of RuntimeStub frame + // keylen could be only {11, 13, 15} * 4 = {44, 52, 60} __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - // keylen = # of 32-bit words, convert to 128-bit words - __ shrl(keylen, 2); - __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); __ movdqu(xmm_result, Address(from, 0)); @@ -3096,29 +3177,55 @@ // for decryption java expanded key ordering is rotated one position from what we want // so we start from 0x10 here and hit 0x00 last // we don't know if the key is aligned, hence not using load-execute form - load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask); - __ pxor (xmm_result, xmm_temp); - for (int offset = 0x20; offset <= 0xa0; offset += 0x10) { - aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); - } - __ cmpl(keylen, 0); - __ jcc(Assembler::equal, L_doLast); - // only in 192 and 256 bit keys - aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); - aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask); - __ subl(keylen, 2); - __ jcc(Assembler::equal, L_doLast); - // only in 256 bit keys - aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); - aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask); + load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); + load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); + load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); + + __ pxor (xmm_result, xmm_temp1); + __ aesdec(xmm_result, xmm_temp2); + __ aesdec(xmm_result, xmm_temp3); + __ aesdec(xmm_result, xmm_temp4); + + load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); + load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); + load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); + + __ aesdec(xmm_result, xmm_temp1); + __ aesdec(xmm_result, xmm_temp2); + __ aesdec(xmm_result, xmm_temp3); + __ aesdec(xmm_result, xmm_temp4); + + load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); + load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask); + + __ cmpl(keylen, 44); + __ jccb(Assembler::equal, L_doLast); + + __ aesdec(xmm_result, xmm_temp1); + __ aesdec(xmm_result, xmm_temp2); + + load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); + + __ cmpl(keylen, 52); + __ jccb(Assembler::equal, L_doLast); + + __ aesdec(xmm_result, xmm_temp1); + __ aesdec(xmm_result, xmm_temp2); + + load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); __ BIND(L_doLast); + __ aesdec(xmm_result, xmm_temp1); + __ aesdec(xmm_result, xmm_temp2); + // for decryption the aesdeclast operation is always on key+0x00 - load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); - __ aesdeclast(xmm_result, xmm_temp); - + __ aesdeclast(xmm_result, xmm_temp3); __ movdqu(Address(to, 0), xmm_result); // store the result - __ xorptr(rax, rax); // return 0 __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); @@ -3137,7 +3244,7 @@ // c_rarg4 - input length // address generate_cipherBlockChaining_encryptAESCrypt() { - assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + assert(UseAES, "need AES instructions and misaligned SSE support"); __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); address start = __ pc(); @@ -3161,16 +3268,19 @@ const XMMRegister xmm_temp = xmm1; // keys 0-10 preloaded into xmm2-xmm12 const int XMM_REG_NUM_KEY_FIRST = 2; - const int XMM_REG_NUM_KEY_LAST = 12; + const int XMM_REG_NUM_KEY_LAST = 15; const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); - const XMMRegister xmm_key10 = as_XMMRegister(XMM_REG_NUM_KEY_LAST); + const XMMRegister xmm_key10 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+10); + const XMMRegister xmm_key11 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+11); + const XMMRegister xmm_key12 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+12); + const XMMRegister xmm_key13 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+13); __ enter(); // required for proper stackwalking of RuntimeStub frame #ifdef _WIN64 // on win64, fill len_reg from stack position __ movl(len_reg, len_mem); - // save the xmm registers which must be preserved 6-12 + // save the xmm registers which must be preserved 6-15 __ subptr(rsp, -rsp_after_call_off * wordSize); for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) { __ movdqu(xmm_save(i), as_XMMRegister(i)); @@ -3179,12 +3289,11 @@ const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); - // load up xmm regs 2 thru 12 with key 0x00 - 0xa0 - for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { + // load up xmm regs xmm2 thru xmm12 with key 0x00 - 0xa0 + for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_FIRST+10; rnum++) { load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); offset += 0x10; } - __ movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) @@ -3195,16 +3304,15 @@ // 128 bit code follows here __ movptr(pos, 0); __ align(OptoLoopAlignment); + __ BIND(L_loopTop_128); __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input __ pxor (xmm_result, xmm_temp); // xor with the current r vector - __ pxor (xmm_result, xmm_key0); // do the aes rounds - for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) { + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 9; rnum++) { __ aesenc(xmm_result, as_XMMRegister(rnum)); } __ aesenclast(xmm_result, xmm_key10); - __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output // no need to store r to memory until we exit __ addptr(pos, AESBlockSize); @@ -3226,24 +3334,23 @@ __ BIND(L_key_192_256); // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) + load_key(xmm_key11, key, 0xb0, xmm_key_shuf_mask); + load_key(xmm_key12, key, 0xc0, xmm_key_shuf_mask); __ cmpl(rax, 52); __ jcc(Assembler::notEqual, L_key_256); // 192-bit code follows here (could be changed to use more xmm registers) __ movptr(pos, 0); __ align(OptoLoopAlignment); + __ BIND(L_loopTop_192); __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input __ pxor (xmm_result, xmm_temp); // xor with the current r vector - __ pxor (xmm_result, xmm_key0); // do the aes rounds - for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 11; rnum++) { __ aesenc(xmm_result, as_XMMRegister(rnum)); } - aes_enc_key(xmm_result, xmm_temp, key, 0xb0); - load_key(xmm_temp, key, 0xc0); - __ aesenclast(xmm_result, xmm_temp); - + __ aesenclast(xmm_result, xmm_key12); __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output // no need to store r to memory until we exit __ addptr(pos, AESBlockSize); @@ -3253,22 +3360,19 @@ __ BIND(L_key_256); // 256-bit code follows here (could be changed to use more xmm registers) + load_key(xmm_key13, key, 0xd0, xmm_key_shuf_mask); __ movptr(pos, 0); __ align(OptoLoopAlignment); + __ BIND(L_loopTop_256); __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input __ pxor (xmm_result, xmm_temp); // xor with the current r vector - __ pxor (xmm_result, xmm_key0); // do the aes rounds - for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 13; rnum++) { __ aesenc(xmm_result, as_XMMRegister(rnum)); } - aes_enc_key(xmm_result, xmm_temp, key, 0xb0); - aes_enc_key(xmm_result, xmm_temp, key, 0xc0); - aes_enc_key(xmm_result, xmm_temp, key, 0xd0); load_key(xmm_temp, key, 0xe0); __ aesenclast(xmm_result, xmm_temp); - __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output // no need to store r to memory until we exit __ addptr(pos, AESBlockSize); @@ -3295,7 +3399,7 @@ // address generate_cipherBlockChaining_decryptAESCrypt_Parallel() { - assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + assert(UseAES, "need AES instructions and misaligned SSE support"); __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); address start = __ pc(); @@ -3316,12 +3420,10 @@ #endif const Register pos = rax; - // xmm register assignments for the loops below - const XMMRegister xmm_result = xmm0; // keys 0-10 preloaded into xmm2-xmm12 const int XMM_REG_NUM_KEY_FIRST = 5; const int XMM_REG_NUM_KEY_LAST = 15; - const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); + const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); const XMMRegister xmm_key_last = as_XMMRegister(XMM_REG_NUM_KEY_LAST); __ enter(); // required for proper stackwalking of RuntimeStub frame @@ -3340,13 +3442,14 @@ const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); // load up xmm regs 5 thru 15 with key 0x10 - 0xa0 - 0x00 - for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { - if (rnum == XMM_REG_NUM_KEY_LAST) offset = 0x00; + for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum < XMM_REG_NUM_KEY_LAST; rnum++) { load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); offset += 0x10; } + load_key(xmm_key_last, key, 0x00, xmm_key_shuf_mask); const XMMRegister xmm_prev_block_cipher = xmm1; // holds cipher of previous block + // registers holding the four results in the parallelized loop const XMMRegister xmm_result0 = xmm0; const XMMRegister xmm_result1 = xmm2; @@ -3404,8 +3507,12 @@ __ jmp(L_multiBlock_loopTop_128); // registers used in the non-parallelized loops + // xmm register assignments for the loops below + const XMMRegister xmm_result = xmm0; const XMMRegister xmm_prev_block_cipher_save = xmm2; - const XMMRegister xmm_temp = xmm3; + const XMMRegister xmm_key11 = xmm3; + const XMMRegister xmm_key12 = xmm4; + const XMMRegister xmm_temp = xmm4; __ align(OptoLoopAlignment); __ BIND(L_singleBlock_loopTop_128); @@ -3443,12 +3550,15 @@ __ BIND(L_key_192_256); // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) + load_key(xmm_key11, key, 0xb0); __ cmpl(rax, 52); __ jcc(Assembler::notEqual, L_key_256); // 192-bit code follows here (could be optimized to use parallelism) + load_key(xmm_key12, key, 0xc0); // 192-bit key goes up to c0 __ movptr(pos, 0); __ align(OptoLoopAlignment); + __ BIND(L_singleBlock_loopTop_192); __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input __ movdqa(xmm_prev_block_cipher_save, xmm_result); // save for next r vector @@ -3456,14 +3566,13 @@ for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) { __ aesdec(xmm_result, as_XMMRegister(rnum)); } - aes_dec_key(xmm_result, xmm_temp, key, 0xb0); // 192-bit key goes up to c0 - aes_dec_key(xmm_result, xmm_temp, key, 0xc0); + __ aesdec(xmm_result, xmm_key11); + __ aesdec(xmm_result, xmm_key12); __ aesdeclast(xmm_result, xmm_key_last); // xmm15 always came from key+0 __ pxor (xmm_result, xmm_prev_block_cipher); // xor with the current r vector - __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output // no need to store r to memory until we exit - __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block - + __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block __ addptr(pos, AESBlockSize); __ subptr(len_reg, AESBlockSize); __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192); @@ -3473,23 +3582,26 @@ // 256-bit code follows here (could be optimized to use parallelism) __ movptr(pos, 0); __ align(OptoLoopAlignment); + __ BIND(L_singleBlock_loopTop_256); - __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input + __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input __ movdqa(xmm_prev_block_cipher_save, xmm_result); // save for next r vector __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) { __ aesdec(xmm_result, as_XMMRegister(rnum)); } - aes_dec_key(xmm_result, xmm_temp, key, 0xb0); // 256-bit key goes up to e0 - aes_dec_key(xmm_result, xmm_temp, key, 0xc0); - aes_dec_key(xmm_result, xmm_temp, key, 0xd0); - aes_dec_key(xmm_result, xmm_temp, key, 0xe0); - __ aesdeclast(xmm_result, xmm_key_last); // xmm15 came from key+0 + __ aesdec(xmm_result, xmm_key11); + load_key(xmm_temp, key, 0xc0); + __ aesdec(xmm_result, xmm_temp); + load_key(xmm_temp, key, 0xd0); + __ aesdec(xmm_result, xmm_temp); + load_key(xmm_temp, key, 0xe0); // 256-bit key goes up to e0 + __ aesdec(xmm_result, xmm_temp); + __ aesdeclast(xmm_result, xmm_key_last); // xmm15 came from key+0 __ pxor (xmm_result, xmm_prev_block_cipher); // xor with the current r vector - __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output // no need to store r to memory until we exit - __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block - + __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block __ addptr(pos, AESBlockSize); __ subptr(len_reg, AESBlockSize); __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
--- a/src/cpu/x86/vm/vm_version_x86.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/cpu/x86/vm/vm_version_x86.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -428,7 +428,7 @@ } char buf[256]; - jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", cores_per_cpu(), threads_per_core(), cpu_family(), _model, _stepping, (supports_cmov() ? ", cmov" : ""), @@ -445,6 +445,7 @@ (supports_avx() ? ", avx" : ""), (supports_avx2() ? ", avx2" : ""), (supports_aes() ? ", aes" : ""), + (supports_erms() ? ", erms" : ""), (supports_mmx_ext() ? ", mmxext" : ""), (supports_3dnow_prefetch() ? ", 3dnowpref" : ""), (supports_lzcnt() ? ", lzcnt": ""), @@ -488,8 +489,8 @@ } // The AES intrinsic stubs require AES instruction support (of course) - // but also require AVX and sse3 modes for instructions it use. - if (UseAES && (UseAVX > 0) && (UseSSE > 2)) { + // but also require sse3 mode for instructions it use. + if (UseAES && (UseSSE > 2)) { if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { UseAESIntrinsics = true; } @@ -670,6 +671,16 @@ FLAG_SET_DEFAULT(UsePopCountInstruction, false); } + // Use fast-string operations if available. + if (supports_erms()) { + if (FLAG_IS_DEFAULT(UseFastStosb)) { + UseFastStosb = true; + } + } else if (UseFastStosb) { + warning("fast-string operations are not available on this CPU"); + FLAG_SET_DEFAULT(UseFastStosb, false); + } + #ifdef COMPILER2 if (FLAG_IS_DEFAULT(AlignVector)) { // Modern processors allow misaligned memory operations for vectors.
--- a/src/cpu/x86/vm/vm_version_x86.hpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/cpu/x86/vm/vm_version_x86.hpp Fri Jan 11 10:38:38 2013 -0800 @@ -204,7 +204,8 @@ avx2 : 1, : 2, bmi2 : 1, - : 23; + erms : 1, + : 22; } bits; }; @@ -247,7 +248,8 @@ CPU_TSCINV = (1 << 16), CPU_AVX = (1 << 17), CPU_AVX2 = (1 << 18), - CPU_AES = (1 << 19) + CPU_AES = (1 << 19), + CPU_ERMS = (1 << 20) // enhanced 'rep movsb/stosb' instructions } cpuFeatureFlags; enum { @@ -425,6 +427,8 @@ result |= CPU_TSCINV; if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0) result |= CPU_AES; + if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0) + result |= CPU_ERMS; // AMD features. if (is_amd()) { @@ -489,7 +493,7 @@ return (_cpuid_info.std_max_function >= 0xB) && // eax[4:0] | ebx[0:15] == 0 indicates invalid topology level. // Some cpus have max cpuid >= 0xB but do not support processor topology. - ((_cpuid_info.tpl_cpuidB0_eax & 0x1f | _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus) != 0); + (((_cpuid_info.tpl_cpuidB0_eax & 0x1f) | _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus) != 0); } static uint cores_per_cpu() { @@ -550,6 +554,7 @@ static bool supports_avx2() { return (_cpuFeatures & CPU_AVX2) != 0; } static bool supports_tsc() { return (_cpuFeatures & CPU_TSC) != 0; } static bool supports_aes() { return (_cpuFeatures & CPU_AES) != 0; } + static bool supports_erms() { return (_cpuFeatures & CPU_ERMS) != 0; } // Intel features static bool is_intel_family_core() { return is_intel() &&
--- a/src/cpu/x86/vm/x86_32.ad Wed Jan 09 20:33:26 2013 -0800 +++ b/src/cpu/x86/vm/x86_32.ad Fri Jan 11 10:38:38 2013 -0800 @@ -11578,15 +11578,28 @@ // ======================================================================= // fast clearing of an array instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ + predicate(!UseFastStosb); match(Set dummy (ClearArray cnt base)); effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); - format %{ "SHL ECX,1\t# Convert doublewords to words\n\t" - "XOR EAX,EAX\n\t" + format %{ "XOR EAX,EAX\t# ClearArray:\n\t" + "SHL ECX,1\t# Convert doublewords to words\n\t" "REP STOS\t# store EAX into [EDI++] while ECX--" %} - opcode(0,0x4); - ins_encode( Opcode(0xD1), RegOpc(ECX), - OpcRegReg(0x33,EAX,EAX), - Opcode(0xF3), Opcode(0xAB) ); + ins_encode %{ + __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); + %} + ins_pipe( pipe_slow ); +%} + +instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ + predicate(UseFastStosb); + match(Set dummy (ClearArray cnt base)); + effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); + format %{ "XOR EAX,EAX\t# ClearArray:\n\t" + "SHL ECX,3\t# Convert doublewords to bytes\n\t" + "REP STOSB\t# store EAX into [EDI++] while ECX--" %} + ins_encode %{ + __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); + %} ins_pipe( pipe_slow ); %}
--- a/src/cpu/x86/vm/x86_64.ad Wed Jan 09 20:33:26 2013 -0800 +++ b/src/cpu/x86/vm/x86_64.ad Fri Jan 11 10:38:38 2013 -0800 @@ -10170,16 +10170,33 @@ instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy, rFlagsReg cr) %{ + predicate(!UseFastStosb); match(Set dummy (ClearArray cnt base)); effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); - format %{ "xorl rax, rax\t# ClearArray:\n\t" - "rep stosq\t# Store rax to *rdi++ while rcx--" %} - ins_encode(opc_reg_reg(0x33, RAX, RAX), // xorl %eax, %eax - Opcode(0xF3), Opcode(0x48), Opcode(0xAB)); // rep REX_W stos + format %{ "xorq rax, rax\t# ClearArray:\n\t" + "rep stosq\t# Store rax to *rdi++ while rcx--" %} + ins_encode %{ + __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); + %} ins_pipe(pipe_slow); %} +instruct rep_fast_stosb(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy, + rFlagsReg cr) +%{ + predicate(UseFastStosb); + match(Set dummy (ClearArray cnt base)); + effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); + format %{ "xorq rax, rax\t# ClearArray:\n\t" + "shlq rcx,3\t# Convert doublewords to bytes\n\t" + "rep stosb\t# Store rax to *rdi++ while rcx--" %} + ins_encode %{ + __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); + %} + ins_pipe( pipe_slow ); +%} + instruct string_compare(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2, rax_RegI result, regD tmp1, rFlagsReg cr) %{
--- a/src/share/tools/LogCompilation/README Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/tools/LogCompilation/README Fri Jan 11 10:38:38 2013 -0800 @@ -13,6 +13,6 @@ More information about the LogCompilation output can be found at -http://wikis.sun.com/display/HotSpotInternals/LogCompilation+overview -http://wikis.sun.com/display/HotSpotInternals/PrintCompilation -http://wikis.sun.com/display/HotSpotInternals/LogCompilation+tool +https://wikis.oracle.com/display/HotSpotInternals/LogCompilation+overview +https://wikis.oracle.com/display/HotSpotInternals/PrintCompilation +https://wikis.oracle.com/display/HotSpotInternals/LogCompilation+tool
--- a/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/CallSite.java Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/CallSite.java Fri Jan 11 10:38:38 2013 -0800 @@ -38,6 +38,7 @@ private String reason; private List<CallSite> calls; private int endNodes; + private int endLiveNodes; private double timeStamp; CallSite() { @@ -106,7 +107,7 @@ } } if (getEndNodes() > 0) { - stream.printf(" (end time: %6.4f nodes: %d)", getTimeStamp(), getEndNodes()); + stream.printf(" (end time: %6.4f nodes: %d live: %d)", getTimeStamp(), getEndNodes(), getEndLiveNodes()); } stream.println(""); if (getReceiver() != null) { @@ -195,6 +196,14 @@ return endNodes; } + void setEndLiveNodes(int n) { + endLiveNodes = n; + } + + public int getEndLiveNodes() { + return endLiveNodes; + } + void setTimeStamp(double time) { timeStamp = time; }
--- a/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/LogCompilation.java Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/LogCompilation.java Fri Jan 11 10:38:38 2013 -0800 @@ -37,13 +37,13 @@ public class LogCompilation extends DefaultHandler implements ErrorHandler, Constants { public static void usage(int exitcode) { - System.out.println("Usage: LogCompilation [ -v ] [ -c ] [ -s ] [ -e | -N ] file1 ..."); + System.out.println("Usage: LogCompilation [ -v ] [ -c ] [ -s ] [ -e | -n ] file1 ..."); System.out.println(" -c: clean up malformed 1.5 xml"); System.out.println(" -i: print inlining decisions"); System.out.println(" -S: print compilation statistics"); System.out.println(" -s: sort events by start time"); System.out.println(" -e: sort events by elapsed time"); - System.out.println(" -N: sort events by name and start"); + System.out.println(" -n: sort events by name and start"); System.exit(exitcode); } @@ -137,7 +137,11 @@ v2 = Integer.valueOf(0); } phaseNodes.put(phase.getName(), Integer.valueOf(v2.intValue() + phase.getNodes())); - out.printf("\t%s %6.4f %d %d\n", phase.getName(), phase.getElapsedTime(), phase.getStartNodes(), phase.getNodes()); + /* Print phase name, elapsed time, nodes at the start of the phase, + nodes created in the phase, live nodes at the start of the phase, + live nodes added in the phase. + */ + out.printf("\t%s %6.4f %d %d %d %d\n", phase.getName(), phase.getElapsedTime(), phase.getStartNodes(), phase.getNodes(), phase.getStartLiveNodes(), phase.getLiveNodes()); } } else if (e instanceof MakeNotEntrantEvent) { MakeNotEntrantEvent mne = (MakeNotEntrantEvent) e;
--- a/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/LogParser.java Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/LogParser.java Fri Jan 11 10:38:38 2013 -0800 @@ -224,7 +224,6 @@ throw new InternalError("can't find " + name); } int indent = 0; - String compile_id; String type(String id) { String result = types.get(id); @@ -268,12 +267,18 @@ if (qname.equals("phase")) { Phase p = new Phase(search(atts, "name"), Double.parseDouble(search(atts, "stamp")), - Integer.parseInt(search(atts, "nodes"))); + Integer.parseInt(search(atts, "nodes", "0")), + Integer.parseInt(search(atts, "live"))); phaseStack.push(p); } else if (qname.equals("phase_done")) { Phase p = phaseStack.pop(); - p.setEndNodes(Integer.parseInt(search(atts, "nodes"))); + if (! p.getId().equals(search(atts, "name"))) { + System.out.println("phase: " + p.getId()); + throw new InternalError("phase name mismatch"); + } p.setEnd(Double.parseDouble(search(atts, "stamp"))); + p.setEndNodes(Integer.parseInt(search(atts, "nodes", "0"))); + p.setEndLiveNodes(Integer.parseInt(search(atts, "live"))); compile.getPhases().add(p); } else if (qname.equals("task")) { compile = new Compilation(Integer.parseInt(search(atts, "compile_id", "-1"))); @@ -317,13 +322,16 @@ m.setName(search(atts, "name")); m.setReturnType(type(search(atts, "return"))); m.setArguments(search(atts, "arguments", "void")); - m.setBytes(search(atts, "bytes")); - m.setIICount(search(atts, "iicount")); - m.setFlags(search(atts, "flags")); + + if (search(atts, "unloaded", "0").equals("0")) { + m.setBytes(search(atts, "bytes")); + m.setIICount(search(atts, "iicount")); + m.setFlags(search(atts, "flags")); + } methods.put(id, m); } else if (qname.equals("call")) { site = new CallSite(bci, method(search(atts, "method"))); - site.setCount(Integer.parseInt(search(atts, "count"))); + site.setCount(Integer.parseInt(search(atts, "count", "0"))); String receiver = atts.getValue("receiver"); if (receiver != null) { site.setReceiver(type(receiver)); @@ -406,6 +414,7 @@ } else if (qname.equals("parse_done")) { CallSite call = scopes.pop(); call.setEndNodes(Integer.parseInt(search(atts, "nodes", "1"))); + call.setEndLiveNodes(Integer.parseInt(search(atts, "live", "1"))); call.setTimeStamp(Double.parseDouble(search(atts, "stamp"))); scopes.push(call); }
--- a/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/Phase.java Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/Phase.java Fri Jan 11 10:38:38 2013 -0800 @@ -30,10 +30,13 @@ private final int startNodes; private int endNodes; + private final int startLiveNodes; + private int endLiveNodes; - Phase(String n, double s, int nodes) { + Phase(String n, double s, int nodes, int live) { super(s, n); startNodes = nodes; + startLiveNodes = live; } int getNodes() { @@ -55,6 +58,22 @@ public int getEndNodes() { return endNodes; } + /* Number of live nodes added by the phase */ + int getLiveNodes() { + return getEndLiveNodes() - getStartLiveNodes(); + } + + void setEndLiveNodes(int n) { + endLiveNodes = n; + } + + public int getStartLiveNodes() { + return startLiveNodes; + } + + public int getEndLiveNodes() { + return endLiveNodes; + } @Override public void print(PrintStream stream) {
--- a/src/share/vm/c1/c1_Compilation.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/c1/c1_Compilation.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -129,7 +129,15 @@ CHECK_BAILOUT(); // setup ir + CompileLog* log = this->log(); + if (log != NULL) { + log->begin_head("parse method='%d' ", + log->identify(_method)); + log->stamp(); + log->end_head(); + } _hir = new IR(this, method(), osr_bci()); + if (log) log->done("parse"); if (!_hir->is_valid()) { bailout("invalid parsing"); return;
--- a/src/share/vm/c1/c1_GraphBuilder.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/c1/c1_GraphBuilder.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -1836,7 +1836,7 @@ // check if we could do inlining if (!PatchALot && Inline && klass->is_loaded() && (klass->is_initialized() || klass->is_interface() && target->holder()->is_initialized()) - && target->will_link(klass, callee_holder, code)) { + && target->is_loaded()) { // callee is known => check if we have static binding assert(target->is_loaded(), "callee must be known"); if (code == Bytecodes::_invokestatic ||
--- a/src/share/vm/ci/bcEscapeAnalyzer.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/ci/bcEscapeAnalyzer.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -282,7 +282,7 @@ ciMethod* inline_target = NULL; if (target->is_loaded() && klass->is_loaded() && (klass->is_initialized() || klass->is_interface() && target->holder()->is_initialized()) - && target->will_link(klass, callee_holder, code)) { + && target->is_loaded()) { if (code == Bytecodes::_invokestatic || code == Bytecodes::_invokespecial || code == Bytecodes::_invokevirtual && target->is_final_method()) {
--- a/src/share/vm/ci/ciField.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/ci/ciField.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -366,10 +366,12 @@ // ------------------------------------------------------------------ // ciField::print void ciField::print() { - tty->print("<ciField "); + tty->print("<ciField name="); _holder->print_name(); tty->print("."); _name->print_symbol(); + tty->print(" signature="); + _signature->print_symbol(); tty->print(" offset=%d type=", _offset); if (_type != NULL) _type->print_name(); else tty->print("(reference)");
--- a/src/share/vm/ci/ciMethod.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/ci/ciMethod.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -735,6 +735,24 @@ } // ------------------------------------------------------------------ +// ciMethod::get_field_at_bci +ciField* ciMethod::get_field_at_bci(int bci, bool &will_link) { + ciBytecodeStream iter(this); + iter.reset_to_bci(bci); + iter.next(); + return iter.get_field(will_link); +} + +// ------------------------------------------------------------------ +// ciMethod::get_method_at_bci +ciMethod* ciMethod::get_method_at_bci(int bci, bool &will_link, ciSignature* *declared_signature) { + ciBytecodeStream iter(this); + iter.reset_to_bci(bci); + iter.next(); + return iter.get_method(will_link, declared_signature); +} + +// ------------------------------------------------------------------ // Adjust a CounterData count to be commensurate with // interpreter_invocation_count. If the MDO exists for // only 25% of the time the method exists, then the @@ -869,25 +887,6 @@ } // ------------------------------------------------------------------ -// ciMethod::will_link -// -// Will this method link in a specific calling context? -bool ciMethod::will_link(ciKlass* accessing_klass, - ciKlass* declared_method_holder, - Bytecodes::Code bc) { - if (!is_loaded()) { - // Method lookup failed. - return false; - } - - // The link checks have been front-loaded into the get_method - // call. This method (ciMethod::will_link()) will be removed - // in the future. - - return true; -} - -// ------------------------------------------------------------------ // ciMethod::should_exclude // // Should this method be excluded from compilation?
--- a/src/share/vm/ci/ciMethod.hpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/ci/ciMethod.hpp Fri Jan 11 10:38:38 2013 -0800 @@ -224,6 +224,9 @@ ciCallProfile call_profile_at_bci(int bci); int interpreter_call_site_count(int bci); + ciField* get_field_at_bci( int bci, bool &will_link); + ciMethod* get_method_at_bci(int bci, bool &will_link, ciSignature* *declared_signature); + // Given a certain calling environment, find the monomorphic target // for the call. Return NULL if the call is not monomorphic in // its calling environment. @@ -239,9 +242,6 @@ int resolve_vtable_index(ciKlass* caller, ciKlass* receiver); // Compilation directives - bool will_link(ciKlass* accessing_klass, - ciKlass* declared_method_holder, - Bytecodes::Code bc); bool should_exclude(); bool should_inline(); bool should_not_inline();
--- a/src/share/vm/ci/ciSignature.hpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/ci/ciSignature.hpp Fri Jan 11 10:38:38 2013 -0800 @@ -57,12 +57,14 @@ ciSymbol* as_symbol() const { return _symbol; } ciKlass* accessing_klass() const { return _accessing_klass; } - ciType* return_type() const; - ciType* type_at(int index) const; + ciType* return_type() const; + ciType* type_at(int index) const; int size() const { return _size; } int count() const { return _count; } + int arg_size_for_bc(Bytecodes::Code bc) { return size() + (Bytecodes::has_receiver(bc) ? 1 : 0); } + bool equals(ciSignature* that); void print_signature();
--- a/src/share/vm/compiler/compilerOracle.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/compiler/compilerOracle.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -538,6 +538,7 @@ if (match != NULL) { if (!_quiet) { + ResourceMark rm; tty->print("CompilerOracle: %s ", command_names[command]); match->print(); }
--- a/src/share/vm/interpreter/bytecodes.hpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/interpreter/bytecodes.hpp Fri Jan 11 10:38:38 2013 -0800 @@ -423,7 +423,9 @@ static bool is_zero_const (Code code) { return (code == _aconst_null || code == _iconst_0 || code == _fconst_0 || code == _dconst_0); } static bool is_invoke (Code code) { return (_invokevirtual <= code && code <= _invokedynamic); } - + static bool has_receiver (Code code) { assert(is_invoke(code), ""); return code == _invokevirtual || + code == _invokespecial || + code == _invokeinterface; } static bool has_optional_appendix(Code code) { return code == _invokedynamic || code == _invokehandle; } static int compute_flags (const char* format, int more_flags = 0); // compute the flags
--- a/src/share/vm/interpreter/interpreterRuntime.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/interpreter/interpreterRuntime.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -427,7 +427,7 @@ // exception handler lookup KlassHandle h_klass(THREAD, h_exception->klass()); - handler_bci = h_method->fast_exception_handler_bci_for(h_klass, current_bci, THREAD); + handler_bci = methodOopDesc::fast_exception_handler_bci_for(h_method, h_klass, current_bci, THREAD); if (HAS_PENDING_EXCEPTION) { // We threw an exception while trying to find the exception handler. // Transfer the new exception to the exception handle which will
--- a/src/share/vm/oops/methodOop.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/oops/methodOop.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -109,16 +109,16 @@ return buf; } -int methodOopDesc::fast_exception_handler_bci_for(KlassHandle ex_klass, int throw_bci, TRAPS) { +int methodOopDesc::fast_exception_handler_bci_for(methodHandle mh, KlassHandle ex_klass, int throw_bci, TRAPS) { // exception table holds quadruple entries of the form (beg_bci, end_bci, handler_bci, klass_index) // access exception table - ExceptionTable table(this); + ExceptionTable table(mh()); int length = table.length(); // iterate through all entries sequentially - constantPoolHandle pool(THREAD, constants()); + constantPoolHandle pool(THREAD, mh->constants()); for (int i = 0; i < length; i ++) { //reacquire the table in case a GC happened - ExceptionTable table(this); + ExceptionTable table(mh()); int beg_bci = table.start_pc(i); int end_bci = table.end_pc(i); assert(beg_bci <= end_bci, "inconsistent exception table");
--- a/src/share/vm/oops/methodOop.hpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/oops/methodOop.hpp Fri Jan 11 10:38:38 2013 -0800 @@ -306,7 +306,7 @@ // exception handler which caused the exception to be thrown, which // is needed for proper retries. See, for example, // InterpreterRuntime::exception_handler_for_exception. - int fast_exception_handler_bci_for(KlassHandle ex_klass, int throw_bci, TRAPS); + static int fast_exception_handler_bci_for(methodHandle mh, KlassHandle ex_klass, int throw_bci, TRAPS); // method data access methodDataOop method_data() const {
--- a/src/share/vm/opto/addnode.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/addnode.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -189,6 +189,11 @@ set_req(1, addx); set_req(2, a22); progress = this; + PhaseIterGVN *igvn = phase->is_IterGVN(); + if (add2->outcnt() == 0 && igvn) { + // add disconnected. + igvn->_worklist.push(add2); + } } } @@ -624,6 +629,11 @@ if( t22->singleton() && (t22 != Type::TOP) ) { // Right input is an add of a constant? set_req(Address, phase->transform(new (phase->C) AddPNode(in(Base),in(Address),add->in(1)))); set_req(Offset, add->in(2)); + PhaseIterGVN *igvn = phase->is_IterGVN(); + if (add->outcnt() == 0 && igvn) { + // add disconnected. + igvn->_worklist.push((Node*)add); + } return this; // Made progress } }
--- a/src/share/vm/opto/block.hpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/block.hpp Fri Jan 11 10:38:38 2013 -0800 @@ -292,7 +292,7 @@ void needed_for_next_call(Node *this_call, VectorSet &next_call, Block_Array &bbs); bool schedule_local(PhaseCFG *cfg, Matcher &m, GrowableArray<int> &ready_cnt, VectorSet &next_call); // Cleanup if any code lands between a Call and his Catch - void call_catch_cleanup(Block_Array &bbs); + void call_catch_cleanup(Block_Array &bbs, Compile *C); // Detect implicit-null-check opportunities. Basically, find NULL checks // with suitable memory ops nearby. Use the memory op to do the NULL check. // I can generate a memory op if there is not one nearby.
--- a/src/share/vm/opto/bytecodeInfo.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/bytecodeInfo.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -45,7 +45,8 @@ _method(callee), _site_invoke_ratio(site_invoke_ratio), _max_inline_level(max_inline_level), - _count_inline_bcs(method()->code_size_for_inlining()) + _count_inline_bcs(method()->code_size_for_inlining()), + _subtrees(c->comp_arena(), 2, 0, NULL) { NOT_PRODUCT(_count_inlines = 0;) if (_caller_jvms != NULL) { @@ -208,16 +209,18 @@ if ( callee_method->dont_inline()) return "don't inline by annotation"; if ( callee_method->has_unloaded_classes_in_signature()) return "unloaded signature classes"; - if (callee_method->force_inline() || callee_method->should_inline()) { + if (callee_method->should_inline()) { // ignore heuristic controls on inlining return NULL; } // Now perform checks which are heuristic - if (callee_method->has_compiled_code() && - callee_method->instructions_size(CompLevel_full_optimization) > InlineSmallCode) { + if (!callee_method->force_inline()) { + if (callee_method->has_compiled_code() && + callee_method->instructions_size(CompLevel_full_optimization) > InlineSmallCode) { return "already compiled into a big method"; + } } // don't inline exception code unless the top method belongs to an @@ -270,12 +273,15 @@ //-----------------------------try_to_inline----------------------------------- // return NULL if ok, reason for not inlining otherwise // Relocated from "InliningClosure::try_to_inline" -const char* InlineTree::try_to_inline(ciMethod* callee_method, ciMethod* caller_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result) { - +const char* InlineTree::try_to_inline(ciMethod* callee_method, ciMethod* caller_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result, bool& should_delay) { // Old algorithm had funny accumulating BC-size counters if (UseOldInlining && ClipInlining && (int)count_inline_bcs() >= DesiredMethodLimit) { - return "size > DesiredMethodLimit"; + if (!callee_method->force_inline() || !IncrementalInline) { + return "size > DesiredMethodLimit"; + } else if (!C->inlining_incrementally()) { + should_delay = true; + } } const char *msg = NULL; @@ -296,8 +302,13 @@ if (callee_method->code_size() > MaxTrivialSize) { // don't inline into giant methods - if (C->unique() > (uint)NodeCountInliningCutoff) { - return "NodeCountInliningCutoff"; + if (C->over_inlining_cutoff()) { + if ((!callee_method->force_inline() && !caller_method->is_compiled_lambda_form()) + || !IncrementalInline) { + return "NodeCountInliningCutoff"; + } else { + should_delay = true; + } } if ((!UseInterpreter || CompileTheWorld) && @@ -316,7 +327,11 @@ return "not an accessor"; } if (inline_level() > _max_inline_level) { - return "inlining too deep"; + if (!callee_method->force_inline() || !IncrementalInline) { + return "inlining too deep"; + } else if (!C->inlining_incrementally()) { + should_delay = true; + } } // detect direct and indirect recursive inlining @@ -341,7 +356,11 @@ if (UseOldInlining && ClipInlining && (int)count_inline_bcs() + size >= DesiredMethodLimit) { - return "size > DesiredMethodLimit"; + if (!callee_method->force_inline() || !IncrementalInline) { + return "size > DesiredMethodLimit"; + } else if (!C->inlining_incrementally()) { + should_delay = true; + } } // ok, inline this method @@ -396,7 +415,7 @@ //------------------------------print_inlining--------------------------------- // Really, the failure_msg can be a success message also. void InlineTree::print_inlining(ciMethod* callee_method, int caller_bci, const char* failure_msg) const { - CompileTask::print_inlining(callee_method, inline_level(), caller_bci, failure_msg ? failure_msg : "inline"); + C->print_inlining(callee_method, inline_level(), caller_bci, failure_msg ? failure_msg : "inline"); if (callee_method == NULL) tty->print(" callee not monotonic or profiled"); if (Verbose && callee_method) { const InlineTree *top = this; @@ -406,8 +425,9 @@ } //------------------------------ok_to_inline----------------------------------- -WarmCallInfo* InlineTree::ok_to_inline(ciMethod* callee_method, JVMState* jvms, ciCallProfile& profile, WarmCallInfo* initial_wci) { +WarmCallInfo* InlineTree::ok_to_inline(ciMethod* callee_method, JVMState* jvms, ciCallProfile& profile, WarmCallInfo* initial_wci, bool& should_delay) { assert(callee_method != NULL, "caller checks for optimized virtual!"); + assert(!should_delay, "should be initialized to false"); #ifdef ASSERT // Make sure the incoming jvms has the same information content as me. // This means that we can eventually make this whole class AllStatic. @@ -437,7 +457,7 @@ // Check if inlining policy says no. WarmCallInfo wci = *(initial_wci); - failure_msg = try_to_inline(callee_method, caller_method, caller_bci, profile, &wci); + failure_msg = try_to_inline(callee_method, caller_method, caller_bci, profile, &wci, should_delay); if (failure_msg != NULL && C->log() != NULL) { C->log()->inline_fail(failure_msg); }
--- a/src/share/vm/opto/c2_globals.hpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/c2_globals.hpp Fri Jan 11 10:38:38 2013 -0800 @@ -115,6 +115,12 @@ notproduct(bool, VerifyOpto, false, \ "Apply more time consuming verification during compilation") \ \ + notproduct(bool, VerifyIdealNodeCount, false, \ + "Verify that tracked dead ideal node count is accurate") \ + \ + notproduct(bool, PrintIdealNodeCount, false, \ + "Print liveness counts of ideal nodes") \ + \ notproduct(bool, VerifyOptoOopOffsets, false, \ "Check types of base addresses in field references") \ \ @@ -600,6 +606,16 @@ \ develop(bool, VerifyAliases, false, \ "perform extra checks on the results of alias analysis") \ + \ + product(bool, IncrementalInline, true, \ + "do post parse inlining") \ + \ + develop(bool, AlwaysIncrementalInline, false, \ + "do all inlining incrementally") \ + \ + product(intx, LiveNodeCountInliningCutoff, 20000, \ + "max number of live nodes in a method") \ + C2_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG)
--- a/src/share/vm/opto/callGenerator.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/callGenerator.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -139,7 +139,7 @@ if (!is_static) { // Make an explicit receiver null_check as part of this call. // Since we share a map with the caller, his JVMS gets adjusted. - kit.null_check_receiver(method()); + kit.null_check_receiver_before_call(method()); if (kit.stopped()) { // And dump it back to the caller, decorated with any exceptions: return kit.transfer_exceptions_into_jvms(); @@ -207,7 +207,7 @@ >= (uint)ImplicitNullCheckThreshold))) { // Make an explicit receiver null_check as part of this call. // Since we share a map with the caller, his JVMS gets adjusted. - receiver = kit.null_check_receiver(method()); + receiver = kit.null_check_receiver_before_call(method()); if (kit.stopped()) { // And dump it back to the caller, decorated with any exceptions: return kit.transfer_exceptions_into_jvms(); @@ -262,8 +262,11 @@ // Allow inlining decisions to be delayed class LateInlineCallGenerator : public DirectCallGenerator { + protected: CallGenerator* _inline_cg; + virtual bool do_late_inline_check(JVMState* jvms) { return true; } + public: LateInlineCallGenerator(ciMethod* method, CallGenerator* inline_cg) : DirectCallGenerator(method, true), _inline_cg(inline_cg) {} @@ -274,9 +277,14 @@ virtual void do_late_inline(); virtual JVMState* generate(JVMState* jvms) { + Compile *C = Compile::current(); + C->print_inlining_skip(this); + // Record that this call site should be revisited once the main // parse is finished. - Compile::current()->add_late_inline(this); + if (!is_mh_late_inline()) { + C->add_late_inline(this); + } // Emit the CallStaticJava and request separate projections so // that the late inlining logic can distinguish between fall @@ -285,15 +293,33 @@ return DirectCallGenerator::generate(jvms); } + virtual void print_inlining_late(const char* msg) { + CallNode* call = call_node(); + Compile* C = Compile::current(); + C->print_inlining_insert(this); + C->print_inlining(method(), call->jvms()->depth()-1, call->jvms()->bci(), msg); + } + }; - void LateInlineCallGenerator::do_late_inline() { // Can't inline it if (call_node() == NULL || call_node()->outcnt() == 0 || call_node()->in(0) == NULL || call_node()->in(0)->is_top()) return; + for (int i1 = 0; i1 < method()->arg_size(); i1++) { + if (call_node()->in(TypeFunc::Parms + i1)->is_top()) { + assert(Compile::current()->inlining_incrementally(), "shouldn't happen during parsing"); + return; + } + } + + if (call_node()->in(TypeFunc::Memory)->is_top()) { + assert(Compile::current()->inlining_incrementally(), "shouldn't happen during parsing"); + return; + } + CallStaticJavaNode* call = call_node(); // Make a clone of the JVMState that appropriate to use for driving a parse @@ -307,7 +333,9 @@ // Make sure the state is a MergeMem for parsing. if (!map->in(TypeFunc::Memory)->is_MergeMem()) { - map->set_req(TypeFunc::Memory, MergeMemNode::make(C, map->in(TypeFunc::Memory))); + Node* mem = MergeMemNode::make(C, map->in(TypeFunc::Memory)); + C->initial_gvn()->set_type_bottom(mem); + map->set_req(TypeFunc::Memory, mem); } // Make enough space for the expression stack and transfer the incoming arguments @@ -320,6 +348,13 @@ } } + if (!do_late_inline_check(jvms)) { + map->disconnect_inputs(NULL, C); + return; + } + + C->print_inlining_insert(this); + CompileLog* log = C->log(); if (log != NULL) { log->head("late_inline method='%d'", log->identify(method())); @@ -354,6 +389,10 @@ result = (result_size == 1) ? kit.pop() : kit.pop_pair(); } + C->set_has_loops(C->has_loops() || _inline_cg->method()->has_loops()); + C->env()->notice_inlined_method(_inline_cg->method()); + C->set_inlining_progress(true); + kit.replace_call(call, result); } @@ -362,6 +401,83 @@ return new LateInlineCallGenerator(method, inline_cg); } +class LateInlineMHCallGenerator : public LateInlineCallGenerator { + ciMethod* _caller; + int _attempt; + bool _input_not_const; + + virtual bool do_late_inline_check(JVMState* jvms); + virtual bool already_attempted() const { return _attempt > 0; } + + public: + LateInlineMHCallGenerator(ciMethod* caller, ciMethod* callee, bool input_not_const) : + LateInlineCallGenerator(callee, NULL), _caller(caller), _attempt(0), _input_not_const(input_not_const) {} + + virtual bool is_mh_late_inline() const { return true; } + + virtual JVMState* generate(JVMState* jvms) { + JVMState* new_jvms = LateInlineCallGenerator::generate(jvms); + if (_input_not_const) { + // inlining won't be possible so no need to enqueue right now. + call_node()->set_generator(this); + } else { + Compile::current()->add_late_inline(this); + } + return new_jvms; + } + + virtual void print_inlining_late(const char* msg) { + if (!_input_not_const) return; + LateInlineCallGenerator::print_inlining_late(msg); + } +}; + +bool LateInlineMHCallGenerator::do_late_inline_check(JVMState* jvms) { + + CallGenerator* cg = for_method_handle_inline(jvms, _caller, method(), _input_not_const); + + if (!_input_not_const) { + _attempt++; + } + + if (cg != NULL) { + assert(!cg->is_late_inline() && cg->is_inline(), "we're doing late inlining"); + _inline_cg = cg; + Compile::current()->dec_number_of_mh_late_inlines(); + return true; + } + + call_node()->set_generator(this); + return false; +} + +CallGenerator* CallGenerator::for_mh_late_inline(ciMethod* caller, ciMethod* callee, bool input_not_const) { + Compile::current()->inc_number_of_mh_late_inlines(); + CallGenerator* cg = new LateInlineMHCallGenerator(caller, callee, input_not_const); + return cg; +} + +class LateInlineStringCallGenerator : public LateInlineCallGenerator { + + public: + LateInlineStringCallGenerator(ciMethod* method, CallGenerator* inline_cg) : + LateInlineCallGenerator(method, inline_cg) {} + + virtual JVMState* generate(JVMState* jvms) { + Compile *C = Compile::current(); + C->print_inlining_skip(this); + + C->add_string_late_inline(this); + + JVMState* new_jvms = DirectCallGenerator::generate(jvms); + return new_jvms; + } +}; + +CallGenerator* CallGenerator::for_string_late_inline(ciMethod* method, CallGenerator* inline_cg) { + return new LateInlineStringCallGenerator(method, inline_cg); +} + //---------------------------WarmCallGenerator-------------------------------- // Internal class which handles initial deferral of inlining decisions. @@ -491,7 +607,7 @@ jvms->bci(), log->identify(_predicted_receiver)); } - receiver = kit.null_check_receiver(method()); + receiver = kit.null_check_receiver_before_call(method()); if (kit.stopped()) { return kit.transfer_exceptions_into_jvms(); } @@ -580,35 +696,52 @@ } -CallGenerator* CallGenerator::for_method_handle_call(JVMState* jvms, ciMethod* caller, ciMethod* callee) { +CallGenerator* CallGenerator::for_method_handle_call(JVMState* jvms, ciMethod* caller, ciMethod* callee, bool delayed_forbidden) { assert(callee->is_method_handle_intrinsic() || callee->is_compiled_lambda_form(), "for_method_handle_call mismatch"); - CallGenerator* cg = CallGenerator::for_method_handle_inline(jvms, caller, callee); - if (cg != NULL) - return cg; - return CallGenerator::for_direct_call(callee); + bool input_not_const; + CallGenerator* cg = CallGenerator::for_method_handle_inline(jvms, caller, callee, input_not_const); + Compile* C = Compile::current(); + if (cg != NULL) { + if (!delayed_forbidden && AlwaysIncrementalInline) { + return CallGenerator::for_late_inline(callee, cg); + } else { + return cg; + } + } + int bci = jvms->bci(); + ciCallProfile profile = caller->call_profile_at_bci(bci); + int call_site_count = caller->scale_count(profile.count()); + + if (IncrementalInline && call_site_count > 0 && + (input_not_const || !C->inlining_incrementally() || C->over_inlining_cutoff())) { + return CallGenerator::for_mh_late_inline(caller, callee, input_not_const); + } else { + return CallGenerator::for_direct_call(callee); + } } -CallGenerator* CallGenerator::for_method_handle_inline(JVMState* jvms, ciMethod* caller, ciMethod* callee) { +CallGenerator* CallGenerator::for_method_handle_inline(JVMState* jvms, ciMethod* caller, ciMethod* callee, bool& input_not_const) { GraphKit kit(jvms); PhaseGVN& gvn = kit.gvn(); Compile* C = kit.C; vmIntrinsics::ID iid = callee->intrinsic_id(); + input_not_const = true; switch (iid) { case vmIntrinsics::_invokeBasic: { - // get MethodHandle receiver + // Get MethodHandle receiver: Node* receiver = kit.argument(0); if (receiver->Opcode() == Op_ConP) { + input_not_const = false; const TypeOopPtr* oop_ptr = receiver->bottom_type()->is_oopptr(); ciMethod* target = oop_ptr->const_oop()->as_method_handle()->get_vmtarget(); guarantee(!target->is_method_handle_intrinsic(), "should not happen"); // XXX remove const int vtable_index = methodOopDesc::invalid_vtable_index; - CallGenerator* cg = C->call_generator(target, vtable_index, false, jvms, true, PROB_ALWAYS); + CallGenerator* cg = C->call_generator(target, vtable_index, false, jvms, true, PROB_ALWAYS, true, true); + assert (!cg->is_late_inline() || cg->is_mh_late_inline(), "no late inline here"); if (cg != NULL && cg->is_inline()) return cg; - } else { - if (PrintInlining) CompileTask::print_inlining(callee, jvms->depth() - 1, jvms->bci(), "receiver not constant"); } } break; @@ -618,9 +751,10 @@ case vmIntrinsics::_linkToSpecial: case vmIntrinsics::_linkToInterface: { - // pop MemberName argument + // Get MemberName argument: Node* member_name = kit.argument(callee->arg_size() - 1); if (member_name->Opcode() == Op_ConP) { + input_not_const = false; const TypeOopPtr* oop_ptr = member_name->bottom_type()->is_oopptr(); ciMethod* target = oop_ptr->const_oop()->as_member_name()->get_vmtarget(); @@ -655,7 +789,8 @@ } const int vtable_index = methodOopDesc::invalid_vtable_index; const bool call_is_virtual = target->is_abstract(); // FIXME workaround - CallGenerator* cg = C->call_generator(target, vtable_index, call_is_virtual, jvms, true, PROB_ALWAYS); + CallGenerator* cg = C->call_generator(target, vtable_index, call_is_virtual, jvms, true, PROB_ALWAYS, true, true); + assert (!cg->is_late_inline() || cg->is_mh_late_inline(), "no late inline here"); if (cg != NULL && cg->is_inline()) return cg; }
--- a/src/share/vm/opto/callGenerator.hpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/callGenerator.hpp Fri Jan 11 10:38:38 2013 -0800 @@ -68,6 +68,12 @@ // is_late_inline: supports conversion of call into an inline virtual bool is_late_inline() const { return false; } + // same but for method handle calls + virtual bool is_mh_late_inline() const { return false; } + + // for method handle calls: have we tried inlinining the call already? + virtual bool already_attempted() const { ShouldNotReachHere(); return false; } + // Replace the call with an inline version of the code virtual void do_late_inline() { ShouldNotReachHere(); } @@ -112,11 +118,13 @@ static CallGenerator* for_virtual_call(ciMethod* m, int vtable_index); // virtual, interface static CallGenerator* for_dynamic_call(ciMethod* m); // invokedynamic - static CallGenerator* for_method_handle_call( JVMState* jvms, ciMethod* caller, ciMethod* callee); - static CallGenerator* for_method_handle_inline(JVMState* jvms, ciMethod* caller, ciMethod* callee); + static CallGenerator* for_method_handle_call( JVMState* jvms, ciMethod* caller, ciMethod* callee, bool delayed_forbidden); + static CallGenerator* for_method_handle_inline(JVMState* jvms, ciMethod* caller, ciMethod* callee, bool& input_not_const); // How to generate a replace a direct call with an inline version static CallGenerator* for_late_inline(ciMethod* m, CallGenerator* inline_cg); + static CallGenerator* for_mh_late_inline(ciMethod* caller, ciMethod* callee, bool input_not_const); + static CallGenerator* for_string_late_inline(ciMethod* m, CallGenerator* inline_cg); // How to make a call but defer the decision whether to inline or not. static CallGenerator* for_warm_call(WarmCallInfo* ci, @@ -147,9 +155,11 @@ CallGenerator* cg); virtual Node* generate_predicate(JVMState* jvms) { return NULL; }; - static void print_inlining(ciMethod* callee, int inline_level, int bci, const char* msg) { + virtual void print_inlining_late(const char* msg) { ShouldNotReachHere(); } + + static void print_inlining(Compile* C, ciMethod* callee, int inline_level, int bci, const char* msg) { if (PrintInlining) - CompileTask::print_inlining(callee, inline_level, bci, msg); + C->print_inlining(callee, inline_level, bci, msg); } };
--- a/src/share/vm/opto/callnode.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/callnode.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -25,6 +25,7 @@ #include "precompiled.hpp" #include "ci/bcEscapeAnalyzer.hpp" #include "compiler/oopMap.hpp" +#include "opto/callGenerator.hpp" #include "opto/callnode.hpp" #include "opto/escape.hpp" #include "opto/locknode.hpp" @@ -746,7 +747,7 @@ projs->fallthrough_ioproj = pn; for (DUIterator j = pn->outs(); pn->has_out(j); j++) { Node* e = pn->out(j); - if (e->Opcode() == Op_CreateEx && e->in(0)->is_CatchProj()) { + if (e->Opcode() == Op_CreateEx && e->in(0)->is_CatchProj() && e->outcnt() > 0) { assert(projs->exobj == NULL, "only one"); projs->exobj = e; } @@ -770,16 +771,38 @@ // and the exception object may not exist if an exception handler // swallows the exception but all the other must exist and be found. assert(projs->fallthrough_proj != NULL, "must be found"); - assert(projs->fallthrough_catchproj != NULL, "must be found"); - assert(projs->fallthrough_memproj != NULL, "must be found"); - assert(projs->fallthrough_ioproj != NULL, "must be found"); - assert(projs->catchall_catchproj != NULL, "must be found"); + assert(Compile::current()->inlining_incrementally() || projs->fallthrough_catchproj != NULL, "must be found"); + assert(Compile::current()->inlining_incrementally() || projs->fallthrough_memproj != NULL, "must be found"); + assert(Compile::current()->inlining_incrementally() || projs->fallthrough_ioproj != NULL, "must be found"); + assert(Compile::current()->inlining_incrementally() || projs->catchall_catchproj != NULL, "must be found"); if (separate_io_proj) { - assert(projs->catchall_memproj != NULL, "must be found"); - assert(projs->catchall_ioproj != NULL, "must be found"); + assert(Compile::current()->inlining_incrementally() || projs->catchall_memproj != NULL, "must be found"); + assert(Compile::current()->inlining_incrementally() || projs->catchall_ioproj != NULL, "must be found"); } } +Node *CallNode::Ideal(PhaseGVN *phase, bool can_reshape) { + CallGenerator* cg = generator(); + if (can_reshape && cg != NULL && cg->is_mh_late_inline() && !cg->already_attempted()) { + // Check whether this MH handle call becomes a candidate for inlining + ciMethod* callee = cg->method(); + vmIntrinsics::ID iid = callee->intrinsic_id(); + if (iid == vmIntrinsics::_invokeBasic) { + if (in(TypeFunc::Parms)->Opcode() == Op_ConP) { + phase->C->prepend_late_inline(cg); + set_generator(NULL); + } + } else { + assert(callee->has_member_arg(), "wrong type of call?"); + if (in(TypeFunc::Parms + callee->arg_size() - 1)->Opcode() == Op_ConP) { + phase->C->prepend_late_inline(cg); + set_generator(NULL); + } + } + } + return SafePointNode::Ideal(phase, can_reshape); +} + //============================================================================= uint CallJavaNode::size_of() const { return sizeof(*this); }
--- a/src/share/vm/opto/callnode.hpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/callnode.hpp Fri Jan 11 10:38:38 2013 -0800 @@ -344,17 +344,26 @@ OopMap *oop_map() const { return _oop_map; } void set_oop_map(OopMap *om) { _oop_map = om; } + private: + void verify_input(JVMState* jvms, uint idx) const { + assert(verify_jvms(jvms), "jvms must match"); + Node* n = in(idx); + assert((!n->bottom_type()->isa_long() && !n->bottom_type()->isa_double()) || + in(idx + 1)->is_top(), "2nd half of long/double"); + } + + public: // Functionality from old debug nodes which has changed Node *local(JVMState* jvms, uint idx) const { - assert(verify_jvms(jvms), "jvms must match"); + verify_input(jvms, jvms->locoff() + idx); return in(jvms->locoff() + idx); } Node *stack(JVMState* jvms, uint idx) const { - assert(verify_jvms(jvms), "jvms must match"); + verify_input(jvms, jvms->stkoff() + idx); return in(jvms->stkoff() + idx); } Node *argument(JVMState* jvms, uint idx) const { - assert(verify_jvms(jvms), "jvms must match"); + verify_input(jvms, jvms->argoff() + idx); return in(jvms->argoff() + idx); } Node *monitor_box(JVMState* jvms, uint idx) const { @@ -498,6 +507,7 @@ Node* exobj; }; +class CallGenerator; //------------------------------CallNode--------------------------------------- // Call nodes now subsume the function of debug nodes at callsites, so they @@ -508,26 +518,31 @@ const TypeFunc *_tf; // Function type address _entry_point; // Address of method being called float _cnt; // Estimate of number of times called + CallGenerator* _generator; // corresponding CallGenerator for some late inline calls CallNode(const TypeFunc* tf, address addr, const TypePtr* adr_type) : SafePointNode(tf->domain()->cnt(), NULL, adr_type), _tf(tf), _entry_point(addr), - _cnt(COUNT_UNKNOWN) + _cnt(COUNT_UNKNOWN), + _generator(NULL) { init_class_id(Class_Call); } - const TypeFunc* tf() const { return _tf; } - const address entry_point() const { return _entry_point; } - const float cnt() const { return _cnt; } + const TypeFunc* tf() const { return _tf; } + const address entry_point() const { return _entry_point; } + const float cnt() const { return _cnt; } + CallGenerator* generator() const { return _generator; } - void set_tf(const TypeFunc* tf) { _tf = tf; } - void set_entry_point(address p) { _entry_point = p; } - void set_cnt(float c) { _cnt = c; } + void set_tf(const TypeFunc* tf) { _tf = tf; } + void set_entry_point(address p) { _entry_point = p; } + void set_cnt(float c) { _cnt = c; } + void set_generator(CallGenerator* cg) { _generator = cg; } virtual const Type *bottom_type() const; virtual const Type *Value( PhaseTransform *phase ) const; + virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); virtual Node *Identity( PhaseTransform *phase ) { return this; } virtual uint cmp( const Node &n ) const; virtual uint size_of() const = 0;
--- a/src/share/vm/opto/cfgnode.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/cfgnode.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -363,6 +363,49 @@ return true; // The Region node is unreachable - it is dead. } +bool RegionNode::try_clean_mem_phi(PhaseGVN *phase) { + // Incremental inlining + PhaseStringOpts sometimes produce: + // + // cmpP with 1 top input + // | + // If + // / \ + // IfFalse IfTrue /- Some Node + // \ / / / + // Region / /-MergeMem + // \---Phi + // + // + // It's expected by PhaseStringOpts that the Region goes away and is + // replaced by If's control input but because there's still a Phi, + // the Region stays in the graph. The top input from the cmpP is + // propagated forward and a subgraph that is useful goes away. The + // code below replaces the Phi with the MergeMem so that the Region + // is simplified. + + PhiNode* phi = has_unique_phi(); + if (phi && phi->type() == Type::MEMORY && req() == 3 && phi->is_diamond_phi(true)) { + MergeMemNode* m = NULL; + assert(phi->req() == 3, "same as region"); + for (uint i = 1; i < 3; ++i) { + Node *mem = phi->in(i); + if (mem && mem->is_MergeMem() && in(i)->outcnt() == 1) { + // Nothing is control-dependent on path #i except the region itself. + m = mem->as_MergeMem(); + uint j = 3 - i; + Node* other = phi->in(j); + if (other && other == m->base_memory()) { + // m is a successor memory to other, and is not pinned inside the diamond, so push it out. + // This will allow the diamond to collapse completely. + phase->is_IterGVN()->replace_node(phi, m); + return true; + } + } + } + } + return false; +} + //------------------------------Ideal------------------------------------------ // Return a node which is more "ideal" than the current node. Must preserve // the CFG, but we can still strip out dead paths. @@ -375,6 +418,10 @@ bool has_phis = false; if (can_reshape) { // Need DU info to check for Phi users has_phis = (has_phi() != NULL); // Cache result + if (has_phis && try_clean_mem_phi(phase)) { + has_phis = false; + } + if (!has_phis) { // No Phi users? Nothing merging? for (uint i = 1; i < req()-1; i++) { Node *if1 = in(i); @@ -1005,7 +1052,9 @@ //------------------------------is_diamond_phi--------------------------------- // Does this Phi represent a simple well-shaped diamond merge? Return the // index of the true path or 0 otherwise. -int PhiNode::is_diamond_phi() const { +// If check_control_only is true, do not inspect the If node at the +// top, and return -1 (not an edge number) on success. +int PhiNode::is_diamond_phi(bool check_control_only) const { // Check for a 2-path merge Node *region = in(0); if( !region ) return 0; @@ -1018,6 +1067,7 @@ Node *iff = ifp1->in(0); if( !iff || !iff->is_If() ) return 0; if( iff != ifp2->in(0) ) return 0; + if (check_control_only) return -1; // Check for a proper bool/cmp const Node *b = iff->in(1); if( !b->is_Bool() ) return 0; @@ -1566,6 +1616,10 @@ Node* n = in(j); // Get the input if (rc == NULL || phase->type(rc) == Type::TOP) { if (n != top) { // Not already top? + PhaseIterGVN *igvn = phase->is_IterGVN(); + if (can_reshape && igvn != NULL) { + igvn->_worklist.push(r); + } set_req(j, top); // Nuke it down progress = this; // Record progress }
--- a/src/share/vm/opto/cfgnode.hpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/cfgnode.hpp Fri Jan 11 10:38:38 2013 -0800 @@ -95,6 +95,7 @@ virtual Node *Identity( PhaseTransform *phase ); virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); virtual const RegMask &out_RegMask() const; + bool try_clean_mem_phi(PhaseGVN *phase); }; //------------------------------JProjNode-------------------------------------- @@ -181,7 +182,7 @@ LoopSafety simple_data_loop_check(Node *in) const; // Is it unsafe data loop? It becomes a dead loop if this phi node removed. bool is_unsafe_data_reference(Node *in) const; - int is_diamond_phi() const; + int is_diamond_phi(bool check_control_only = false) const; virtual int Opcode() const; virtual bool pinned() const { return in(0) != 0; } virtual const TypePtr *adr_type() const { verify_adr_type(true); return _adr_type; }
--- a/src/share/vm/opto/chaitin.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/chaitin.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -1495,7 +1495,7 @@ cisc->ins_req(1,src); // Requires a memory edge } b->_nodes.map(j,cisc); // Insert into basic block - n->subsume_by(cisc); // Correct graph + n->subsume_by(cisc, C); // Correct graph // ++_used_cisc_instructions; #ifndef PRODUCT
--- a/src/share/vm/opto/compile.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/compile.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -134,7 +134,7 @@ void Compile::register_intrinsic(CallGenerator* cg) { if (_intrinsics == NULL) { - _intrinsics = new GrowableArray<CallGenerator*>(60); + _intrinsics = new (comp_arena())GrowableArray<CallGenerator*>(comp_arena(), 60, 0, NULL); } // This code is stolen from ciObjectFactory::insert. // Really, GrowableArray should have methods for @@ -316,7 +316,12 @@ } - +static inline bool not_a_node(const Node* n) { + if (n == NULL) return true; + if (((intptr_t)n & 1) != 0) return true; // uninitialized, etc. + if (*(address*)n == badAddress) return true; // kill by Node::destruct + return false; +} // Identify all nodes that are reachable from below, useful. // Use breadth-first pass that records state in a Unique_Node_List, @@ -337,12 +342,42 @@ uint max = n->len(); for( uint i = 0; i < max; ++i ) { Node *m = n->in(i); - if( m == NULL ) continue; + if (not_a_node(m)) continue; useful.push(m); } } } +// Update dead_node_list with any missing dead nodes using useful +// list. Consider all non-useful nodes to be useless i.e., dead nodes. +void Compile::update_dead_node_list(Unique_Node_List &useful) { + uint max_idx = unique(); + VectorSet& useful_node_set = useful.member_set(); + + for (uint node_idx = 0; node_idx < max_idx; node_idx++) { + // If node with index node_idx is not in useful set, + // mark it as dead in dead node list. + if (! useful_node_set.test(node_idx) ) { + record_dead_node(node_idx); + } + } +} + +void Compile::remove_useless_late_inlines(GrowableArray<CallGenerator*>* inlines, Unique_Node_List &useful) { + int shift = 0; + for (int i = 0; i < inlines->length(); i++) { + CallGenerator* cg = inlines->at(i); + CallNode* call = cg->call_node(); + if (shift > 0) { + inlines->at_put(i-shift, cg); + } + if (!useful.member(call)) { + shift++; + } + } + inlines->trunc_to(inlines->length()-shift); +} + // Disconnect all useless nodes by disconnecting those at the boundary. void Compile::remove_useless_nodes(Unique_Node_List &useful) { uint next = 0; @@ -372,6 +407,9 @@ remove_macro_node(n); } } + // clean up the late inline lists + remove_useless_late_inlines(&_string_late_inlines, useful); + remove_useless_late_inlines(&_late_inlines, useful); debug_only(verify_graph_edges(true/*check for no_dead_code*/);) } @@ -582,11 +620,21 @@ _inner_loops(0), _scratch_const_size(-1), _in_scratch_emit_size(false), + _dead_node_list(comp_arena()), + _dead_node_count(0), #ifndef PRODUCT _trace_opto_output(TraceOptoOutput || method()->has_option("TraceOptoOutput")), _printer(IdealGraphPrinter::printer()), #endif - _congraph(NULL) { + _congraph(NULL), + _late_inlines(comp_arena(), 2, 0, NULL), + _string_late_inlines(comp_arena(), 2, 0, NULL), + _late_inlines_pos(0), + _number_of_mh_late_inlines(0), + _inlining_progress(false), + _inlining_incrementally(false), + _print_inlining_list(NULL), + _print_inlining(0) { C = this; CompileWrapper cw(this); @@ -642,6 +690,9 @@ PhaseGVN gvn(node_arena(), estimated_size); set_initial_gvn(&gvn); + if (PrintInlining) { + _print_inlining_list = new (comp_arena())GrowableArray<PrintInliningBuffer>(comp_arena(), 1, 1, PrintInliningBuffer()); + } { // Scope for timing the parser TracePhase t3("parse", &_t_parser, true); @@ -708,28 +759,13 @@ rethrow_exceptions(kit.transfer_exceptions_into_jvms()); } - if (!failing() && has_stringbuilder()) { - { - // remove useless nodes to make the usage analysis simpler - ResourceMark rm; - PhaseRemoveUseless pru(initial_gvn(), &for_igvn); - } - - { - ResourceMark rm; - print_method("Before StringOpts", 3); - PhaseStringOpts pso(initial_gvn(), &for_igvn); - print_method("After StringOpts", 3); - } - - // now inline anything that we skipped the first time around - while (_late_inlines.length() > 0) { - CallGenerator* cg = _late_inlines.pop(); - cg->do_late_inline(); - if (failing()) return; - } + assert(IncrementalInline || (_late_inlines.length() == 0 && !has_mh_late_inlines()), "incremental inlining is off"); + + if (_late_inlines.length() == 0 && !has_mh_late_inlines() && !failing() && has_stringbuilder()) { + inline_string_calls(true); } - assert(_late_inlines.length() == 0, "should have been processed"); + + if (failing()) return; print_method("Before RemoveUseless", 3); @@ -873,7 +909,14 @@ _trace_opto_output(TraceOptoOutput), _printer(NULL), #endif - _congraph(NULL) { + _dead_node_list(comp_arena()), + _dead_node_count(0), + _congraph(NULL), + _number_of_mh_late_inlines(0), + _inlining_progress(false), + _inlining_incrementally(false), + _print_inlining_list(NULL), + _print_inlining(0) { C = this; #ifndef PRODUCT @@ -1080,6 +1123,72 @@ assert(_top == NULL || top()->is_top(), ""); } +#ifdef ASSERT +uint Compile::count_live_nodes_by_graph_walk() { + Unique_Node_List useful(comp_arena()); + // Get useful node list by walking the graph. + identify_useful_nodes(useful); + return useful.size(); +} + +void Compile::print_missing_nodes() { + + // Return if CompileLog is NULL and PrintIdealNodeCount is false. + if ((_log == NULL) && (! PrintIdealNodeCount)) { + return; + } + + // This is an expensive function. It is executed only when the user + // specifies VerifyIdealNodeCount option or otherwise knows the + // additional work that needs to be done to identify reachable nodes + // by walking the flow graph and find the missing ones using + // _dead_node_list. + + Unique_Node_List useful(comp_arena()); + // Get useful node list by walking the graph. + identify_useful_nodes(useful); + + uint l_nodes = C->live_nodes(); + uint l_nodes_by_walk = useful.size(); + + if (l_nodes != l_nodes_by_walk) { + if (_log != NULL) { + _log->begin_head("mismatched_nodes count='%d'", abs((int) (l_nodes - l_nodes_by_walk))); + _log->stamp(); + _log->end_head(); + } + VectorSet& useful_member_set = useful.member_set(); + int last_idx = l_nodes_by_walk; + for (int i = 0; i < last_idx; i++) { + if (useful_member_set.test(i)) { + if (_dead_node_list.test(i)) { + if (_log != NULL) { + _log->elem("mismatched_node_info node_idx='%d' type='both live and dead'", i); + } + if (PrintIdealNodeCount) { + // Print the log message to tty + tty->print_cr("mismatched_node idx='%d' both live and dead'", i); + useful.at(i)->dump(); + } + } + } + else if (! _dead_node_list.test(i)) { + if (_log != NULL) { + _log->elem("mismatched_node_info node_idx='%d' type='neither live nor dead'", i); + } + if (PrintIdealNodeCount) { + // Print the log message to tty + tty->print_cr("mismatched_node idx='%d' type='neither live nor dead'", i); + } + } + } + if (_log != NULL) { + _log->tail("mismatched_nodes"); + } + } +} +#endif + #ifndef PRODUCT void Compile::verify_top(Node* tn) const { if (tn != NULL) { @@ -1671,6 +1780,124 @@ assert(predicate_count()==0, "should be clean!"); } +// StringOpts and late inlining of string methods +void Compile::inline_string_calls(bool parse_time) { + { + // remove useless nodes to make the usage analysis simpler + ResourceMark rm; + PhaseRemoveUseless pru(initial_gvn(), for_igvn()); + } + + { + ResourceMark rm; + print_method("Before StringOpts", 3); + PhaseStringOpts pso(initial_gvn(), for_igvn()); + print_method("After StringOpts", 3); + } + + // now inline anything that we skipped the first time around + if (!parse_time) { + _late_inlines_pos = _late_inlines.length(); + } + + while (_string_late_inlines.length() > 0) { + CallGenerator* cg = _string_late_inlines.pop(); + cg->do_late_inline(); + if (failing()) return; + } + _string_late_inlines.trunc_to(0); +} + +void Compile::inline_incrementally_one(PhaseIterGVN& igvn) { + assert(IncrementalInline, "incremental inlining should be on"); + PhaseGVN* gvn = initial_gvn(); + + set_inlining_progress(false); + for_igvn()->clear(); + gvn->replace_with(&igvn); + + int i = 0; + + for (; i <_late_inlines.length() && !inlining_progress(); i++) { + CallGenerator* cg = _late_inlines.at(i); + _late_inlines_pos = i+1; + cg->do_late_inline(); + if (failing()) return; + } + int j = 0; + for (; i < _late_inlines.length(); i++, j++) { + _late_inlines.at_put(j, _late_inlines.at(i)); + } + _late_inlines.trunc_to(j); + + { + ResourceMark rm; + PhaseRemoveUseless pru(C->initial_gvn(), C->for_igvn()); + } + + igvn = PhaseIterGVN(gvn); +} + +// Perform incremental inlining until bound on number of live nodes is reached +void Compile::inline_incrementally(PhaseIterGVN& igvn) { + PhaseGVN* gvn = initial_gvn(); + + set_inlining_incrementally(true); + set_inlining_progress(true); + uint low_live_nodes = 0; + + while(inlining_progress() && _late_inlines.length() > 0) { + + if (live_nodes() > (uint)LiveNodeCountInliningCutoff) { + if (low_live_nodes < (uint)LiveNodeCountInliningCutoff * 8 / 10) { + // PhaseIdealLoop is expensive so we only try it once we are + // out of loop and we only try it again if the previous helped + // got the number of nodes down significantly + PhaseIdealLoop ideal_loop( igvn, false, true ); + if (failing()) return; + low_live_nodes = live_nodes(); + _major_progress = true; + } + + if (live_nodes() > (uint)LiveNodeCountInliningCutoff) { + break; + } + } + + inline_incrementally_one(igvn); + + if (failing()) return; + + igvn.optimize(); + + if (failing()) return; + } + + assert( igvn._worklist.size() == 0, "should be done with igvn" ); + + if (_string_late_inlines.length() > 0) { + assert(has_stringbuilder(), "inconsistent"); + for_igvn()->clear(); + initial_gvn()->replace_with(&igvn); + + inline_string_calls(false); + + if (failing()) return; + + { + ResourceMark rm; + PhaseRemoveUseless pru(initial_gvn(), for_igvn()); + } + + igvn = PhaseIterGVN(gvn); + + igvn.optimize(); + } + + set_inlining_incrementally(false); +} + + //------------------------------Optimize--------------------------------------- // Given a graph, optimize it. void Compile::Optimize() { @@ -1703,6 +1930,12 @@ if (failing()) return; + inline_incrementally(igvn); + + print_method("Incremental Inline", 2); + + if (failing()) return; + // Perform escape analysis if (_do_escape_analysis && ConnectionGraph::has_candidates(this)) { if (has_loops()) { @@ -1825,6 +2058,7 @@ } // (End scope of igvn; run destructor if necessary for asserts.) + dump_inlining(); // A method with only infinite loops has no edges entering loops from root { NOT_PRODUCT( TracePhase t2("graphReshape", &_t_graphReshaping, TimeCompiler); ) @@ -2098,7 +2332,7 @@ // Eliminate trivially redundant StoreCMs and accumulate their // precedence edges. -static void eliminate_redundant_card_marks(Node* n) { +void Compile::eliminate_redundant_card_marks(Node* n) { assert(n->Opcode() == Op_StoreCM, "expected StoreCM"); if (n->in(MemNode::Address)->outcnt() > 1) { // There are multiple users of the same address so it might be @@ -2133,7 +2367,7 @@ // Eliminate the previous StoreCM prev->set_req(MemNode::Memory, mem->in(MemNode::Memory)); assert(mem->outcnt() == 0, "should be dead"); - mem->disconnect_inputs(NULL); + mem->disconnect_inputs(NULL, this); } else { prev = mem; } @@ -2144,7 +2378,7 @@ //------------------------------final_graph_reshaping_impl---------------------- // Implement items 1-5 from final_graph_reshaping below. -static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) { +void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) { if ( n->outcnt() == 0 ) return; // dead node uint nop = n->Opcode(); @@ -2174,8 +2408,7 @@ #ifdef ASSERT if( n->is_Mem() ) { - Compile* C = Compile::current(); - int alias_idx = C->get_alias_index(n->as_Mem()->adr_type()); + int alias_idx = get_alias_index(n->as_Mem()->adr_type()); assert( n->in(0) != NULL || alias_idx != Compile::AliasIdxRaw || // oop will be recorded in oop map if load crosses safepoint n->is_Load() && (n->as_Load()->bottom_type()->isa_oopptr() || @@ -2224,7 +2457,7 @@ break; case Op_Opaque1: // Remove Opaque Nodes before matching case Op_Opaque2: // Remove Opaque Nodes before matching - n->subsume_by(n->in(1)); + n->subsume_by(n->in(1), this); break; case Op_CallStaticJava: case Op_CallJava: @@ -2344,8 +2577,7 @@ Node* nn = NULL; // Look for existing ConN node of the same exact type. - Compile* C = Compile::current(); - Node* r = C->root(); + Node* r = root(); uint cnt = r->outcnt(); for (uint i = 0; i < cnt; i++) { Node* m = r->raw_out(i); @@ -2358,11 +2590,11 @@ if (nn != NULL) { // Decode a narrow oop to match address // [R12 + narrow_oop_reg<<3 + offset] - nn = new (C) DecodeNNode(nn, t); + nn = new (this) DecodeNNode(nn, t); n->set_req(AddPNode::Base, nn); n->set_req(AddPNode::Address, nn); if (addp->outcnt() == 0) { - addp->disconnect_inputs(NULL); + addp->disconnect_inputs(NULL, this); } } } @@ -2374,7 +2606,6 @@ #ifdef _LP64 case Op_CastPP: if (n->in(1)->is_DecodeN() && Matcher::gen_narrow_oop_implicit_null_checks()) { - Compile* C = Compile::current(); Node* in1 = n->in(1); const Type* t = n->bottom_type(); Node* new_in1 = in1->clone(); @@ -2403,9 +2634,9 @@ new_in1->set_req(0, n->in(0)); } - n->subsume_by(new_in1); + n->subsume_by(new_in1, this); if (in1->outcnt() == 0) { - in1->disconnect_inputs(NULL); + in1->disconnect_inputs(NULL, this); } } break; @@ -2422,7 +2653,6 @@ } assert(in1->is_DecodeN(), "sanity"); - Compile* C = Compile::current(); Node* new_in2 = NULL; if (in2->is_DecodeN()) { new_in2 = in2->in(1); @@ -2433,7 +2663,7 @@ // oops implicit null check is not generated. // This will allow to generate normal oop implicit null check. if (Matcher::gen_narrow_oop_implicit_null_checks()) - new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR); + new_in2 = ConNode::make(this, TypeNarrowOop::NULL_PTR); // // This transformation together with CastPP transformation above // will generated code for implicit NULL checks for compressed oops. @@ -2472,17 +2702,17 @@ // NullCheck base_reg // } else if (t->isa_oopptr()) { - new_in2 = ConNode::make(C, t->make_narrowoop()); + new_in2 = ConNode::make(this, t->make_narrowoop()); } } if (new_in2 != NULL) { - Node* cmpN = new (C) CmpNNode(in1->in(1), new_in2); - n->subsume_by( cmpN ); + Node* cmpN = new (this) CmpNNode(in1->in(1), new_in2); + n->subsume_by(cmpN, this); if (in1->outcnt() == 0) { - in1->disconnect_inputs(NULL); + in1->disconnect_inputs(NULL, this); } if (in2->outcnt() == 0) { - in2->disconnect_inputs(NULL); + in2->disconnect_inputs(NULL, this); } } } @@ -2498,18 +2728,17 @@ case Op_EncodeP: { Node* in1 = n->in(1); if (in1->is_DecodeN()) { - n->subsume_by(in1->in(1)); + n->subsume_by(in1->in(1), this); } else if (in1->Opcode() == Op_ConP) { - Compile* C = Compile::current(); const Type* t = in1->bottom_type(); if (t == TypePtr::NULL_PTR) { - n->subsume_by(ConNode::make(C, TypeNarrowOop::NULL_PTR)); + n->subsume_by(ConNode::make(this, TypeNarrowOop::NULL_PTR), this); } else if (t->isa_oopptr()) { - n->subsume_by(ConNode::make(C, t->make_narrowoop())); + n->subsume_by(ConNode::make(this, t->make_narrowoop()), this); } } if (in1->outcnt() == 0) { - in1->disconnect_inputs(NULL); + in1->disconnect_inputs(NULL, this); } break; } @@ -2532,7 +2761,7 @@ } } assert(proj != NULL, "must be found"); - p->subsume_by(proj); + p->subsume_by(proj, this); } } break; @@ -2552,7 +2781,7 @@ unique_in = NULL; } if (unique_in != NULL) { - n->subsume_by(unique_in); + n->subsume_by(unique_in, this); } } break; @@ -2565,16 +2794,15 @@ Node* d = n->find_similar(Op_DivI); if (d) { // Replace them with a fused divmod if supported - Compile* C = Compile::current(); if (Matcher::has_match_rule(Op_DivModI)) { - DivModINode* divmod = DivModINode::make(C, n); - d->subsume_by(divmod->div_proj()); - n->subsume_by(divmod->mod_proj()); + DivModINode* divmod = DivModINode::make(this, n); + d->subsume_by(divmod->div_proj(), this); + n->subsume_by(divmod->mod_proj(), this); } else { // replace a%b with a-((a/b)*b) - Node* mult = new (C) MulINode(d, d->in(2)); - Node* sub = new (C) SubINode(d->in(1), mult); - n->subsume_by( sub ); + Node* mult = new (this) MulINode(d, d->in(2)); + Node* sub = new (this) SubINode(d->in(1), mult); + n->subsume_by(sub, this); } } } @@ -2586,16 +2814,15 @@ Node* d = n->find_similar(Op_DivL); if (d) { // Replace them with a fused divmod if supported - Compile* C = Compile::current(); if (Matcher::has_match_rule(Op_DivModL)) { - DivModLNode* divmod = DivModLNode::make(C, n); - d->subsume_by(divmod->div_proj()); - n->subsume_by(divmod->mod_proj()); + DivModLNode* divmod = DivModLNode::make(this, n); + d->subsume_by(divmod->div_proj(), this); + n->subsume_by(divmod->mod_proj(), this); } else { // replace a%b with a-((a/b)*b) - Node* mult = new (C) MulLNode(d, d->in(2)); - Node* sub = new (C) SubLNode(d->in(1), mult); - n->subsume_by( sub ); + Node* mult = new (this) MulLNode(d, d->in(2)); + Node* sub = new (this) SubLNode(d->in(1), mult); + n->subsume_by(sub, this); } } } @@ -2614,8 +2841,8 @@ if (n->req()-1 > 2) { // Replace many operand PackNodes with a binary tree for matching PackNode* p = (PackNode*) n; - Node* btp = p->binary_tree_pack(Compile::current(), 1, n->req()); - n->subsume_by(btp); + Node* btp = p->binary_tree_pack(this, 1, n->req()); + n->subsume_by(btp, this); } break; case Op_Loop: @@ -2639,18 +2866,16 @@ if (t != NULL && t->is_con()) { juint shift = t->get_con(); if (shift > mask) { // Unsigned cmp - Compile* C = Compile::current(); - n->set_req(2, ConNode::make(C, TypeInt::make(shift & mask))); + n->set_req(2, ConNode::make(this, TypeInt::make(shift & mask))); } } else { if (t == NULL || t->_lo < 0 || t->_hi > (int)mask) { - Compile* C = Compile::current(); - Node* shift = new (C) AndINode(in2, ConNode::make(C, TypeInt::make(mask))); + Node* shift = new (this) AndINode(in2, ConNode::make(this, TypeInt::make(mask))); n->set_req(2, shift); } } if (in2->outcnt() == 0) { // Remove dead node - in2->disconnect_inputs(NULL); + in2->disconnect_inputs(NULL, this); } } break; @@ -2668,7 +2893,7 @@ //------------------------------final_graph_reshaping_walk--------------------- // Replacing Opaque nodes with their input in final_graph_reshaping_impl(), // requires that the walk visits a node's inputs before visiting the node. -static void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &frc ) { +void Compile::final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &frc ) { ResourceArea *area = Thread::current()->resource_area(); Unique_Node_List sfpt(area); @@ -2734,7 +2959,7 @@ n->set_req(j, in->in(1)); } if (in->outcnt() == 0) { - in->disconnect_inputs(NULL); + in->disconnect_inputs(NULL, this); } } } @@ -3007,7 +3232,8 @@ } Compile::TracePhase::TracePhase(const char* name, elapsedTimer* accumulator, bool dolog) - : TraceTime(NULL, accumulator, false NOT_PRODUCT( || TimeCompiler ), false) + : TraceTime(NULL, accumulator, false NOT_PRODUCT( || TimeCompiler ), false), + _phase_name(name), _dolog(dolog) { if (dolog) { C = Compile::current(); @@ -3017,15 +3243,34 @@ _log = NULL; } if (_log != NULL) { - _log->begin_head("phase name='%s' nodes='%d'", name, C->unique()); + _log->begin_head("phase name='%s' nodes='%d' live='%d'", _phase_name, C->unique(), C->live_nodes()); _log->stamp(); _log->end_head(); } } Compile::TracePhase::~TracePhase() { + + C = Compile::current(); + if (_dolog) { + _log = C->log(); + } else { + _log = NULL; + } + +#ifdef ASSERT + if (PrintIdealNodeCount) { + tty->print_cr("phase name='%s' nodes='%d' live='%d' live_graph_walk='%d'", + _phase_name, C->unique(), C->live_nodes(), C->count_live_nodes_by_graph_walk()); + } + + if (VerifyIdealNodeCount) { + Compile::current()->print_missing_nodes(); + } +#endif + if (_log != NULL) { - _log->done("phase nodes='%d'", C->unique()); + _log->done("phase name='%s' nodes='%d' live='%d'", _phase_name, C->unique(), C->live_nodes()); } } @@ -3226,3 +3471,33 @@ cb.consts()->relocate((address) constant_addr, relocInfo::internal_word_type); } } + +void Compile::dump_inlining() { + if (PrintInlining) { + // Print inlining message for candidates that we couldn't inline + // for lack of space or non constant receiver + for (int i = 0; i < _late_inlines.length(); i++) { + CallGenerator* cg = _late_inlines.at(i); + cg->print_inlining_late("live nodes > LiveNodeCountInliningCutoff"); + } + Unique_Node_List useful; + useful.push(root()); + for (uint next = 0; next < useful.size(); ++next) { + Node* n = useful.at(next); + if (n->is_Call() && n->as_Call()->generator() != NULL && n->as_Call()->generator()->call_node() == n) { + CallNode* call = n->as_Call(); + CallGenerator* cg = call->generator(); + cg->print_inlining_late("receiver not constant"); + } + uint max = n->len(); + for ( uint i = 0; i < max; ++i ) { + Node *m = n->in(i); + if ( m == NULL ) continue; + useful.push(m); + } + } + for (int i = 0; i < _print_inlining_list->length(); i++) { + tty->print(_print_inlining_list->at(i).ss()->as_string()); + } + } +}
--- a/src/share/vm/opto/compile.hpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/compile.hpp Fri Jan 11 10:38:38 2013 -0800 @@ -30,6 +30,7 @@ #include "code/debugInfoRec.hpp" #include "code/exceptionHandlerTable.hpp" #include "compiler/compilerOracle.hpp" +#include "compiler/compileBroker.hpp" #include "libadt/dict.hpp" #include "libadt/port.hpp" #include "libadt/vectset.hpp" @@ -75,6 +76,8 @@ class Unique_Node_List; class nmethod; class WarmCallInfo; +class Node_Stack; +struct Final_Reshape_Counts; //------------------------------Compile---------------------------------------- // This class defines a top-level Compiler invocation. @@ -98,6 +101,8 @@ private: Compile* C; CompileLog* _log; + const char* _phase_name; + bool _dolog; public: TracePhase(const char* name, elapsedTimer* accumulator, bool dolog); ~TracePhase(); @@ -259,6 +264,8 @@ int _orig_pc_slot_offset_in_bytes; int _major_progress; // Count of something big happening + bool _inlining_progress; // progress doing incremental inlining? + bool _inlining_incrementally;// Are we doing incremental inlining (post parse) bool _has_loops; // True if the method _may_ have some loops bool _has_split_ifs; // True if the method _may_ have some split-if bool _has_unsafe_access; // True if the method _may_ produce faults in unsafe loads or stores. @@ -297,6 +304,9 @@ // Node management uint _unique; // Counter for unique Node indices + VectorSet _dead_node_list; // Set of dead nodes + uint _dead_node_count; // Number of dead nodes; VectorSet::Size() is O(N). + // So use this to keep count and make the call O(1). debug_only(static int _debug_idx;) // Monotonic counter (not reset), use -XX:BreakAtNode=<idx> Arena _node_arena; // Arena for new-space Nodes Arena _old_arena; // Arena for old-space Nodes, lifetime during xform @@ -343,9 +353,69 @@ Unique_Node_List* _for_igvn; // Initial work-list for next round of Iterative GVN WarmCallInfo* _warm_calls; // Sorted work-list for heat-based inlining. - GrowableArray<CallGenerator*> _late_inlines; // List of CallGenerators to be revisited after - // main parsing has finished. + GrowableArray<CallGenerator*> _late_inlines; // List of CallGenerators to be revisited after + // main parsing has finished. + GrowableArray<CallGenerator*> _string_late_inlines; // same but for string operations + + int _late_inlines_pos; // Where in the queue should the next late inlining candidate go (emulate depth first inlining) + uint _number_of_mh_late_inlines; // number of method handle late inlining still pending + + + // Inlining may not happen in parse order which would make + // PrintInlining output confusing. Keep track of PrintInlining + // pieces in order. + class PrintInliningBuffer : public ResourceObj { + private: + CallGenerator* _cg; + stringStream* _ss; + + public: + PrintInliningBuffer() + : _cg(NULL) { _ss = new stringStream(); } + + stringStream* ss() const { return _ss; } + CallGenerator* cg() const { return _cg; } + void set_cg(CallGenerator* cg) { _cg = cg; } + }; + + GrowableArray<PrintInliningBuffer>* _print_inlining_list; + int _print_inlining; + + public: + outputStream* print_inlining_stream() const { + return _print_inlining_list->at(_print_inlining).ss(); + } + + void print_inlining_skip(CallGenerator* cg) { + if (PrintInlining) { + _print_inlining_list->at(_print_inlining).set_cg(cg); + _print_inlining++; + _print_inlining_list->insert_before(_print_inlining, PrintInliningBuffer()); + } + } + + void print_inlining_insert(CallGenerator* cg) { + if (PrintInlining) { + for (int i = 0; i < _print_inlining_list->length(); i++) { + if (_print_inlining_list->at(i).cg() == cg) { + _print_inlining_list->insert_before(i+1, PrintInliningBuffer()); + _print_inlining = i+1; + _print_inlining_list->at(i).set_cg(NULL); + return; + } + } + ShouldNotReachHere(); + } + } + + void print_inlining(ciMethod* method, int inline_level, int bci, const char* msg = NULL) { + stringStream ss; + CompileTask::print_inlining(&ss, method, inline_level, bci, msg); + print_inlining_stream()->print(ss.as_string()); + } + + private: // Matching, CFG layout, allocation, code generation PhaseCFG* _cfg; // Results of CFG finding bool _select_24_bit_instr; // We selected an instruction with a 24-bit result @@ -412,6 +482,10 @@ int fixed_slots() const { assert(_fixed_slots >= 0, ""); return _fixed_slots; } void set_fixed_slots(int n) { _fixed_slots = n; } int major_progress() const { return _major_progress; } + void set_inlining_progress(bool z) { _inlining_progress = z; } + int inlining_progress() const { return _inlining_progress; } + void set_inlining_incrementally(bool z) { _inlining_incrementally = z; } + int inlining_incrementally() const { return _inlining_incrementally; } void set_major_progress() { _major_progress++; } void clear_major_progress() { _major_progress = 0; } int num_loop_opts() const { return _num_loop_opts; } @@ -518,7 +592,7 @@ ciEnv* env() const { return _env; } CompileLog* log() const { return _log; } bool failing() const { return _env->failing() || _failure_reason != NULL; } - const char* failure_reason() { return _failure_reason; } + const char* failure_reason() { return _failure_reason; } bool failure_reason_is(const char* r) { return (r==_failure_reason) || (r!=NULL && _failure_reason!=NULL && strcmp(r, _failure_reason)==0); } void record_failure(const char* reason); @@ -533,7 +607,7 @@ record_method_not_compilable(reason, true); } bool check_node_count(uint margin, const char* reason) { - if (unique() + margin > (uint)MaxNodeLimit) { + if (live_nodes() + margin > (uint)MaxNodeLimit) { record_method_not_compilable(reason); return true; } else { @@ -542,25 +616,41 @@ } // Node management - uint unique() const { return _unique; } - uint next_unique() { return _unique++; } - void set_unique(uint i) { _unique = i; } - static int debug_idx() { return debug_only(_debug_idx)+0; } - static void set_debug_idx(int i) { debug_only(_debug_idx = i); } - Arena* node_arena() { return &_node_arena; } - Arena* old_arena() { return &_old_arena; } - RootNode* root() const { return _root; } - void set_root(RootNode* r) { _root = r; } - StartNode* start() const; // (Derived from root.) + uint unique() const { return _unique; } + uint next_unique() { return _unique++; } + void set_unique(uint i) { _unique = i; } + static int debug_idx() { return debug_only(_debug_idx)+0; } + static void set_debug_idx(int i) { debug_only(_debug_idx = i); } + Arena* node_arena() { return &_node_arena; } + Arena* old_arena() { return &_old_arena; } + RootNode* root() const { return _root; } + void set_root(RootNode* r) { _root = r; } + StartNode* start() const; // (Derived from root.) void init_start(StartNode* s); - Node* immutable_memory(); + Node* immutable_memory(); - Node* recent_alloc_ctl() const { return _recent_alloc_ctl; } - Node* recent_alloc_obj() const { return _recent_alloc_obj; } - void set_recent_alloc(Node* ctl, Node* obj) { + Node* recent_alloc_ctl() const { return _recent_alloc_ctl; } + Node* recent_alloc_obj() const { return _recent_alloc_obj; } + void set_recent_alloc(Node* ctl, Node* obj) { _recent_alloc_ctl = ctl; _recent_alloc_obj = obj; - } + } + void record_dead_node(uint idx) { if (_dead_node_list.test_set(idx)) return; + _dead_node_count++; + } + uint dead_node_count() { return _dead_node_count; } + void reset_dead_node_list() { _dead_node_list.Reset(); + _dead_node_count = 0; + } + uint live_nodes() const { + int val = _unique - _dead_node_count; + assert (val >= 0, err_msg_res("number of tracked dead nodes %d more than created nodes %d", _unique, _dead_node_count)); + return (uint) val; + } +#ifdef ASSERT + uint count_live_nodes_by_graph_walk(); + void print_missing_nodes(); +#endif // Constant table ConstantTable& constant_table() { return _constant_table; } @@ -634,7 +724,7 @@ // Decide how to build a call. // The profile factor is a discount to apply to this site's interp. profile. - CallGenerator* call_generator(ciMethod* call_method, int vtable_index, bool call_is_virtual, JVMState* jvms, bool allow_inline, float profile_factor, bool allow_intrinsics = true); + CallGenerator* call_generator(ciMethod* call_method, int vtable_index, bool call_is_virtual, JVMState* jvms, bool allow_inline, float profile_factor, bool allow_intrinsics = true, bool delayed_forbidden = false); bool should_delay_inlining(ciMethod* call_method, JVMState* jvms); // Report if there were too many traps at a current method and bci. @@ -662,14 +752,46 @@ void identify_useful_nodes(Unique_Node_List &useful); - void remove_useless_nodes (Unique_Node_List &useful); + void update_dead_node_list(Unique_Node_List &useful); + void remove_useless_nodes (Unique_Node_List &useful); WarmCallInfo* warm_calls() const { return _warm_calls; } void set_warm_calls(WarmCallInfo* l) { _warm_calls = l; } WarmCallInfo* pop_warm_call(); // Record this CallGenerator for inlining at the end of parsing. - void add_late_inline(CallGenerator* cg) { _late_inlines.push(cg); } + void add_late_inline(CallGenerator* cg) { + _late_inlines.insert_before(_late_inlines_pos, cg); + _late_inlines_pos++; + } + + void prepend_late_inline(CallGenerator* cg) { + _late_inlines.insert_before(0, cg); + } + + void add_string_late_inline(CallGenerator* cg) { + _string_late_inlines.push(cg); + } + + void remove_useless_late_inlines(GrowableArray<CallGenerator*>* inlines, Unique_Node_List &useful); + + void dump_inlining(); + + bool over_inlining_cutoff() const { + if (!inlining_incrementally()) { + return unique() > (uint)NodeCountInliningCutoff; + } else { + return live_nodes() > (uint)LiveNodeCountInliningCutoff; + } + } + + void inc_number_of_mh_late_inlines() { _number_of_mh_late_inlines++; } + void dec_number_of_mh_late_inlines() { assert(_number_of_mh_late_inlines > 0, "_number_of_mh_late_inlines < 0 !"); _number_of_mh_late_inlines--; } + bool has_mh_late_inlines() const { return _number_of_mh_late_inlines > 0; } + + void inline_incrementally_one(PhaseIterGVN& igvn); + void inline_incrementally(PhaseIterGVN& igvn); + void inline_string_calls(bool parse_time); // Matching, CFG layout, allocation, code generation PhaseCFG* cfg() { return _cfg; } @@ -876,6 +998,11 @@ static juint _intrinsic_hist_count[vmIntrinsics::ID_LIMIT]; static jubyte _intrinsic_hist_flags[vmIntrinsics::ID_LIMIT]; #endif + // Function calls made by the public function final_graph_reshaping. + // No need to be made public as they are not called elsewhere. + void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc); + void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &frc ); + void eliminate_redundant_card_marks(Node* n); public:
--- a/src/share/vm/opto/doCall.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/doCall.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -41,25 +41,30 @@ #include "prims/nativeLookup.hpp" #include "runtime/sharedRuntime.hpp" -void trace_type_profile(ciMethod *method, int depth, int bci, ciMethod *prof_method, ciKlass *prof_klass, int site_count, int receiver_count) { +void trace_type_profile(Compile* C, ciMethod *method, int depth, int bci, ciMethod *prof_method, ciKlass *prof_klass, int site_count, int receiver_count) { if (TraceTypeProfile || PrintInlining NOT_PRODUCT(|| PrintOptoInlining)) { + outputStream* out = tty; if (!PrintInlining) { if (NOT_PRODUCT(!PrintOpto &&) !PrintCompilation) { method->print_short_name(); tty->cr(); } CompileTask::print_inlining(prof_method, depth, bci); + } else { + out = C->print_inlining_stream(); } - CompileTask::print_inline_indent(depth); - tty->print(" \\-> TypeProfile (%d/%d counts) = ", receiver_count, site_count); - prof_klass->name()->print_symbol(); - tty->cr(); + CompileTask::print_inline_indent(depth, out); + out->print(" \\-> TypeProfile (%d/%d counts) = ", receiver_count, site_count); + stringStream ss; + prof_klass->name()->print_symbol_on(&ss); + out->print(ss.as_string()); + out->cr(); } } CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool call_is_virtual, JVMState* jvms, bool allow_inline, - float prof_factor, bool allow_intrinsics) { + float prof_factor, bool allow_intrinsics, bool delayed_forbidden) { ciMethod* caller = jvms->method(); int bci = jvms->bci(); Bytecodes::Code bytecode = caller->java_code_at_bci(bci); @@ -126,7 +131,9 @@ // MethodHandle.invoke* are native methods which obviously don't // have bytecodes and so normal inlining fails. if (callee->is_method_handle_intrinsic()) { - return CallGenerator::for_method_handle_call(jvms, caller, callee); + CallGenerator* cg = CallGenerator::for_method_handle_call(jvms, caller, callee, delayed_forbidden); + assert (cg == NULL || !delayed_forbidden || !cg->is_late_inline() || cg->is_mh_late_inline(), "unexpected CallGenerator"); + return cg; } // Do not inline strict fp into non-strict code, or the reverse @@ -157,20 +164,27 @@ WarmCallInfo scratch_ci; if (!UseOldInlining) scratch_ci.init(jvms, callee, profile, prof_factor); - WarmCallInfo* ci = ilt->ok_to_inline(callee, jvms, profile, &scratch_ci); + bool should_delay = false; + WarmCallInfo* ci = ilt->ok_to_inline(callee, jvms, profile, &scratch_ci, should_delay); assert(ci != &scratch_ci, "do not let this pointer escape"); bool allow_inline = (ci != NULL && !ci->is_cold()); bool require_inline = (allow_inline && ci->is_hot()); if (allow_inline) { CallGenerator* cg = CallGenerator::for_inline(callee, expected_uses); - if (require_inline && cg != NULL && should_delay_inlining(callee, jvms)) { + + if (require_inline && cg != NULL) { // Delay the inlining of this method to give us the // opportunity to perform some high level optimizations // first. - return CallGenerator::for_late_inline(callee, cg); + if (should_delay_inlining(callee, jvms)) { + assert(!delayed_forbidden, "strange"); + return CallGenerator::for_string_late_inline(callee, cg); + } else if ((should_delay || AlwaysIncrementalInline) && !delayed_forbidden) { + return CallGenerator::for_late_inline(callee, cg); + } } - if (cg == NULL) { + if (cg == NULL || should_delay) { // Fall through. } else if (require_inline || !InlineWarmCalls) { return cg; @@ -234,13 +248,13 @@ } if (miss_cg != NULL) { if (next_hit_cg != NULL) { - trace_type_profile(jvms->method(), jvms->depth() - 1, jvms->bci(), next_receiver_method, profile.receiver(1), site_count, profile.receiver_count(1)); + trace_type_profile(C, jvms->method(), jvms->depth() - 1, jvms->bci(), next_receiver_method, profile.receiver(1), site_count, profile.receiver_count(1)); // We don't need to record dependency on a receiver here and below. // Whenever we inline, the dependency is added by Parse::Parse(). miss_cg = CallGenerator::for_predicted_call(profile.receiver(1), miss_cg, next_hit_cg, PROB_MAX); } if (miss_cg != NULL) { - trace_type_profile(jvms->method(), jvms->depth() - 1, jvms->bci(), receiver_method, profile.receiver(0), site_count, receiver_count); + trace_type_profile(C, jvms->method(), jvms->depth() - 1, jvms->bci(), receiver_method, profile.receiver(0), site_count, receiver_count); CallGenerator* cg = CallGenerator::for_predicted_call(profile.receiver(0), miss_cg, hit_cg, profile.receiver_prob(0)); if (cg != NULL) return cg; } @@ -335,7 +349,7 @@ return true; } - assert(dest_method->will_link(method()->holder(), klass, bc()), "dest_method: typeflow responsibility"); + assert(dest_method->is_loaded(), "dest_method: typeflow responsibility"); return false; } @@ -351,7 +365,7 @@ // Set frequently used booleans const bool is_virtual = bc() == Bytecodes::_invokevirtual; const bool is_virtual_or_interface = is_virtual || bc() == Bytecodes::_invokeinterface; - const bool has_receiver = is_virtual_or_interface || bc() == Bytecodes::_invokespecial; + const bool has_receiver = Bytecodes::has_receiver(bc()); // Find target being called bool will_link; @@ -381,6 +395,8 @@ // Note: In the absence of miranda methods, an abstract class K can perform // an invokevirtual directly on an interface method I.m if K implements I. + // orig_callee is the resolved callee which's signature includes the + // appendix argument. const int nargs = orig_callee->arg_size(); // Push appendix argument (MethodType, CallSite, etc.), if one. @@ -573,7 +589,7 @@ } // If there is going to be a trap, put it at the next bytecode: set_bci(iter().next_bci()); - do_null_assert(peek(), T_OBJECT); + null_assert(peek()); set_bci(iter().cur_bci()); // put it back } }
--- a/src/share/vm/opto/escape.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/escape.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -2313,7 +2313,7 @@ } } } - if ((int)C->unique() + 2*NodeLimitFudgeFactor > MaxNodeLimit) { + if ((int) (C->live_nodes() + 2*NodeLimitFudgeFactor) > MaxNodeLimit) { if (C->do_escape_analysis() == true && !C->failing()) { // Retry compilation without escape analysis. // If this is the first failure, the sentinel string will "stick"
--- a/src/share/vm/opto/gcm.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/gcm.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -1359,7 +1359,7 @@ // If we inserted any instructions between a Call and his CatchNode, // clone the instructions on all paths below the Catch. for( i=0; i < _num_blocks; i++ ) - _blocks[i]->call_catch_cleanup(_bbs); + _blocks[i]->call_catch_cleanup(_bbs, C); #ifndef PRODUCT if (trace_opto_pipelining()) {
--- a/src/share/vm/opto/graphKit.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/graphKit.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -93,6 +93,16 @@ return jvms; } +//--------------------------------sync_jvms_for_reexecute--------------------- +// Make sure our current jvms agrees with our parse state. This version +// uses the reexecute_sp for reexecuting bytecodes. +JVMState* GraphKit::sync_jvms_for_reexecute() { + JVMState* jvms = this->jvms(); + jvms->set_bci(bci()); // Record the new bci in the JVMState + jvms->set_sp(reexecute_sp()); // Record the new sp in the JVMState + return jvms; +} + #ifdef ASSERT bool GraphKit::jvms_in_sync() const { Parse* parse = is_Parse(); @@ -143,7 +153,7 @@ void GraphKit::stop_and_kill_map() { SafePointNode* dead_map = stop(); if (dead_map != NULL) { - dead_map->disconnect_inputs(NULL); // Mark the map as killed. + dead_map->disconnect_inputs(NULL, C); // Mark the map as killed. assert(dead_map->is_killed(), "must be so marked"); } } @@ -826,7 +836,16 @@ // Walk the inline list to fill in the correct set of JVMState's // Also fill in the associated edges for each JVMState. - JVMState* youngest_jvms = sync_jvms(); + // If the bytecode needs to be reexecuted we need to put + // the arguments back on the stack. + const bool should_reexecute = jvms()->should_reexecute(); + JVMState* youngest_jvms = should_reexecute ? sync_jvms_for_reexecute() : sync_jvms(); + + // NOTE: set_bci (called from sync_jvms) might reset the reexecute bit to + // undefined if the bci is different. This is normal for Parse but it + // should not happen for LibraryCallKit because only one bci is processed. + assert(!is_LibraryCallKit() || (jvms()->should_reexecute() == should_reexecute), + "in LibraryCallKit the reexecute bit should not change"); // If we are guaranteed to throw, we can prune everything but the // input to the current bytecode. @@ -860,7 +879,7 @@ } // Presize the call: - debug_only(uint non_debug_edges = call->req()); + DEBUG_ONLY(uint non_debug_edges = call->req()); call->add_req_batch(top(), youngest_jvms->debug_depth()); assert(call->req() == non_debug_edges + youngest_jvms->debug_depth(), ""); @@ -965,7 +984,7 @@ assert(call->jvms()->debug_depth() == call->req() - non_debug_edges, ""); } -bool GraphKit::compute_stack_effects(int& inputs, int& depth, bool for_parse) { +bool GraphKit::compute_stack_effects(int& inputs, int& depth) { Bytecodes::Code code = java_bc(); if (code == Bytecodes::_wide) { code = method()->java_code_at_bci(bci() + 1); @@ -1005,14 +1024,11 @@ case Bytecodes::_getfield: case Bytecodes::_putfield: { + bool ignored_will_link; + ciField* field = method()->get_field_at_bci(bci(), ignored_will_link); + int size = field->type()->size(); bool is_get = (depth >= 0), is_static = (depth & 1); - ciBytecodeStream iter(method()); - iter.reset_to_bci(bci()); - iter.next(); - bool ignored_will_link; - ciField* field = iter.get_field(ignored_will_link); - int size = field->type()->size(); - inputs = (is_static ? 0 : 1); + inputs = (is_static ? 0 : 1); if (is_get) { depth = size - inputs; } else { @@ -1028,26 +1044,11 @@ case Bytecodes::_invokedynamic: case Bytecodes::_invokeinterface: { - ciBytecodeStream iter(method()); - iter.reset_to_bci(bci()); - iter.next(); bool ignored_will_link; ciSignature* declared_signature = NULL; - ciMethod* callee = iter.get_method(ignored_will_link, &declared_signature); + ciMethod* ignored_callee = method()->get_method_at_bci(bci(), ignored_will_link, &declared_signature); assert(declared_signature != NULL, "cannot be null"); - // (Do not use ciMethod::arg_size(), because - // it might be an unloaded method, which doesn't - // know whether it is static or not.) - if (for_parse) { - // Case 1: When called from parse we are *before* the invoke (in the - // caller) and need to to adjust the inputs by an appendix - // argument that will be pushed implicitly. - inputs = callee->invoke_arg_size(code) - (iter.has_appendix() ? 1 : 0); - } else { - // Case 2: Here we are *after* the invoke (in the callee) and need to - // remove any appendix arguments that were popped. - inputs = callee->invoke_arg_size(code) - (callee->has_member_arg() ? 1 : 0); - } + inputs = declared_signature->arg_size_for_bc(code); int size = declared_signature->return_type()->size(); depth = size - inputs; } @@ -1178,7 +1179,7 @@ Node *chk = NULL; switch(type) { case T_LONG : chk = new (C) CmpLNode(value, _gvn.zerocon(T_LONG)); break; - case T_INT : chk = new (C) CmpINode( value, _gvn.intcon(0)); break; + case T_INT : chk = new (C) CmpINode(value, _gvn.intcon(0)); break; case T_ARRAY : // fall through type = T_OBJECT; // simplify further tests case T_OBJECT : { @@ -1229,7 +1230,8 @@ break; } - default : ShouldNotReachHere(); + default: + fatal(err_msg_res("unexpected type: %s", type2name(type))); } assert(chk != NULL, "sanity check"); chk = _gvn.transform(chk); @@ -1769,11 +1771,21 @@ CallProjections callprojs; call->extract_projections(&callprojs, true); + Node* init_mem = call->in(TypeFunc::Memory); + Node* final_mem = final_state->in(TypeFunc::Memory); + Node* final_ctl = final_state->in(TypeFunc::Control); + Node* final_io = final_state->in(TypeFunc::I_O); + // Replace all the old call edges with the edges from the inlining result - C->gvn_replace_by(callprojs.fallthrough_catchproj, final_state->in(TypeFunc::Control)); - C->gvn_replace_by(callprojs.fallthrough_memproj, final_state->in(TypeFunc::Memory)); - C->gvn_replace_by(callprojs.fallthrough_ioproj, final_state->in(TypeFunc::I_O)); - Node* final_mem = final_state->in(TypeFunc::Memory); + if (callprojs.fallthrough_catchproj != NULL) { + C->gvn_replace_by(callprojs.fallthrough_catchproj, final_ctl); + } + if (callprojs.fallthrough_memproj != NULL) { + C->gvn_replace_by(callprojs.fallthrough_memproj, final_mem); + } + if (callprojs.fallthrough_ioproj != NULL) { + C->gvn_replace_by(callprojs.fallthrough_ioproj, final_io); + } // Replace the result with the new result if it exists and is used if (callprojs.resproj != NULL && result != NULL) { @@ -1782,10 +1794,15 @@ if (ejvms == NULL) { // No exception edges to simply kill off those paths - C->gvn_replace_by(callprojs.catchall_catchproj, C->top()); - C->gvn_replace_by(callprojs.catchall_memproj, C->top()); - C->gvn_replace_by(callprojs.catchall_ioproj, C->top()); - + if (callprojs.catchall_catchproj != NULL) { + C->gvn_replace_by(callprojs.catchall_catchproj, C->top()); + } + if (callprojs.catchall_memproj != NULL) { + C->gvn_replace_by(callprojs.catchall_memproj, C->top()); + } + if (callprojs.catchall_ioproj != NULL) { + C->gvn_replace_by(callprojs.catchall_ioproj, C->top()); + } // Replace the old exception object with top if (callprojs.exobj != NULL) { C->gvn_replace_by(callprojs.exobj, C->top()); @@ -1797,10 +1814,15 @@ SafePointNode* ex_map = ekit.combine_and_pop_all_exception_states(); Node* ex_oop = ekit.use_exception_state(ex_map); - - C->gvn_replace_by(callprojs.catchall_catchproj, ekit.control()); - C->gvn_replace_by(callprojs.catchall_memproj, ekit.reset_memory()); - C->gvn_replace_by(callprojs.catchall_ioproj, ekit.i_o()); + if (callprojs.catchall_catchproj != NULL) { + C->gvn_replace_by(callprojs.catchall_catchproj, ekit.control()); + } + if (callprojs.catchall_memproj != NULL) { + C->gvn_replace_by(callprojs.catchall_memproj, ekit.reset_memory()); + } + if (callprojs.catchall_ioproj != NULL) { + C->gvn_replace_by(callprojs.catchall_ioproj, ekit.i_o()); + } // Replace the old exception object with the newly created one if (callprojs.exobj != NULL) { @@ -1809,7 +1831,7 @@ } // Disconnect the call from the graph - call->disconnect_inputs(NULL); + call->disconnect_inputs(NULL, C); C->gvn_replace_by(call, C->top()); // Clean up any MergeMems that feed other MergeMems since the @@ -1861,15 +1883,17 @@ // occurs here, the runtime will make sure an MDO exists. There is // no need to call method()->ensure_method_data() at this point. + // Set the stack pointer to the right value for reexecution: + set_sp(reexecute_sp()); + #ifdef ASSERT if (!must_throw) { // Make sure the stack has at least enough depth to execute // the current bytecode. - int inputs, ignore; - if (compute_stack_effects(inputs, ignore)) { - assert(sp() >= inputs, "must have enough JVMS stack to execute"); - // It is a frequent error in library_call.cpp to issue an - // uncommon trap with the _sp value already popped. + int inputs, ignored_depth; + if (compute_stack_effects(inputs, ignored_depth)) { + assert(sp() >= inputs, err_msg_res("must have enough JVMS stack to execute %s: sp=%d, inputs=%d", + Bytecodes::name(java_bc()), sp(), inputs)); } } #endif @@ -1900,7 +1924,8 @@ case Deoptimization::Action_make_not_compilable: break; default: - assert(false, "bad action"); + fatal(err_msg_res("unknown action %d: %s", action, Deoptimization::trap_action_name(action))); + break; #endif } @@ -2667,7 +2692,7 @@ case SSC_always_false: // It needs a null check because a null will *pass* the cast check. // A non-null value will always produce an exception. - return do_null_assert(obj, T_OBJECT); + return null_assert(obj); } } } @@ -2786,7 +2811,7 @@ mb->init_req(TypeFunc::Control, control()); mb->init_req(TypeFunc::Memory, reset_memory()); Node* membar = _gvn.transform(mb); - set_control(_gvn.transform(new (C) ProjNode(membar,TypeFunc::Control) )); + set_control(_gvn.transform(new (C) ProjNode(membar, TypeFunc::Control))); set_all_memory_call(membar); return membar; } @@ -2975,7 +3000,7 @@ set_control( _gvn.transform(new (C) ProjNode(allocx, TypeFunc::Control) ) ); // create memory projection for i_o set_memory ( _gvn.transform( new (C) ProjNode(allocx, TypeFunc::Memory, true) ), rawidx ); - make_slow_call_ex(allocx, env()->OutOfMemoryError_klass(), true); + make_slow_call_ex(allocx, env()->Throwable_klass(), true); // create a memory projection as for the normal control path Node* malloc = _gvn.transform(new (C) ProjNode(allocx, TypeFunc::Memory)); @@ -3148,7 +3173,7 @@ Node* cmp_lh = _gvn.transform( new(C) CmpINode(layout_val, intcon(layout_con)) ); Node* bol_lh = _gvn.transform( new(C) BoolNode(cmp_lh, BoolTest::eq) ); { BuildCutout unless(this, bol_lh, PROB_MAX); - _sp += nargs; + inc_sp(nargs); uncommon_trap(Deoptimization::Reason_class_check, Deoptimization::Action_maybe_recompile); } @@ -3391,7 +3416,7 @@ { PreserveJVMState pjvms(this); set_control(iffalse); - _sp += nargs; + inc_sp(nargs); uncommon_trap(reason, Deoptimization::Action_maybe_recompile); } Node* iftrue = _gvn.transform(new (C) IfTrueNode(iff));
--- a/src/share/vm/opto/graphKit.hpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/graphKit.hpp Fri Jan 11 10:38:38 2013 -0800 @@ -41,6 +41,7 @@ class FastLockNode; class FastUnlockNode; class IdealKit; +class LibraryCallKit; class Parse; class RootNode; @@ -60,11 +61,13 @@ PhaseGVN &_gvn; // Some optimizations while parsing SafePointNode* _map; // Parser map from JVM to Nodes SafePointNode* _exceptions;// Parser map(s) for exception state(s) - int _sp; // JVM Expression Stack Pointer int _bci; // JVM Bytecode Pointer ciMethod* _method; // JVM Current Method private: + int _sp; // JVM Expression Stack Pointer; don't modify directly! + + private: SafePointNode* map_not_null() const { assert(_map != NULL, "must call stopped() to test for reset compiler map"); return _map; @@ -80,7 +83,8 @@ } #endif - virtual Parse* is_Parse() const { return NULL; } + virtual Parse* is_Parse() const { return NULL; } + virtual LibraryCallKit* is_LibraryCallKit() const { return NULL; } ciEnv* env() const { return _env; } PhaseGVN& gvn() const { return _gvn; } @@ -141,7 +145,7 @@ _bci = jvms->bci(); _method = jvms->has_method() ? jvms->method() : NULL; } void set_map(SafePointNode* m) { _map = m; debug_only(verify_map()); } - void set_sp(int i) { assert(i >= 0, "must be non-negative"); _sp = i; } + void set_sp(int sp) { assert(sp >= 0, err_msg_res("sp must be non-negative: %d", sp)); _sp = sp; } void clean_stack(int from_sp); // clear garbage beyond from_sp to top void inc_sp(int i) { set_sp(sp() + i); } @@ -149,7 +153,9 @@ void set_bci(int bci) { _bci = bci; } // Make sure jvms has current bci & sp. - JVMState* sync_jvms() const; + JVMState* sync_jvms() const; + JVMState* sync_jvms_for_reexecute(); + #ifdef ASSERT // Make sure JVMS has an updated copy of bci and sp. // Also sanity-check method, depth, and monitor depth. @@ -286,7 +292,7 @@ // How many stack inputs does the current BC consume? // And, how does the stack change after the bytecode? // Returns false if unknown. - bool compute_stack_effects(int& inputs, int& depth, bool for_parse = false); + bool compute_stack_effects(int& inputs, int& depth); // Add a fixed offset to a pointer Node* basic_plus_adr(Node* base, Node* ptr, intptr_t offset) { @@ -337,20 +343,37 @@ Node* load_object_klass(Node* object); // Find out the length of an array. Node* load_array_length(Node* array); + + // Helper function to do a NULL pointer check or ZERO check based on type. - Node* null_check_common(Node* value, BasicType type, - bool assert_null, Node* *null_control); // Throw an exception if a given value is null. // Return the value cast to not-null. // Be clever about equivalent dominating null checks. - Node* do_null_check(Node* value, BasicType type) { - return null_check_common(value, type, false, NULL); + Node* null_check_common(Node* value, BasicType type, + bool assert_null = false, Node* *null_control = NULL); + Node* null_check(Node* value, BasicType type = T_OBJECT) { + return null_check_common(value, type); + } + Node* null_check_receiver() { + assert(argument(0)->bottom_type()->isa_ptr(), "must be"); + return null_check(argument(0)); + } + Node* zero_check_int(Node* value) { + assert(value->bottom_type()->basic_type() == T_INT, + err_msg_res("wrong type: %s", type2name(value->bottom_type()->basic_type()))); + return null_check_common(value, T_INT); + } + Node* zero_check_long(Node* value) { + assert(value->bottom_type()->basic_type() == T_LONG, + err_msg_res("wrong type: %s", type2name(value->bottom_type()->basic_type()))); + return null_check_common(value, T_LONG); } // Throw an uncommon trap if a given value is __not__ null. // Return the value cast to null, and be clever about dominating checks. - Node* do_null_assert(Node* value, BasicType type) { - return null_check_common(value, type, true, NULL); + Node* null_assert(Node* value, BasicType type = T_OBJECT) { + return null_check_common(value, type, true); } + // Null check oop. Return null-path control into (*null_control). // Return a cast-not-null node which depends on the not-null control. // If never_see_null, use an uncommon trap (*null_control sees a top). @@ -371,9 +394,9 @@ // Replace all occurrences of one node by another. void replace_in_map(Node* old, Node* neww); - void push(Node* n) { map_not_null(); _map->set_stack(_map->_jvms, _sp++, n); } - Node* pop() { map_not_null(); return _map->stack( _map->_jvms, --_sp); } - Node* peek(int off = 0) { map_not_null(); return _map->stack( _map->_jvms, _sp - off - 1); } + void push(Node* n) { map_not_null(); _map->set_stack(_map->_jvms, _sp++ , n); } + Node* pop() { map_not_null(); return _map->stack( _map->_jvms, --_sp ); } + Node* peek(int off = 0) { map_not_null(); return _map->stack( _map->_jvms, _sp - off - 1 ); } void push_pair(Node* ldval) { push(ldval); @@ -580,19 +603,15 @@ //---------- help for generating calls -------------- - // Do a null check on the receiver, which is in argument(0). - Node* null_check_receiver(ciMethod* callee) { + // Do a null check on the receiver as it would happen before the call to + // callee (with all arguments still on the stack). + Node* null_check_receiver_before_call(ciMethod* callee) { assert(!callee->is_static(), "must be a virtual method"); - int nargs = 1 + callee->signature()->size(); - // Null check on self without removing any arguments. The argument - // null check technically happens in the wrong place, which can lead to - // invalid stack traces when the primitive is inlined into a method - // which handles NullPointerExceptions. - Node* receiver = argument(0); - _sp += nargs; - receiver = do_null_check(receiver, T_OBJECT); - _sp -= nargs; - return receiver; + const int nargs = callee->arg_size(); + inc_sp(nargs); + Node* n = null_check_receiver(); + dec_sp(nargs); + return n; } // Fill in argument edges for the call from argument(0), argument(1), ... @@ -645,6 +664,9 @@ klass, reason_string, must_throw, keep_exact_action); } + // SP when bytecode needs to be reexecuted. + virtual int reexecute_sp() { return sp(); } + // Report if there were too many traps at the current method and bci. // Report if a trap was recorded, and/or PerMethodTrapLimit was exceeded. // If there is no MDO at all, report no trap unless told to assume it.
--- a/src/share/vm/opto/ifg.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/ifg.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -573,7 +573,7 @@ (n2lidx(def) && !liveout.member(n2lidx(def)) ) ) { b->_nodes.remove(j - 1); if( lrgs(r)._def == n ) lrgs(r)._def = 0; - n->disconnect_inputs(NULL); + n->disconnect_inputs(NULL, C); _cfg._bbs.map(n->_idx,NULL); n->replace_by(C->top()); // Since yanking a Node from block, high pressure moves up one
--- a/src/share/vm/opto/lcm.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/lcm.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -1005,7 +1005,7 @@ //------------------------------call_catch_cleanup----------------------------- // If we inserted any instructions between a Call and his CatchNode, // clone the instructions on all paths below the Catch. -void Block::call_catch_cleanup(Block_Array &bbs) { +void Block::call_catch_cleanup(Block_Array &bbs, Compile* C) { // End of region to clone uint end = end_idx(); @@ -1067,7 +1067,7 @@ // Remove the now-dead cloned ops for(uint i3 = beg; i3 < end; i3++ ) { - _nodes[beg]->disconnect_inputs(NULL); + _nodes[beg]->disconnect_inputs(NULL, C); _nodes.remove(beg); } @@ -1080,7 +1080,7 @@ Node *n = sb->_nodes[j]; if (n->outcnt() == 0 && (!n->is_Proj() || n->as_Proj()->in(0)->outcnt() == 1) ){ - n->disconnect_inputs(NULL); + n->disconnect_inputs(NULL, C); sb->_nodes.remove(j); new_cnt--; }
--- a/src/share/vm/opto/library_call.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/library_call.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -67,30 +67,64 @@ // Local helper class for LibraryIntrinsic: class LibraryCallKit : public GraphKit { private: - LibraryIntrinsic* _intrinsic; // the library intrinsic being called + LibraryIntrinsic* _intrinsic; // the library intrinsic being called + Node* _result; // the result node, if any + int _reexecute_sp; // the stack pointer when bytecode needs to be reexecuted const TypeOopPtr* sharpen_unsafe_type(Compile::AliasType* alias_type, const TypePtr *adr_type, bool is_native_ptr = false); public: - LibraryCallKit(JVMState* caller, LibraryIntrinsic* intrinsic) - : GraphKit(caller), - _intrinsic(intrinsic) + LibraryCallKit(JVMState* jvms, LibraryIntrinsic* intrinsic) + : GraphKit(jvms), + _intrinsic(intrinsic), + _result(NULL) { + // Check if this is a root compile. In that case we don't have a caller. + if (!jvms->has_method()) { + _reexecute_sp = sp(); + } else { + // Find out how many arguments the interpreter needs when deoptimizing + // and save the stack pointer value so it can used by uncommon_trap. + // We find the argument count by looking at the declared signature. + bool ignored_will_link; + ciSignature* declared_signature = NULL; + ciMethod* ignored_callee = caller()->get_method_at_bci(bci(), ignored_will_link, &declared_signature); + const int nargs = declared_signature->arg_size_for_bc(caller()->java_code_at_bci(bci())); + _reexecute_sp = sp() + nargs; // "push" arguments back on stack + } } + virtual LibraryCallKit* is_LibraryCallKit() const { return (LibraryCallKit*)this; } + ciMethod* caller() const { return jvms()->method(); } int bci() const { return jvms()->bci(); } LibraryIntrinsic* intrinsic() const { return _intrinsic; } vmIntrinsics::ID intrinsic_id() const { return _intrinsic->intrinsic_id(); } ciMethod* callee() const { return _intrinsic->method(); } - ciSignature* signature() const { return callee()->signature(); } - int arg_size() const { return callee()->arg_size(); } bool try_to_inline(); Node* try_to_predicate(); + void push_result() { + // Push the result onto the stack. + if (!stopped() && result() != NULL) { + BasicType bt = result()->bottom_type()->basic_type(); + push_node(bt, result()); + } + } + + private: + void fatal_unexpected_iid(vmIntrinsics::ID iid) { + fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid))); + } + + void set_result(Node* n) { assert(_result == NULL, "only set once"); _result = n; } + void set_result(RegionNode* region, PhiNode* value); + Node* result() { return _result; } + + virtual int reexecute_sp() { return _reexecute_sp; } + // Helper functions to inline natives - void push_result(RegionNode* region, PhiNode* value); Node* generate_guard(Node* test, RegionNode* region, float true_prob); Node* generate_slow_guard(Node* test, RegionNode* region); Node* generate_fair_guard(Node* test, RegionNode* region); @@ -108,21 +142,19 @@ bool disjoint_bases, const char* &name, bool dest_uninitialized); Node* load_mirror_from_klass(Node* klass); Node* load_klass_from_mirror_common(Node* mirror, bool never_see_null, - int nargs, RegionNode* region, int null_path, int offset); - Node* load_klass_from_mirror(Node* mirror, bool never_see_null, int nargs, + Node* load_klass_from_mirror(Node* mirror, bool never_see_null, RegionNode* region, int null_path) { int offset = java_lang_Class::klass_offset_in_bytes(); - return load_klass_from_mirror_common(mirror, never_see_null, nargs, + return load_klass_from_mirror_common(mirror, never_see_null, region, null_path, offset); } Node* load_array_klass_from_mirror(Node* mirror, bool never_see_null, - int nargs, RegionNode* region, int null_path) { int offset = java_lang_Class::array_klass_offset_in_bytes(); - return load_klass_from_mirror_common(mirror, never_see_null, nargs, + return load_klass_from_mirror_common(mirror, never_see_null, region, null_path, offset); } @@ -161,16 +193,14 @@ bool inline_string_indexOf(); Node* string_indexOf(Node* string_object, ciTypeArray* target_array, jint offset, jint cache_i, jint md2_i); bool inline_string_equals(); - Node* pop_math_arg(); + Node* round_double_node(Node* n); bool runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName); bool inline_math_native(vmIntrinsics::ID id); bool inline_trig(vmIntrinsics::ID id); - bool inline_trans(vmIntrinsics::ID id); - bool inline_abs(vmIntrinsics::ID id); - bool inline_sqrt(vmIntrinsics::ID id); + bool inline_math(vmIntrinsics::ID id); + bool inline_exp(); + bool inline_pow(); void finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName); - bool inline_pow(vmIntrinsics::ID id); - bool inline_exp(vmIntrinsics::ID id); bool inline_min_max(vmIntrinsics::ID id); Node* generate_min_max(vmIntrinsics::ID id, Node* x, Node* y); // This returns Type::AnyPtr, RawPtr, or OopPtr. @@ -179,7 +209,7 @@ // Helper for inline_unsafe_access. // Generates the guards that check whether the result of // Unsafe.getObject should be recorded in an SATB log buffer. - void insert_pre_barrier(Node* base_oop, Node* offset, Node* pre_val, int nargs, bool need_mem_bar); + void insert_pre_barrier(Node* base_oop, Node* offset, Node* pre_val, bool need_mem_bar); bool inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile); bool inline_unsafe_prefetch(bool is_native_ptr, bool is_store, bool is_static); bool inline_unsafe_allocate(); @@ -253,11 +283,7 @@ bool inline_unsafe_load_store(BasicType type, LoadStoreKind kind); bool inline_unsafe_ordered_store(BasicType type); bool inline_fp_conversions(vmIntrinsics::ID id); - bool inline_numberOfLeadingZeros(vmIntrinsics::ID id); - bool inline_numberOfTrailingZeros(vmIntrinsics::ID id); - bool inline_bitCount(vmIntrinsics::ID id); - bool inline_reverseBytes(vmIntrinsics::ID id); - + bool inline_number_methods(vmIntrinsics::ID id); bool inline_reference_get(); bool inline_aescrypt_Block(vmIntrinsics::ID id); bool inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id); @@ -321,15 +347,18 @@ switch (id) { case vmIntrinsics::_compareTo: if (!SpecialStringCompareTo) return NULL; + if (!Matcher::match_rule_supported(Op_StrComp)) return NULL; break; case vmIntrinsics::_indexOf: if (!SpecialStringIndexOf) return NULL; break; case vmIntrinsics::_equals: if (!SpecialStringEquals) return NULL; + if (!Matcher::match_rule_supported(Op_StrEquals)) return NULL; break; case vmIntrinsics::_equalsC: if (!SpecialArraysEquals) return NULL; + if (!Matcher::match_rule_supported(Op_AryEq)) return NULL; break; case vmIntrinsics::_arraycopy: if (!InlineArrayCopy) return NULL; @@ -382,6 +411,19 @@ if (!Matcher::match_rule_supported(Op_CountTrailingZerosL)) return NULL; break; + case vmIntrinsics::_reverseBytes_c: + if (!Matcher::match_rule_supported(Op_ReverseBytesUS)) return NULL; + break; + case vmIntrinsics::_reverseBytes_s: + if (!Matcher::match_rule_supported(Op_ReverseBytesS)) return NULL; + break; + case vmIntrinsics::_reverseBytes_i: + if (!Matcher::match_rule_supported(Op_ReverseBytesI)) return NULL; + break; + case vmIntrinsics::_reverseBytes_l: + if (!Matcher::match_rule_supported(Op_ReverseBytesL)) return NULL; + break; + case vmIntrinsics::_Reference_get: // Use the intrinsic version of Reference.get() so that the value in // the referent field can be registered by the G1 pre-barrier code. @@ -488,10 +530,13 @@ tty->print_cr("Intrinsic %s", str); } #endif - + ciMethod* callee = kit.callee(); + const int bci = kit.bci(); + + // Try to inline the intrinsic. if (kit.try_to_inline()) { if (PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) { - CompileTask::print_inlining(kit.callee(), jvms->depth() - 1, kit.bci(), is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)"); + C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)"); } C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_worked); if (C->log()) { @@ -500,6 +545,8 @@ (is_virtual() ? " virtual='1'" : ""), C->unique() - nodes); } + // Push the result from the inlined method onto the stack. + kit.push_result(); return kit.transfer_exceptions_into_jvms(); } @@ -508,12 +555,12 @@ if (jvms->has_method()) { // Not a root compile. const char* msg = is_virtual() ? "failed to inline (intrinsic, virtual)" : "failed to inline (intrinsic)"; - CompileTask::print_inlining(kit.callee(), jvms->depth() - 1, kit.bci(), msg); + C->print_inlining(callee, jvms->depth() - 1, bci, msg); } else { // Root compile tty->print("Did not generate intrinsic %s%s at bci:%d in", vmIntrinsics::name_at(intrinsic_id()), - (is_virtual() ? " (virtual)" : ""), kit.bci()); + (is_virtual() ? " (virtual)" : ""), bci); } } C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_failed); @@ -532,9 +579,15 @@ tty->print_cr("Predicate for intrinsic %s", str); } #endif + ciMethod* callee = kit.callee(); + const int bci = kit.bci(); Node* slow_ctl = kit.try_to_predicate(); if (!kit.failing()) { + if (PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) { + C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)"); + } + C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_worked); if (C->log()) { C->log()->elem("predicate_intrinsic id='%s'%s nodes='%d'", vmIntrinsics::name_at(intrinsic_id()), @@ -549,12 +602,12 @@ if (jvms->has_method()) { // Not a root compile. const char* msg = "failed to generate predicate for intrinsic"; - CompileTask::print_inlining(kit.callee(), jvms->depth() - 1, kit.bci(), msg); + C->print_inlining(kit.callee(), jvms->depth() - 1, bci, msg); } else { // Root compile - tty->print("Did not generate predicate for intrinsic %s%s at bci:%d in", - vmIntrinsics::name_at(intrinsic_id()), - (is_virtual() ? " (virtual)" : ""), kit.bci()); + C->print_inlining_stream()->print("Did not generate predicate for intrinsic %s%s at bci:%d in", + vmIntrinsics::name_at(intrinsic_id()), + (is_virtual() ? " (virtual)" : ""), bci); } } C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_failed); @@ -566,6 +619,7 @@ const bool is_store = true; const bool is_native_ptr = true; const bool is_static = true; + const bool is_volatile = true; if (!jvms()->has_method()) { // Root JVMState has a null method. @@ -575,13 +629,11 @@ } assert(merged_memory(), ""); + switch (intrinsic_id()) { - case vmIntrinsics::_hashCode: - return inline_native_hashcode(intrinsic()->is_virtual(), !is_static); - case vmIntrinsics::_identityHashCode: - return inline_native_hashcode(/*!virtual*/ false, is_static); - case vmIntrinsics::_getClass: - return inline_native_getClass(); + case vmIntrinsics::_hashCode: return inline_native_hashcode(intrinsic()->is_virtual(), !is_static); + case vmIntrinsics::_identityHashCode: return inline_native_hashcode(/*!virtual*/ false, is_static); + case vmIntrinsics::_getClass: return inline_native_getClass(); case vmIntrinsics::_dsin: case vmIntrinsics::_dcos: @@ -592,203 +644,114 @@ case vmIntrinsics::_dexp: case vmIntrinsics::_dlog: case vmIntrinsics::_dlog10: - case vmIntrinsics::_dpow: - return inline_math_native(intrinsic_id()); + case vmIntrinsics::_dpow: return inline_math_native(intrinsic_id()); case vmIntrinsics::_min: - case vmIntrinsics::_max: - return inline_min_max(intrinsic_id()); - - case vmIntrinsics::_arraycopy: - return inline_arraycopy(); - - case vmIntrinsics::_compareTo: - return inline_string_compareTo(); - case vmIntrinsics::_indexOf: - return inline_string_indexOf(); - case vmIntrinsics::_equals: - return inline_string_equals(); - - case vmIntrinsics::_getObject: - return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT, false); - case vmIntrinsics::_getBoolean: - return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN, false); - case vmIntrinsics::_getByte: - return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE, false); - case vmIntrinsics::_getShort: - return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT, false); - case vmIntrinsics::_getChar: - return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR, false); - case vmIntrinsics::_getInt: - return inline_unsafe_access(!is_native_ptr, !is_store, T_INT, false); - case vmIntrinsics::_getLong: - return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG, false); - case vmIntrinsics::_getFloat: - return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT, false); - case vmIntrinsics::_getDouble: - return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE, false); - - case vmIntrinsics::_putObject: - return inline_unsafe_access(!is_native_ptr, is_store, T_OBJECT, false); - case vmIntrinsics::_putBoolean: - return inline_unsafe_access(!is_native_ptr, is_store, T_BOOLEAN, false); - case vmIntrinsics::_putByte: - return inline_unsafe_access(!is_native_ptr, is_store, T_BYTE, false); - case vmIntrinsics::_putShort: - return inline_unsafe_access(!is_native_ptr, is_store, T_SHORT, false); - case vmIntrinsics::_putChar: - return inline_unsafe_access(!is_native_ptr, is_store, T_CHAR, false); - case vmIntrinsics::_putInt: - return inline_unsafe_access(!is_native_ptr, is_store, T_INT, false); - case vmIntrinsics::_putLong: - return inline_unsafe_access(!is_native_ptr, is_store, T_LONG, false); - case vmIntrinsics::_putFloat: - return inline_unsafe_access(!is_native_ptr, is_store, T_FLOAT, false); - case vmIntrinsics::_putDouble: - return inline_unsafe_access(!is_native_ptr, is_store, T_DOUBLE, false); - - case vmIntrinsics::_getByte_raw: - return inline_unsafe_access(is_native_ptr, !is_store, T_BYTE, false); - case vmIntrinsics::_getShort_raw: - return inline_unsafe_access(is_native_ptr, !is_store, T_SHORT, false); - case vmIntrinsics::_getChar_raw: - return inline_unsafe_access(is_native_ptr, !is_store, T_CHAR, false); - case vmIntrinsics::_getInt_raw: - return inline_unsafe_access(is_native_ptr, !is_store, T_INT, false); - case vmIntrinsics::_getLong_raw: - return inline_unsafe_access(is_native_ptr, !is_store, T_LONG, false); - case vmIntrinsics::_getFloat_raw: - return inline_unsafe_access(is_native_ptr, !is_store, T_FLOAT, false); - case vmIntrinsics::_getDouble_raw: - return inline_unsafe_access(is_native_ptr, !is_store, T_DOUBLE, false); - case vmIntrinsics::_getAddress_raw: - return inline_unsafe_access(is_native_ptr, !is_store, T_ADDRESS, false); - - case vmIntrinsics::_putByte_raw: - return inline_unsafe_access(is_native_ptr, is_store, T_BYTE, false); - case vmIntrinsics::_putShort_raw: - return inline_unsafe_access(is_native_ptr, is_store, T_SHORT, false); - case vmIntrinsics::_putChar_raw: - return inline_unsafe_access(is_native_ptr, is_store, T_CHAR, false); - case vmIntrinsics::_putInt_raw: - return inline_unsafe_access(is_native_ptr, is_store, T_INT, false); - case vmIntrinsics::_putLong_raw: - return inline_unsafe_access(is_native_ptr, is_store, T_LONG, false); - case vmIntrinsics::_putFloat_raw: - return inline_unsafe_access(is_native_ptr, is_store, T_FLOAT, false); - case vmIntrinsics::_putDouble_raw: - return inline_unsafe_access(is_native_ptr, is_store, T_DOUBLE, false); - case vmIntrinsics::_putAddress_raw: - return inline_unsafe_access(is_native_ptr, is_store, T_ADDRESS, false); - - case vmIntrinsics::_getObjectVolatile: - return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT, true); - case vmIntrinsics::_getBooleanVolatile: - return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN, true); - case vmIntrinsics::_getByteVolatile: - return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE, true); - case vmIntrinsics::_getShortVolatile: - return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT, true); - case vmIntrinsics::_getCharVolatile: - return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR, true); - case vmIntrinsics::_getIntVolatile: - return inline_unsafe_access(!is_native_ptr, !is_store, T_INT, true); - case vmIntrinsics::_getLongVolatile: - return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG, true); - case vmIntrinsics::_getFloatVolatile: - return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT, true); - case vmIntrinsics::_getDoubleVolatile: - return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE, true); - - case vmIntrinsics::_putObjectVolatile: - return inline_unsafe_access(!is_native_ptr, is_store, T_OBJECT, true); - case vmIntrinsics::_putBooleanVolatile: - return inline_unsafe_access(!is_native_ptr, is_store, T_BOOLEAN, true); - case vmIntrinsics::_putByteVolatile: - return inline_unsafe_access(!is_native_ptr, is_store, T_BYTE, true); - case vmIntrinsics::_putShortVolatile: - return inline_unsafe_access(!is_native_ptr, is_store, T_SHORT, true); - case vmIntrinsics::_putCharVolatile: - return inline_unsafe_access(!is_native_ptr, is_store, T_CHAR, true); - case vmIntrinsics::_putIntVolatile: - return inline_unsafe_access(!is_native_ptr, is_store, T_INT, true); - case vmIntrinsics::_putLongVolatile: - return inline_unsafe_access(!is_native_ptr, is_store, T_LONG, true); - case vmIntrinsics::_putFloatVolatile: - return inline_unsafe_access(!is_native_ptr, is_store, T_FLOAT, true); - case vmIntrinsics::_putDoubleVolatile: - return inline_unsafe_access(!is_native_ptr, is_store, T_DOUBLE, true); - - case vmIntrinsics::_prefetchRead: - return inline_unsafe_prefetch(!is_native_ptr, !is_store, !is_static); - case vmIntrinsics::_prefetchWrite: - return inline_unsafe_prefetch(!is_native_ptr, is_store, !is_static); - case vmIntrinsics::_prefetchReadStatic: - return inline_unsafe_prefetch(!is_native_ptr, !is_store, is_static); - case vmIntrinsics::_prefetchWriteStatic: - return inline_unsafe_prefetch(!is_native_ptr, is_store, is_static); - - case vmIntrinsics::_compareAndSwapObject: - return inline_unsafe_load_store(T_OBJECT, LS_cmpxchg); - case vmIntrinsics::_compareAndSwapInt: - return inline_unsafe_load_store(T_INT, LS_cmpxchg); - case vmIntrinsics::_compareAndSwapLong: - return inline_unsafe_load_store(T_LONG, LS_cmpxchg); - - case vmIntrinsics::_putOrderedObject: - return inline_unsafe_ordered_store(T_OBJECT); - case vmIntrinsics::_putOrderedInt: - return inline_unsafe_ordered_store(T_INT); - case vmIntrinsics::_putOrderedLong: - return inline_unsafe_ordered_store(T_LONG); - - case vmIntrinsics::_getAndAddInt: - return inline_unsafe_load_store(T_INT, LS_xadd); - case vmIntrinsics::_getAndAddLong: - return inline_unsafe_load_store(T_LONG, LS_xadd); - case vmIntrinsics::_getAndSetInt: - return inline_unsafe_load_store(T_INT, LS_xchg); - case vmIntrinsics::_getAndSetLong: - return inline_unsafe_load_store(T_LONG, LS_xchg); - case vmIntrinsics::_getAndSetObject: - return inline_unsafe_load_store(T_OBJECT, LS_xchg); - - case vmIntrinsics::_currentThread: - return inline_native_currentThread(); - case vmIntrinsics::_isInterrupted: - return inline_native_isInterrupted(); + case vmIntrinsics::_max: return inline_min_max(intrinsic_id()); + + case vmIntrinsics::_arraycopy: return inline_arraycopy(); + + case vmIntrinsics::_compareTo: return inline_string_compareTo(); + case vmIntrinsics::_indexOf: return inline_string_indexOf(); + case vmIntrinsics::_equals: return inline_string_equals(); + + case vmIntrinsics::_getObject: return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT, !is_volatile); + case vmIntrinsics::_getBoolean: return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN, !is_volatile); + case vmIntrinsics::_getByte: return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE, !is_volatile); + case vmIntrinsics::_getShort: return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT, !is_volatile); + case vmIntrinsics::_getChar: return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR, !is_volatile); + case vmIntrinsics::_getInt: return inline_unsafe_access(!is_native_ptr, !is_store, T_INT, !is_volatile); + case vmIntrinsics::_getLong: return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG, !is_volatile); + case vmIntrinsics::_getFloat: return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT, !is_volatile); + case vmIntrinsics::_getDouble: return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE, !is_volatile); + + case vmIntrinsics::_putObject: return inline_unsafe_access(!is_native_ptr, is_store, T_OBJECT, !is_volatile); + case vmIntrinsics::_putBoolean: return inline_unsafe_access(!is_native_ptr, is_store, T_BOOLEAN, !is_volatile); + case vmIntrinsics::_putByte: return inline_unsafe_access(!is_native_ptr, is_store, T_BYTE, !is_volatile); + case vmIntrinsics::_putShort: return inline_unsafe_access(!is_native_ptr, is_store, T_SHORT, !is_volatile); + case vmIntrinsics::_putChar: return inline_unsafe_access(!is_native_ptr, is_store, T_CHAR, !is_volatile); + case vmIntrinsics::_putInt: return inline_unsafe_access(!is_native_ptr, is_store, T_INT, !is_volatile); + case vmIntrinsics::_putLong: return inline_unsafe_access(!is_native_ptr, is_store, T_LONG, !is_volatile); + case vmIntrinsics::_putFloat: return inline_unsafe_access(!is_native_ptr, is_store, T_FLOAT, !is_volatile); + case vmIntrinsics::_putDouble: return inline_unsafe_access(!is_native_ptr, is_store, T_DOUBLE, !is_volatile); + + case vmIntrinsics::_getByte_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_BYTE, !is_volatile); + case vmIntrinsics::_getShort_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_SHORT, !is_volatile); + case vmIntrinsics::_getChar_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_CHAR, !is_volatile); + case vmIntrinsics::_getInt_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_INT, !is_volatile); + case vmIntrinsics::_getLong_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_LONG, !is_volatile); + case vmIntrinsics::_getFloat_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_FLOAT, !is_volatile); + case vmIntrinsics::_getDouble_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_DOUBLE, !is_volatile); + case vmIntrinsics::_getAddress_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_ADDRESS, !is_volatile); + + case vmIntrinsics::_putByte_raw: return inline_unsafe_access( is_native_ptr, is_store, T_BYTE, !is_volatile); + case vmIntrinsics::_putShort_raw: return inline_unsafe_access( is_native_ptr, is_store, T_SHORT, !is_volatile); + case vmIntrinsics::_putChar_raw: return inline_unsafe_access( is_native_ptr, is_store, T_CHAR, !is_volatile); + case vmIntrinsics::_putInt_raw: return inline_unsafe_access( is_native_ptr, is_store, T_INT, !is_volatile); + case vmIntrinsics::_putLong_raw: return inline_unsafe_access( is_native_ptr, is_store, T_LONG, !is_volatile); + case vmIntrinsics::_putFloat_raw: return inline_unsafe_access( is_native_ptr, is_store, T_FLOAT, !is_volatile); + case vmIntrinsics::_putDouble_raw: return inline_unsafe_access( is_native_ptr, is_store, T_DOUBLE, !is_volatile); + case vmIntrinsics::_putAddress_raw: return inline_unsafe_access( is_native_ptr, is_store, T_ADDRESS, !is_volatile); + + case vmIntrinsics::_getObjectVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT, is_volatile); + case vmIntrinsics::_getBooleanVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN, is_volatile); + case vmIntrinsics::_getByteVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE, is_volatile); + case vmIntrinsics::_getShortVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT, is_volatile); + case vmIntrinsics::_getCharVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR, is_volatile); + case vmIntrinsics::_getIntVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_INT, is_volatile); + case vmIntrinsics::_getLongVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG, is_volatile); + case vmIntrinsics::_getFloatVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT, is_volatile); + case vmIntrinsics::_getDoubleVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE, is_volatile); + + case vmIntrinsics::_putObjectVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_OBJECT, is_volatile); + case vmIntrinsics::_putBooleanVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_BOOLEAN, is_volatile); + case vmIntrinsics::_putByteVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_BYTE, is_volatile); + case vmIntrinsics::_putShortVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_SHORT, is_volatile); + case vmIntrinsics::_putCharVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_CHAR, is_volatile); + case vmIntrinsics::_putIntVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_INT, is_volatile); + case vmIntrinsics::_putLongVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_LONG, is_volatile); + case vmIntrinsics::_putFloatVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_FLOAT, is_volatile); + case vmIntrinsics::_putDoubleVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_DOUBLE, is_volatile); + + case vmIntrinsics::_prefetchRead: return inline_unsafe_prefetch(!is_native_ptr, !is_store, !is_static); + case vmIntrinsics::_prefetchWrite: return inline_unsafe_prefetch(!is_native_ptr, is_store, !is_static); + case vmIntrinsics::_prefetchReadStatic: return inline_unsafe_prefetch(!is_native_ptr, !is_store, is_static); + case vmIntrinsics::_prefetchWriteStatic: return inline_unsafe_prefetch(!is_native_ptr, is_store, is_static); + + case vmIntrinsics::_compareAndSwapObject: return inline_unsafe_load_store(T_OBJECT, LS_cmpxchg); + case vmIntrinsics::_compareAndSwapInt: return inline_unsafe_load_store(T_INT, LS_cmpxchg); + case vmIntrinsics::_compareAndSwapLong: return inline_unsafe_load_store(T_LONG, LS_cmpxchg); + + case vmIntrinsics::_putOrderedObject: return inline_unsafe_ordered_store(T_OBJECT); + case vmIntrinsics::_putOrderedInt: return inline_unsafe_ordered_store(T_INT); + case vmIntrinsics::_putOrderedLong: return inline_unsafe_ordered_store(T_LONG); + + case vmIntrinsics::_getAndAddInt: return inline_unsafe_load_store(T_INT, LS_xadd); + case vmIntrinsics::_getAndAddLong: return inline_unsafe_load_store(T_LONG, LS_xadd); + case vmIntrinsics::_getAndSetInt: return inline_unsafe_load_store(T_INT, LS_xchg); + case vmIntrinsics::_getAndSetLong: return inline_unsafe_load_store(T_LONG, LS_xchg); + case vmIntrinsics::_getAndSetObject: return inline_unsafe_load_store(T_OBJECT, LS_xchg); + + case vmIntrinsics::_currentThread: return inline_native_currentThread(); + case vmIntrinsics::_isInterrupted: return inline_native_isInterrupted(); #ifdef TRACE_HAVE_INTRINSICS - case vmIntrinsics::_classID: - return inline_native_classID(); - case vmIntrinsics::_threadID: - return inline_native_threadID(); - case vmIntrinsics::_counterTime: - return inline_native_time_funcs(CAST_FROM_FN_PTR(address, TRACE_TIME_METHOD), "counterTime"); + case vmIntrinsics::_classID: return inline_native_classID(); + case vmIntrinsics::_threadID: return inline_native_threadID(); + case vmIntrinsics::_counterTime: return inline_native_time_funcs(CAST_FROM_FN_PTR(address, TRACE_TIME_METHOD), "counterTime"); #endif - case vmIntrinsics::_currentTimeMillis: - return inline_native_time_funcs(CAST_FROM_FN_PTR(address, os::javaTimeMillis), "currentTimeMillis"); - case vmIntrinsics::_nanoTime: - return inline_native_time_funcs(CAST_FROM_FN_PTR(address, os::javaTimeNanos), "nanoTime"); - case vmIntrinsics::_allocateInstance: - return inline_unsafe_allocate(); - case vmIntrinsics::_copyMemory: - return inline_unsafe_copyMemory(); - case vmIntrinsics::_newArray: - return inline_native_newArray(); - case vmIntrinsics::_getLength: - return inline_native_getLength(); - case vmIntrinsics::_copyOf: - return inline_array_copyOf(false); - case vmIntrinsics::_copyOfRange: - return inline_array_copyOf(true); - case vmIntrinsics::_equalsC: - return inline_array_equals(); - case vmIntrinsics::_clone: - return inline_native_clone(intrinsic()->is_virtual()); - - case vmIntrinsics::_isAssignableFrom: - return inline_native_subtype_check(); + case vmIntrinsics::_currentTimeMillis: return inline_native_time_funcs(CAST_FROM_FN_PTR(address, os::javaTimeMillis), "currentTimeMillis"); + case vmIntrinsics::_nanoTime: return inline_native_time_funcs(CAST_FROM_FN_PTR(address, os::javaTimeNanos), "nanoTime"); + case vmIntrinsics::_allocateInstance: return inline_unsafe_allocate(); + case vmIntrinsics::_copyMemory: return inline_unsafe_copyMemory(); + case vmIntrinsics::_newArray: return inline_native_newArray(); + case vmIntrinsics::_getLength: return inline_native_getLength(); + case vmIntrinsics::_copyOf: return inline_array_copyOf(false); + case vmIntrinsics::_copyOfRange: return inline_array_copyOf(true); + case vmIntrinsics::_equalsC: return inline_array_equals(); + case vmIntrinsics::_clone: return inline_native_clone(intrinsic()->is_virtual()); + + case vmIntrinsics::_isAssignableFrom: return inline_native_subtype_check(); case vmIntrinsics::_isInstance: case vmIntrinsics::_getModifiers: @@ -797,44 +760,32 @@ case vmIntrinsics::_isPrimitive: case vmIntrinsics::_getSuperclass: case vmIntrinsics::_getComponentType: - case vmIntrinsics::_getClassAccessFlags: - return inline_native_Class_query(intrinsic_id()); + case vmIntrinsics::_getClassAccessFlags: return inline_native_Class_query(intrinsic_id()); case vmIntrinsics::_floatToRawIntBits: case vmIntrinsics::_floatToIntBits: case vmIntrinsics::_intBitsToFloat: case vmIntrinsics::_doubleToRawLongBits: case vmIntrinsics::_doubleToLongBits: - case vmIntrinsics::_longBitsToDouble: - return inline_fp_conversions(intrinsic_id()); + case vmIntrinsics::_longBitsToDouble: return inline_fp_conversions(intrinsic_id()); case vmIntrinsics::_numberOfLeadingZeros_i: case vmIntrinsics::_numberOfLeadingZeros_l: - return inline_numberOfLeadingZeros(intrinsic_id()); - case vmIntrinsics::_numberOfTrailingZeros_i: case vmIntrinsics::_numberOfTrailingZeros_l: - return inline_numberOfTrailingZeros(intrinsic_id()); - case vmIntrinsics::_bitCount_i: case vmIntrinsics::_bitCount_l: - return inline_bitCount(intrinsic_id()); - case vmIntrinsics::_reverseBytes_i: case vmIntrinsics::_reverseBytes_l: case vmIntrinsics::_reverseBytes_s: - case vmIntrinsics::_reverseBytes_c: - return inline_reverseBytes((vmIntrinsics::ID) intrinsic_id()); - - case vmIntrinsics::_getCallerClass: - return inline_native_Reflection_getCallerClass(); - - case vmIntrinsics::_Reference_get: - return inline_reference_get(); + case vmIntrinsics::_reverseBytes_c: return inline_number_methods(intrinsic_id()); + + case vmIntrinsics::_getCallerClass: return inline_native_Reflection_getCallerClass(); + + case vmIntrinsics::_Reference_get: return inline_reference_get(); case vmIntrinsics::_aescrypt_encryptBlock: - case vmIntrinsics::_aescrypt_decryptBlock: - return inline_aescrypt_Block(intrinsic_id()); + case vmIntrinsics::_aescrypt_decryptBlock: return inline_aescrypt_Block(intrinsic_id()); case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt: case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt: @@ -883,13 +834,13 @@ } } -//------------------------------push_result------------------------------ +//------------------------------set_result------------------------------- // Helper function for finishing intrinsics. -void LibraryCallKit::push_result(RegionNode* region, PhiNode* value) { +void LibraryCallKit::set_result(RegionNode* region, PhiNode* value) { record_for_igvn(region); set_control(_gvn.transform(region)); - BasicType value_type = value->type()->basic_type(); - push_node(value_type, _gvn.transform(value)); + set_result( _gvn.transform(value)); + assert(value->type()->basic_type() == result()->bottom_type()->basic_type(), "sanity"); } //------------------------------generate_guard--------------------------- @@ -1078,7 +1029,6 @@ // to Int nodes containing the lenghts of str1 and str2. // Node* LibraryCallKit::make_string_method_node(int opcode, Node* str1_start, Node* cnt1, Node* str2_start, Node* cnt2) { - Node* result = NULL; switch (opcode) { case Op_StrIndexOf: @@ -1105,51 +1055,23 @@ } //------------------------------inline_string_compareTo------------------------ +// public int java.lang.String.compareTo(String anotherString); bool LibraryCallKit::inline_string_compareTo() { - - if (!Matcher::has_match_rule(Op_StrComp)) return false; - - _sp += 2; - Node *argument = pop(); // pop non-receiver first: it was pushed second - Node *receiver = pop(); - - // Null check on self without removing any arguments. The argument - // null check technically happens in the wrong place, which can lead to - // invalid stack traces when string compare is inlined into a method - // which handles NullPointerExceptions. - _sp += 2; - receiver = do_null_check(receiver, T_OBJECT); - argument = do_null_check(argument, T_OBJECT); - _sp -= 2; + Node* receiver = null_check(argument(0)); + Node* arg = null_check(argument(1)); if (stopped()) { return true; } - - Node* compare = make_string_method_node(Op_StrComp, receiver, argument); - push(compare); + set_result(make_string_method_node(Op_StrComp, receiver, arg)); return true; } //------------------------------inline_string_equals------------------------ bool LibraryCallKit::inline_string_equals() { - - if (!Matcher::has_match_rule(Op_StrEquals)) return false; - - int nargs = 2; - _sp += nargs; - Node* argument = pop(); // pop non-receiver first: it was pushed second - Node* receiver = pop(); - - // Null check on self without removing any arguments. The argument - // null check technically happens in the wrong place, which can lead to - // invalid stack traces when string compare is inlined into a method - // which handles NullPointerExceptions. - _sp += nargs; - receiver = do_null_check(receiver, T_OBJECT); - //should not do null check for argument for String.equals(), because spec - //allows to specify NULL as argument. - _sp -= nargs; - + Node* receiver = null_check_receiver(); + // NOTE: Do not null check argument for String.equals() because spec + // allows to specify NULL as argument. + Node* argument = this->argument(1); if (stopped()) { return true; } @@ -1173,9 +1095,7 @@ ciInstanceKlass* klass = env()->String_klass(); if (!stopped()) { - _sp += nargs; // gen_instanceof might do an uncommon trap Node* inst = gen_instanceof(argument, makecon(TypeKlassPtr::make(klass))); - _sp -= nargs; Node* cmp = _gvn.transform(new (C) CmpINode(inst, intcon(1))); Node* bol = _gvn.transform(new (C) BoolNode(cmp, BoolTest::ne)); @@ -1207,7 +1127,7 @@ Node* receiver_cnt = load_String_length(no_ctrl, receiver); // Get start addr of argument - Node* argument_val = load_String_value(no_ctrl, argument); + Node* argument_val = load_String_value(no_ctrl, argument); Node* argument_offset = load_String_offset(no_ctrl, argument); Node* argument_start = array_element_address(argument_val, argument_offset, T_CHAR); @@ -1236,24 +1156,15 @@ set_control(_gvn.transform(region)); record_for_igvn(region); - push(_gvn.transform(phi)); - + set_result(_gvn.transform(phi)); return true; } //------------------------------inline_array_equals---------------------------- bool LibraryCallKit::inline_array_equals() { - - if (!Matcher::has_match_rule(Op_AryEq)) return false; - - _sp += 2; - Node *argument2 = pop(); - Node *argument1 = pop(); - - Node* equals = - _gvn.transform(new (C) AryEqNode(control(), memory(TypeAryPtr::CHARS), - argument1, argument2) ); - push(equals); + Node* arg1 = argument(0); + Node* arg2 = argument(1); + set_result(_gvn.transform(new (C) AryEqNode(control(), memory(TypeAryPtr::CHARS), arg1, arg2))); return true; } @@ -1325,7 +1236,7 @@ float likely = PROB_LIKELY(0.9); float unlikely = PROB_UNLIKELY(0.9); - const int nargs = 2; // number of arguments to push back for uncommon trap in predicate + const int nargs = 0; // no arguments to push back for uncommon trap in predicate Node* source = load_String_value(no_ctrl, string_object); Node* sourceOffset = load_String_offset(no_ctrl, string_object); @@ -1396,10 +1307,8 @@ //------------------------------inline_string_indexOf------------------------ bool LibraryCallKit::inline_string_indexOf() { - - _sp += 2; - Node *argument = pop(); // pop non-receiver first: it was pushed second - Node *receiver = pop(); + Node* receiver = argument(0); + Node* arg = argument(1); Node* result; // Disable the use of pcmpestri until it can be guaranteed that @@ -1409,15 +1318,8 @@ // Generate SSE4.2 version of indexOf // We currently only have match rules that use SSE4.2 - // Null check on self without removing any arguments. The argument - // null check technically happens in the wrong place, which can lead to - // invalid stack traces when string compare is inlined into a method - // which handles NullPointerExceptions. - _sp += 2; - receiver = do_null_check(receiver, T_OBJECT); - argument = do_null_check(argument, T_OBJECT); - _sp -= 2; - + receiver = null_check(receiver); + arg = null_check(arg); if (stopped()) { return true; } @@ -1439,12 +1341,12 @@ Node* source_cnt = load_String_length(no_ctrl, receiver); // Get start addr of substring - Node* substr = load_String_value(no_ctrl, argument); - Node* substr_offset = load_String_offset(no_ctrl, argument); + Node* substr = load_String_value(no_ctrl, arg); + Node* substr_offset = load_String_offset(no_ctrl, arg); Node* substr_start = array_element_address(substr, substr_offset, T_CHAR); // Get length of source string - Node* substr_cnt = load_String_length(no_ctrl, argument); + Node* substr_cnt = load_String_length(no_ctrl, arg); // Check for substr count > string count Node* cmp = _gvn.transform( new(C) CmpINode(substr_cnt, source_cnt) ); @@ -1477,10 +1379,10 @@ } else { // Use LibraryCallKit::string_indexOf // don't intrinsify if argument isn't a constant string. - if (!argument->is_Con()) { + if (!arg->is_Con()) { return false; } - const TypeOopPtr* str_type = _gvn.type(argument)->isa_oopptr(); + const TypeOopPtr* str_type = _gvn.type(arg)->isa_oopptr(); if (str_type == NULL) { return false; } @@ -1511,21 +1413,15 @@ return false; } - // Null check on self without removing any arguments. The argument - // null check technically happens in the wrong place, which can lead to - // invalid stack traces when string compare is inlined into a method - // which handles NullPointerExceptions. - _sp += 2; - receiver = do_null_check(receiver, T_OBJECT); - // No null check on the argument is needed since it's a constant String oop. - _sp -= 2; + receiver = null_check(receiver, T_OBJECT); + // NOTE: No null check on the argument is needed since it's a constant String oop. if (stopped()) { return true; } // The null string as a pattern always returns 0 (match at beginning of string) if (c == 0) { - push(intcon(0)); + set_result(intcon(0)); return true; } @@ -1548,47 +1444,54 @@ result = string_indexOf(receiver, pat, o, cache, md2); } - - push(result); + set_result(result); return true; } -//--------------------------pop_math_arg-------------------------------- -// Pop a double argument to a math function from the stack -// rounding it if necessary. -Node * LibraryCallKit::pop_math_arg() { - Node *arg = pop_pair(); - if( Matcher::strict_fp_requires_explicit_rounding && UseSSE<=1 ) - arg = _gvn.transform( new (C) RoundDoubleNode(0, arg) ); - return arg; +//--------------------------round_double_node-------------------------------- +// Round a double node if necessary. +Node* LibraryCallKit::round_double_node(Node* n) { + if (Matcher::strict_fp_requires_explicit_rounding && UseSSE <= 1) + n = _gvn.transform(new (C) RoundDoubleNode(0, n)); + return n; +} + +//------------------------------inline_math----------------------------------- +// public static double Math.abs(double) +// public static double Math.sqrt(double) +// public static double Math.log(double) +// public static double Math.log10(double) +bool LibraryCallKit::inline_math(vmIntrinsics::ID id) { + Node* arg = round_double_node(argument(0)); + Node* n; + switch (id) { + case vmIntrinsics::_dabs: n = new (C) AbsDNode( arg); break; + case vmIntrinsics::_dsqrt: n = new (C) SqrtDNode(0, arg); break; + case vmIntrinsics::_dlog: n = new (C) LogDNode( arg); break; + case vmIntrinsics::_dlog10: n = new (C) Log10DNode( arg); break; + default: fatal_unexpected_iid(id); break; + } + set_result(_gvn.transform(n)); + return true; } //------------------------------inline_trig---------------------------------- // Inline sin/cos/tan instructions, if possible. If rounding is required, do // argument reduction which will turn into a fast/slow diamond. bool LibraryCallKit::inline_trig(vmIntrinsics::ID id) { - _sp += arg_size(); // restore stack pointer - Node* arg = pop_math_arg(); - Node* trig = NULL; + Node* arg = round_double_node(argument(0)); + Node* n = NULL; switch (id) { - case vmIntrinsics::_dsin: - trig = _gvn.transform((Node*)new (C) SinDNode(arg)); - break; - case vmIntrinsics::_dcos: - trig = _gvn.transform((Node*)new (C) CosDNode(arg)); - break; - case vmIntrinsics::_dtan: - trig = _gvn.transform((Node*)new (C) TanDNode(arg)); - break; - default: - assert(false, "bad intrinsic was passed in"); - return false; + case vmIntrinsics::_dsin: n = new (C) SinDNode(arg); break; + case vmIntrinsics::_dcos: n = new (C) CosDNode(arg); break; + case vmIntrinsics::_dtan: n = new (C) TanDNode(arg); break; + default: fatal_unexpected_iid(id); break; } + n = _gvn.transform(n); // Rounding required? Check for argument reduction! - if( Matcher::strict_fp_requires_explicit_rounding ) { - + if (Matcher::strict_fp_requires_explicit_rounding) { static const double pi_4 = 0.7853981633974483; static const double neg_pi_4 = -0.7853981633974483; // pi/2 in 80-bit extended precision @@ -1623,8 +1526,8 @@ // probably do the math inside the SIN encoding. // Make the merge point - RegionNode *r = new (C) RegionNode(3); - Node *phi = new (C) PhiNode(r,Type::DOUBLE); + RegionNode* r = new (C) RegionNode(3); + Node* phi = new (C) PhiNode(r, Type::DOUBLE); // Flatten arg so we need only 1 test Node *abs = _gvn.transform(new (C) AbsDNode(arg)); @@ -1639,7 +1542,7 @@ set_control(opt_iff(r,iff)); // Set fast path result - phi->init_req(2,trig); + phi->init_req(2, n); // Slow path - non-blocking leaf call Node* call = NULL; @@ -1661,37 +1564,18 @@ break; } assert(control()->in(0) == call, ""); - Node* slow_result = _gvn.transform(new (C) ProjNode(call,TypeFunc::Parms)); - r->init_req(1,control()); - phi->init_req(1,slow_result); + Node* slow_result = _gvn.transform(new (C) ProjNode(call, TypeFunc::Parms)); + r->init_req(1, control()); + phi->init_req(1, slow_result); // Post-merge set_control(_gvn.transform(r)); record_for_igvn(r); - trig = _gvn.transform(phi); + n = _gvn.transform(phi); C->set_has_split_ifs(true); // Has chance for split-if optimization } - // Push result back on JVM stack - push_pair(trig); - return true; -} - -//------------------------------inline_sqrt------------------------------------- -// Inline square root instruction, if possible. -bool LibraryCallKit::inline_sqrt(vmIntrinsics::ID id) { - assert(id == vmIntrinsics::_dsqrt, "Not square root"); - _sp += arg_size(); // restore stack pointer - push_pair(_gvn.transform(new (C) SqrtDNode(0, pop_math_arg()))); - return true; -} - -//------------------------------inline_abs------------------------------------- -// Inline absolute value instruction, if possible. -bool LibraryCallKit::inline_abs(vmIntrinsics::ID id) { - assert(id == vmIntrinsics::_dabs, "Not absolute value"); - _sp += arg_size(); // restore stack pointer - push_pair(_gvn.transform(new (C) AbsDNode(pop_math_arg()))); + set_result(n); return true; } @@ -1700,24 +1584,18 @@ //result=(result.isNaN())? funcAddr():result; // Check: If isNaN() by checking result!=result? then either trap // or go to runtime - Node* cmpisnan = _gvn.transform(new (C) CmpDNode(result,result)); + Node* cmpisnan = _gvn.transform(new (C) CmpDNode(result, result)); // Build the boolean node - Node* bolisnum = _gvn.transform( new (C) BoolNode(cmpisnan, BoolTest::eq) ); + Node* bolisnum = _gvn.transform(new (C) BoolNode(cmpisnan, BoolTest::eq)); if (!too_many_traps(Deoptimization::Reason_intrinsic)) { - { - BuildCutout unless(this, bolisnum, PROB_STATIC_FREQUENT); - // End the current control-flow path - push_pair(x); - if (y != NULL) { - push_pair(y); - } + { BuildCutout unless(this, bolisnum, PROB_STATIC_FREQUENT); // The pow or exp intrinsic returned a NaN, which requires a call // to the runtime. Recompile with the runtime call. uncommon_trap(Deoptimization::Reason_intrinsic, Deoptimization::Action_make_not_entrant); } - push_pair(result); + set_result(result); } else { // If this inlining ever returned NaN in the past, we compile a call // to the runtime to properly handle corner cases @@ -1727,7 +1605,7 @@ Node* if_fast = _gvn.transform( new (C) IfTrueNode(iff) ); if (!if_slow->is_top()) { - RegionNode* result_region = new(C) RegionNode(3); + RegionNode* result_region = new (C) RegionNode(3); PhiNode* result_val = new (C) PhiNode(result_region, Type::DOUBLE); result_region->init_req(1, if_fast); @@ -1747,9 +1625,9 @@ result_region->init_req(2, control()); result_val->init_req(2, value); - push_result(result_region, result_val); + set_result(result_region, result_val); } else { - push_pair(result); + set_result(result); } } } @@ -1757,25 +1635,19 @@ //------------------------------inline_exp------------------------------------- // Inline exp instructions, if possible. The Intel hardware only misses // really odd corner cases (+/- Infinity). Just uncommon-trap them. -bool LibraryCallKit::inline_exp(vmIntrinsics::ID id) { - assert(id == vmIntrinsics::_dexp, "Not exp"); - - _sp += arg_size(); // restore stack pointer - Node *x = pop_math_arg(); - Node *result = _gvn.transform(new (C) ExpDNode(0,x)); - - finish_pow_exp(result, x, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP"); +bool LibraryCallKit::inline_exp() { + Node* arg = round_double_node(argument(0)); + Node* n = _gvn.transform(new (C) ExpDNode(0, arg)); + + finish_pow_exp(n, arg, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP"); C->set_has_split_ifs(true); // Has chance for split-if optimization - return true; } //------------------------------inline_pow------------------------------------- // Inline power instructions, if possible. -bool LibraryCallKit::inline_pow(vmIntrinsics::ID id) { - assert(id == vmIntrinsics::_dpow, "Not pow"); - +bool LibraryCallKit::inline_pow() { // Pseudocode for pow // if (x <= 0.0) { // long longy = (long)y; @@ -1793,15 +1665,14 @@ // } // return result; - _sp += arg_size(); // restore stack pointer - Node* y = pop_math_arg(); - Node* x = pop_math_arg(); + Node* x = round_double_node(argument(0)); + Node* y = round_double_node(argument(2)); Node* result = NULL; if (!too_many_traps(Deoptimization::Reason_intrinsic)) { // Short form: skip the fancy tests and just check for NaN result. - result = _gvn.transform( new (C) PowDNode(0, x, y) ); + result = _gvn.transform(new (C) PowDNode(0, x, y)); } else { // If this inlining ever returned NaN in the past, include all // checks + call to the runtime. @@ -1919,55 +1790,23 @@ // Post merge set_control(_gvn.transform(r)); record_for_igvn(r); - result=_gvn.transform(phi); + result = _gvn.transform(phi); } finish_pow_exp(result, x, y, OptoRuntime::Math_DD_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dpow), "POW"); C->set_has_split_ifs(true); // Has chance for split-if optimization - - return true; -} - -//------------------------------inline_trans------------------------------------- -// Inline transcendental instructions, if possible. The Intel hardware gets -// these right, no funny corner cases missed. -bool LibraryCallKit::inline_trans(vmIntrinsics::ID id) { - _sp += arg_size(); // restore stack pointer - Node* arg = pop_math_arg(); - Node* trans = NULL; - - switch (id) { - case vmIntrinsics::_dlog: - trans = _gvn.transform((Node*)new (C) LogDNode(arg)); - break; - case vmIntrinsics::_dlog10: - trans = _gvn.transform((Node*)new (C) Log10DNode(arg)); - break; - default: - assert(false, "bad intrinsic was passed in"); - return false; - } - - // Push result back on JVM stack - push_pair(trans); return true; } //------------------------------runtime_math----------------------------- bool LibraryCallKit::runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName) { - Node* a = NULL; - Node* b = NULL; - assert(call_type == OptoRuntime::Math_DD_D_Type() || call_type == OptoRuntime::Math_D_D_Type(), "must be (DD)D or (D)D type"); // Inputs - _sp += arg_size(); // restore stack pointer - if (call_type == OptoRuntime::Math_DD_D_Type()) { - b = pop_math_arg(); - } - a = pop_math_arg(); + Node* a = round_double_node(argument(0)); + Node* b = (call_type == OptoRuntime::Math_DD_D_Type()) ? round_double_node(argument(2)) : NULL; const TypePtr* no_memory_effects = NULL; Node* trig = make_runtime_call(RC_LEAF, call_type, funcAddr, funcName, @@ -1979,43 +1818,43 @@ assert(value_top == top(), "second value must be top"); #endif - push_pair(value); + set_result(value); return true; } //------------------------------inline_math_native----------------------------- bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) { +#define FN_PTR(f) CAST_FROM_FN_PTR(address, f) switch (id) { // These intrinsics are not properly supported on all hardware - case vmIntrinsics::_dcos: return Matcher::has_match_rule(Op_CosD) ? inline_trig(id) : - runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dcos), "COS"); - case vmIntrinsics::_dsin: return Matcher::has_match_rule(Op_SinD) ? inline_trig(id) : - runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dsin), "SIN"); - case vmIntrinsics::_dtan: return Matcher::has_match_rule(Op_TanD) ? inline_trig(id) : - runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dtan), "TAN"); - - case vmIntrinsics::_dlog: return Matcher::has_match_rule(Op_LogD) ? inline_trans(id) : - runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dlog), "LOG"); - case vmIntrinsics::_dlog10: return Matcher::has_match_rule(Op_Log10D) ? inline_trans(id) : - runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), "LOG10"); + case vmIntrinsics::_dcos: return Matcher::has_match_rule(Op_CosD) ? inline_trig(id) : + runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dcos), "COS"); + case vmIntrinsics::_dsin: return Matcher::has_match_rule(Op_SinD) ? inline_trig(id) : + runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dsin), "SIN"); + case vmIntrinsics::_dtan: return Matcher::has_match_rule(Op_TanD) ? inline_trig(id) : + runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dtan), "TAN"); + + case vmIntrinsics::_dlog: return Matcher::has_match_rule(Op_LogD) ? inline_math(id) : + runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog), "LOG"); + case vmIntrinsics::_dlog10: return Matcher::has_match_rule(Op_Log10D) ? inline_math(id) : + runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog10), "LOG10"); // These intrinsics are supported on all hardware - case vmIntrinsics::_dsqrt: return Matcher::has_match_rule(Op_SqrtD) ? inline_sqrt(id) : false; - case vmIntrinsics::_dabs: return Matcher::has_match_rule(Op_AbsD) ? inline_abs(id) : false; - - case vmIntrinsics::_dexp: return - Matcher::has_match_rule(Op_ExpD) ? inline_exp(id) : - runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP"); - case vmIntrinsics::_dpow: return - Matcher::has_match_rule(Op_PowD) ? inline_pow(id) : - runtime_math(OptoRuntime::Math_DD_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dpow), "POW"); + case vmIntrinsics::_dsqrt: return Matcher::has_match_rule(Op_SqrtD) ? inline_math(id) : false; + case vmIntrinsics::_dabs: return Matcher::has_match_rule(Op_AbsD) ? inline_math(id) : false; + + case vmIntrinsics::_dexp: return Matcher::has_match_rule(Op_ExpD) ? inline_exp() : + runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dexp), "EXP"); + case vmIntrinsics::_dpow: return Matcher::has_match_rule(Op_PowD) ? inline_pow() : + runtime_math(OptoRuntime::Math_DD_D_Type(), FN_PTR(SharedRuntime::dpow), "POW"); +#undef FN_PTR // These intrinsics are not yet correctly implemented case vmIntrinsics::_datan2: return false; default: - ShouldNotReachHere(); + fatal_unexpected_iid(id); return false; } } @@ -2030,8 +1869,7 @@ //----------------------------inline_min_max----------------------------------- bool LibraryCallKit::inline_min_max(vmIntrinsics::ID id) { - push(generate_min_max(id, argument(0), argument(1))); - + set_result(generate_min_max(id, argument(0), argument(1))); return true; } @@ -2254,99 +2092,37 @@ } } -//-------------------inline_numberOfLeadingZeros_int/long----------------------- -// inline int Integer.numberOfLeadingZeros(int) -// inline int Long.numberOfLeadingZeros(long) -bool LibraryCallKit::inline_numberOfLeadingZeros(vmIntrinsics::ID id) { - assert(id == vmIntrinsics::_numberOfLeadingZeros_i || id == vmIntrinsics::_numberOfLeadingZeros_l, "not numberOfLeadingZeros"); - if (id == vmIntrinsics::_numberOfLeadingZeros_i && !Matcher::match_rule_supported(Op_CountLeadingZerosI)) return false; - if (id == vmIntrinsics::_numberOfLeadingZeros_l && !Matcher::match_rule_supported(Op_CountLeadingZerosL)) return false; - _sp += arg_size(); // restore stack pointer +//--------------------------inline_number_methods----------------------------- +// inline int Integer.numberOfLeadingZeros(int) +// inline int Long.numberOfLeadingZeros(long) +// +// inline int Integer.numberOfTrailingZeros(int) +// inline int Long.numberOfTrailingZeros(long) +// +// inline int Integer.bitCount(int) +// inline int Long.bitCount(long) +// +// inline char Character.reverseBytes(char) +// inline short Short.reverseBytes(short) +// inline int Integer.reverseBytes(int) +// inline long Long.reverseBytes(long) +bool LibraryCallKit::inline_number_methods(vmIntrinsics::ID id) { + Node* arg = argument(0); + Node* n; switch (id) { - case vmIntrinsics::_numberOfLeadingZeros_i: - push(_gvn.transform(new (C) CountLeadingZerosINode(pop()))); - break; - case vmIntrinsics::_numberOfLeadingZeros_l: - push(_gvn.transform(new (C) CountLeadingZerosLNode(pop_pair()))); - break; - default: - ShouldNotReachHere(); - } - return true; -} - -//-------------------inline_numberOfTrailingZeros_int/long---------------------- -// inline int Integer.numberOfTrailingZeros(int) -// inline int Long.numberOfTrailingZeros(long) -bool LibraryCallKit::inline_numberOfTrailingZeros(vmIntrinsics::ID id) { - assert(id == vmIntrinsics::_numberOfTrailingZeros_i || id == vmIntrinsics::_numberOfTrailingZeros_l, "not numberOfTrailingZeros"); - if (id == vmIntrinsics::_numberOfTrailingZeros_i && !Matcher::match_rule_supported(Op_CountTrailingZerosI)) return false; - if (id == vmIntrinsics::_numberOfTrailingZeros_l && !Matcher::match_rule_supported(Op_CountTrailingZerosL)) return false; - _sp += arg_size(); // restore stack pointer - switch (id) { - case vmIntrinsics::_numberOfTrailingZeros_i: - push(_gvn.transform(new (C) CountTrailingZerosINode(pop()))); - break; - case vmIntrinsics::_numberOfTrailingZeros_l: - push(_gvn.transform(new (C) CountTrailingZerosLNode(pop_pair()))); - break; - default: - ShouldNotReachHere(); + case vmIntrinsics::_numberOfLeadingZeros_i: n = new (C) CountLeadingZerosINode( arg); break; + case vmIntrinsics::_numberOfLeadingZeros_l: n = new (C) CountLeadingZerosLNode( arg); break; + case vmIntrinsics::_numberOfTrailingZeros_i: n = new (C) CountTrailingZerosINode(arg); break; + case vmIntrinsics::_numberOfTrailingZeros_l: n = new (C) CountTrailingZerosLNode(arg); break; + case vmIntrinsics::_bitCount_i: n = new (C) PopCountINode( arg); break; + case vmIntrinsics::_bitCount_l: n = new (C) PopCountLNode( arg); break; + case vmIntrinsics::_reverseBytes_c: n = new (C) ReverseBytesUSNode(0, arg); break; + case vmIntrinsics::_reverseBytes_s: n = new (C) ReverseBytesSNode( 0, arg); break; + case vmIntrinsics::_reverseBytes_i: n = new (C) ReverseBytesINode( 0, arg); break; + case vmIntrinsics::_reverseBytes_l: n = new (C) ReverseBytesLNode( 0, arg); break; + default: fatal_unexpected_iid(id); break; } - return true; -} - -//----------------------------inline_bitCount_int/long----------------------- -// inline int Integer.bitCount(int) -// inline int Long.bitCount(long) -bool LibraryCallKit::inline_bitCount(vmIntrinsics::ID id) { - assert(id == vmIntrinsics::_bitCount_i || id == vmIntrinsics::_bitCount_l, "not bitCount"); - if (id == vmIntrinsics::_bitCount_i && !Matcher::has_match_rule(Op_PopCountI)) return false; - if (id == vmIntrinsics::_bitCount_l && !Matcher::has_match_rule(Op_PopCountL)) return false; - _sp += arg_size(); // restore stack pointer - switch (id) { - case vmIntrinsics::_bitCount_i: - push(_gvn.transform(new (C) PopCountINode(pop()))); - break; - case vmIntrinsics::_bitCount_l: - push(_gvn.transform(new (C) PopCountLNode(pop_pair()))); - break; - default: - ShouldNotReachHere(); - } - return true; -} - -//----------------------------inline_reverseBytes_int/long/char/short------------------- -// inline Integer.reverseBytes(int) -// inline Long.reverseBytes(long) -// inline Character.reverseBytes(char) -// inline Short.reverseBytes(short) -bool LibraryCallKit::inline_reverseBytes(vmIntrinsics::ID id) { - assert(id == vmIntrinsics::_reverseBytes_i || id == vmIntrinsics::_reverseBytes_l || - id == vmIntrinsics::_reverseBytes_c || id == vmIntrinsics::_reverseBytes_s, - "not reverse Bytes"); - if (id == vmIntrinsics::_reverseBytes_i && !Matcher::has_match_rule(Op_ReverseBytesI)) return false; - if (id == vmIntrinsics::_reverseBytes_l && !Matcher::has_match_rule(Op_ReverseBytesL)) return false; - if (id == vmIntrinsics::_reverseBytes_c && !Matcher::has_match_rule(Op_ReverseBytesUS)) return false; - if (id == vmIntrinsics::_reverseBytes_s && !Matcher::has_match_rule(Op_ReverseBytesS)) return false; - _sp += arg_size(); // restore stack pointer - switch (id) { - case vmIntrinsics::_reverseBytes_i: - push(_gvn.transform(new (C) ReverseBytesINode(0, pop()))); - break; - case vmIntrinsics::_reverseBytes_l: - push_pair(_gvn.transform(new (C) ReverseBytesLNode(0, pop_pair()))); - break; - case vmIntrinsics::_reverseBytes_c: - push(_gvn.transform(new (C) ReverseBytesUSNode(0, pop()))); - break; - case vmIntrinsics::_reverseBytes_s: - push(_gvn.transform(new (C) ReverseBytesSNode(0, pop()))); - break; - default: - ; - } + set_result(_gvn.transform(n)); return true; } @@ -2356,7 +2132,7 @@ // Helper that guards and inserts a pre-barrier. void LibraryCallKit::insert_pre_barrier(Node* base_oop, Node* offset, - Node* pre_val, int nargs, bool need_mem_bar) { + Node* pre_val, bool need_mem_bar) { // We could be accessing the referent field of a reference object. If so, when G1 // is enabled, we need to log the value in the referent field in an SATB buffer. // This routine performs some compile time filters and generates suitable @@ -2406,8 +2182,8 @@ // } // } - float likely = PROB_LIKELY(0.999); - float unlikely = PROB_UNLIKELY(0.999); + float likely = PROB_LIKELY( 0.999); + float unlikely = PROB_UNLIKELY(0.999); IdealKit ideal(this); #define __ ideal. @@ -2419,9 +2195,7 @@ sync_kit(ideal); Node* ref_klass_con = makecon(TypeKlassPtr::make(env()->Reference_klass())); - _sp += nargs; // gen_instanceof might do an uncommon trap Node* is_instof = gen_instanceof(base_oop, ref_klass_con); - _sp -= nargs; // Update IdealKit memory and control from graphKit. __ sync_kit(this); @@ -2505,7 +2279,7 @@ { ResourceMark rm; // Check the signatures. - ciSignature* sig = signature(); + ciSignature* sig = callee()->signature(); #ifdef ASSERT if (!is_store) { // Object getObject(Object base, int/long offset), etc. @@ -2543,42 +2317,19 @@ C->set_has_unsafe_access(true); // Mark eventual nmethod as "unsafe". - int type_words = type2size[ (type == T_ADDRESS) ? T_LONG : type ]; - - // Argument words: "this" plus (oop/offset) or (lo/hi) args plus maybe 1 or 2 value words - int nargs = 1 + (is_native_ptr ? 2 : 3) + (is_store ? type_words : 0); - assert(callee()->arg_size() == nargs, "must be"); - - debug_only(int saved_sp = _sp); - _sp += nargs; - - Node* val; - debug_only(val = (Node*)(uintptr_t)-1); - - - if (is_store) { - // Get the value being stored. (Pop it first; it was pushed last.) - switch (type) { - case T_DOUBLE: - case T_LONG: - case T_ADDRESS: - val = pop_pair(); - break; - default: - val = pop(); - } - } + Node* receiver = argument(0); // type: oop // Build address expression. See the code in inline_unsafe_prefetch. - Node *adr; - Node *heap_base_oop = top(); + Node* adr; + Node* heap_base_oop = top(); Node* offset = top(); + Node* val; if (!is_native_ptr) { + // The base is either a Java object or a value produced by Unsafe.staticFieldBase + Node* base = argument(1); // type: oop // The offset is a value produced by Unsafe.staticFieldOffset or Unsafe.objectFieldOffset - offset = pop_pair(); - // The base is either a Java object or a value produced by Unsafe.staticFieldBase - Node* base = pop(); + offset = argument(2); // type: long // We currently rely on the cookies produced by Unsafe.xxxFieldOffset // to be plain byte offsets, which are also the same as those accepted // by oopDesc::field_base. @@ -2588,18 +2339,14 @@ offset = ConvL2X(offset); adr = make_unsafe_address(base, offset); heap_base_oop = base; + val = is_store ? argument(4) : NULL; } else { - Node* ptr = pop_pair(); - // Adjust Java long to machine word: - ptr = ConvL2X(ptr); + Node* ptr = argument(1); // type: long + ptr = ConvL2X(ptr); // adjust Java long to machine word adr = make_unsafe_address(NULL, ptr); + val = is_store ? argument(3) : NULL; } - // Pop receiver last: it was pushed first. - Node *receiver = pop(); - - assert(saved_sp == _sp, "must have correct argument count"); - const TypePtr *adr_type = _gvn.type(adr)->isa_ptr(); // First guess at the value type. @@ -2633,13 +2380,7 @@ } } - // Null check on self without removing any arguments. The argument - // null check technically happens in the wrong place, which can lead to - // invalid stack traces when the primitive is inlined into a method - // which handles NullPointerExceptions. - _sp += nargs; - do_null_check(receiver, T_OBJECT); - _sp -= nargs; + receiver = null_check(receiver); if (stopped()) { return true; } @@ -2671,34 +2412,36 @@ if (!is_store) { Node* p = make_load(control(), adr, value_type, type, adr_type, is_volatile); - // load value and push onto stack + // load value switch (type) { case T_BOOLEAN: case T_CHAR: case T_BYTE: case T_SHORT: case T_INT: + case T_LONG: case T_FLOAT: - push(p); + case T_DOUBLE: break; case T_OBJECT: if (need_read_barrier) { - insert_pre_barrier(heap_base_oop, offset, p, nargs, !(is_volatile || need_mem_bar)); + insert_pre_barrier(heap_base_oop, offset, p, !(is_volatile || need_mem_bar)); } - push(p); break; case T_ADDRESS: // Cast to an int type. - p = _gvn.transform( new (C) CastP2XNode(NULL,p) ); + p = _gvn.transform(new (C) CastP2XNode(NULL, p)); p = ConvX2L(p); - push_pair(p); + break; + default: + fatal(err_msg_res("unexpected type %d: %s", type, type2name(type))); break; - case T_DOUBLE: - case T_LONG: - push_pair( p ); - break; - default: ShouldNotReachHere(); } + // The load node has the control of the preceding MemBarCPUOrder. All + // following nodes will have the control of the MemBarCPUOrder inserted at + // the end of this method. So, pushing the load onto the stack at a later + // point is fine. + set_result(p); } else { // place effect of store into memory switch (type) { @@ -2762,7 +2505,7 @@ { ResourceMark rm; // Check the signatures. - ciSignature* sig = signature(); + ciSignature* sig = callee()->signature(); #ifdef ASSERT // Object getObject(Object base, int/long offset), etc. BasicType rtype = sig->return_type()->basic_type(); @@ -2780,19 +2523,21 @@ C->set_has_unsafe_access(true); // Mark eventual nmethod as "unsafe". - // Argument words: "this" if not static, plus (oop/offset) or (lo/hi) args - int nargs = (is_static ? 0 : 1) + (is_native_ptr ? 2 : 3); - - debug_only(int saved_sp = _sp); - _sp += nargs; + const int idx = is_static ? 0 : 1; + if (!is_static) { + null_check_receiver(); + if (stopped()) { + return true; + } + } // Build address expression. See the code in inline_unsafe_access. Node *adr; if (!is_native_ptr) { + // The base is either a Java object or a value produced by Unsafe.staticFieldBase + Node* base = argument(idx + 0); // type: oop // The offset is a value produced by Unsafe.staticFieldOffset or Unsafe.objectFieldOffset - Node* offset = pop_pair(); - // The base is either a Java object or a value produced by Unsafe.staticFieldBase - Node* base = pop(); + Node* offset = argument(idx + 1); // type: long // We currently rely on the cookies produced by Unsafe.xxxFieldOffset // to be plain byte offsets, which are also the same as those accepted // by oopDesc::field_base. @@ -2802,31 +2547,11 @@ offset = ConvL2X(offset); adr = make_unsafe_address(base, offset); } else { - Node* ptr = pop_pair(); - // Adjust Java long to machine word: - ptr = ConvL2X(ptr); + Node* ptr = argument(idx + 0); // type: long + ptr = ConvL2X(ptr); // adjust Java long to machine word adr = make_unsafe_address(NULL, ptr); } - if (is_static) { - assert(saved_sp == _sp, "must have correct argument count"); - } else { - // Pop receiver last: it was pushed first. - Node *receiver = pop(); - assert(saved_sp == _sp, "must have correct argument count"); - - // Null check on self without removing any arguments. The argument - // null check technically happens in the wrong place, which can lead to - // invalid stack traces when the primitive is inlined into a method - // which handles NullPointerExceptions. - _sp += nargs; - do_null_check(receiver, T_OBJECT); - _sp -= nargs; - if (stopped()) { - return true; - } - } - // Generate the read or write prefetch Node *prefetch; if (is_store) { @@ -2841,7 +2566,22 @@ } //----------------------------inline_unsafe_load_store---------------------------- - +// This method serves a couple of different customers (depending on LoadStoreKind): +// +// LS_cmpxchg: +// public final native boolean compareAndSwapObject(Object o, long offset, Object expected, Object x); +// public final native boolean compareAndSwapInt( Object o, long offset, int expected, int x); +// public final native boolean compareAndSwapLong( Object o, long offset, long expected, long x); +// +// LS_xadd: +// public int getAndAddInt( Object o, long offset, int delta) +// public long getAndAddLong(Object o, long offset, long delta) +// +// LS_xchg: +// int getAndSet(Object o, long offset, int newValue) +// long getAndSet(Object o, long offset, long newValue) +// Object getAndSet(Object o, long offset, Object newValue) +// bool LibraryCallKit::inline_unsafe_load_store(BasicType type, LoadStoreKind kind) { // This basic scheme here is the same as inline_unsafe_access, but // differs in enough details that combining them would make the code @@ -2856,7 +2596,8 @@ BasicType rtype; { ResourceMark rm; - ciSignature* sig = signature(); + // Check the signatures. + ciSignature* sig = callee()->signature(); rtype = sig->return_type()->basic_type(); if (kind == LS_xadd || kind == LS_xchg) { // Check the signatures. @@ -2881,28 +2622,31 @@ } #endif //PRODUCT - // number of stack slots per value argument (1 or 2) - int type_words = type2size[type]; - C->set_has_unsafe_access(true); // Mark eventual nmethod as "unsafe". - // Argument words: "this" plus oop plus offset (plus oldvalue) plus newvalue/delta; - int nargs = 1 + 1 + 2 + ((kind == LS_cmpxchg) ? type_words : 0) + type_words; - - // pop arguments: newval, offset, base, and receiver - debug_only(int saved_sp = _sp); - _sp += nargs; - Node* newval = (type_words == 1) ? pop() : pop_pair(); - Node* oldval = (kind == LS_cmpxchg) ? ((type_words == 1) ? pop() : pop_pair()) : NULL; - Node *offset = pop_pair(); - Node *base = pop(); - Node *receiver = pop(); - assert(saved_sp == _sp, "must have correct argument count"); - - // Null check receiver. - _sp += nargs; - do_null_check(receiver, T_OBJECT); - _sp -= nargs; + // Get arguments: + Node* receiver = NULL; + Node* base = NULL; + Node* offset = NULL; + Node* oldval = NULL; + Node* newval = NULL; + if (kind == LS_cmpxchg) { + const bool two_slot_type = type2size[type] == 2; + receiver = argument(0); // type: oop + base = argument(1); // type: oop + offset = argument(2); // type: long + oldval = argument(4); // type: oop, int, or long + newval = argument(two_slot_type ? 6 : 5); // type: oop, int, or long + } else if (kind == LS_xadd || kind == LS_xchg){ + receiver = argument(0); // type: oop + base = argument(1); // type: oop + offset = argument(2); // type: long + oldval = NULL; + newval = argument(4); // type: oop, int, or long + } + + // Null check receiver. + receiver = null_check(receiver); if (stopped()) { return true; } @@ -3008,7 +2752,7 @@ post_barrier(control(), load_store, base, adr, alias_idx, newval, T_OBJECT, true); break; default: - ShouldNotReachHere(); + fatal(err_msg_res("unexpected type %d: %s", type, type2name(type))); break; } @@ -3029,10 +2773,14 @@ #endif assert(type2size[load_store->bottom_type()->basic_type()] == type2size[rtype], "result type should match"); - push_node(load_store->bottom_type()->basic_type(), load_store); + set_result(load_store); return true; } +//----------------------------inline_unsafe_ordered_store---------------------- +// public native void sun.misc.Unsafe.putOrderedObject(Object o, long offset, Object x); +// public native void sun.misc.Unsafe.putOrderedInt(Object o, long offset, int x); +// public native void sun.misc.Unsafe.putOrderedLong(Object o, long offset, long x); bool LibraryCallKit::inline_unsafe_ordered_store(BasicType type) { // This is another variant of inline_unsafe_access, differing in // that it always issues store-store ("release") barrier and ensures @@ -3044,7 +2792,7 @@ { ResourceMark rm; // Check the signatures. - ciSignature* sig = signature(); + ciSignature* sig = callee()->signature(); #ifdef ASSERT BasicType rtype = sig->return_type()->basic_type(); assert(rtype == T_VOID, "must return void"); @@ -3055,27 +2803,16 @@ } #endif //PRODUCT - // number of stack slots per value argument (1 or 2) - int type_words = type2size[type]; - C->set_has_unsafe_access(true); // Mark eventual nmethod as "unsafe". - // Argument words: "this" plus oop plus offset plus value; - int nargs = 1 + 1 + 2 + type_words; - - // pop arguments: val, offset, base, and receiver - debug_only(int saved_sp = _sp); - _sp += nargs; - Node* val = (type_words == 1) ? pop() : pop_pair(); - Node *offset = pop_pair(); - Node *base = pop(); - Node *receiver = pop(); - assert(saved_sp == _sp, "must have correct argument count"); - - // Null check receiver. - _sp += nargs; - do_null_check(receiver, T_OBJECT); - _sp -= nargs; + // Get arguments: + Node* receiver = argument(0); // type: oop + Node* base = argument(1); // type: oop + Node* offset = argument(2); // type: long + Node* val = argument(4); // type: oop, int, or long + + // Null check receiver. + receiver = null_check(receiver); if (stopped()) { return true; } @@ -3092,7 +2829,7 @@ insert_mem_bar(Op_MemBarRelease); insert_mem_bar(Op_MemBarCPUOrder); // Ensure that the store is atomic for longs: - bool require_atomic_access = true; + const bool require_atomic_access = true; Node* store; if (type == T_OBJECT) // reference stores need a store barrier. store = store_oop_to_unknown(control(), base, adr, adr_type, val, type); @@ -3103,20 +2840,17 @@ return true; } +//----------------------------inline_unsafe_allocate--------------------------- +// public native Object sun.mics.Unsafe.allocateInstance(Class<?> cls); bool LibraryCallKit::inline_unsafe_allocate() { if (callee()->is_static()) return false; // caller must have the capability! - int nargs = 1 + 1; - assert(signature()->size() == nargs-1, "alloc has 1 argument"); - null_check_receiver(callee()); // check then ignore argument(0) - _sp += nargs; // set original stack for use by uncommon_trap - Node* cls = do_null_check(argument(1), T_OBJECT); - _sp -= nargs; + + null_check_receiver(); // null-check, then ignore + Node* cls = null_check(argument(1)); if (stopped()) return true; - Node* kls = load_klass_from_mirror(cls, false, nargs, NULL, 0); - _sp += nargs; // set original stack for use by uncommon_trap - kls = do_null_check(kls, T_OBJECT); - _sp -= nargs; + Node* kls = load_klass_from_mirror(cls, false, NULL, 0); + kls = null_check(kls); if (stopped()) return true; // argument was like int.class // Note: The argument might still be an illegal value like @@ -3127,12 +2861,11 @@ // can generate code to load it as unsigned byte. Node* inst = make_load(NULL, insp, TypeInt::UBYTE, T_BOOLEAN); Node* bits = intcon(instanceKlass::fully_initialized); - Node* test = _gvn.transform( new (C) SubINode(inst, bits) ); + Node* test = _gvn.transform(new (C) SubINode(inst, bits)); // The 'test' is non-zero if we need to take a slow path. Node* obj = new_instance(kls, test); - push(obj); - + set_result(obj); return true; } @@ -3143,15 +2876,10 @@ * return myklass->trace_id & ~0x3 */ bool LibraryCallKit::inline_native_classID() { - int nargs = 1 + 1; - null_check_receiver(callee()); // check then ignore argument(0) - _sp += nargs; - Node* cls = do_null_check(argument(1), T_OBJECT); - _sp -= nargs; - Node* kls = load_klass_from_mirror(cls, false, nargs, NULL, 0); - _sp += nargs; - kls = do_null_check(kls, T_OBJECT); - _sp -= nargs; + null_check_receiver(); // null-check, then ignore + Node* cls = null_check(argument(1), T_OBJECT); + Node* kls = load_klass_from_mirror(cls, false, NULL, 0); + kls = null_check(kls, T_OBJECT); ByteSize offset = TRACE_ID_OFFSET; Node* insp = basic_plus_adr(kls, in_bytes(offset)); Node* tvalue = make_load(NULL, insp, TypeLong::LONG, T_LONG); @@ -3162,7 +2890,7 @@ const TypePtr *adr_type = _gvn.type(insp)->isa_ptr(); store_to_memory(control(), insp, orl, T_LONG, adr_type); - push_pair(andl); + set_result(andl); return true; } @@ -3177,13 +2905,12 @@ size_t thread_id_size = OSThread::thread_id_size(); if (thread_id_size == (size_t) BytesPerLong) { threadid = ConvL2I(make_load(control(), p, TypeLong::LONG, T_LONG)); - push(threadid); } else if (thread_id_size == (size_t) BytesPerInt) { threadid = make_load(control(), p, TypeInt::INT, T_INT); - push(threadid); } else { ShouldNotReachHere(); } + set_result(threadid); return true; } #endif @@ -3192,29 +2919,28 @@ // inline code for System.currentTimeMillis() and System.nanoTime() // these have the same type and signature bool LibraryCallKit::inline_native_time_funcs(address funcAddr, const char* funcName) { - const TypeFunc *tf = OptoRuntime::void_long_Type(); + const TypeFunc* tf = OptoRuntime::void_long_Type(); const TypePtr* no_memory_effects = NULL; Node* time = make_runtime_call(RC_LEAF, tf, funcAddr, funcName, no_memory_effects); Node* value = _gvn.transform(new (C) ProjNode(time, TypeFunc::Parms+0)); #ifdef ASSERT - Node* value_top = _gvn.transform(new (C) ProjNode(time, TypeFunc::Parms + 1)); + Node* value_top = _gvn.transform(new (C) ProjNode(time, TypeFunc::Parms+1)); assert(value_top == top(), "second value must be top"); #endif - push_pair(value); + set_result(value); return true; } //------------------------inline_native_currentThread------------------ bool LibraryCallKit::inline_native_currentThread() { Node* junk = NULL; - push(generate_current_thread(junk)); + set_result(generate_current_thread(junk)); return true; } //------------------------inline_native_isInterrupted------------------ +// private native boolean java.lang.Thread.isInterrupted(boolean ClearInterrupted); bool LibraryCallKit::inline_native_isInterrupted() { - const int nargs = 1+1; // receiver + boolean - assert(nargs == arg_size(), "sanity"); // Add a fast path to t.isInterrupted(clear_int): // (t == Thread.current() && (!TLS._osthread._interrupted || !clear_int)) // ? TLS._osthread._interrupted : /*slow path:*/ t.isInterrupted(clear_int) @@ -3226,14 +2952,23 @@ // We only go to the fast case code if we pass two guards. // Paths which do not pass are accumulated in the slow_region. + + enum { + no_int_result_path = 1, // t == Thread.current() && !TLS._osthread._interrupted + no_clear_result_path = 2, // t == Thread.current() && TLS._osthread._interrupted && !clear_int + slow_result_path = 3, // slow path: t.isInterrupted(clear_int) + PATH_LIMIT + }; + + // Ensure that it's not possible to move the load of TLS._osthread._interrupted flag + // out of the function. + insert_mem_bar(Op_MemBarCPUOrder); + + RegionNode* result_rgn = new (C) RegionNode(PATH_LIMIT); + PhiNode* result_val = new (C) PhiNode(result_rgn, TypeInt::BOOL); + RegionNode* slow_region = new (C) RegionNode(1); record_for_igvn(slow_region); - RegionNode* result_rgn = new (C) RegionNode(1+3); // fast1, fast2, slow - PhiNode* result_val = new (C) PhiNode(result_rgn, TypeInt::BOOL); - enum { no_int_result_path = 1, - no_clear_result_path = 2, - slow_result_path = 3 - }; // (a) Receiving thread must be the current thread. Node* rec_thr = argument(0); @@ -3242,14 +2977,13 @@ Node* cmp_thr = _gvn.transform( new (C) CmpPNode(cur_thr, rec_thr) ); Node* bol_thr = _gvn.transform( new (C) BoolNode(cmp_thr, BoolTest::ne) ); - bool known_current_thread = (_gvn.type(bol_thr) == TypeInt::ZERO); - if (!known_current_thread) - generate_slow_guard(bol_thr, slow_region); + generate_slow_guard(bol_thr, slow_region); // (b) Interrupt bit on TLS must be false. Node* p = basic_plus_adr(top()/*!oop*/, tls_ptr, in_bytes(JavaThread::osthread_offset())); Node* osthread = make_load(NULL, p, TypeRawPtr::NOTNULL, T_ADDRESS); p = basic_plus_adr(top()/*!oop*/, osthread, in_bytes(OSThread::interrupted_offset())); + // Set the control input on the field _interrupted read to prevent it floating up. Node* int_bit = make_load(control(), p, TypeInt::BOOL, T_INT); Node* cmp_bit = _gvn.transform( new (C) CmpINode(int_bit, intcon(0)) ); @@ -3294,27 +3028,24 @@ Node* slow_val = set_results_for_java_call(slow_call); // this->control() comes from set_results_for_java_call - // If we know that the result of the slow call will be true, tell the optimizer! - if (known_current_thread) slow_val = intcon(1); - Node* fast_io = slow_call->in(TypeFunc::I_O); Node* fast_mem = slow_call->in(TypeFunc::Memory); + // These two phis are pre-filled with copies of of the fast IO and Memory - Node* io_phi = PhiNode::make(result_rgn, fast_io, Type::ABIO); - Node* mem_phi = PhiNode::make(result_rgn, fast_mem, Type::MEMORY, TypePtr::BOTTOM); + PhiNode* result_mem = PhiNode::make(result_rgn, fast_mem, Type::MEMORY, TypePtr::BOTTOM); + PhiNode* result_io = PhiNode::make(result_rgn, fast_io, Type::ABIO); result_rgn->init_req(slow_result_path, control()); - io_phi ->init_req(slow_result_path, i_o()); - mem_phi ->init_req(slow_result_path, reset_memory()); + result_io ->init_req(slow_result_path, i_o()); + result_mem->init_req(slow_result_path, reset_memory()); result_val->init_req(slow_result_path, slow_val); - set_all_memory( _gvn.transform(mem_phi) ); - set_i_o( _gvn.transform(io_phi) ); + set_all_memory(_gvn.transform(result_mem)); + set_i_o( _gvn.transform(result_io)); } - push_result(result_rgn, result_val); C->set_has_split_ifs(true); // Has chance for split-if optimization - + set_result(result_rgn, result_val); return true; } @@ -3334,7 +3065,6 @@ // If the region is NULL, force never_see_null = true. Node* LibraryCallKit::load_klass_from_mirror_common(Node* mirror, bool never_see_null, - int nargs, RegionNode* region, int null_path, int offset) { @@ -3342,7 +3072,6 @@ Node* p = basic_plus_adr(mirror, offset); const TypeKlassPtr* kls_type = TypeKlassPtr::OBJECT_OR_NULL; Node* kls = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), p, TypeRawPtr::BOTTOM, kls_type) ); - _sp += nargs; // any deopt will start just before call to enclosing method Node* null_ctl = top(); kls = null_check_oop(kls, &null_ctl, never_see_null); if (region != NULL) { @@ -3351,7 +3080,6 @@ } else { assert(null_ctl == top(), "no loose ends"); } - _sp -= nargs; return kls; } @@ -3376,7 +3104,6 @@ //-------------------------inline_native_Class_query------------------- bool LibraryCallKit::inline_native_Class_query(vmIntrinsics::ID id) { - int nargs = 1+0; // just the Class mirror, in most cases const Type* return_type = TypeInt::BOOL; Node* prim_return_value = top(); // what happens if it's a primitive class? bool never_see_null = !too_many_traps(Deoptimization::Reason_null_check); @@ -3384,11 +3111,14 @@ enum { _normal_path = 1, _prim_path = 2, PATH_LIMIT }; + Node* mirror = argument(0); + Node* obj = top(); + switch (id) { case vmIntrinsics::_isInstance: - nargs = 1+1; // the Class mirror, plus the object getting queried about // nothing is an instance of a primitive type prim_return_value = intcon(0); + obj = argument(1); break; case vmIntrinsics::_getModifiers: prim_return_value = intcon(JVM_ACC_ABSTRACT | JVM_ACC_FINAL | JVM_ACC_PUBLIC); @@ -3419,12 +3149,10 @@ return_type = TypeInt::INT; // not bool! 6297094 break; default: - ShouldNotReachHere(); + fatal_unexpected_iid(id); + break; } - Node* mirror = argument(0); - Node* obj = (nargs <= 1)? top(): argument(1); - const TypeInstPtr* mirror_con = _gvn.type(mirror)->isa_instptr(); if (mirror_con == NULL) return false; // cannot happen? @@ -3451,9 +3179,7 @@ // For Reflection.getClassAccessFlags(), the null check occurs in // the wrong place; see inline_unsafe_access(), above, for a similar // situation. - _sp += nargs; // set original stack for use by uncommon_trap - mirror = do_null_check(mirror, T_OBJECT); - _sp -= nargs; + mirror = null_check(mirror); // If mirror or obj is dead, only null-path is taken. if (stopped()) return true; @@ -3461,11 +3187,10 @@ // Now load the mirror's klass metaobject, and null-check it. // Side-effects region with the control path if the klass is null. - Node* kls = load_klass_from_mirror(mirror, never_see_null, nargs, - region, _prim_path); + Node* kls = load_klass_from_mirror(mirror, never_see_null, region, _prim_path); // If kls is null, we have a primitive mirror. phi->init_req(_prim_path, prim_return_value); - if (stopped()) { push_result(region, phi); return true; } + if (stopped()) { set_result(region, phi); return true; } Node* p; // handy temp Node* null_ctl; @@ -3476,9 +3201,7 @@ switch (id) { case vmIntrinsics::_isInstance: // nothing is an instance of a primitive type - _sp += nargs; // gen_instanceof might do an uncommon trap query_value = gen_instanceof(obj, kls); - _sp -= nargs; break; case vmIntrinsics::_getModifiers: @@ -3553,16 +3276,16 @@ break; default: - ShouldNotReachHere(); + fatal_unexpected_iid(id); + break; } // Fall-through is the normal case of a query to a real class. phi->init_req(1, query_value); region->init_req(1, control()); - push_result(region, phi); C->set_has_split_ifs(true); // Has chance for split-if optimization - + set_result(region, phi); return true; } @@ -3570,8 +3293,6 @@ // This intrinsic takes the JNI calls out of the heart of // UnsafeFieldAccessorImpl.set, which improves Field.set, readObject, etc. bool LibraryCallKit::inline_native_subtype_check() { - int nargs = 1+1; // the Class mirror, plus the other class getting examined - // Pull both arguments off the stack. Node* args[2]; // two java.lang.Class mirrors: superc, subc args[0] = argument(0); @@ -3602,11 +3323,9 @@ int which_arg; for (which_arg = 0; which_arg <= 1; which_arg++) { Node* arg = args[which_arg]; - _sp += nargs; // set original stack for use by uncommon_trap - arg = do_null_check(arg, T_OBJECT); - _sp -= nargs; + arg = null_check(arg); if (stopped()) break; - args[which_arg] = _gvn.transform(arg); + args[which_arg] = arg; Node* p = basic_plus_adr(arg, class_klass_offset); Node* kls = LoadKlassNode::make(_gvn, immutable_memory(), p, adr_type, kls_type); @@ -3618,9 +3337,7 @@ for (which_arg = 0; which_arg <= 1; which_arg++) { Node* kls = klasses[which_arg]; Node* null_ctl = top(); - _sp += nargs; // set original stack for use by uncommon_trap kls = null_check_oop(kls, &null_ctl, never_see_null); - _sp -= nargs; int prim_path = (which_arg == 0 ? _prim_0_path : _prim_1_path); region->init_req(prim_path, null_ctl); if (stopped()) break; @@ -3670,8 +3387,7 @@ } set_control(_gvn.transform(region)); - push(_gvn.transform(phi)); - + set_result(_gvn.transform(phi)); return true; } @@ -3719,14 +3435,12 @@ //-----------------------inline_native_newArray-------------------------- +// private static native Object java.lang.reflect.newArray(Class<?> componentType, int length); bool LibraryCallKit::inline_native_newArray() { - int nargs = 2; Node* mirror = argument(0); Node* count_val = argument(1); - _sp += nargs; // set original stack for use by uncommon_trap - mirror = do_null_check(mirror, T_OBJECT); - _sp -= nargs; + mirror = null_check(mirror); // If mirror or obj is dead, only null-path is taken. if (stopped()) return true; @@ -3740,7 +3454,6 @@ bool never_see_null = !too_many_traps(Deoptimization::Reason_null_check); Node* klass_node = load_array_klass_from_mirror(mirror, never_see_null, - nargs, result_reg, _slow_path); Node* normal_ctl = control(); Node* no_array_ctl = result_reg->in(_slow_path); @@ -3767,7 +3480,7 @@ // Normal case: The array type has been cached in the java.lang.Class. // The following call works fine even if the array type is polymorphic. // It could be a dynamic mix of int[], boolean[], Object[], etc. - Node* obj = new_array(klass_node, count_val, nargs); + Node* obj = new_array(klass_node, count_val, 0); // no arguments to push result_reg->init_req(_normal_path, control()); result_val->init_req(_normal_path, obj); result_io ->init_req(_normal_path, i_o()); @@ -3777,23 +3490,18 @@ // Return the combined state. set_i_o( _gvn.transform(result_io) ); set_all_memory( _gvn.transform(result_mem) ); - push_result(result_reg, result_val); + C->set_has_split_ifs(true); // Has chance for split-if optimization - + set_result(result_reg, result_val); return true; } //----------------------inline_native_getLength-------------------------- +// public static native int java.lang.reflect.Array.getLength(Object array); bool LibraryCallKit::inline_native_getLength() { if (too_many_traps(Deoptimization::Reason_intrinsic)) return false; - int nargs = 1; - Node* array = argument(0); - - _sp += nargs; // set original stack for use by uncommon_trap - array = do_null_check(array, T_OBJECT); - _sp -= nargs; - + Node* array = null_check(argument(0)); // If array is dead, only null-path is taken. if (stopped()) return true; @@ -3803,7 +3511,6 @@ if (non_array != NULL) { PreserveJVMState pjvms(this); set_control(non_array); - _sp += nargs; // push the arguments back on the stack uncommon_trap(Deoptimization::Reason_intrinsic, Deoptimization::Action_maybe_recompile); } @@ -3813,19 +3520,21 @@ // The works fine even if the array type is polymorphic. // It could be a dynamic mix of int[], boolean[], Object[], etc. - push( load_array_length(array) ); - - C->set_has_split_ifs(true); // Has chance for split-if optimization - + Node* result = load_array_length(array); + + C->set_has_split_ifs(true); // Has chance for split-if optimization + set_result(result); return true; } //------------------------inline_array_copyOf---------------------------- +// public static <T,U> T[] java.util.Arrays.copyOf( U[] original, int newLength, Class<? extends T[]> newType); +// public static <T,U> T[] java.util.Arrays.copyOfRange(U[] original, int from, int to, Class<? extends T[]> newType); bool LibraryCallKit::inline_array_copyOf(bool is_copyOfRange) { + return false; if (too_many_traps(Deoptimization::Reason_intrinsic)) return false; - // Restore the stack and pop off the arguments. - int nargs = 3 + (is_copyOfRange? 1: 0); + // Get the arguments. Node* original = argument(0); Node* start = is_copyOfRange? argument(1): intcon(0); Node* end = is_copyOfRange? argument(2): argument(1); @@ -3833,23 +3542,21 @@ Node* newcopy; - //set the original stack and the reexecute bit for the interpreter to reexecute - //the bytecode that invokes Arrays.copyOf if deoptimization happens + // Set the original stack and the reexecute bit for the interpreter to reexecute + // the bytecode that invokes Arrays.copyOf if deoptimization happens. { PreserveReexecuteState preexecs(this); - _sp += nargs; jvms()->set_should_reexecute(true); - array_type_mirror = do_null_check(array_type_mirror, T_OBJECT); - original = do_null_check(original, T_OBJECT); + array_type_mirror = null_check(array_type_mirror); + original = null_check(original); // Check if a null path was taken unconditionally. if (stopped()) return true; Node* orig_length = load_array_length(original); - Node* klass_node = load_klass_from_mirror(array_type_mirror, false, 0, - NULL, 0); - klass_node = do_null_check(klass_node, T_OBJECT); + Node* klass_node = load_klass_from_mirror(array_type_mirror, false, NULL, 0); + klass_node = null_check(klass_node); RegionNode* bailout = new (C) RegionNode(1); record_for_igvn(bailout); @@ -3872,7 +3579,7 @@ Node* length = end; if (_gvn.type(start) != TypeInt::ZERO) { - length = _gvn.transform( new (C) SubINode(end, start) ); + length = _gvn.transform(new (C) SubINode(end, start)); } // Bail out if length is negative. @@ -3883,19 +3590,18 @@ if (bailout->req() > 1) { PreserveJVMState pjvms(this); - set_control( _gvn.transform(bailout) ); + set_control(_gvn.transform(bailout)); uncommon_trap(Deoptimization::Reason_intrinsic, Deoptimization::Action_maybe_recompile); } if (!stopped()) { - // How many elements will we copy from the original? // The answer is MinI(orig_length - start, length). - Node* orig_tail = _gvn.transform( new(C) SubINode(orig_length, start) ); + Node* orig_tail = _gvn.transform(new (C) SubINode(orig_length, start)); Node* moved = generate_min_max(vmIntrinsics::_min, orig_tail, length); - newcopy = new_array(klass_node, length, 0); + newcopy = new_array(klass_node, length, 0); // no argments to push // Generate a direct call to the right arraycopy function(s). // We know the copy is disjoint but we might not know if the @@ -3910,14 +3616,12 @@ original, start, newcopy, intcon(0), moved, disjoint_bases, length_never_negative); } - } //original reexecute and sp are set back here - - if(!stopped()) { - push(newcopy); - } + } // original reexecute is set back here C->set_has_split_ifs(true); // Has chance for split-if optimization - + if (!stopped()) { + set_result(newcopy); + } return true; } @@ -3969,7 +3673,7 @@ SharedRuntime::get_resolve_static_call_stub(), method, bci()); } else if (is_virtual) { - null_check_receiver(method); + null_check_receiver(); int vtable_index = methodOopDesc::invalid_vtable_index; if (UseInlineCaches) { // Suppress the vtable call @@ -3983,7 +3687,7 @@ SharedRuntime::get_resolve_virtual_call_stub(), method, vtable_index, bci()); } else { // neither virtual nor static: opt_virtual - null_check_receiver(method); + null_check_receiver(); slow_call = new(C) CallStaticJavaNode(tf, SharedRuntime::get_resolve_opt_virtual_call_stub(), method, bci()); @@ -4012,7 +3716,7 @@ Node* obj = NULL; if (!is_static) { // Check for hashing null object - obj = null_check_receiver(callee()); + obj = null_check_receiver(); if (stopped()) return true; // unconditionally null result_reg->init_req(_null_path, top()); result_val->init_req(_null_path, top()); @@ -4028,9 +3732,9 @@ // Unconditionally null? Then return right away. if (stopped()) { - set_control( result_reg->in(_null_path) ); + set_control( result_reg->in(_null_path)); if (!stopped()) - push( result_val ->in(_null_path) ); + set_result(result_val->in(_null_path)); return true; } @@ -4103,8 +3807,7 @@ if (!stopped()) { // No need for PreserveJVMState, because we're using up the present state. set_all_memory(init_mem); - vmIntrinsics::ID hashCode_id = vmIntrinsics::_hashCode; - if (is_static) hashCode_id = vmIntrinsics::_identityHashCode; + vmIntrinsics::ID hashCode_id = is_static ? vmIntrinsics::_identityHashCode : vmIntrinsics::_hashCode; CallJavaNode* slow_call = generate_method_call(hashCode_id, is_virtual, is_static); Node* slow_result = set_results_for_java_call(slow_call); // this->control() comes from set_results_for_java_call @@ -4117,48 +3820,38 @@ // Return the combined state. set_i_o( _gvn.transform(result_io) ); set_all_memory( _gvn.transform(result_mem) ); - push_result(result_reg, result_val); - + + set_result(result_reg, result_val); return true; } //---------------------------inline_native_getClass---------------------------- +// public final native Class<?> java.lang.Object.getClass(); +// // Build special case code for calls to getClass on an object. bool LibraryCallKit::inline_native_getClass() { - Node* obj = null_check_receiver(callee()); + Node* obj = null_check_receiver(); if (stopped()) return true; - push( load_mirror_from_klass(load_object_klass(obj)) ); + set_result(load_mirror_from_klass(load_object_klass(obj))); return true; } //-----------------inline_native_Reflection_getCallerClass--------------------- +// public static native Class<?> sun.reflect.Reflection.getCallerClass(int realFramesToSkip); +// // In the presence of deep enough inlining, getCallerClass() becomes a no-op. // // NOTE that this code must perform the same logic as // vframeStream::security_get_caller_frame in that it must skip // Method.invoke() and auxiliary frames. - - - - bool LibraryCallKit::inline_native_Reflection_getCallerClass() { - ciMethod* method = callee(); - #ifndef PRODUCT if ((PrintIntrinsics || PrintInlining || PrintOptoInlining) && Verbose) { tty->print_cr("Attempting to inline sun.reflect.Reflection.getCallerClass"); } #endif - debug_only(int saved_sp = _sp); - - // Argument words: (int depth) - int nargs = 1; - - _sp += nargs; - Node* caller_depth_node = pop(); - - assert(saved_sp == _sp, "must have correct argument count"); + Node* caller_depth_node = argument(0); // The depth value must be a constant in order for the runtime call // to be eliminated. @@ -4230,7 +3923,8 @@ tty->print_cr(" Bailing out because caller depth (%d) exceeded inlining depth (%d)", caller_depth_type->get_con(), _depth); tty->print_cr(" JVM state at this point:"); for (int i = _depth; i >= 1; i--) { - tty->print_cr(" %d) %s", i, jvms()->of_depth(i)->method()->name()->as_utf8()); + ciMethod* m = jvms()->of_depth(i)->method(); + tty->print_cr(" %d) %s.%s", i, m->holder()->name()->as_utf8(), m->name()->as_utf8()); } } #endif @@ -4240,14 +3934,17 @@ // Acquire method holder as java.lang.Class ciInstanceKlass* caller_klass = caller_jvms->method()->holder(); ciInstance* caller_mirror = caller_klass->java_mirror(); + // Push this as a constant - push(makecon(TypeInstPtr::make(caller_mirror))); + set_result(makecon(TypeInstPtr::make(caller_mirror))); + #ifndef PRODUCT if ((PrintIntrinsics || PrintInlining || PrintOptoInlining) && Verbose) { tty->print_cr(" Succeeded: caller = %s.%s, caller depth = %d, depth = %d", caller_klass->name()->as_utf8(), caller_jvms->method()->name()->as_utf8(), caller_depth_type->get_con(), _depth); tty->print_cr(" JVM state at this point:"); for (int i = _depth; i >= 1; i--) { - tty->print_cr(" %d) %s", i, jvms()->of_depth(i)->method()->name()->as_utf8()); + ciMethod* m = jvms()->of_depth(i)->method(); + tty->print_cr(" %d) %s.%s", i, m->holder()->name()->as_utf8(), m->name()->as_utf8()); } } #endif @@ -4283,36 +3980,23 @@ } bool LibraryCallKit::inline_fp_conversions(vmIntrinsics::ID id) { - // restore the arguments - _sp += arg_size(); + Node* arg = argument(0); + Node* result; switch (id) { - case vmIntrinsics::_floatToRawIntBits: - push(_gvn.transform( new (C) MoveF2INode(pop()))); - break; - - case vmIntrinsics::_intBitsToFloat: - push(_gvn.transform( new (C) MoveI2FNode(pop()))); - break; - - case vmIntrinsics::_doubleToRawLongBits: - push_pair(_gvn.transform( new (C) MoveD2LNode(pop_pair()))); - break; - - case vmIntrinsics::_longBitsToDouble: - push_pair(_gvn.transform( new (C) MoveL2DNode(pop_pair()))); - break; + case vmIntrinsics::_floatToRawIntBits: result = new (C) MoveF2INode(arg); break; + case vmIntrinsics::_intBitsToFloat: result = new (C) MoveI2FNode(arg); break; + case vmIntrinsics::_doubleToRawLongBits: result = new (C) MoveD2LNode(arg); break; + case vmIntrinsics::_longBitsToDouble: result = new (C) MoveL2DNode(arg); break; case vmIntrinsics::_doubleToLongBits: { - Node* value = pop_pair(); - // two paths (plus control) merge in a wood RegionNode *r = new (C) RegionNode(3); Node *phi = new (C) PhiNode(r, TypeLong::LONG); - Node *cmpisnan = _gvn.transform( new (C) CmpDNode(value, value)); + Node *cmpisnan = _gvn.transform(new (C) CmpDNode(arg, arg)); // Build the boolean node - Node *bolisnan = _gvn.transform( new (C) BoolNode( cmpisnan, BoolTest::ne ) ); + Node *bolisnan = _gvn.transform(new (C) BoolNode(cmpisnan, BoolTest::ne)); // Branch either way. // NaN case is less traveled, which makes all the difference. @@ -4330,35 +4014,30 @@ r->init_req(1, iftrue); // Else fall through - Node *iffalse = _gvn.transform( new (C) IfFalseNode(opt_ifisnan) ); + Node *iffalse = _gvn.transform(new (C) IfFalseNode(opt_ifisnan)); set_control(iffalse); - phi->init_req(2, _gvn.transform( new (C) MoveD2LNode(value))); + phi->init_req(2, _gvn.transform(new (C) MoveD2LNode(arg))); r->init_req(2, iffalse); // Post merge set_control(_gvn.transform(r)); record_for_igvn(r); - Node* result = _gvn.transform(phi); + C->set_has_split_ifs(true); // Has chance for split-if optimization + result = phi; assert(result->bottom_type()->isa_long(), "must be"); - push_pair(result); - - C->set_has_split_ifs(true); // Has chance for split-if optimization - break; } case vmIntrinsics::_floatToIntBits: { - Node* value = pop(); - // two paths (plus control) merge in a wood RegionNode *r = new (C) RegionNode(3); Node *phi = new (C) PhiNode(r, TypeInt::INT); - Node *cmpisnan = _gvn.transform( new (C) CmpFNode(value, value)); + Node *cmpisnan = _gvn.transform(new (C) CmpFNode(arg, arg)); // Build the boolean node - Node *bolisnan = _gvn.transform( new (C) BoolNode( cmpisnan, BoolTest::ne ) ); + Node *bolisnan = _gvn.transform(new (C) BoolNode(cmpisnan, BoolTest::ne)); // Branch either way. // NaN case is less traveled, which makes all the difference. @@ -4376,29 +4055,27 @@ r->init_req(1, iftrue); // Else fall through - Node *iffalse = _gvn.transform( new (C) IfFalseNode(opt_ifisnan) ); + Node *iffalse = _gvn.transform(new (C) IfFalseNode(opt_ifisnan)); set_control(iffalse); - phi->init_req(2, _gvn.transform( new (C) MoveF2INode(value))); + phi->init_req(2, _gvn.transform(new (C) MoveF2INode(arg))); r->init_req(2, iffalse); // Post merge set_control(_gvn.transform(r)); record_for_igvn(r); - Node* result = _gvn.transform(phi); + C->set_has_split_ifs(true); // Has chance for split-if optimization + result = phi; assert(result->bottom_type()->isa_int(), "must be"); - push(result); - - C->set_has_split_ifs(true); // Has chance for split-if optimization - break; } default: - ShouldNotReachHere(); + fatal_unexpected_iid(id); + break; } - + set_result(_gvn.transform(result)); return true; } @@ -4409,23 +4086,19 @@ #endif //_LP64 //----------------------inline_unsafe_copyMemory------------------------- +// public native void sun.misc.Unsafe.copyMemory(Object srcBase, long srcOffset, Object destBase, long destOffset, long bytes); bool LibraryCallKit::inline_unsafe_copyMemory() { if (callee()->is_static()) return false; // caller must have the capability! - int nargs = 1 + 5 + 3; // 5 args: (src: ptr,off, dst: ptr,off, size) - assert(signature()->size() == nargs-1, "copy has 5 arguments"); - null_check_receiver(callee()); // check then ignore argument(0) + null_check_receiver(); // null-check receiver if (stopped()) return true; C->set_has_unsafe_access(true); // Mark eventual nmethod as "unsafe". - Node* src_ptr = argument(1); - Node* src_off = ConvL2X(argument(2)); - assert(argument(3)->is_top(), "2nd half of long"); - Node* dst_ptr = argument(4); - Node* dst_off = ConvL2X(argument(5)); - assert(argument(6)->is_top(), "2nd half of long"); - Node* size = ConvL2X(argument(7)); - assert(argument(8)->is_top(), "2nd half of long"); + Node* src_ptr = argument(1); // type: oop + Node* src_off = ConvL2X(argument(2)); // type: long + Node* dst_ptr = argument(4); // type: oop + Node* dst_off = ConvL2X(argument(5)); // type: long + Node* size = ConvL2X(argument(7)); // type: long assert(Unsafe_field_offset_to_byte_offset(11) == 11, "fieldOffset must be byte-scaled"); @@ -4545,6 +4218,8 @@ } //------------------------inline_native_clone---------------------------- +// protected native Object java.lang.Object.clone(); +// // Here are the simple edge cases: // null receiver => normal trap // virtual and clone was overridden => slow path to out-of-line clone @@ -4561,20 +4236,16 @@ // can be sharply typed as an object array, a type array, or an instance. // bool LibraryCallKit::inline_native_clone(bool is_virtual) { - int nargs = 1; PhiNode* result_val; - //set the original stack and the reexecute bit for the interpreter to reexecute - //the bytecode that invokes Object.clone if deoptimization happens + // Set the reexecute bit for the interpreter to reexecute + // the bytecode that invokes Object.clone if deoptimization happens. { PreserveReexecuteState preexecs(this); jvms()->set_should_reexecute(true); - //null_check_receiver will adjust _sp (push and pop) - Node* obj = null_check_receiver(callee()); + Node* obj = null_check_receiver(); if (stopped()) return true; - _sp += nargs; - Node* obj_klass = load_object_klass(obj); const TypeKlassPtr* tklass = _gvn.type(obj_klass)->isa_klassptr(); const TypeOopPtr* toop = ((tklass != NULL) @@ -4611,7 +4282,7 @@ set_control(array_ctl); Node* obj_length = load_array_length(obj); Node* obj_size = NULL; - Node* alloc_obj = new_array(obj_klass, obj_length, 0, &obj_size); + Node* alloc_obj = new_array(obj_klass, obj_length, 0, &obj_size); // no arguments to push if (!use_ReduceInitialCardMarks()) { // If it is an oop array, it requires very special treatment, @@ -4711,10 +4382,9 @@ set_control( _gvn.transform(result_reg) ); set_i_o( _gvn.transform(result_i_o) ); set_all_memory( _gvn.transform(result_mem) ); - } //original reexecute and sp are set back here - - push(_gvn.transform(result_val)); - + } // original reexecute is set back here + + set_result(_gvn.transform(result_val)); return true; } @@ -4755,25 +4425,25 @@ //------------------------------inline_arraycopy----------------------- +// public static native void java.lang.System.arraycopy(Object src, int srcPos, +// Object dest, int destPos, +// int length); bool LibraryCallKit::inline_arraycopy() { - // Restore the stack and pop off the arguments. - int nargs = 5; // 2 oops, 3 ints, no size_t or long - assert(callee()->signature()->size() == nargs, "copy has 5 arguments"); - - Node *src = argument(0); - Node *src_offset = argument(1); - Node *dest = argument(2); - Node *dest_offset = argument(3); - Node *length = argument(4); + // Get the arguments. + Node* src = argument(0); // type: oop + Node* src_offset = argument(1); // type: int + Node* dest = argument(2); // type: oop + Node* dest_offset = argument(3); // type: int + Node* length = argument(4); // type: int // Compile time checks. If any of these checks cannot be verified at compile time, // we do not make a fast path for this call. Instead, we let the call remain as it // is. The checks we choose to mandate at compile time are: // // (1) src and dest are arrays. - const Type* src_type = src->Value(&_gvn); + const Type* src_type = src->Value(&_gvn); const Type* dest_type = dest->Value(&_gvn); - const TypeAryPtr* top_src = src_type->isa_aryptr(); + const TypeAryPtr* top_src = src_type->isa_aryptr(); const TypeAryPtr* top_dest = dest_type->isa_aryptr(); if (top_src == NULL || top_src->klass() == NULL || top_dest == NULL || top_dest->klass() == NULL) { @@ -4828,15 +4498,13 @@ record_for_igvn(slow_region); // (3) operands must not be null - // We currently perform our null checks with the do_null_check routine. + // We currently perform our null checks with the null_check routine. // This means that the null exceptions will be reported in the caller // rather than (correctly) reported inside of the native arraycopy call. // This should be corrected, given time. We do our null check with the // stack pointer restored. - _sp += nargs; - src = do_null_check(src, T_ARRAY); - dest = do_null_check(dest, T_ARRAY); - _sp -= nargs; + src = null_check(src, T_ARRAY); + dest = null_check(dest, T_ARRAY); // (4) src_offset must not be negative. generate_negative_guard(src_offset, slow_region); @@ -5179,7 +4847,7 @@ slow_control = top(); if (slow_region != NULL) slow_control = _gvn.transform(slow_region); - debug_only(slow_region = (RegionNode*)badAddress); + DEBUG_ONLY(slow_region = (RegionNode*)badAddress); set_control(checked_control); if (!stopped()) { @@ -5674,33 +5342,22 @@ } //----------------------------inline_reference_get---------------------------- - +// public T java.lang.ref.Reference.get(); bool LibraryCallKit::inline_reference_get() { - const int nargs = 1; // self - - guarantee(java_lang_ref_Reference::referent_offset > 0, - "should have already been set"); - - int referent_offset = java_lang_ref_Reference::referent_offset; - - // Restore the stack and pop off the argument - _sp += nargs; - Node *reference_obj = pop(); - - // Null check on self without removing any arguments. - _sp += nargs; - reference_obj = do_null_check(reference_obj, T_OBJECT); - _sp -= nargs;; - + const int referent_offset = java_lang_ref_Reference::referent_offset; + guarantee(referent_offset > 0, "should have already been set"); + + // Get the argument: + Node* reference_obj = null_check_receiver(); if (stopped()) return true; - Node *adr = basic_plus_adr(reference_obj, reference_obj, referent_offset); + Node* adr = basic_plus_adr(reference_obj, reference_obj, referent_offset); ciInstanceKlass* klass = env()->Object_klass(); const TypeOopPtr* object_type = TypeOopPtr::make_from_klass(klass); Node* no_ctrl = NULL; - Node *result = make_load(no_ctrl, adr, object_type, T_OBJECT); + Node* result = make_load(no_ctrl, adr, object_type, T_OBJECT); // Use the pre-barrier to record the value in the referent field pre_barrier(false /* do_load */, @@ -5713,7 +5370,7 @@ // across safepoint since GC can change its value. insert_mem_bar(Op_MemBarCPUOrder); - push(result); + set_result(result); return true; } @@ -5770,15 +5427,11 @@ } if (stubAddr == NULL) return false; - // Restore the stack and pop off the arguments. - int nargs = 5; // this + 2 oop/offset combos - assert(callee()->signature()->size() == nargs-1, "encryptBlock has 4 arguments"); - - Node *aescrypt_object = argument(0); - Node *src = argument(1); - Node *src_offset = argument(2); - Node *dest = argument(3); - Node *dest_offset = argument(4); + Node* aescrypt_object = argument(0); + Node* src = argument(1); + Node* src_offset = argument(2); + Node* dest = argument(3); + Node* dest_offset = argument(4); // (1) src and dest are arrays. const Type* src_type = src->Value(&_gvn); @@ -5829,16 +5482,12 @@ } if (stubAddr == NULL) return false; - - // Restore the stack and pop off the arguments. - int nargs = 6; // this + oop/offset + len + oop/offset - assert(callee()->signature()->size() == nargs-1, "wrong number of arguments"); - Node *cipherBlockChaining_object = argument(0); - Node *src = argument(1); - Node *src_offset = argument(2); - Node *len = argument(3); - Node *dest = argument(4); - Node *dest_offset = argument(5); + Node* cipherBlockChaining_object = argument(0); + Node* src = argument(1); + Node* src_offset = argument(2); + Node* len = argument(3); + Node* dest = argument(4); + Node* dest_offset = argument(5); // (1) src and dest are arrays. const Type* src_type = src->Value(&_gvn); @@ -5920,11 +5569,8 @@ // Node* LibraryCallKit::inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting) { // First, check receiver for NULL since it is virtual method. - int nargs = arg_size(); Node* objCBC = argument(0); - _sp += nargs; - objCBC = do_null_check(objCBC, T_OBJECT); - _sp -= nargs; + objCBC = null_check(objCBC); if (stopped()) return NULL; // Always NULL @@ -5948,9 +5594,7 @@ } ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass(); - _sp += nargs; // gen_instanceof might do an uncommon trap Node* instof = gen_instanceof(embeddedCipherObj, makecon(TypeKlassPtr::make(instklass_AESCrypt))); - _sp -= nargs; Node* cmp_instof = _gvn.transform(new (C) CmpINode(instof, intcon(1))); Node* bool_instof = _gvn.transform(new (C) BoolNode(cmp_instof, BoolTest::ne)); @@ -5966,7 +5610,7 @@ RegionNode* region = new(C) RegionNode(3); region->init_req(1, instof_false); Node* src = argument(1); - Node *dest = argument(4); + Node* dest = argument(4); Node* cmp_src_dest = _gvn.transform(new (C) CmpPNode(src, dest)); Node* bool_src_dest = _gvn.transform(new (C) BoolNode(cmp_src_dest, BoolTest::eq)); Node* src_dest_conjoint = generate_guard(bool_src_dest, NULL, PROB_MIN); @@ -5974,7 +5618,4 @@ record_for_igvn(region); return _gvn.transform(region); - } - -
--- a/src/share/vm/opto/locknode.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/locknode.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -165,7 +165,7 @@ kill_dead_locals(); // Null check; get casted pointer. - Node *obj = do_null_check(peek(), T_OBJECT); + Node* obj = null_check(peek()); // Check for locking null object if (stopped()) return;
--- a/src/share/vm/opto/loopTransform.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/loopTransform.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -269,10 +269,10 @@ bool IdealLoopTree::policy_peeling( PhaseIdealLoop *phase ) const { Node *test = ((IdealLoopTree*)this)->tail(); int body_size = ((IdealLoopTree*)this)->_body.size(); - int uniq = phase->C->unique(); + int live_node_count = phase->C->live_nodes(); // Peeling does loop cloning which can result in O(N^2) node construction if( body_size > 255 /* Prevent overflow for large body_size */ - || (body_size * body_size + uniq > MaxNodeLimit) ) { + || (body_size * body_size + live_node_count > MaxNodeLimit) ) { return false; // too large to safely clone } while( test != _head ) { // Scan till run off top of loop @@ -601,7 +601,7 @@ return false; if (new_body_size > unroll_limit || // Unrolling can result in a large amount of node construction - new_body_size >= MaxNodeLimit - phase->C->unique()) { + new_body_size >= MaxNodeLimit - (uint) phase->C->live_nodes()) { return false; } @@ -2268,7 +2268,7 @@ // Skip next optimizations if running low on nodes. Note that // policy_unswitching and policy_maximally_unroll have this check. - uint nodes_left = MaxNodeLimit - phase->C->unique(); + uint nodes_left = MaxNodeLimit - (uint) phase->C->live_nodes(); if ((2 * _body.size()) > nodes_left) { return true; }
--- a/src/share/vm/opto/loopUnswitch.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/loopUnswitch.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -59,7 +59,7 @@ if (!_head->is_Loop()) { return false; } - uint nodes_left = MaxNodeLimit - phase->C->unique(); + uint nodes_left = MaxNodeLimit - phase->C->live_nodes(); if (2 * _body.size() > nodes_left) { return false; // Too speculative if running low on nodes. }
--- a/src/share/vm/opto/loopopts.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/loopopts.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -729,7 +729,7 @@ for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { weight += region->fast_out(i)->outcnt(); } - int nodes_left = MaxNodeLimit - C->unique(); + int nodes_left = MaxNodeLimit - C->live_nodes(); if (weight * 8 > nodes_left) { #ifndef PRODUCT if (PrintOpto)
--- a/src/share/vm/opto/macro.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/macro.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -2262,7 +2262,7 @@ Node *slow_ctrl = _fallthroughproj->clone(); transform_later(slow_ctrl); _igvn.hash_delete(_fallthroughproj); - _fallthroughproj->disconnect_inputs(NULL); + _fallthroughproj->disconnect_inputs(NULL, C); region->init_req(1, slow_ctrl); // region inputs are now complete transform_later(region); @@ -2327,7 +2327,7 @@ Node *slow_ctrl = _fallthroughproj->clone(); transform_later(slow_ctrl); _igvn.hash_delete(_fallthroughproj); - _fallthroughproj->disconnect_inputs(NULL); + _fallthroughproj->disconnect_inputs(NULL, C); region->init_req(1, slow_ctrl); // region inputs are now complete transform_later(region);
--- a/src/share/vm/opto/matcher.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/matcher.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -342,6 +342,7 @@ // Reset node counter so MachNodes start with _idx at 0 int nodes = C->unique(); // save value C->set_unique(0); + C->reset_dead_node_list(); // Recursively match trees from old space into new space. // Correct leaves of new-space Nodes; they point to old-space.
--- a/src/share/vm/opto/memnode.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/memnode.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -2716,10 +2716,8 @@ zend = phase->transform( new(C) URShiftXNode(zend, shift) ); } + // Bulk clear double-words Node* zsize = phase->transform( new(C) SubXNode(zend, zbase) ); - Node* zinit = phase->zerocon((unit == BytesPerLong) ? T_LONG : T_INT); - - // Bulk clear double-words Node* adr = phase->transform( new(C) AddPNode(dest, dest, start_offset) ); mem = new (C) ClearArrayNode(ctl, mem, zsize, adr); return phase->transform(mem);
--- a/src/share/vm/opto/node.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/node.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -57,7 +57,7 @@ int new_debug_idx = old_debug_idx+1; if (new_debug_idx > 0) { // Arrange that the lowest five decimal digits of _debug_idx - // will repeat thos of _idx. In case this is somehow pathological, + // will repeat those of _idx. In case this is somehow pathological, // we continue to assign negative numbers (!) consecutively. const int mod = 100000; int bump = (int)(_idx - new_debug_idx) % mod; @@ -67,7 +67,7 @@ } Compile::set_debug_idx(new_debug_idx); set_debug_idx( new_debug_idx ); - assert(Compile::current()->unique() < (uint)MaxNodeLimit, "Node limit exceeded"); + assert(Compile::current()->unique() < (UINT_MAX - 1), "Node limit exceeded UINT_MAX"); if (BreakAtNode != 0 && (_debug_idx == BreakAtNode || (int)_idx == BreakAtNode)) { tty->print_cr("BreakAtNode: _idx=%d _debug_idx=%d", _idx, _debug_idx); BREAKPOINT; @@ -802,7 +802,7 @@ //-------------------------disconnect_inputs----------------------------------- // NULL out all inputs to eliminate incoming Def-Use edges. // Return the number of edges between 'n' and 'this' -int Node::disconnect_inputs(Node *n) { +int Node::disconnect_inputs(Node *n, Compile* C) { int edges_to_n = 0; uint cnt = req(); @@ -824,6 +824,9 @@ // Node::destruct requires all out edges be deleted first // debug_only(destruct();) // no reuse benefit expected + if (edges_to_n == 0) { + C->record_dead_node(_idx); + } return edges_to_n; }
--- a/src/share/vm/opto/node.hpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/node.hpp Fri Jan 11 10:38:38 2013 -0800 @@ -406,7 +406,7 @@ int replace_edge(Node* old, Node* neww); // NULL out all inputs to eliminate incoming Def-Use edges. // Return the number of edges between 'n' and 'this' - int disconnect_inputs(Node *n); + int disconnect_inputs(Node *n, Compile *c); // Quickly, return true if and only if I am Compile::current()->top(). bool is_top() const { @@ -454,9 +454,9 @@ void replace_by(Node* new_node); // Globally replace this node by a given new node, updating all uses // and cutting input edges of old node. - void subsume_by(Node* new_node) { + void subsume_by(Node* new_node, Compile* c) { replace_by(new_node); - disconnect_inputs(NULL); + disconnect_inputs(NULL, c); } void set_req_X( uint i, Node *n, PhaseIterGVN *igvn ); // Find the one non-null required input. RegionNode only
--- a/src/share/vm/opto/output.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/output.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -513,7 +513,7 @@ } adjust_block_start += diff; b->_nodes.map(idx, replacement); - mach->subsume_by(replacement); + mach->subsume_by(replacement, C); mach = replacement; progress = true; @@ -1426,7 +1426,7 @@ jmp_rule[i] = mach->rule(); #endif b->_nodes.map(j, replacement); - mach->subsume_by(replacement); + mach->subsume_by(replacement, C); n = replacement; mach = replacement; }
--- a/src/share/vm/opto/parse.hpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/parse.hpp Fri Jan 11 10:38:38 2013 -0800 @@ -70,7 +70,7 @@ InlineTree *build_inline_tree_for_callee(ciMethod* callee_method, JVMState* caller_jvms, int caller_bci); - const char* try_to_inline(ciMethod* callee_method, ciMethod* caller_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result); + const char* try_to_inline(ciMethod* callee_method, ciMethod* caller_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result, bool& should_delay); const char* should_inline(ciMethod* callee_method, ciMethod* caller_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result) const; const char* should_not_inline(ciMethod* callee_method, ciMethod* caller_method, WarmCallInfo* wci_result) const; void print_inlining(ciMethod *callee_method, int caller_bci, const char *failure_msg) const; @@ -107,7 +107,7 @@ // and may be accessed by find_subtree_from_root. // The call_method is the dest_method for a special or static invocation. // The call_method is an optimized virtual method candidate otherwise. - WarmCallInfo* ok_to_inline(ciMethod *call_method, JVMState* caller_jvms, ciCallProfile& profile, WarmCallInfo* wci); + WarmCallInfo* ok_to_inline(ciMethod *call_method, JVMState* caller_jvms, ciCallProfile& profile, WarmCallInfo* wci, bool& should_delay); // Information about inlined method JVMState* caller_jvms() const { return _caller_jvms; }
--- a/src/share/vm/opto/parse1.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/parse1.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -601,8 +601,8 @@ set_map(entry_map); do_exits(); - if (log) log->done("parse nodes='%d' memory='%d'", - C->unique(), C->node_arena()->used()); + if (log) log->done("parse nodes='%d' live='%d' memory='%d'", + C->unique(), C->live_nodes(), C->node_arena()->used()); } //---------------------------do_all_blocks------------------------------------- @@ -1008,7 +1008,7 @@ // If this is an inlined method, we may have to do a receiver null check. if (_caller->has_method() && is_normal_parse() && !method()->is_static()) { GraphKit kit(_caller); - kit.null_check_receiver(method()); + kit.null_check_receiver_before_call(method()); _caller = kit.transfer_exceptions_into_jvms(); if (kit.stopped()) { _exits.add_exception_states_from(_caller); @@ -1398,7 +1398,7 @@ #ifdef ASSERT int pre_bc_sp = sp(); int inputs, depth; - bool have_se = !stopped() && compute_stack_effects(inputs, depth, /*for_parse*/ true); + bool have_se = !stopped() && compute_stack_effects(inputs, depth); assert(!have_se || pre_bc_sp >= inputs, err_msg_res("have enough stack to execute this BC: pre_bc_sp=%d, inputs=%d", pre_bc_sp, inputs)); #endif //ASSERT
--- a/src/share/vm/opto/parse2.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/parse2.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -48,7 +48,7 @@ const Type* elem = Type::TOP; Node* adr = array_addressing(elem_type, 0, &elem); if (stopped()) return; // guaranteed null or range check - _sp -= 2; // Pop array and index + dec_sp(2); // Pop array and index const TypeAryPtr* adr_type = TypeAryPtr::get_array_body_type(elem_type); Node* ld = make_load(control(), adr, elem, elem_type, adr_type); push(ld); @@ -60,7 +60,7 @@ Node* adr = array_addressing(elem_type, 1); if (stopped()) return; // guaranteed null or range check Node* val = pop(); - _sp -= 2; // Pop array and index + dec_sp(2); // Pop array and index const TypeAryPtr* adr_type = TypeAryPtr::get_array_body_type(elem_type); store_to_memory(control(), adr, val, elem_type, adr_type); } @@ -73,7 +73,7 @@ Node *ary = peek(1+vals); // in case of exception // Null check the array base, with correct stack contents - ary = do_null_check(ary, T_ARRAY); + ary = null_check(ary, T_ARRAY); // Compile-time detect of null-exception? if (stopped()) return top(); @@ -681,7 +681,7 @@ void Parse::do_irem() { // Must keep both values on the expression-stack during null-check - do_null_check(peek(), T_INT); + zero_check_int(peek()); // Compile-time detect of null-exception? if (stopped()) return; @@ -958,7 +958,7 @@ DEBUG_ONLY(sync_jvms()); // argument(n) requires a synced jvms assert(argument(0) != NULL, "must exist"); assert(bc_depth == 1 || argument(1) != NULL, "two must exist"); - _sp += bc_depth; + inc_sp(bc_depth); return bc_depth; } @@ -1581,8 +1581,8 @@ set_pair_local( iter().get_index(), dstore_rounding(pop_pair()) ); break; - case Bytecodes::_pop: _sp -= 1; break; - case Bytecodes::_pop2: _sp -= 2; break; + case Bytecodes::_pop: dec_sp(1); break; + case Bytecodes::_pop2: dec_sp(2); break; case Bytecodes::_swap: a = pop(); b = pop(); @@ -1650,7 +1650,7 @@ case Bytecodes::_arraylength: { // Must do null-check with value on expression stack - Node *ary = do_null_check(peek(), T_ARRAY); + Node *ary = null_check(peek(), T_ARRAY); // Compile-time detect of null-exception? if (stopped()) return; a = pop(); @@ -1667,15 +1667,15 @@ case Bytecodes::_laload: { a = array_addressing(T_LONG, 0); if (stopped()) return; // guaranteed null or range check - _sp -= 2; // Pop array and index - push_pair( make_load(control(), a, TypeLong::LONG, T_LONG, TypeAryPtr::LONGS)); + dec_sp(2); // Pop array and index + push_pair(make_load(control(), a, TypeLong::LONG, T_LONG, TypeAryPtr::LONGS)); break; } case Bytecodes::_daload: { a = array_addressing(T_DOUBLE, 0); if (stopped()) return; // guaranteed null or range check - _sp -= 2; // Pop array and index - push_pair( make_load(control(), a, Type::DOUBLE, T_DOUBLE, TypeAryPtr::DOUBLES)); + dec_sp(2); // Pop array and index + push_pair(make_load(control(), a, Type::DOUBLE, T_DOUBLE, TypeAryPtr::DOUBLES)); break; } case Bytecodes::_bastore: array_store(T_BYTE); break; @@ -1699,7 +1699,7 @@ a = array_addressing(T_LONG, 2); if (stopped()) return; // guaranteed null or range check c = pop_pair(); - _sp -= 2; // Pop array and index + dec_sp(2); // Pop array and index store_to_memory(control(), a, c, T_LONG, TypeAryPtr::LONGS); break; } @@ -1707,7 +1707,7 @@ a = array_addressing(T_DOUBLE, 2); if (stopped()) return; // guaranteed null or range check c = pop_pair(); - _sp -= 2; // Pop array and index + dec_sp(2); // Pop array and index c = dstore_rounding(c); store_to_memory(control(), a, c, T_DOUBLE, TypeAryPtr::DOUBLES); break; @@ -1733,7 +1733,7 @@ break; case Bytecodes::_idiv: // Must keep both values on the expression-stack during null-check - do_null_check(peek(), T_INT); + zero_check_int(peek()); // Compile-time detect of null-exception? if (stopped()) return; b = pop(); @@ -2041,7 +2041,7 @@ case Bytecodes::_lrem: // Must keep both values on the expression-stack during null-check assert(peek(0) == top(), "long word order"); - do_null_check(peek(1), T_LONG); + zero_check_long(peek(1)); // Compile-time detect of null-exception? if (stopped()) return; b = pop_pair(); @@ -2053,7 +2053,7 @@ case Bytecodes::_ldiv: // Must keep both values on the expression-stack during null-check assert(peek(0) == top(), "long word order"); - do_null_check(peek(1), T_LONG); + zero_check_long(peek(1)); // Compile-time detect of null-exception? if (stopped()) return; b = pop_pair(); @@ -2175,7 +2175,7 @@ case Bytecodes::_athrow: // null exception oop throws NULL pointer exception - do_null_check(peek(), T_OBJECT); + null_check(peek()); if (stopped()) return; // Hook the thrown exception directly to subsequent handlers. if (BailoutToInterpreterForThrows) {
--- a/src/share/vm/opto/parse3.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/parse3.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -116,7 +116,7 @@ Node* obj; if (is_field) { int obj_depth = is_get ? 0 : field->type()->size(); - obj = do_null_check(peek(obj_depth), T_OBJECT); + obj = null_check(peek(obj_depth)); // Compile-time detect of null-exception? if (stopped()) return; @@ -126,11 +126,11 @@ #endif if (is_get) { - --_sp; // pop receiver before getting + (void) pop(); // pop receiver before getting do_get_xxx(obj, field, is_field); } else { do_put_xxx(obj, field, is_field); - --_sp; // pop receiver after putting + (void) pop(); // pop receiver after putting } } else { const TypeInstPtr* tip = TypeInstPtr::make(field_holder->java_mirror()); @@ -230,7 +230,7 @@ } // If there is going to be a trap, put it at the next bytecode: set_bci(iter().next_bci()); - do_null_assert(peek(), T_OBJECT); + null_assert(peek()); set_bci(iter().cur_bci()); // put it back } @@ -463,7 +463,7 @@ // Note: the reexecute bit will be set in GraphKit::add_safepoint_edges() // when AllocateArray node for newarray is created. { PreserveReexecuteState preexecs(this); - _sp += ndimensions; + inc_sp(ndimensions); // Pass 0 as nargs since uncommon trap code does not need to restore stack. obj = expand_multianewarray(array_klass, &length[0], ndimensions, 0); } //original reexecute and sp are set back here @@ -492,7 +492,7 @@ // Create a java array for dimension sizes Node* dims = NULL; { PreserveReexecuteState preexecs(this); - _sp += ndimensions; + inc_sp(ndimensions); Node* dims_array_klass = makecon(TypeKlassPtr::make(ciArrayKlass::make(ciType::make(T_INT)))); dims = new_array(dims_array_klass, intcon(ndimensions), 0); @@ -509,6 +509,7 @@ makecon(TypeKlassPtr::make(array_klass)), dims); } + make_slow_call_ex(c, env()->Throwable_klass(), false); Node* res = _gvn.transform(new (C) ProjNode(c, TypeFunc::Parms));
--- a/src/share/vm/opto/parseHelper.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/parseHelper.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -84,7 +84,7 @@ C->log()->identify(tp->klass())); } } - do_null_assert(obj, T_OBJECT); + null_assert(obj); assert( stopped() || _gvn.type(peek())->higher_equal(TypePtr::NULL_PTR), "what's left behind is null" ); if (!stopped()) { profile_null_checkcast(); @@ -116,7 +116,7 @@ C->log()->elem("assert_null reason='instanceof' klass='%d'", C->log()->identify(klass)); } - do_null_assert(peek(), T_OBJECT); + null_assert(peek()); assert( stopped() || _gvn.type(peek())->higher_equal(TypePtr::NULL_PTR), "what's left behind is null" ); if (!stopped()) { // The object is now known to be null. @@ -139,10 +139,10 @@ // pull array from stack and check that the store is valid void Parse::array_store_check() { - // Shorthand access to array store elements - Node *obj = stack(_sp-1); - Node *idx = stack(_sp-2); - Node *ary = stack(_sp-3); + // Shorthand access to array store elements without popping them. + Node *obj = peek(0); + Node *idx = peek(1); + Node *ary = peek(2); if (_gvn.type(obj) == TypePtr::NULL_PTR) { // There's never a type check on null values.
--- a/src/share/vm/opto/phaseX.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/phaseX.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -75,6 +75,13 @@ // nh->_sentinel must be in the current node space } +void NodeHash::replace_with(NodeHash *nh) { + debug_only(_table = (Node**)badAddress); // interact correctly w/ operator= + // just copy in all the fields + *this = *nh; + // nh->_sentinel must be in the current node space +} + //------------------------------hash_find-------------------------------------- // Find in hash table Node *NodeHash::hash_find( const Node *n ) { @@ -383,6 +390,8 @@ // Identify nodes that are reachable from below, useful. C->identify_useful_nodes(_useful); + // Update dead node list + C->update_dead_node_list(_useful); // Remove all useless nodes from PhaseValues' recorded types // Must be done before disconnecting nodes to preserve hash-table-invariant @@ -1190,7 +1199,7 @@ } } } - + C->record_dead_node(dead->_idx); if (dead->is_macro()) { C->remove_macro_node(dead); } @@ -1199,6 +1208,11 @@ continue; } } + // Constant node that has no out-edges and has only one in-edge from + // root is usually dead. However, sometimes reshaping walk makes + // it reachable by adding use edges. So, we will NOT count Con nodes + // as dead to be conservative about the dead node count at any + // given time. } // Aggressively kill globally dead uses
--- a/src/share/vm/opto/phaseX.hpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/phaseX.hpp Fri Jan 11 10:38:38 2013 -0800 @@ -92,6 +92,7 @@ } void remove_useless_nodes(VectorSet &useful); // replace with sentinel + void replace_with(NodeHash* nh); Node *sentinel() { return _sentinel; } @@ -386,6 +387,11 @@ Node *transform( Node *n ); Node *transform_no_reclaim( Node *n ); + void replace_with(PhaseGVN* gvn) { + _table.replace_with(&gvn->_table); + _types = gvn->_types; + } + // Check for a simple dead loop when a data node references itself. DEBUG_ONLY(void dead_loop_check(Node *n);) };
--- a/src/share/vm/opto/postaloc.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/postaloc.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -146,7 +146,7 @@ } } // Disconnect control and remove precedence edges if any exist - old->disconnect_inputs(NULL); + old->disconnect_inputs(NULL, C); } return blk_adjust; } @@ -513,7 +513,7 @@ b->_nodes.remove(j--); phi_dex--; _cfg._bbs.map(phi->_idx,NULL); phi->replace_by(u); - phi->disconnect_inputs(NULL); + phi->disconnect_inputs(NULL, C); continue; } // Note that if value[pidx] exists, then we merged no new values here
--- a/src/share/vm/opto/reg_split.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/reg_split.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -747,7 +747,7 @@ if( i >= cnt ) { // Found one unique input assert(Find_id(n) == Find_id(u), "should be the same lrg"); n->replace_by(u); // Then replace with unique input - n->disconnect_inputs(NULL); + n->disconnect_inputs(NULL, C); b->_nodes.remove(insidx); insidx--; b->_ihrp_index--;
--- a/src/share/vm/opto/runtime.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/runtime.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -989,7 +989,7 @@ // since we're notifying the VM on every catch. // Force deoptimization and the rest of the lookup // will be fine. - deoptimize_caller_frame(thread, true); + deoptimize_caller_frame(thread); } // Check the stack guard pages. If enabled, look for handler in this frame; @@ -1143,17 +1143,22 @@ void OptoRuntime::deoptimize_caller_frame(JavaThread *thread, bool doit) { - // Deoptimize frame - if (doit) { - // Called from within the owner thread, so no need for safepoint - RegisterMap reg_map(thread); - frame stub_frame = thread->last_frame(); - assert(stub_frame.is_runtime_frame() || exception_blob()->contains(stub_frame.pc()), "sanity check"); - frame caller_frame = stub_frame.sender(®_map); + // Deoptimize the caller before continuing, as the compiled + // exception handler table may not be valid. + if (!StressCompiledExceptionHandlers && doit) { + deoptimize_caller_frame(thread); + } +} - // Deoptimize the caller frame. - Deoptimization::deoptimize_frame(thread, caller_frame.id()); - } +void OptoRuntime::deoptimize_caller_frame(JavaThread *thread) { + // Called from within the owner thread, so no need for safepoint + RegisterMap reg_map(thread); + frame stub_frame = thread->last_frame(); + assert(stub_frame.is_runtime_frame() || exception_blob()->contains(stub_frame.pc()), "sanity check"); + frame caller_frame = stub_frame.sender(®_map); + + // Deoptimize the caller frame. + Deoptimization::deoptimize_frame(thread, caller_frame.id()); }
--- a/src/share/vm/opto/runtime.hpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/runtime.hpp Fri Jan 11 10:38:38 2013 -0800 @@ -174,6 +174,7 @@ static address handle_exception_C (JavaThread* thread); static address handle_exception_C_helper(JavaThread* thread, nmethod*& nm); static address rethrow_C (oopDesc* exception, JavaThread *thread, address return_pc ); + static void deoptimize_caller_frame (JavaThread *thread); static void deoptimize_caller_frame (JavaThread *thread, bool doit); static bool is_deoptimized_caller_frame (JavaThread *thread);
--- a/src/share/vm/opto/stringopts.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/stringopts.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -241,13 +241,13 @@ _stringopts->gvn()->transform(call); C->gvn_replace_by(uct, call); - uct->disconnect_inputs(NULL); + uct->disconnect_inputs(NULL, C); } } void cleanup() { // disconnect the hook node - _arguments->disconnect_inputs(NULL); + _arguments->disconnect_inputs(NULL, _stringopts->C); } }; @@ -265,7 +265,8 @@ } else if (n->is_IfTrue()) { Compile* C = _stringopts->C; C->gvn_replace_by(n, n->in(0)->in(0)); - C->gvn_replace_by(n->in(0), C->top()); + // get rid of the other projection + C->gvn_replace_by(n->in(0)->as_If()->proj_out(false), C->top()); } } } @@ -358,7 +359,7 @@ C->gvn_replace_by(mem_proj, mem); } C->gvn_replace_by(init, C->top()); - init->disconnect_inputs(NULL); + init->disconnect_inputs(NULL, C); } Node_List PhaseStringOpts::collect_toString_calls() { @@ -439,7 +440,7 @@ } // Find the constructor call Node* result = alloc->result_cast(); - if (result == NULL || !result->is_CheckCastPP()) { + if (result == NULL || !result->is_CheckCastPP() || alloc->in(TypeFunc::Memory)->is_top()) { // strange looking allocation #ifndef PRODUCT if (PrintOptimizeStringConcat) { @@ -744,7 +745,9 @@ ctrl_path.push(cn); ctrl_path.push(cn->proj_out(0)); ctrl_path.push(cn->proj_out(0)->unique_out()); - ctrl_path.push(cn->proj_out(0)->unique_out()->as_Catch()->proj_out(0)); + if (cn->proj_out(0)->unique_out()->as_Catch()->proj_out(0) != NULL) { + ctrl_path.push(cn->proj_out(0)->unique_out()->as_Catch()->proj_out(0)); + } } else { ShouldNotReachHere(); } @@ -762,6 +765,12 @@ } else if (ptr->is_IfTrue()) { IfNode* iff = ptr->in(0)->as_If(); BoolNode* b = iff->in(1)->isa_Bool(); + + if (b == NULL) { + fail = true; + break; + } + Node* cmp = b->in(1); Node* v1 = cmp->in(1); Node* v2 = cmp->in(2); @@ -826,6 +835,9 @@ ptr->in(1)->in(0) != NULL && ptr->in(1)->in(0)->is_If()) { // Simple diamond. // XXX should check for possibly merging stores. simple data merges are ok. + // The IGVN will make this simple diamond go away when it + // transforms the Region. Make sure it sees it. + Compile::current()->record_for_igvn(ptr); ptr = ptr->in(1)->in(0)->in(0); continue; } @@ -1408,75 +1420,80 @@ Deoptimization::Action_make_not_entrant); } - // length now contains the number of characters needed for the - // char[] so create a new AllocateArray for the char[] - Node* char_array = NULL; - { - PreserveReexecuteState preexecs(&kit); - // The original jvms is for an allocation of either a String or - // StringBuffer so no stack adjustment is necessary for proper - // reexecution. If we deoptimize in the slow path the bytecode - // will be reexecuted and the char[] allocation will be thrown away. - kit.jvms()->set_should_reexecute(true); - char_array = kit.new_array(__ makecon(TypeKlassPtr::make(ciTypeArrayKlass::make(T_CHAR))), - length, 1); - } + Node* result; + if (!kit.stopped()) { + + // length now contains the number of characters needed for the + // char[] so create a new AllocateArray for the char[] + Node* char_array = NULL; + { + PreserveReexecuteState preexecs(&kit); + // The original jvms is for an allocation of either a String or + // StringBuffer so no stack adjustment is necessary for proper + // reexecution. If we deoptimize in the slow path the bytecode + // will be reexecuted and the char[] allocation will be thrown away. + kit.jvms()->set_should_reexecute(true); + char_array = kit.new_array(__ makecon(TypeKlassPtr::make(ciTypeArrayKlass::make(T_CHAR))), + length, 1); + } + + // Mark the allocation so that zeroing is skipped since the code + // below will overwrite the entire array + AllocateArrayNode* char_alloc = AllocateArrayNode::Ideal_array_allocation(char_array, _gvn); + char_alloc->maybe_set_complete(_gvn); - // Mark the allocation so that zeroing is skipped since the code - // below will overwrite the entire array - AllocateArrayNode* char_alloc = AllocateArrayNode::Ideal_array_allocation(char_array, _gvn); - char_alloc->maybe_set_complete(_gvn); - - // Now copy the string representations into the final char[] - Node* start = __ intcon(0); - for (int argi = 0; argi < sc->num_arguments(); argi++) { - Node* arg = sc->argument(argi); - switch (sc->mode(argi)) { - case StringConcat::IntMode: { - Node* end = __ AddI(start, string_sizes->in(argi)); - // getChars words backwards so pass the ending point as well as the start - int_getChars(kit, arg, char_array, start, end); - start = end; - break; + // Now copy the string representations into the final char[] + Node* start = __ intcon(0); + for (int argi = 0; argi < sc->num_arguments(); argi++) { + Node* arg = sc->argument(argi); + switch (sc->mode(argi)) { + case StringConcat::IntMode: { + Node* end = __ AddI(start, string_sizes->in(argi)); + // getChars words backwards so pass the ending point as well as the start + int_getChars(kit, arg, char_array, start, end); + start = end; + break; + } + case StringConcat::StringNullCheckMode: + case StringConcat::StringMode: { + start = copy_string(kit, arg, char_array, start); + break; + } + case StringConcat::CharMode: { + __ store_to_memory(kit.control(), kit.array_element_address(char_array, start, T_CHAR), + arg, T_CHAR, char_adr_idx); + start = __ AddI(start, __ intcon(1)); + break; + } + default: + ShouldNotReachHere(); } - case StringConcat::StringNullCheckMode: - case StringConcat::StringMode: { - start = copy_string(kit, arg, char_array, start); - break; - } - case StringConcat::CharMode: { - __ store_to_memory(kit.control(), kit.array_element_address(char_array, start, T_CHAR), - arg, T_CHAR, char_adr_idx); - start = __ AddI(start, __ intcon(1)); - break; - } - default: - ShouldNotReachHere(); } - } - // If we're not reusing an existing String allocation then allocate one here. - Node* result = sc->string_alloc(); - if (result == NULL) { - PreserveReexecuteState preexecs(&kit); - // The original jvms is for an allocation of either a String or - // StringBuffer so no stack adjustment is necessary for proper - // reexecution. - kit.jvms()->set_should_reexecute(true); - result = kit.new_instance(__ makecon(TypeKlassPtr::make(C->env()->String_klass()))); + // If we're not reusing an existing String allocation then allocate one here. + result = sc->string_alloc(); + if (result == NULL) { + PreserveReexecuteState preexecs(&kit); + // The original jvms is for an allocation of either a String or + // StringBuffer so no stack adjustment is necessary for proper + // reexecution. + kit.jvms()->set_should_reexecute(true); + result = kit.new_instance(__ makecon(TypeKlassPtr::make(C->env()->String_klass()))); + } + + // Intialize the string + if (java_lang_String::has_offset_field()) { + kit.store_String_offset(kit.control(), result, __ intcon(0)); + kit.store_String_length(kit.control(), result, length); + } + kit.store_String_value(kit.control(), result, char_array); + } else { + result = C->top(); } - - // Intialize the string - if (java_lang_String::has_offset_field()) { - kit.store_String_offset(kit.control(), result, __ intcon(0)); - kit.store_String_length(kit.control(), result, length); - } - kit.store_String_value(kit.control(), result, char_array); - // hook up the outgoing control and result kit.replace_call(sc->end(), result); // Unhook any hook nodes - string_sizes->disconnect_inputs(NULL); + string_sizes->disconnect_inputs(NULL, C); sc->cleanup(); }
--- a/src/share/vm/opto/type.hpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/opto/type.hpp Fri Jan 11 10:38:38 2013 -0800 @@ -225,8 +225,10 @@ const TypeInt *isa_int() const; // Returns NULL if not an Int const TypeLong *is_long() const; const TypeLong *isa_long() const; // Returns NULL if not a Long + const TypeD *isa_double() const; // Returns NULL if not a Double{Top,Con,Bot} const TypeD *is_double_constant() const; // Asserts it is a DoubleCon const TypeD *isa_double_constant() const; // Returns NULL if not a DoubleCon + const TypeF *isa_float() const; // Returns NULL if not a Float{Top,Con,Bot} const TypeF *is_float_constant() const; // Asserts it is a FloatCon const TypeF *isa_float_constant() const; // Returns NULL if not a FloatCon const TypeTuple *is_tuple() const; // Collection of fields, NOT a pointer @@ -1141,24 +1143,6 @@ return ((TypeD*)this)->_d; } -inline const TypeF *Type::is_float_constant() const { - assert( _base == FloatCon, "Not a Float" ); - return (TypeF*)this; -} - -inline const TypeF *Type::isa_float_constant() const { - return ( _base == FloatCon ? (TypeF*)this : NULL); -} - -inline const TypeD *Type::is_double_constant() const { - assert( _base == DoubleCon, "Not a Double" ); - return (TypeD*)this; -} - -inline const TypeD *Type::isa_double_constant() const { - return ( _base == DoubleCon ? (TypeD*)this : NULL); -} - inline const TypeInt *Type::is_int() const { assert( _base == Int, "Not an Int" ); return (TypeInt*)this; @@ -1177,6 +1161,36 @@ return ( _base == Long ? (TypeLong*)this : NULL); } +inline const TypeF *Type::isa_float() const { + return ((_base == FloatTop || + _base == FloatCon || + _base == FloatBot) ? (TypeF*)this : NULL); +} + +inline const TypeF *Type::is_float_constant() const { + assert( _base == FloatCon, "Not a Float" ); + return (TypeF*)this; +} + +inline const TypeF *Type::isa_float_constant() const { + return ( _base == FloatCon ? (TypeF*)this : NULL); +} + +inline const TypeD *Type::isa_double() const { + return ((_base == DoubleTop || + _base == DoubleCon || + _base == DoubleBot) ? (TypeD*)this : NULL); +} + +inline const TypeD *Type::is_double_constant() const { + assert( _base == DoubleCon, "Not a Double" ); + return (TypeD*)this; +} + +inline const TypeD *Type::isa_double_constant() const { + return ( _base == DoubleCon ? (TypeD*)this : NULL); +} + inline const TypeTuple *Type::is_tuple() const { assert( _base == Tuple, "Not a Tuple" ); return (TypeTuple*)this;
--- a/src/share/vm/prims/jvmtiExport.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/prims/jvmtiExport.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -1305,15 +1305,17 @@ vframeStream st(thread); assert(!st.at_end(), "cannot be at end"); methodOop current_method = NULL; + methodHandle current_mh = methodHandle(thread, current_method); int current_bci = -1; do { current_method = st.method(); + current_mh = methodHandle(thread, current_method); current_bci = st.bci(); do { should_repeat = false; KlassHandle eh_klass(thread, exception_handle()->klass()); - current_bci = current_method->fast_exception_handler_bci_for( - eh_klass, current_bci, THREAD); + current_bci = methodOopDesc::fast_exception_handler_bci_for( + current_mh, eh_klass, current_bci, THREAD); if (HAS_PENDING_EXCEPTION) { exception_handle = KlassHandle(thread, PENDING_EXCEPTION); CLEAR_PENDING_EXCEPTION; @@ -1328,8 +1330,7 @@ catch_jmethodID = 0; current_bci = 0; } else { - catch_jmethodID = jem.to_jmethodID( - methodHandle(thread, current_method)); + catch_jmethodID = jem.to_jmethodID(current_mh); } JvmtiJavaThreadEventTransition jet(thread);
--- a/src/share/vm/prims/methodHandles.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/prims/methodHandles.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -1170,8 +1170,8 @@ // Walk all nmethods depending on this call site. MutexLocker mu(Compile_lock, thread); Universe::flush_dependents_on(call_site, target); + java_lang_invoke_CallSite::set_target(call_site(), target()); } - java_lang_invoke_CallSite::set_target(call_site(), target()); } JVM_END @@ -1182,8 +1182,8 @@ // Walk all nmethods depending on this call site. MutexLocker mu(Compile_lock, thread); Universe::flush_dependents_on(call_site, target); + java_lang_invoke_CallSite::set_target_volatile(call_site(), target()); } - java_lang_invoke_CallSite::set_target_volatile(call_site(), target()); } JVM_END
--- a/src/share/vm/runtime/arguments.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/runtime/arguments.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -3228,6 +3228,18 @@ if (!EliminateLocks) { EliminateNestedLocks = false; } + if (!Inline) { + IncrementalInline = false; + } +#ifndef PRODUCT + if (!IncrementalInline) { + AlwaysIncrementalInline = false; + } +#endif + if (IncrementalInline && FLAG_IS_DEFAULT(MaxNodeLimit)) { + // incremental inlining: bump MaxNodeLimit + FLAG_SET_DEFAULT(MaxNodeLimit, (intx)75000); + } #endif if (PrintAssembly && FLAG_IS_DEFAULT(DebugNonSafepoints)) {
--- a/src/share/vm/runtime/deoptimization.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/runtime/deoptimization.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -1242,8 +1242,8 @@ nmethodLocker nl(fr.pc()); // Log a message - Events::log_deopt_message(thread, "Uncommon trap %d fr.pc " INTPTR_FORMAT, - trap_request, fr.pc()); + Events::log(thread, "Uncommon trap: trap_request=" PTR32_FORMAT " fr.pc=" INTPTR_FORMAT, + trap_request, fr.pc()); { ResourceMark rm; @@ -1274,6 +1274,11 @@ methodDataHandle trap_mdo (THREAD, get_method_data(thread, trap_method, create_if_missing)); + // Log a message + Events::log_deopt_message(thread, "Uncommon trap: reason=%s action=%s pc=" INTPTR_FORMAT " method=%s @ %d", + trap_reason_name(reason), trap_action_name(action), fr.pc(), + trap_method->name_and_sig_as_C_string(), trap_bci); + // Print a bunch of diagnostics, if requested. if (TraceDeoptimization || LogCompilation) { ResourceMark rm;
--- a/src/share/vm/runtime/globals.hpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/runtime/globals.hpp Fri Jan 11 10:38:38 2013 -0800 @@ -922,6 +922,9 @@ develop(bool, PrintExceptionHandlers, false, \ "Print exception handler tables for all nmethods when generated") \ \ + develop(bool, StressCompiledExceptionHandlers, false, \ + "Exercise compiled exception handlers") \ + \ develop(bool, InterceptOSException, false, \ "Starts debugger when an implicit OS (e.g., NULL) " \ "exception happens") \
--- a/src/share/vm/runtime/sharedRuntime.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/runtime/sharedRuntime.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -642,7 +642,7 @@ bool skip_scope_increment = false; // exception handler lookup KlassHandle ek (THREAD, exception->klass()); - handler_bci = sd->method()->fast_exception_handler_bci_for(ek, bci, THREAD); + handler_bci = methodOopDesc::fast_exception_handler_bci_for(sd->method(), ek, bci, THREAD); if (HAS_PENDING_EXCEPTION) { recursive_exception = true; // We threw an exception while trying to find the exception handler.
--- a/src/share/vm/runtime/thread.cpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/runtime/thread.cpp Fri Jan 11 10:38:38 2013 -0800 @@ -2171,7 +2171,7 @@ // BiasedLocking needs an updated RegisterMap for the revoke monitors pass RegisterMap reg_map(this, UseBiasedLocking); frame compiled_frame = f.sender(®_map); - if (compiled_frame.can_be_deoptimized()) { + if (!StressCompiledExceptionHandlers && compiled_frame.can_be_deoptimized()) { Deoptimization::deoptimize(this, compiled_frame, ®_map); } }
--- a/src/share/vm/utilities/events.hpp Wed Jan 09 20:33:26 2013 -0800 +++ b/src/share/vm/utilities/events.hpp Fri Jan 11 10:38:38 2013 -0800 @@ -135,11 +135,11 @@ }; // A simple wrapper class for fixed size text messages. -class StringLogMessage : public FormatBuffer<132> { +class StringLogMessage : public FormatBuffer<256> { public: // Wrap this buffer in a stringStream. stringStream stream() { - return stringStream(_buf, sizeof(_buf)); + return stringStream(_buf, size()); } };
--- a/test/compiler/6865265/StackOverflowBug.java Wed Jan 09 20:33:26 2013 -0800 +++ b/test/compiler/6865265/StackOverflowBug.java Fri Jan 11 10:38:38 2013 -0800 @@ -28,7 +28,7 @@ * @summary JVM crashes with "missing exception handler" error * @author volker.simonis@sap.com * - * @run main/othervm -XX:CompileThreshold=100 -Xbatch -Xss224k StackOverflowBug + * @run main/othervm -XX:CompileThreshold=100 -Xbatch -Xss248k StackOverflowBug */
--- a/test/compiler/7184394/TestAESBase.java Wed Jan 09 20:33:26 2013 -0800 +++ b/test/compiler/7184394/TestAESBase.java Fri Jan 11 10:38:38 2013 -0800 @@ -54,7 +54,6 @@ String paddingStr = "PKCS5Padding"; AlgorithmParameters algParams; SecretKey key; - int ivLen; static int numThreads = 0; int threadId; @@ -68,7 +67,7 @@ public void prepare() { try { - System.out.println("\nmsgSize=" + msgSize + ", key size=" + keySize + ", reInit=" + !noReinit + ", checkOutput=" + checkOutput); + System.out.println("\nalgorithm=" + algorithm + ", mode=" + mode + ", msgSize=" + msgSize + ", keySize=" + keySize + ", noReinit=" + noReinit + ", checkOutput=" + checkOutput); int keyLenBytes = (keySize == 0 ? 16 : keySize/8); byte keyBytes[] = new byte[keyLenBytes]; @@ -90,10 +89,14 @@ cipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE"); dCipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE"); - ivLen = (algorithm.equals("AES") ? 16 : algorithm.equals("DES") ? 8 : 0); - IvParameterSpec initVector = new IvParameterSpec(new byte[ivLen]); - - cipher.init(Cipher.ENCRYPT_MODE, key, initVector); + if (mode.equals("CBC")) { + int ivLen = (algorithm.equals("AES") ? 16 : algorithm.equals("DES") ? 8 : 0); + IvParameterSpec initVector = new IvParameterSpec(new byte[ivLen]); + cipher.init(Cipher.ENCRYPT_MODE, key, initVector); + } else { + algParams = cipher.getParameters(); + cipher.init(Cipher.ENCRYPT_MODE, key, algParams); + } algParams = cipher.getParameters(); dCipher.init(Cipher.DECRYPT_MODE, key, algParams); if (threadId == 0) {
--- a/test/compiler/7184394/TestAESMain.java Wed Jan 09 20:33:26 2013 -0800 +++ b/test/compiler/7184394/TestAESMain.java Fri Jan 11 10:38:38 2013 -0800 @@ -27,7 +27,8 @@ * @bug 7184394 * @summary add intrinsics to use AES instructions * - * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true TestAESMain + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CBC TestAESMain + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB TestAESMain * * @author Tom Deneau */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/8004741/Test8004741.java Fri Jan 11 10:38:38 2013 -0800 @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test Test8004741.java + * @bug 8004741 + * @summary Missing compiled exception handle table entry for multidimensional array allocation + * @run main/othervm -Xmx64m -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:+StressCompiledExceptionHandlers Test8004741 + * + */ + +import java.util.*; + +public class Test8004741 extends Thread { + + static int[][] test(int a, int b) throws Exception { + int[][] ar = null; + try { + ar = new int[a][b]; + } catch (Error e) { + System.out.println("test got Error"); + passed = true; + throw(e); + } catch (Exception e) { + System.out.println("test got Exception"); + throw(e); + } + return ar; + } + + static boolean passed = false; + + public void run() { + System.out.println("test started"); + try { + while(true) { + test(2,20000); + } + } catch (ThreadDeath e) { + System.out.println("test got ThreadDeath"); + passed = true; + } catch (Error e) { + e.printStackTrace(); + System.out.println("test got Error"); + } catch (Exception e) { + e.printStackTrace(); + System.out.println("test got Exception"); + } + } + + public static void main(String[] args) throws Exception { + for (int n = 0; n < 11000; n++) { + test(2, 20); + } + + // First test exception catch + Test8004741 t = new Test8004741(); + + passed = false; + t.start(); + Thread.sleep(1000); + t.stop(); + + Thread.sleep(5000); + t.join(); + if (passed) { + System.out.println("PASSED"); + } else { + System.out.println("FAILED"); + System.exit(97); + } + } + +};
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/8005033/Test8005033.java Fri Jan 11 10:38:38 2013 -0800 @@ -0,0 +1,50 @@ +/* + * Copyright 2012 SAP AG. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 8005033 + * @summary On sparcv9, C2's intrinsic for Integer.bitCount(OV) returns wrong result if OV is the result of an operation with int overflow. + * @run main/othervm -Xcomp -XX:CompileOnly=Test8005033::testBitCount Test8005033 + * @author Richard Reingruber richard DOT reingruber AT sap DOT com + */ + +public class Test8005033 { + public static int MINUS_ONE = -1; + + public static void main(String[] args) { + System.out.println("EXECUTING test."); + Integer.bitCount(1); // load class + int expectedBitCount = 0; + int calculatedBitCount = testBitCount(); + if (expectedBitCount != calculatedBitCount) { + throw new InternalError("got " + calculatedBitCount + " but expected " + expectedBitCount); + } + System.out.println("SUCCESSFULLY passed test."); + } + + // testBitCount will be compiled using the Integer.bitCount() intrinsic if possible + private static int testBitCount() { + return Integer.bitCount(MINUS_ONE+1); // -1 + 1 => int overflow + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/8005419/Test8005419.java Fri Jan 11 10:38:38 2013 -0800 @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8005419 + * @summary Improve intrinsics code performance on x86 by using AVX2 + * @run main/othervm -Xbatch -Xmx64m Test8005419 + * + */ + +public class Test8005419 { + public static int SIZE = 64; + + public static void main(String[] args) { + char[] a = new char[SIZE]; + char[] b = new char[SIZE]; + + for (int i = 16; i < SIZE; i++) { + a[i] = (char)i; + b[i] = (char)i; + } + String s1 = new String(a); + String s2 = new String(b); + + // Warm up + boolean failed = false; + int result = 0; + for (int i = 0; i < 10000; i++) { + result += test(s1, s2); + } + for (int i = 0; i < 10000; i++) { + result += test(s1, s2); + } + for (int i = 0; i < 10000; i++) { + result += test(s1, s2); + } + if (result != 0) failed = true; + + System.out.println("Start testing"); + // Compare same string + result = test(s1, s1); + if (result != 0) { + failed = true; + System.out.println("Failed same: result = " + result + ", expected 0"); + } + // Compare equal strings + for (int i = 1; i <= SIZE; i++) { + s1 = new String(a, 0, i); + s2 = new String(b, 0, i); + result = test(s1, s2); + if (result != 0) { + failed = true; + System.out.println("Failed equals s1[" + i + "], s2[" + i + "]: result = " + result + ", expected 0"); + } + } + // Compare equal strings but different sizes + for (int i = 1; i <= SIZE; i++) { + s1 = new String(a, 0, i); + for (int j = 1; j <= SIZE; j++) { + s2 = new String(b, 0, j); + result = test(s1, s2); + if (result != (i-j)) { + failed = true; + System.out.println("Failed diff size s1[" + i + "], s2[" + j + "]: result = " + result + ", expected " + (i-j)); + } + } + } + // Compare strings with one char different and different sizes + for (int i = 1; i <= SIZE; i++) { + s1 = new String(a, 0, i); + for (int j = 0; j < i; j++) { + b[j] -= 3; // change char + s2 = new String(b, 0, i); + result = test(s1, s2); + int chdiff = a[j] - b[j]; + if (result != chdiff) { + failed = true; + System.out.println("Failed diff char s1[" + i + "], s2[" + i + "]: result = " + result + ", expected " + chdiff); + } + result = test(s2, s1); + chdiff = b[j] - a[j]; + if (result != chdiff) { + failed = true; + System.out.println("Failed diff char s2[" + i + "], s1[" + i + "]: result = " + result + ", expected " + chdiff); + } + b[j] += 3; // restore + } + } + if (failed) { + System.out.println("FAILED"); + System.exit(97); + } + System.out.println("PASSED"); + } + + private static int test(String str1, String str2) { + return str1.compareTo(str2); + } +}