Mercurial > hg > icedtea8-forest > hotspot
changeset 9208:e480e0df8eea icedtea-3.1.0
Merge jdk8u101-b13
line wrap: on
line diff
--- a/.hgtags Mon Jul 25 06:02:58 2016 +0100 +++ b/.hgtags Mon Jul 25 06:38:24 2016 +0100 @@ -617,6 +617,7 @@ 6824e2475e0432e27f9cc51838bc34ea5fbf5113 jdk8u40-b27 8220f68a195f6eeed2f5fb6e8a303726b512e899 jdk8u40-b31 850a290eb1088a61178d1910c500e170ef4f4386 jdk8u40-b32 +e6aa4a8c1b46a05b6c493b4ffe5c2555013f5c99 jdk8u40-b33 1b3abbeee961dee49780c0e4af5337feb918c555 jdk8u40-b10 f10fe402dfb1543723b4b117a7cba3ea3d4159f1 hs25.40-b15 99372b2fee0eb8b3452f47230e84aa6e97003184 jdk8u40-b11 @@ -826,17 +827,28 @@ dc2fdd4e0b8105268b8231040f761f27ab4523f2 jdk8u72-b14 dbf85d44da89a2428b3bd678be791c314e148845 icedtea-3.0.0pre09 d6670c5d49ba381405ec9f69a78ccc5b8b0c8473 jdk8u72-b15 +da43260704c28b9f19cb652090ae65c258220fd6 jdk8u72-b31 26b99cd20661a1fa05939d1856a9389311e01c4f jdk8u73-b00 931c31db01ae873525a1b2c306b01129eb431960 jdk8u73-b01 67566d815a66d958c1f817d65f1621ba1d2e5f33 jdk8u73-b02 451dda77f6c29bd3260e87f847a9eadae122a759 jdk8u74-b00 c1031a924f2c910fad078838b88a2f0146f2de98 jdk8u74-b01 ca9cae9aa9e989bbe6713c91d55c913edeaecce4 jdk8u74-b02 +a5b78b56841e97ce00463874f1b7f63c54d84934 jdk8u74-b31 +94ec11846b18111e73929b6caa9fbe7262e142c1 jdk8u74-b32 +1b6d4fd2730e58f17820930f797938dc182117c4 jdk8u77-b00 +ddd297e340b1170d3cec011ee64e729f8b493c86 jdk8u77-b01 +1b4072e4bb3ad54c4e894998486a8b33f0689160 jdk8u77-b02 +223b64a19e94222dd97b92bb40abcfbc0bf6ef1f jdk8u77-b03 +dd8507f51d786572dae18af8ffdc5a1ea34c755e jdk8u77-b31 + +94ec11846b18111e73929b6caa9fbe7262e142c1 jdk8u74-b32 da43260704c28b9f19cb652090ae65c258220fd6 jdk8u72-b31 c0242ea4bde19d72be5149feda112a39e8c89b0a jdk8u75-b00 ca3b8c8e390ab0540b0cc2e5def869b38e460d86 jdk8u75-b01 9aef5b5e0a68f20059cfa9e2806b4ff0e11a3d31 jdk8u75-b02 2df9fe896819362b9075a670b78106b249e50d6d jdk8u75-b03 +b374548dcb4834eb8731a06b52faddd0f10bd45d jdk8u101-b00 32b682649973231b54740c09b10889660f6ebde5 jdk8u75-b04 1f43bd4fab06d2ca5d1964611df14d8506d6b36e jdk8u75-b05 916712f178c39d0acbc590f38802133fc86a7346 jdk8u75-b06 @@ -855,6 +867,7 @@ bbbb05e91c629f8d9eef2ba43933767f68a898b0 jdk8u91-b00 e36b6ade0499eadfd8673fe62ef0a613af2e6d67 jdk8u91-b13 fa8991ccf6e5b74890a0b5672440b3c09d8d8732 jdk8u91-b14 +e1ea97ad19af4d1e0bda449aa43be7e1b118ffe9 jdk8u91-b15 d7b01fb81aa8a5437cb03bc36afe15cf0e55fb89 jdk8u76-b00 c1679cc87ba045219169cabb6b9b378c2b5cc578 jdk8u76-b01 218483967e52b419d885d34af4488a81c5133804 jdk8u76-b02 @@ -874,3 +887,18 @@ 76eca5cf31500ecb1d1807685729a7ea5c3780e7 icedtea-3.1.0pre02 ea6933324a7a52379d7f1e18e7525fd619079865 icedtea-3.1.0pre03 0f47eef348e2ec17dd6492886c57e23023fbc435 icedtea-3.1.0pre04 +24a09407d71bb2cc4848bfa21660c890b4d722b1 jdk8u92-b14 +445941ba41c0e3829fe02140690b144281ac2141 jdk8u92-b31 +b374548dcb4834eb8731a06b52faddd0f10bd45d jdk8u81-b00 +ead07188d11107e877e8e4ad215ff6cb238a8a92 jdk8u101-b01 +34429bad9986677f4991c80aeb22665842881cba jdk8u101-b02 +b41d5faaf1d32ed1bf9592f65f2f94ddd4c60fc4 jdk8u101-b03 +ceecf88e5c2c09bfabf5926581e6d0b0f65f5148 jdk8u101-b04 +19e74265fc8def6a7fc96c836d8ebe38ad1cf199 jdk8u101-b05 +7c60503b0888ac16eac80a6cd074195973f8dedb jdk8u101-b06 +cb4af293fe70549b51039bb9197f373e6750fafb jdk8u101-b07 +8ed377d2cec94435d1617a37999960a24be73ad9 jdk8u101-b08 +9be452c4e7161e60d623d55bb72ad013386aefd1 jdk8u101-b09 +218a44a163fa8c2532fd5f2e8ea9bc3c9c2ca8cf jdk8u101-b10 +0095e54dcaa1acfe1614feff9600734c26af7ae8 jdk8u101-b11 +286fe17d81c3d153611a28e50926083ae934cc56 jdk8u101-b12
--- a/src/cpu/x86/vm/assembler_x86.cpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/cpu/x86/vm/assembler_x86.cpp Mon Jul 25 06:38:24 2016 +0100 @@ -2318,6 +2318,13 @@ emit_arith(0x0B, 0xC0, dst, src); } +void Assembler::orl(Address dst, Register src) { + InstructionMark im(this); + prefix(dst, src); + emit_int8(0x09); + emit_operand(src, dst); +} + void Assembler::packuswb(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); @@ -5613,6 +5620,19 @@ } } +void Assembler::rcrq(Register dst, int imm8) { + assert(isShiftCount(imm8 >> 1), "illegal shift count"); + int encode = prefixq_and_encode(dst->encoding()); + if (imm8 == 1) { + emit_int8((unsigned char)0xD1); + emit_int8((unsigned char)(0xD8 | encode)); + } else { + emit_int8((unsigned char)0xC1); + emit_int8((unsigned char)(0xD8 | encode)); + emit_int8(imm8); + } +} + void Assembler::rorq(Register dst, int imm8) { assert(isShiftCount(imm8 >> 1), "illegal shift count"); int encode = prefixq_and_encode(dst->encoding());
--- a/src/cpu/x86/vm/assembler_x86.hpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/cpu/x86/vm/assembler_x86.hpp Mon Jul 25 06:38:24 2016 +0100 @@ -1455,6 +1455,7 @@ void orl(Register dst, int32_t imm32); void orl(Register dst, Address src); void orl(Register dst, Register src); + void orl(Address dst, Register src); void orq(Address dst, int32_t imm32); void orq(Register dst, int32_t imm32); @@ -1555,6 +1556,8 @@ void rclq(Register dst, int imm8); + void rcrq(Register dst, int imm8); + void rdtsc(); void ret(int imm16);
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/cpu/x86/vm/macroAssembler_x86.cpp Mon Jul 25 06:38:24 2016 +0100 @@ -7769,6 +7769,503 @@ pop(tmp2); pop(tmp1); } + +//Helper functions for square_to_len() + +/** + * Store the squares of x[], right shifted one bit (divided by 2) into z[] + * Preserves x and z and modifies rest of the registers. + */ + +void MacroAssembler::square_rshift(Register x, Register xlen, Register z, Register tmp1, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg) { + // Perform square and right shift by 1 + // Handle odd xlen case first, then for even xlen do the following + // jlong carry = 0; + // for (int j=0, i=0; j < xlen; j+=2, i+=4) { + // huge_128 product = x[j:j+1] * x[j:j+1]; + // z[i:i+1] = (carry << 63) | (jlong)(product >>> 65); + // z[i+2:i+3] = (jlong)(product >>> 1); + // carry = (jlong)product; + // } + + xorq(tmp5, tmp5); // carry + xorq(rdxReg, rdxReg); + xorl(tmp1, tmp1); // index for x + xorl(tmp4, tmp4); // index for z + + Label L_first_loop, L_first_loop_exit; + + testl(xlen, 1); + jccb(Assembler::zero, L_first_loop); //jump if xlen is even + + // Square and right shift by 1 the odd element using 32 bit multiply + movl(raxReg, Address(x, tmp1, Address::times_4, 0)); + imulq(raxReg, raxReg); + shrq(raxReg, 1); + adcq(tmp5, 0); + movq(Address(z, tmp4, Address::times_4, 0), raxReg); + incrementl(tmp1); + addl(tmp4, 2); + + // Square and right shift by 1 the rest using 64 bit multiply + bind(L_first_loop); + cmpptr(tmp1, xlen); + jccb(Assembler::equal, L_first_loop_exit); + + // Square + movq(raxReg, Address(x, tmp1, Address::times_4, 0)); + rorq(raxReg, 32); // convert big-endian to little-endian + mulq(raxReg); // 64-bit multiply rax * rax -> rdx:rax + + // Right shift by 1 and save carry + shrq(tmp5, 1); // rdx:rax:tmp5 = (tmp5:rdx:rax) >>> 1 + rcrq(rdxReg, 1); + rcrq(raxReg, 1); + adcq(tmp5, 0); + + // Store result in z + movq(Address(z, tmp4, Address::times_4, 0), rdxReg); + movq(Address(z, tmp4, Address::times_4, 8), raxReg); + + // Update indices for x and z + addl(tmp1, 2); + addl(tmp4, 4); + jmp(L_first_loop); + + bind(L_first_loop_exit); +} + + +/** + * Perform the following multiply add operation using BMI2 instructions + * carry:sum = sum + op1*op2 + carry + * op2 should be in rdx + * op2 is preserved, all other registers are modified + */ +void MacroAssembler::multiply_add_64_bmi2(Register sum, Register op1, Register op2, Register carry, Register tmp2) { + // assert op2 is rdx + mulxq(tmp2, op1, op1); // op1 * op2 -> tmp2:op1 + addq(sum, carry); + adcq(tmp2, 0); + addq(sum, op1); + adcq(tmp2, 0); + movq(carry, tmp2); +} + +/** + * Perform the following multiply add operation: + * carry:sum = sum + op1*op2 + carry + * Preserves op1, op2 and modifies rest of registers + */ +void MacroAssembler::multiply_add_64(Register sum, Register op1, Register op2, Register carry, Register rdxReg, Register raxReg) { + // rdx:rax = op1 * op2 + movq(raxReg, op2); + mulq(op1); + + // rdx:rax = sum + carry + rdx:rax + addq(sum, carry); + adcq(rdxReg, 0); + addq(sum, raxReg); + adcq(rdxReg, 0); + + // carry:sum = rdx:sum + movq(carry, rdxReg); +} + +/** + * Add 64 bit long carry into z[] with carry propogation. + * Preserves z and carry register values and modifies rest of registers. + * + */ +void MacroAssembler::add_one_64(Register z, Register zlen, Register carry, Register tmp1) { + Label L_fourth_loop, L_fourth_loop_exit; + + movl(tmp1, 1); + subl(zlen, 2); + addq(Address(z, zlen, Address::times_4, 0), carry); + + bind(L_fourth_loop); + jccb(Assembler::carryClear, L_fourth_loop_exit); + subl(zlen, 2); + jccb(Assembler::negative, L_fourth_loop_exit); + addq(Address(z, zlen, Address::times_4, 0), tmp1); + jmp(L_fourth_loop); + bind(L_fourth_loop_exit); +} + +/** + * Shift z[] left by 1 bit. + * Preserves x, len, z and zlen registers and modifies rest of the registers. + * + */ +void MacroAssembler::lshift_by_1(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, Register tmp3, Register tmp4) { + + Label L_fifth_loop, L_fifth_loop_exit; + + // Fifth loop + // Perform primitiveLeftShift(z, zlen, 1) + + const Register prev_carry = tmp1; + const Register new_carry = tmp4; + const Register value = tmp2; + const Register zidx = tmp3; + + // int zidx, carry; + // long value; + // carry = 0; + // for (zidx = zlen-2; zidx >=0; zidx -= 2) { + // (carry:value) = (z[i] << 1) | carry ; + // z[i] = value; + // } + + movl(zidx, zlen); + xorl(prev_carry, prev_carry); // clear carry flag and prev_carry register + + bind(L_fifth_loop); + decl(zidx); // Use decl to preserve carry flag + decl(zidx); + jccb(Assembler::negative, L_fifth_loop_exit); + + if (UseBMI2Instructions) { + movq(value, Address(z, zidx, Address::times_4, 0)); + rclq(value, 1); + rorxq(value, value, 32); + movq(Address(z, zidx, Address::times_4, 0), value); // Store back in big endian form + } + else { + // clear new_carry + xorl(new_carry, new_carry); + + // Shift z[i] by 1, or in previous carry and save new carry + movq(value, Address(z, zidx, Address::times_4, 0)); + shlq(value, 1); + adcl(new_carry, 0); + + orq(value, prev_carry); + rorq(value, 0x20); + movq(Address(z, zidx, Address::times_4, 0), value); // Store back in big endian form + + // Set previous carry = new carry + movl(prev_carry, new_carry); + } + jmp(L_fifth_loop); + + bind(L_fifth_loop_exit); +} + + +/** + * Code for BigInteger::squareToLen() intrinsic + * + * rdi: x + * rsi: len + * r8: z + * rcx: zlen + * r12: tmp1 + * r13: tmp2 + * r14: tmp3 + * r15: tmp4 + * rbx: tmp5 + * + */ +void MacroAssembler::square_to_len(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg) { + + Label L_second_loop, L_second_loop_exit, L_third_loop, L_third_loop_exit, fifth_loop, fifth_loop_exit, L_last_x, L_multiply; + push(tmp1); + push(tmp2); + push(tmp3); + push(tmp4); + push(tmp5); + + // First loop + // Store the squares, right shifted one bit (i.e., divided by 2). + square_rshift(x, len, z, tmp1, tmp3, tmp4, tmp5, rdxReg, raxReg); + + // Add in off-diagonal sums. + // + // Second, third (nested) and fourth loops. + // zlen +=2; + // for (int xidx=len-2,zidx=zlen-4; xidx > 0; xidx-=2,zidx-=4) { + // carry = 0; + // long op2 = x[xidx:xidx+1]; + // for (int j=xidx-2,k=zidx; j >= 0; j-=2) { + // k -= 2; + // long op1 = x[j:j+1]; + // long sum = z[k:k+1]; + // carry:sum = multiply_add_64(sum, op1, op2, carry, tmp_regs); + // z[k:k+1] = sum; + // } + // add_one_64(z, k, carry, tmp_regs); + // } + + const Register carry = tmp5; + const Register sum = tmp3; + const Register op1 = tmp4; + Register op2 = tmp2; + + push(zlen); + push(len); + addl(zlen,2); + bind(L_second_loop); + xorq(carry, carry); + subl(zlen, 4); + subl(len, 2); + push(zlen); + push(len); + cmpl(len, 0); + jccb(Assembler::lessEqual, L_second_loop_exit); + + // Multiply an array by one 64 bit long. + if (UseBMI2Instructions) { + op2 = rdxReg; + movq(op2, Address(x, len, Address::times_4, 0)); + rorxq(op2, op2, 32); + } + else { + movq(op2, Address(x, len, Address::times_4, 0)); + rorq(op2, 32); + } + + bind(L_third_loop); + decrementl(len); + jccb(Assembler::negative, L_third_loop_exit); + decrementl(len); + jccb(Assembler::negative, L_last_x); + + movq(op1, Address(x, len, Address::times_4, 0)); + rorq(op1, 32); + + bind(L_multiply); + subl(zlen, 2); + movq(sum, Address(z, zlen, Address::times_4, 0)); + + // Multiply 64 bit by 64 bit and add 64 bits lower half and upper 64 bits as carry. + if (UseBMI2Instructions) { + multiply_add_64_bmi2(sum, op1, op2, carry, tmp2); + } + else { + multiply_add_64(sum, op1, op2, carry, rdxReg, raxReg); + } + + movq(Address(z, zlen, Address::times_4, 0), sum); + + jmp(L_third_loop); + bind(L_third_loop_exit); + + // Fourth loop + // Add 64 bit long carry into z with carry propogation. + // Uses offsetted zlen. + add_one_64(z, zlen, carry, tmp1); + + pop(len); + pop(zlen); + jmp(L_second_loop); + + // Next infrequent code is moved outside loops. + bind(L_last_x); + movl(op1, Address(x, 0)); + jmp(L_multiply); + + bind(L_second_loop_exit); + pop(len); + pop(zlen); + pop(len); + pop(zlen); + + // Fifth loop + // Shift z left 1 bit. + lshift_by_1(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4); + + // z[zlen-1] |= x[len-1] & 1; + movl(tmp3, Address(x, len, Address::times_4, -4)); + andl(tmp3, 1); + orl(Address(z, zlen, Address::times_4, -4), tmp3); + + pop(tmp5); + pop(tmp4); + pop(tmp3); + pop(tmp2); + pop(tmp1); +} + +/** + * Helper function for mul_add() + * Multiply the in[] by int k and add to out[] starting at offset offs using + * 128 bit by 32 bit multiply and return the carry in tmp5. + * Only quad int aligned length of in[] is operated on in this function. + * k is in rdxReg for BMI2Instructions, for others it is in tmp2. + * This function preserves out, in and k registers. + * len and offset point to the appropriate index in "in" & "out" correspondingly + * tmp5 has the carry. + * other registers are temporary and are modified. + * + */ +void MacroAssembler::mul_add_128_x_32_loop(Register out, Register in, + Register offset, Register len, Register tmp1, Register tmp2, Register tmp3, + Register tmp4, Register tmp5, Register rdxReg, Register raxReg) { + + Label L_first_loop, L_first_loop_exit; + + movl(tmp1, len); + shrl(tmp1, 2); + + bind(L_first_loop); + subl(tmp1, 1); + jccb(Assembler::negative, L_first_loop_exit); + + subl(len, 4); + subl(offset, 4); + + Register op2 = tmp2; + const Register sum = tmp3; + const Register op1 = tmp4; + const Register carry = tmp5; + + if (UseBMI2Instructions) { + op2 = rdxReg; + } + + movq(op1, Address(in, len, Address::times_4, 8)); + rorq(op1, 32); + movq(sum, Address(out, offset, Address::times_4, 8)); + rorq(sum, 32); + if (UseBMI2Instructions) { + multiply_add_64_bmi2(sum, op1, op2, carry, raxReg); + } + else { + multiply_add_64(sum, op1, op2, carry, rdxReg, raxReg); + } + // Store back in big endian from little endian + rorq(sum, 0x20); + movq(Address(out, offset, Address::times_4, 8), sum); + + movq(op1, Address(in, len, Address::times_4, 0)); + rorq(op1, 32); + movq(sum, Address(out, offset, Address::times_4, 0)); + rorq(sum, 32); + if (UseBMI2Instructions) { + multiply_add_64_bmi2(sum, op1, op2, carry, raxReg); + } + else { + multiply_add_64(sum, op1, op2, carry, rdxReg, raxReg); + } + // Store back in big endian from little endian + rorq(sum, 0x20); + movq(Address(out, offset, Address::times_4, 0), sum); + + jmp(L_first_loop); + bind(L_first_loop_exit); +} + +/** + * Code for BigInteger::mulAdd() intrinsic + * + * rdi: out + * rsi: in + * r11: offs (out.length - offset) + * rcx: len + * r8: k + * r12: tmp1 + * r13: tmp2 + * r14: tmp3 + * r15: tmp4 + * rbx: tmp5 + * Multiply the in[] by word k and add to out[], return the carry in rax + */ +void MacroAssembler::mul_add(Register out, Register in, Register offs, + Register len, Register k, Register tmp1, Register tmp2, Register tmp3, + Register tmp4, Register tmp5, Register rdxReg, Register raxReg) { + + Label L_carry, L_last_in, L_done; + +// carry = 0; +// for (int j=len-1; j >= 0; j--) { +// long product = (in[j] & LONG_MASK) * kLong + +// (out[offs] & LONG_MASK) + carry; +// out[offs--] = (int)product; +// carry = product >>> 32; +// } +// + push(tmp1); + push(tmp2); + push(tmp3); + push(tmp4); + push(tmp5); + + Register op2 = tmp2; + const Register sum = tmp3; + const Register op1 = tmp4; + const Register carry = tmp5; + + if (UseBMI2Instructions) { + op2 = rdxReg; + movl(op2, k); + } + else { + movl(op2, k); + } + + xorq(carry, carry); + + //First loop + + //Multiply in[] by k in a 4 way unrolled loop using 128 bit by 32 bit multiply + //The carry is in tmp5 + mul_add_128_x_32_loop(out, in, offs, len, tmp1, tmp2, tmp3, tmp4, tmp5, rdxReg, raxReg); + + //Multiply the trailing in[] entry using 64 bit by 32 bit, if any + decrementl(len); + jccb(Assembler::negative, L_carry); + decrementl(len); + jccb(Assembler::negative, L_last_in); + + movq(op1, Address(in, len, Address::times_4, 0)); + rorq(op1, 32); + + subl(offs, 2); + movq(sum, Address(out, offs, Address::times_4, 0)); + rorq(sum, 32); + + if (UseBMI2Instructions) { + multiply_add_64_bmi2(sum, op1, op2, carry, raxReg); + } + else { + multiply_add_64(sum, op1, op2, carry, rdxReg, raxReg); + } + + // Store back in big endian from little endian + rorq(sum, 0x20); + movq(Address(out, offs, Address::times_4, 0), sum); + + testl(len, len); + jccb(Assembler::zero, L_carry); + + //Multiply the last in[] entry, if any + bind(L_last_in); + movl(op1, Address(in, 0)); + movl(sum, Address(out, offs, Address::times_4, -4)); + + movl(raxReg, k); + mull(op1); //tmp4 * eax -> edx:eax + addl(sum, carry); + adcl(rdxReg, 0); + addl(sum, raxReg); + adcl(rdxReg, 0); + movl(carry, rdxReg); + + movl(Address(out, offs, Address::times_4, -4), sum); + + bind(L_carry); + //return tmp5/carry as carry in rax + movl(rax, carry); + + bind(L_done); + pop(tmp5); + pop(tmp4); + pop(tmp3); + pop(tmp2); + pop(tmp1); +} #endif /**
--- a/src/cpu/x86/vm/macroAssembler_x86.hpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/cpu/x86/vm/macroAssembler_x86.hpp Mon Jul 25 06:38:24 2016 +0100 @@ -1241,6 +1241,25 @@ Register carry2); void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register zlen, Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5); + + void square_rshift(Register x, Register len, Register z, Register tmp1, Register tmp3, + Register tmp4, Register tmp5, Register rdxReg, Register raxReg); + void multiply_add_64_bmi2(Register sum, Register op1, Register op2, Register carry, + Register tmp2); + void multiply_add_64(Register sum, Register op1, Register op2, Register carry, + Register rdxReg, Register raxReg); + void add_one_64(Register z, Register zlen, Register carry, Register tmp1); + void lshift_by_1(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, + Register tmp3, Register tmp4); + void square_to_len(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, + Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg); + + void mul_add_128_x_32_loop(Register out, Register in, Register offset, Register len, Register tmp1, + Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, + Register raxReg); + void mul_add(Register out, Register in, Register offset, Register len, Register k, Register tmp1, + Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, + Register raxReg); #endif // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Mon Jul 25 06:38:24 2016 +0100 @@ -23,6 +23,9 @@ */ #include "precompiled.hpp" +#ifndef _WINDOWS +#include "alloca.h" +#endif #include "asm/macroAssembler.hpp" #include "asm/macroAssembler.inline.hpp" #include "code/debugInfoRec.hpp" @@ -3966,6 +3969,256 @@ } +//------------------------------Montgomery multiplication------------------------ +// + +#ifndef _WINDOWS + +#define ASM_SUBTRACT + +#ifdef ASM_SUBTRACT +// Subtract 0:b from carry:a. Return carry. +static unsigned long +sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) { + long i = 0, cnt = len; + unsigned long tmp; + asm volatile("clc; " + "0: ; " + "mov (%[b], %[i], 8), %[tmp]; " + "sbb %[tmp], (%[a], %[i], 8); " + "inc %[i]; dec %[cnt]; " + "jne 0b; " + "mov %[carry], %[tmp]; sbb $0, %[tmp]; " + : [i]"+r"(i), [cnt]"+r"(cnt), [tmp]"=&r"(tmp) + : [a]"r"(a), [b]"r"(b), [carry]"r"(carry) + : "memory"); + return tmp; +} +#else // ASM_SUBTRACT +typedef int __attribute__((mode(TI))) int128; + +// Subtract 0:b from carry:a. Return carry. +static unsigned long +sub(unsigned long a[], unsigned long b[], unsigned long carry, int len) { + int128 tmp = 0; + int i; + for (i = 0; i < len; i++) { + tmp += a[i]; + tmp -= b[i]; + a[i] = tmp; + tmp >>= 64; + assert(-1 <= tmp && tmp <= 0, "invariant"); + } + return tmp + carry; +} +#endif // ! ASM_SUBTRACT + +// Multiply (unsigned) Long A by Long B, accumulating the double- +// length result into the accumulator formed of T0, T1, and T2. +#define MACC(A, B, T0, T1, T2) \ +do { \ + unsigned long hi, lo; \ + asm volatile("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4" \ + : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \ + : "r"(A), "a"(B) : "cc"); \ + } while(0) + +// As above, but add twice the double-length result into the +// accumulator. +#define MACC2(A, B, T0, T1, T2) \ +do { \ + unsigned long hi, lo; \ + asm volatile("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4;" \ + "add %%rax, %2; adc %%rdx, %3; adc $0, %4" \ + : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \ + : "r"(A), "a"(B) : "cc"); \ + } while(0) + +// Fast Montgomery multiplication. The derivation of the algorithm is +// in A Cryptographic Library for the Motorola DSP56000, +// Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. + +static void __attribute__((noinline)) +montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[], + unsigned long m[], unsigned long inv, int len) { + unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator + int i; + + assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); + + for (i = 0; i < len; i++) { + int j; + for (j = 0; j < i; j++) { + MACC(a[j], b[i-j], t0, t1, t2); + MACC(m[j], n[i-j], t0, t1, t2); + } + MACC(a[i], b[0], t0, t1, t2); + m[i] = t0 * inv; + MACC(m[i], n[0], t0, t1, t2); + + assert(t0 == 0, "broken Montgomery multiply"); + + t0 = t1; t1 = t2; t2 = 0; + } + + for (i = len; i < 2*len; i++) { + int j; + for (j = i-len+1; j < len; j++) { + MACC(a[j], b[i-j], t0, t1, t2); + MACC(m[j], n[i-j], t0, t1, t2); + } + m[i-len] = t0; + t0 = t1; t1 = t2; t2 = 0; + } + + while (t0) + t0 = sub(m, n, t0, len); +} + +// Fast Montgomery squaring. This uses asymptotically 25% fewer +// multiplies so it should be up to 25% faster than Montgomery +// multiplication. However, its loop control is more complex and it +// may actually run slower on some machines. + +static void __attribute__((noinline)) +montgomery_square(unsigned long a[], unsigned long n[], + unsigned long m[], unsigned long inv, int len) { + unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator + int i; + + assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); + + for (i = 0; i < len; i++) { + int j; + int end = (i+1)/2; + for (j = 0; j < end; j++) { + MACC2(a[j], a[i-j], t0, t1, t2); + MACC(m[j], n[i-j], t0, t1, t2); + } + if ((i & 1) == 0) { + MACC(a[j], a[j], t0, t1, t2); + } + for (; j < i; j++) { + MACC(m[j], n[i-j], t0, t1, t2); + } + m[i] = t0 * inv; + MACC(m[i], n[0], t0, t1, t2); + + assert(t0 == 0, "broken Montgomery square"); + + t0 = t1; t1 = t2; t2 = 0; + } + + for (i = len; i < 2*len; i++) { + int start = i-len+1; + int end = start + (len - start)/2; + int j; + for (j = start; j < end; j++) { + MACC2(a[j], a[i-j], t0, t1, t2); + MACC(m[j], n[i-j], t0, t1, t2); + } + if ((i & 1) == 0) { + MACC(a[j], a[j], t0, t1, t2); + } + for (; j < len; j++) { + MACC(m[j], n[i-j], t0, t1, t2); + } + m[i-len] = t0; + t0 = t1; t1 = t2; t2 = 0; + } + + while (t0) + t0 = sub(m, n, t0, len); +} + +// Swap words in a longword. +static unsigned long swap(unsigned long x) { + return (x << 32) | (x >> 32); +} + +// Copy len longwords from s to d, word-swapping as we go. The +// destination array is reversed. +static void reverse_words(unsigned long *s, unsigned long *d, int len) { + d += len; + while(len-- > 0) { + d--; + *d = swap(*s); + s++; + } +} + +// The threshold at which squaring is advantageous was determined +// experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz. +#define MONTGOMERY_SQUARING_THRESHOLD 64 + +void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints, + jint len, jlong inv, + jint *m_ints) { + assert(len % 2 == 0, "array length in montgomery_multiply must be even"); + int longwords = len/2; + + // Make very sure we don't use so much space that the stack might + // overflow. 512 jints corresponds to an 16384-bit integer and + // will use here a total of 8k bytes of stack space. + int total_allocation = longwords * sizeof (unsigned long) * 4; + guarantee(total_allocation <= 8192, "must be"); + unsigned long *scratch = (unsigned long *)alloca(total_allocation); + + // Local scratch arrays + unsigned long + *a = scratch + 0 * longwords, + *b = scratch + 1 * longwords, + *n = scratch + 2 * longwords, + *m = scratch + 3 * longwords; + + reverse_words((unsigned long *)a_ints, a, longwords); + reverse_words((unsigned long *)b_ints, b, longwords); + reverse_words((unsigned long *)n_ints, n, longwords); + + ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords); + + reverse_words(m, (unsigned long *)m_ints, longwords); +} + +void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints, + jint len, jlong inv, + jint *m_ints) { + assert(len % 2 == 0, "array length in montgomery_square must be even"); + int longwords = len/2; + + // Make very sure we don't use so much space that the stack might + // overflow. 512 jints corresponds to an 16384-bit integer and + // will use here a total of 6k bytes of stack space. + int total_allocation = longwords * sizeof (unsigned long) * 3; + guarantee(total_allocation <= 8192, "must be"); + unsigned long *scratch = (unsigned long *)alloca(total_allocation); + + // Local scratch arrays + unsigned long + *a = scratch + 0 * longwords, + *n = scratch + 1 * longwords, + *m = scratch + 2 * longwords; + + reverse_words((unsigned long *)a_ints, a, longwords); + reverse_words((unsigned long *)n_ints, n, longwords); + + //montgomery_square fails to pass BigIntegerTest on solaris amd64 + //on jdk7 and jdk8. +#ifndef SOLARIS + if (len >= MONTGOMERY_SQUARING_THRESHOLD) { +#else + if (0) { +#endif + ::montgomery_square(a, n, m, (unsigned long)inv, longwords); + } else { + ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords); + } + + reverse_words(m, (unsigned long *)m_ints, longwords); +} + +#endif // WINDOWS + #ifdef COMPILER2 // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame //
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp Mon Jul 25 06:38:24 2016 +0100 @@ -3743,6 +3743,107 @@ return start; } +/** + * Arguments: + * + // Input: + // c_rarg0 - x address + // c_rarg1 - x length + // c_rarg2 - z address + // c_rarg3 - z lenth + * + */ + address generate_squareToLen() { + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "squareToLen"); + + address start = __ pc(); + // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...) + // Unix: rdi, rsi, rdx, rcx (c_rarg0, c_rarg1, ...) + const Register x = rdi; + const Register len = rsi; + const Register z = r8; + const Register zlen = rcx; + + const Register tmp1 = r12; + const Register tmp2 = r13; + const Register tmp3 = r14; + const Register tmp4 = r15; + const Register tmp5 = rbx; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + + setup_arg_regs(4); // x => rdi, len => rsi, z => rdx + // zlen => rcx + // r9 and r10 may be used to save non-volatile registers + __ movptr(r8, rdx); + __ square_to_len(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax); + + restore_arg_regs(); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + return start; + } + + /** + * Arguments: + * + * Input: + * c_rarg0 - out address + * c_rarg1 - in address + * c_rarg2 - offset + * c_rarg3 - len + * not Win64 + * c_rarg4 - k + * Win64 + * rsp+40 - k + */ + address generate_mulAdd() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "mulAdd"); + + address start = __ pc(); + // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...) + // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...) + const Register out = rdi; + const Register in = rsi; + const Register offset = r11; + const Register len = rcx; + const Register k = r8; + + // Next registers will be saved on stack in mul_add(). + const Register tmp1 = r12; + const Register tmp2 = r13; + const Register tmp3 = r14; + const Register tmp4 = r15; + const Register tmp5 = rbx; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + + setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx + // len => rcx, k => r8 + // r9 and r10 may be used to save non-volatile registers +#ifdef _WIN64 + // last argument is on stack on Win64 + __ movl(k, Address(rsp, 6 * wordSize)); +#endif + __ movptr(r11, rdx); // move offset in rdx to offset(r11) + __ mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax); + + restore_arg_regs(); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + return start; + } + + #undef __ #define __ masm-> @@ -3987,7 +4088,24 @@ if (UseMultiplyToLenIntrinsic) { StubRoutines::_multiplyToLen = generate_multiplyToLen(); } -#endif + if (UseSquareToLenIntrinsic) { + StubRoutines::_squareToLen = generate_squareToLen(); + } + if (UseMulAddIntrinsic) { + StubRoutines::_mulAdd = generate_mulAdd(); + } + +#ifndef _WINDOWS + if (UseMontgomeryMultiplyIntrinsic) { + StubRoutines::_montgomeryMultiply + = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply); + } + if (UseMontgomerySquareIntrinsic) { + StubRoutines::_montgomerySquare + = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square); + } +#endif // WINDOWS +#endif // COMPILER2 } public:
--- a/src/cpu/x86/vm/stubRoutines_x86_64.hpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/cpu/x86/vm/stubRoutines_x86_64.hpp Mon Jul 25 06:38:24 2016 +0100 @@ -33,7 +33,7 @@ enum platform_dependent_constants { code_size1 = 19000, // simply increase if too small (assembler will crash if too small) - code_size2 = 22000 // simply increase if too small (assembler will crash if too small) + code_size2 = 23000 // simply increase if too small (assembler will crash if too small) }; class x86 {
--- a/src/cpu/x86/vm/vm_version_x86.cpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/cpu/x86/vm/vm_version_x86.cpp Mon Jul 25 06:38:24 2016 +0100 @@ -703,6 +703,18 @@ if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { UseMultiplyToLenIntrinsic = true; } + if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { + UseSquareToLenIntrinsic = false; + } + if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { + UseMulAddIntrinsic = false; + } + if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { + UseMontgomeryMultiplyIntrinsic = false; + } + if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { + UseMontgomerySquareIntrinsic = false; + } #else if (UseMultiplyToLenIntrinsic) { if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { @@ -710,6 +722,30 @@ } FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false); } + if (UseSquareToLenIntrinsic) { + if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { + warning("squareToLen intrinsic is not available in 32-bit VM"); + } + FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false); + } + if (UseMulAddIntrinsic) { + if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { + warning("mulAdd intrinsic is not available in 32-bit VM"); + } + FLAG_SET_DEFAULT(UseMulAddIntrinsic, false); + } + if (UseMontgomeryMultiplyIntrinsic) { + if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { + warning("montgomeryMultiply intrinsic is not available in 32-bit VM"); + } + FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false); + } + if (UseMontgomerySquareIntrinsic) { + if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { + warning("montgomerySquare intrinsic is not available in 32-bit VM"); + } + FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false); + } #endif #endif // COMPILER2
--- a/src/share/vm/classfile/dictionary.cpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/classfile/dictionary.cpp Mon Jul 25 06:38:24 2016 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,6 +25,7 @@ #include "precompiled.hpp" #include "classfile/dictionary.hpp" #include "classfile/systemDictionary.hpp" +#include "classfile/systemDictionaryShared.hpp" #include "memory/iterator.hpp" #include "oops/oop.inline.hpp" #include "prims/jvmtiRedefineClassesTrace.hpp" @@ -36,9 +37,16 @@ DictionaryEntry* Dictionary::_current_class_entry = NULL; int Dictionary::_current_class_index = 0; +size_t Dictionary::entry_size() { + if (DumpSharedSpaces) { + return SystemDictionaryShared::dictionary_entry_size(); + } else { + return sizeof(DictionaryEntry); + } +} Dictionary::Dictionary(int table_size) - : TwoOopHashtable<Klass*, mtClass>(table_size, sizeof(DictionaryEntry)) { + : TwoOopHashtable<Klass*, mtClass>(table_size, (int)entry_size()) { _current_class_index = 0; _current_class_entry = NULL; _pd_cache_table = new ProtectionDomainCacheTable(defaultProtectionDomainCacheSize); @@ -47,7 +55,7 @@ Dictionary::Dictionary(int table_size, HashtableBucket<mtClass>* t, int number_of_entries) - : TwoOopHashtable<Klass*, mtClass>(table_size, sizeof(DictionaryEntry), t, number_of_entries) { + : TwoOopHashtable<Klass*, mtClass>(table_size, (int)entry_size(), t, number_of_entries) { _current_class_index = 0; _current_class_entry = NULL; _pd_cache_table = new ProtectionDomainCacheTable(defaultProtectionDomainCacheSize); @@ -63,6 +71,9 @@ entry->set_loader_data(loader_data); entry->set_pd_set(NULL); assert(klass->oop_is_instance(), "Must be"); + if (DumpSharedSpaces) { + SystemDictionaryShared::init_shared_dictionary_entry(klass, entry); + } return entry; }
--- a/src/share/vm/classfile/dictionary.hpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/classfile/dictionary.hpp Mon Jul 25 06:38:24 2016 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -53,6 +53,7 @@ DictionaryEntry* get_entry(int index, unsigned int hash, Symbol* name, ClassLoaderData* loader_data); +protected: DictionaryEntry* bucket(int i) { return (DictionaryEntry*)Hashtable<Klass*, mtClass>::bucket(i); } @@ -66,6 +67,8 @@ Hashtable<Klass*, mtClass>::add_entry(index, (HashtableEntry<Klass*, mtClass>*)new_entry); } + static size_t entry_size(); + public: Dictionary(int table_size); Dictionary(int table_size, HashtableBucket<mtClass>* t, int number_of_entries);
--- a/src/share/vm/classfile/systemDictionary.cpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/classfile/systemDictionary.cpp Mon Jul 25 06:38:24 2016 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1198,8 +1198,13 @@ if (ik->super() != NULL) { Symbol* cn = ik->super()->name(); - resolve_super_or_fail(class_name, cn, - class_loader, protection_domain, true, CHECK_(nh)); + Klass *s = resolve_super_or_fail(class_name, cn, + class_loader, protection_domain, true, CHECK_(nh)); + if (s != ik->super()) { + // The dynamically resolved super class is not the same as the one we used during dump time, + // so we cannot use ik. + return nh; + } } Array<Klass*>* interfaces = ik->local_interfaces(); @@ -1212,7 +1217,12 @@ // reinitialized yet (they will be once the interface classes // are loaded) Symbol* name = k->name(); - resolve_super_or_fail(class_name, name, class_loader, protection_domain, false, CHECK_(nh)); + Klass* i = resolve_super_or_fail(class_name, name, class_loader, protection_domain, false, CHECK_(nh)); + if (k != i) { + // The dynamically resolved interface class is not the same as the one we used during dump time, + // so we cannot use ik. + return nh; + } } // Adjust methods to recover missing data. They need addresses for
--- a/src/share/vm/classfile/systemDictionaryShared.hpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/classfile/systemDictionaryShared.hpp Mon Jul 25 06:38:24 2016 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -26,6 +26,7 @@ #ifndef SHARE_VM_CLASSFILE_SYSTEMDICTIONARYSHARED_HPP #define SHARE_VM_CLASSFILE_SYSTEMDICTIONARYSHARED_HPP +#include "classfile/dictionary.hpp" #include "classfile/systemDictionary.hpp" class SystemDictionaryShared: public SystemDictionary { @@ -42,6 +43,22 @@ oop class_loader = loader_data->class_loader(); return (class_loader == NULL); } + + static size_t dictionary_entry_size() { + return sizeof(DictionaryEntry); + } + static void init_shared_dictionary_entry(Klass* k, DictionaryEntry* entry) {} + + // The (non-application) CDS implementation supports only classes in the boot + // class loader, which ensures that the verification dependencies are the same + // during archive creation time and runtime. Thus we can do the dependency checks + // entirely during archive creation time. + static void add_verification_dependency(Klass* k, Symbol* accessor_clsname, + Symbol* target_clsname) {} + static void finalize_verification_dependencies() {} + static bool check_verification_dependencies(Klass* k, Handle class_loader, + Handle protection_domain, + char** message_buffer, TRAPS) {return true;} }; #endif // SHARE_VM_CLASSFILE_SYSTEMDICTIONARYSHARED_HPP
--- a/src/share/vm/classfile/verificationType.cpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/classfile/verificationType.cpp Mon Jul 25 06:38:24 2016 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,6 +24,7 @@ #include "precompiled.hpp" #include "classfile/symbolTable.hpp" +#include "classfile/systemDictionaryShared.hpp" #include "classfile/verificationType.hpp" #include "classfile/verifier.hpp" @@ -73,7 +74,23 @@ Klass* from_class = SystemDictionary::resolve_or_fail( from.name(), Handle(THREAD, klass->class_loader()), Handle(THREAD, klass->protection_domain()), true, CHECK_false); - return InstanceKlass::cast(from_class)->is_subclass_of(this_class()); + bool result = InstanceKlass::cast(from_class)->is_subclass_of(this_class()); + if (result && DumpSharedSpaces) { + if (klass()->is_subclass_of(from_class) && klass()->is_subclass_of(this_class())) { + // No need to save verification dependency. At run time, <klass> will be + // loaded from the archived only if <from_class> and <this_class> are + // also loaded from the archive. I.e., all 3 classes are exactly the same + // as we saw at archive creation time. + } else { + // Save the dependency. At run time, we need to check that the condition + // from_class->is_subclass_of(this_class() is still true. + Symbol* accessor_clsname = from.name(); + Symbol* target_clsname = this_class()->name(); + SystemDictionaryShared::add_verification_dependency(klass(), + accessor_clsname, target_clsname); + } + } + return result; } } else if (is_array() && from.is_array()) { VerificationType comp_this = get_component(context, CHECK_false);
--- a/src/share/vm/classfile/verifier.cpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/classfile/verifier.cpp Mon Jul 25 06:38:24 2016 +0100 @@ -2326,9 +2326,17 @@ case Bytecodes::_ifnonnull: target = bcs.dest(); if (visited_branches->contains(bci)) { - if (bci_stack->is_empty()) return true; - // Pop a bytecode starting offset and scan from there. - bcs.set_start(bci_stack->pop()); + if (bci_stack->is_empty()) { + if (handler_stack->is_empty()) { + return true; + } else { + // Parse the catch handlers for try blocks containing athrow. + bcs.set_start(handler_stack->pop()); + } + } else { + // Pop a bytecode starting offset and scan from there. + bcs.set_start(bci_stack->pop()); + } } else { if (target > bci) { // forward branch if (target >= code_length) return false; @@ -2351,9 +2359,17 @@ case Bytecodes::_goto_w: target = (opcode == Bytecodes::_goto ? bcs.dest() : bcs.dest_w()); if (visited_branches->contains(bci)) { - if (bci_stack->is_empty()) return true; - // Been here before, pop new starting offset from stack. - bcs.set_start(bci_stack->pop()); + if (bci_stack->is_empty()) { + if (handler_stack->is_empty()) { + return true; + } else { + // Parse the catch handlers for try blocks containing athrow. + bcs.set_start(handler_stack->pop()); + } + } else { + // Been here before, pop new starting offset from stack. + bcs.set_start(bci_stack->pop()); + } } else { if (target >= code_length) return false; // Continue scanning from the target onward.
--- a/src/share/vm/classfile/vmSymbols.hpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/classfile/vmSymbols.hpp Mon Jul 25 06:38:24 2016 +0100 @@ -793,10 +793,26 @@ do_signature(encodeISOArray_signature, "([CI[BII)I") \ \ do_class(java_math_BigInteger, "java/math/BigInteger") \ - do_intrinsic(_multiplyToLen, java_math_BigInteger, multiplyToLen_name, multiplyToLen_signature, F_R) \ + do_intrinsic(_multiplyToLen, java_math_BigInteger, multiplyToLen_name, multiplyToLen_signature, F_S) \ do_name( multiplyToLen_name, "multiplyToLen") \ do_signature(multiplyToLen_signature, "([II[II[I)[I") \ \ + do_intrinsic(_squareToLen, java_math_BigInteger, squareToLen_name, squareToLen_signature, F_S) \ + do_name( squareToLen_name, "implSquareToLen") \ + do_signature(squareToLen_signature, "([II[II)[I") \ + \ + do_intrinsic(_mulAdd, java_math_BigInteger, mulAdd_name, mulAdd_signature, F_S) \ + do_name( mulAdd_name, "implMulAdd") \ + do_signature(mulAdd_signature, "([I[IIII)I") \ + \ + do_intrinsic(_montgomeryMultiply, java_math_BigInteger, montgomeryMultiply_name, montgomeryMultiply_signature, F_S) \ + do_name( montgomeryMultiply_name, "implMontgomeryMultiply") \ + do_signature(montgomeryMultiply_signature, "([I[I[IIJ[I)[I") \ + \ + do_intrinsic(_montgomerySquare, java_math_BigInteger, montgomerySquare_name, montgomerySquare_signature, F_S) \ + do_name( montgomerySquare_name, "implMontgomerySquare") \ + do_signature(montgomerySquare_signature, "([I[IIJ[I)[I") \ + \ /* java/lang/ref/Reference */ \ do_intrinsic(_Reference_get, java_lang_ref_Reference, get_name, void_object_signature, F_R) \ \
--- a/src/share/vm/interpreter/bytecodeStream.cpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/interpreter/bytecodeStream.cpp Mon Jul 25 06:38:24 2016 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -31,12 +31,12 @@ // set next bytecode position address bcp = RawBytecodeStream::bcp(); address end = method()->code_base() + end_bci(); - int l = Bytecodes::raw_special_length_at(bcp, end); - if (l <= 0 || (_bci + l) > _end_bci) { + int len = Bytecodes::raw_special_length_at(bcp, end); + // Very large tableswitch or lookupswitch size can cause _next_bci to overflow. + if (len <= 0 || (_bci > _end_bci - len) || (_bci - len >= _next_bci)) { code = Bytecodes::_illegal; } else { - _next_bci += l; - assert(_bci < _next_bci, "length must be > 0"); + _next_bci += len; // set attributes _is_wide = false; // check for special (uncommon) cases
--- a/src/share/vm/interpreter/bytecodeStream.hpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/interpreter/bytecodeStream.hpp Mon Jul 25 06:38:24 2016 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -152,12 +152,15 @@ code = Bytecodes::code_or_bp_at(bcp); // set next bytecode position - int l = Bytecodes::length_for(code); - if (l > 0 && (_bci + l) <= _end_bci) { + int len = Bytecodes::length_for(code); + if (len > 0 && (_bci <= _end_bci - len)) { assert(code != Bytecodes::_wide && code != Bytecodes::_tableswitch && code != Bytecodes::_lookupswitch, "can't be special bytecode"); _is_wide = false; - _next_bci += l; + _next_bci += len; + if (_next_bci <= _bci) { // Check for integer overflow + code = Bytecodes::_illegal; + } _raw_code = code; return code; } else { @@ -206,19 +209,23 @@ // note that we cannot advance before having the // tty bytecode otherwise the stepping is wrong! // (carefull: length_for(...) must be used first!) - int l = Bytecodes::length_for(code); - if (l == 0) l = Bytecodes::length_at(_method(), bcp); - _next_bci += l; - assert(_bci < _next_bci, "length must be > 0"); - // set attributes - _is_wide = false; - // check for special (uncommon) cases - if (code == Bytecodes::_wide) { - raw_code = (Bytecodes::Code)bcp[1]; - code = raw_code; // wide BCs are always Java-normal - _is_wide = true; + int len = Bytecodes::length_for(code); + if (len == 0) len = Bytecodes::length_at(_method(), bcp); + if (len <= 0 || (_bci > _end_bci - len) || (_bci - len >= _next_bci)) { + raw_code = code = Bytecodes::_illegal; + } else { + _next_bci += len; + assert(_bci < _next_bci, "length must be > 0"); + // set attributes + _is_wide = false; + // check for special (uncommon) cases + if (code == Bytecodes::_wide) { + raw_code = (Bytecodes::Code)bcp[1]; + code = raw_code; // wide BCs are always Java-normal + _is_wide = true; + } + assert(Bytecodes::is_java_code(code), "sanity check"); } - assert(Bytecodes::is_java_code(code), "sanity check"); } _raw_code = raw_code; _code = code;
--- a/src/share/vm/memory/metaspaceShared.cpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/memory/metaspaceShared.cpp Mon Jul 25 06:38:24 2016 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -30,6 +30,7 @@ #include "classfile/sharedClassUtil.hpp" #include "classfile/symbolTable.hpp" #include "classfile/systemDictionary.hpp" +#include "classfile/systemDictionaryShared.hpp" #include "code/codeCache.hpp" #include "memory/filemap.hpp" #include "memory/gcLocker.hpp" @@ -53,6 +54,7 @@ bool MetaspaceShared::_check_classes_made_progress; bool MetaspaceShared::_has_error_classes; bool MetaspaceShared::_archive_loading_failed = false; +bool MetaspaceShared::_remapped_readwrite = false; // Read/write a data stream for restoring/preserving metadata pointers and // miscellaneous data from/to the shared archive file. @@ -684,6 +686,10 @@ exit(1); } } + + // Copy the dependencies from C_HEAP-alloced GrowableArrays to RO-alloced + // Arrays + SystemDictionaryShared::finalize_verification_dependencies(); } void MetaspaceShared::prepare_for_dumping() { @@ -1100,6 +1106,7 @@ if (!mapinfo->remap_shared_readonly_as_readwrite()) { return false; } + _remapped_readwrite = true; } return true; }
--- a/src/share/vm/memory/metaspaceShared.hpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/memory/metaspaceShared.hpp Mon Jul 25 06:38:24 2016 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -55,6 +55,7 @@ static bool _check_classes_made_progress; static bool _has_error_classes; static bool _archive_loading_failed; + static bool _remapped_readwrite; public: enum { vtbl_list_size = 17, // number of entries in the shared space vtable list. @@ -123,6 +124,10 @@ // sharing is enabled. Simply returns true if sharing is not enabled // or if the remapping has already been done by a prior call. static bool remap_shared_readonly_as_readwrite() NOT_CDS_RETURN_(true); + static bool remapped_readwrite() { + CDS_ONLY(return _remapped_readwrite); + NOT_CDS(return false); + } static void print_shared_spaces();
--- a/src/share/vm/oops/instanceKlass.cpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/oops/instanceKlass.cpp Mon Jul 25 06:38:24 2016 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,6 +25,7 @@ #include "precompiled.hpp" #include "classfile/javaClasses.hpp" #include "classfile/systemDictionary.hpp" +#include "classfile/systemDictionaryShared.hpp" #include "classfile/verifier.hpp" #include "classfile/vmSymbols.hpp" #include "compiler/compileBroker.hpp" @@ -706,6 +707,16 @@ // also sets rewritten this_oop->rewrite_class(CHECK_false); + } else if (this_oop()->is_shared()) { + ResourceMark rm(THREAD); + char* message_buffer; // res-allocated by check_verification_dependencies + Handle loader = this_oop()->class_loader(); + Handle pd = this_oop()->protection_domain(); + bool verified = SystemDictionaryShared::check_verification_dependencies(this_oop(), + loader, pd, &message_buffer, THREAD); + if (!verified) { + THROW_MSG_(vmSymbols::java_lang_VerifyError(), message_buffer, false); + } } // relocate jsrs and link methods after they are all rewritten @@ -715,7 +726,12 @@ // methods have been rewritten since rewrite may // fabricate new Method*s. // also does loader constraint checking - if (!this_oop()->is_shared()) { + // + // Initialize_vtable and initialize_itable need to be rerun for + // a shared class if the class is not loaded by the NULL classloader. + ClassLoaderData * loader_data = this_oop->class_loader_data(); + if (!(this_oop()->is_shared() && + loader_data->is_the_null_class_loader_data())) { ResourceMark rm(THREAD); this_oop->vtable()->initialize_vtable(true, CHECK_false); this_oop->itable()->initialize_itable(true, CHECK_false);
--- a/src/share/vm/oops/klassVtable.cpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/oops/klassVtable.cpp Mon Jul 25 06:38:24 2016 +0100 @@ -27,6 +27,7 @@ #include "classfile/vmSymbols.hpp" #include "gc_implementation/shared/markSweep.inline.hpp" #include "memory/gcLocker.hpp" +#include "memory/metaspaceShared.hpp" #include "memory/resourceArea.hpp" #include "memory/universe.inline.hpp" #include "oops/instanceKlass.hpp" @@ -47,6 +48,10 @@ return (InstanceKlass*)k; } +bool klassVtable::is_preinitialized_vtable() { + return _klass->is_shared() && !MetaspaceShared::remapped_readwrite(); +} + // this function computes the vtable size (including the size needed for miranda // methods) and the number of miranda methods in this class. @@ -128,6 +133,12 @@ int klassVtable::initialize_from_super(KlassHandle super) { if (super.is_null()) { return 0; + } else if (is_preinitialized_vtable()) { + // A shared class' vtable is preinitialized at dump time. No need to copy + // methods from super class for shared class, as that was already done + // during archiving time. However, if Jvmti has redefined a class, + // copy super class's vtable in case the super class has changed. + return super->vtable()->length(); } else { // copy methods from superKlass // can't inherit from array class, so must be InstanceKlass @@ -157,6 +168,8 @@ KlassHandle super (THREAD, klass()->java_super()); int nofNewEntries = 0; + bool is_shared = _klass->is_shared(); + if (PrintVtables && !klass()->oop_is_array()) { ResourceMark rm(THREAD); tty->print_cr("Initializing: %s", _klass->name()->as_C_string()); @@ -169,6 +182,7 @@ #endif if (Universe::is_bootstrapping()) { + assert(!is_shared, "sanity"); // just clear everything for (int i = 0; i < _length; i++) table()[i].clear(); return; @@ -208,6 +222,7 @@ if (len > 0) { Array<int>* def_vtable_indices = NULL; if ((def_vtable_indices = ik()->default_vtable_indices()) == NULL) { + assert(!is_shared, "shared class def_vtable_indices does not exist"); def_vtable_indices = ik()->create_new_default_vtable_indices(len, CHECK); } else { assert(def_vtable_indices->length() == len, "reinit vtable len?"); @@ -222,7 +237,15 @@ // needs new entry if (needs_new_entry) { put_method_at(mh(), initialized); - def_vtable_indices->at_put(i, initialized); //set vtable index + if (is_preinitialized_vtable()) { + // At runtime initialize_vtable is rerun for a shared class + // (loaded by the non-boot loader) as part of link_class_impl(). + // The dumptime vtable index should be the same as the runtime index. + assert(def_vtable_indices->at(i) == initialized, + "dump time vtable index is different from runtime index"); + } else { + def_vtable_indices->at_put(i, initialized); //set vtable index + } initialized++; } } @@ -365,7 +388,8 @@ } // we need a new entry if there is no superclass - if (klass->super() == NULL) { + Klass* super = klass->super(); + if (super == NULL) { return allocate_new; } @@ -394,7 +418,15 @@ Symbol* target_classname = target_klass->name(); for(int i = 0; i < super_vtable_len; i++) { - Method* super_method = method_at(i); + Method* super_method; + if (is_preinitialized_vtable()) { + // If this is a shared class, the vtable is already in the final state (fully + // initialized). Need to look at the super's vtable. + klassVtable* superVtable = super->vtable(); + super_method = superVtable->method_at(i); + } else { + super_method = method_at(i); + } // Check if method name matches if (super_method->name() == name && super_method->signature() == signature) { @@ -458,7 +490,15 @@ target_method()->set_vtable_index(i); } else { if (def_vtable_indices != NULL) { - def_vtable_indices->at_put(default_index, i); + if (is_preinitialized_vtable()) { + // At runtime initialize_vtable is rerun as part of link_class_impl() + // for a shared class loaded by the non-boot loader. + // The dumptime vtable index should be the same as the runtime index. + assert(def_vtable_indices->at(default_index) == i, + "dump time vtable index is different from runtime index"); + } else { + def_vtable_indices->at_put(default_index, i); + } } assert(super_method->is_default_method() || super_method->is_overpass() || super_method->is_abstract(), "default override error"); @@ -523,24 +563,33 @@ } void klassVtable::put_method_at(Method* m, int index) { + if (is_preinitialized_vtable()) { + // At runtime initialize_vtable is rerun as part of link_class_impl() + // for shared class loaded by the non-boot loader to obtain the loader + // constraints based on the runtime classloaders' context. The dumptime + // method at the vtable index should be the same as the runtime method. + assert(table()[index].method() == m, + "archived method is different from the runtime method"); + } else { #ifndef PRODUCT - if (PrintVtables && Verbose) { - ResourceMark rm; - const char* sig = (m != NULL) ? m->name_and_sig_as_C_string() : "<NULL>"; - tty->print("adding %s at index %d, flags: ", sig, index); - if (m != NULL) { - m->access_flags().print_on(tty); - if (m->is_default_method()) { - tty->print("default "); + if (PrintVtables && Verbose) { + ResourceMark rm; + const char* sig = (m != NULL) ? m->name_and_sig_as_C_string() : "<NULL>"; + tty->print("adding %s at index %d, flags: ", sig, index); + if (m != NULL) { + m->access_flags().print_on(tty); + if (m->is_default_method()) { + tty->print("default "); + } + if (m->is_overpass()) { + tty->print("overpass"); + } } - if (m->is_overpass()) { - tty->print("overpass"); - } + tty->cr(); } - tty->cr(); +#endif + table()[index].set(m); } -#endif - table()[index].set(m); } // Find out if a method "m" with superclass "super", loader "classloader" and @@ -971,7 +1020,15 @@ void itableMethodEntry::initialize(Method* m) { if (m == NULL) return; - _method = m; + if (MetaspaceShared::is_in_shared_space((void*)&_method) && + !MetaspaceShared::remapped_readwrite()) { + // At runtime initialize_itable is rerun as part of link_class_impl() + // for a shared class loaded by the non-boot loader. + // The dumptime itable method entry should be the same as the runtime entry. + assert(_method == m, "sanity"); + } else { + _method = m; + } } klassItable::klassItable(instanceKlassHandle klass) { @@ -1081,7 +1138,11 @@ tty->cr(); } if (!m->has_vtable_index()) { - assert(m->vtable_index() == Method::pending_itable_index, "set by initialize_vtable"); + // A shared method could have an initialized itable_index that + // is < 0. + assert(m->vtable_index() == Method::pending_itable_index || + m->is_shared(), + "set by initialize_vtable"); m->set_itable_index(ime_num); // Progress to next itable entry ime_num++; @@ -1277,7 +1338,6 @@ } #endif // INCLUDE_JVMTI - // Setup class InterfaceVisiterClosure : public StackObj { public:
--- a/src/share/vm/oops/klassVtable.hpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/oops/klassVtable.hpp Mon Jul 25 06:38:24 2016 +0100 @@ -142,6 +142,19 @@ Array<Klass*>* local_interfaces); void verify_against(outputStream* st, klassVtable* vt, int index); inline InstanceKlass* ik() const; + // When loading a class from CDS archive at run time, and no class redefintion + // has happened, it is expected that the class's itable/vtables are + // laid out exactly the same way as they had been during dump time. + // Therefore, in klassVtable::initialize_[iv]table, we do not layout the + // tables again. Instead, we only rerun the process to create/check + // the class loader constraints. In non-product builds, we add asserts to + // guarantee that the table's layout would be the same as at dump time. + // + // If JVMTI redefines any class, the read-only shared memory are remapped + // as read-write. A shared class' vtable/itable are re-initialized and + // might have different layout due to class redefinition of the shared class + // or its super types. + bool is_preinitialized_vtable(); };
--- a/src/share/vm/oops/method.cpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/oops/method.cpp Mon Jul 25 06:38:24 2016 +0100 @@ -36,6 +36,7 @@ #include "memory/generation.hpp" #include "memory/heapInspection.hpp" #include "memory/metadataFactory.hpp" +#include "memory/metaspaceShared.hpp" #include "memory/oopFactory.hpp" #include "oops/constMethod.hpp" #include "oops/methodData.hpp" @@ -310,6 +311,33 @@ unlink_method(); } +void Method::set_vtable_index(int index) { + if (is_shared() && !MetaspaceShared::remapped_readwrite()) { + // At runtime initialize_vtable is rerun as part of link_class_impl() + // for a shared class loaded by the non-boot loader to obtain the loader + // constraints based on the runtime classloaders' context. + return; // don't write into the shared class + } else { + _vtable_index = index; + } +} + +void Method::set_itable_index(int index) { + if (is_shared() && !MetaspaceShared::remapped_readwrite()) { + // At runtime initialize_itable is rerun as part of link_class_impl() + // for a shared class loaded by the non-boot loader to obtain the loader + // constraints based on the runtime classloaders' context. The dumptime + // itable index should be the same as the runtime index. + assert(_vtable_index == itable_index_max - index, + "archived itable index is different from runtime index"); + return; // don’t write into the shared class + } else { + _vtable_index = itable_index_max - index; + } + assert(valid_itable_index(), ""); +} + + bool Method::was_executed_more_than(int n) { // Invocation counter is reset when the Method* is compiled.
--- a/src/share/vm/oops/method.hpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/oops/method.hpp Mon Jul 25 06:38:24 2016 +0100 @@ -470,12 +470,12 @@ DEBUG_ONLY(bool valid_vtable_index() const { return _vtable_index >= nonvirtual_vtable_index; }) bool has_vtable_index() const { return _vtable_index >= 0; } int vtable_index() const { return _vtable_index; } - void set_vtable_index(int index) { _vtable_index = index; } + void set_vtable_index(int index); DEBUG_ONLY(bool valid_itable_index() const { return _vtable_index <= pending_itable_index; }) bool has_itable_index() const { return _vtable_index <= itable_index_max; } int itable_index() const { assert(valid_itable_index(), ""); return itable_index_max - _vtable_index; } - void set_itable_index(int index) { _vtable_index = itable_index_max - index; assert(valid_itable_index(), ""); } + void set_itable_index(int index); // interpreter entry address interpreter_entry() const { return _i2i_entry; }
--- a/src/share/vm/opto/c2_globals.hpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/opto/c2_globals.hpp Mon Jul 25 06:38:24 2016 +0100 @@ -665,6 +665,18 @@ product(bool, UseMultiplyToLenIntrinsic, false, \ "Enables intrinsification of BigInteger.multiplyToLen()") \ \ + product(bool, UseSquareToLenIntrinsic, false, \ + "Enables intrinsification of BigInteger.squareToLen()") \ + \ + product(bool, UseMulAddIntrinsic, false, \ + "Enables intrinsification of BigInteger.mulAdd()") \ + \ + product(bool, UseMontgomeryMultiplyIntrinsic, false, \ + "Enables intrinsification of BigInteger.montgomeryMultiply()") \ + \ + product(bool, UseMontgomerySquareIntrinsic, false, \ + "Enables intrinsification of BigInteger.montgomerySquare()") \ + \ product(bool, UseTypeSpeculation, true, \ "Speculatively propagate types from profiles") \ \
--- a/src/share/vm/opto/compile.cpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/opto/compile.cpp Mon Jul 25 06:38:24 2016 +0100 @@ -417,6 +417,13 @@ remove_macro_node(n); } } + // Remove useless CastII nodes with range check dependency + for (int i = range_check_cast_count() - 1; i >= 0; i--) { + Node* cast = range_check_cast_node(i); + if (!useful.member(cast)) { + remove_range_check_cast(cast); + } + } // Remove useless expensive node for (int i = C->expensive_count()-1; i >= 0; i--) { Node* n = C->expensive_node(i); @@ -1184,6 +1191,7 @@ _macro_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8, 0, NULL); _predicate_opaqs = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8, 0, NULL); _expensive_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8, 0, NULL); + _range_check_casts = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8, 0, NULL); register_library_intrinsics(); } @@ -1912,6 +1920,22 @@ assert(predicate_count()==0, "should be clean!"); } +void Compile::add_range_check_cast(Node* n) { + assert(n->isa_CastII()->has_range_check(), "CastII should have range check dependency"); + assert(!_range_check_casts->contains(n), "duplicate entry in range check casts"); + _range_check_casts->append(n); +} + +// Remove all range check dependent CastIINodes. +void Compile::remove_range_check_casts(PhaseIterGVN &igvn) { + for (int i = range_check_cast_count(); i > 0; i--) { + Node* cast = range_check_cast_node(i-1); + assert(cast->isa_CastII()->has_range_check(), "CastII should have range check dependency"); + igvn.replace_node(cast, cast->in(1)); + } + assert(range_check_cast_count() == 0, "should be empty"); +} + // StringOpts and late inlining of string methods void Compile::inline_string_calls(bool parse_time) { { @@ -2254,6 +2278,12 @@ PhaseIdealLoop::verify(igvn); } + if (range_check_cast_count() > 0) { + // No more loop optimizations. Remove all range check dependent CastIINodes. + C->remove_range_check_casts(igvn); + igvn.optimize(); + } + { NOT_PRODUCT( TracePhase t2("macroExpand", &_t_macroExpand, TimeCompiler); ) PhaseMacroExpand mex(igvn); @@ -3023,6 +3053,16 @@ #endif +#ifdef ASSERT + case Op_CastII: + // Verify that all range check dependent CastII nodes were removed. + if (n->isa_CastII()->has_range_check()) { + n->dump(3); + assert(false, "Range check dependent CastII node was not removed"); + } + break; +#endif + case Op_ModI: if (UseDivMod) { // Check if a%b and a/b both exist @@ -4060,6 +4100,24 @@ } } +// Convert integer value to a narrowed long type dependent on ctrl (for example, a range check) +Node* Compile::constrained_convI2L(PhaseGVN* phase, Node* value, const TypeInt* itype, Node* ctrl) { + if (ctrl != NULL) { + // Express control dependency by a CastII node with a narrow type. + value = new (phase->C) CastIINode(value, itype, false, true /* range check dependency */); + // Make the CastII node dependent on the control input to prevent the narrowed ConvI2L + // node from floating above the range check during loop optimizations. Otherwise, the + // ConvI2L node may be eliminated independently of the range check, causing the data path + // to become TOP while the control path is still there (although it's unreachable). + value->set_req(0, ctrl); + // Save CastII node to remove it after loop optimizations. + phase->C->add_range_check_cast(value); + value = phase->transform(value); + } + const TypeLong* ltype = TypeLong::make(itype->_lo, itype->_hi, itype->_widen); + return phase->transform(new (phase->C) ConvI2LNode(value, ltype)); +} + // Auxiliary method to support randomized stressing/fuzzing. // // This method can be called the arbitrary number of times, with current count
--- a/src/share/vm/opto/compile.hpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/opto/compile.hpp Mon Jul 25 06:38:24 2016 +0100 @@ -75,6 +75,7 @@ class JVMState; class Type; class TypeData; +class TypeInt; class TypePtr; class TypeOopPtr; class TypeFunc; @@ -334,6 +335,7 @@ GrowableArray<Node*>* _macro_nodes; // List of nodes which need to be expanded before matching. GrowableArray<Node*>* _predicate_opaqs; // List of Opaque1 nodes for the loop predicates. GrowableArray<Node*>* _expensive_nodes; // List of nodes that are expensive to compute and that we'd better not let the GVN freely common + GrowableArray<Node*>* _range_check_casts; // List of CastII nodes with a range check dependency ConnectionGraph* _congraph; #ifndef PRODUCT IdealGraphPrinter* _printer; @@ -669,7 +671,7 @@ void set_congraph(ConnectionGraph* congraph) { _congraph = congraph;} void add_macro_node(Node * n) { //assert(n->is_macro(), "must be a macro node"); - assert(!_macro_nodes->contains(n), " duplicate entry in expand list"); + assert(!_macro_nodes->contains(n), "duplicate entry in expand list"); _macro_nodes->append(n); } void remove_macro_node(Node * n) { @@ -689,10 +691,23 @@ } } void add_predicate_opaq(Node * n) { - assert(!_predicate_opaqs->contains(n), " duplicate entry in predicate opaque1"); + assert(!_predicate_opaqs->contains(n), "duplicate entry in predicate opaque1"); assert(_macro_nodes->contains(n), "should have already been in macro list"); _predicate_opaqs->append(n); } + + // Range check dependent CastII nodes that can be removed after loop optimizations + void add_range_check_cast(Node* n); + void remove_range_check_cast(Node* n) { + if (_range_check_casts->contains(n)) { + _range_check_casts->remove(n); + } + } + Node* range_check_cast_node(int idx) const { return _range_check_casts->at(idx); } + int range_check_cast_count() const { return _range_check_casts->length(); } + // Remove all range check dependent CastIINodes. + void remove_range_check_casts(PhaseIterGVN &igvn); + // remove the opaque nodes that protect the predicates so that the unused checks and // uncommon traps will be eliminated from the graph. void cleanup_loop_predicates(PhaseIterGVN &igvn); @@ -1201,6 +1216,9 @@ // Definitions of pd methods static void pd_compiler2_init(); + // Convert integer value to a narrowed long type dependent on ctrl (for example, a range check) + static Node* constrained_convI2L(PhaseGVN* phase, Node* value, const TypeInt* itype, Node* ctrl); + // Auxiliary method for randomized fuzzing/stressing static bool randomized_select(int count); };
--- a/src/share/vm/opto/connode.cpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/opto/connode.cpp Mon Jul 25 06:38:24 2016 +0100 @@ -535,6 +535,9 @@ if (_carry_dependency) { st->print(" carry dependency"); } + if (_range_check_dependency) { + st->print(" range check dependency"); + } } #endif @@ -994,7 +997,8 @@ } #ifdef _LP64 - // Convert ConvI2L(AddI(x, y)) to AddL(ConvI2L(x), ConvI2L(y)) , + // Convert ConvI2L(AddI(x, y)) to AddL(ConvI2L(x), ConvI2L(y)) or + // ConvI2L(CastII(AddI(x, y))) to AddL(ConvI2L(CastII(x)), ConvI2L(CastII(y))), // but only if x and y have subranges that cannot cause 32-bit overflow, // under the assumption that x+y is in my own subrange this->type(). @@ -1018,6 +1022,13 @@ Node* z = in(1); int op = z->Opcode(); + Node* ctrl = NULL; + if (op == Op_CastII && z->as_CastII()->has_range_check()) { + // Skip CastII node but save control dependency + ctrl = z->in(0); + z = z->in(1); + op = z->Opcode(); + } if (op == Op_AddI || op == Op_SubI) { Node* x = z->in(1); Node* y = z->in(2); @@ -1075,9 +1086,10 @@ rylo = -ryhi; ryhi = -rylo0; } - - Node* cx = phase->transform( new (phase->C) ConvI2LNode(x, TypeLong::make(rxlo, rxhi, widen)) ); - Node* cy = phase->transform( new (phase->C) ConvI2LNode(y, TypeLong::make(rylo, ryhi, widen)) ); + assert(rxlo == (int)rxlo && rxhi == (int)rxhi, "x should not overflow"); + assert(rylo == (int)rylo && ryhi == (int)ryhi, "y should not overflow"); + Node* cx = phase->C->constrained_convI2L(phase, x, TypeInt::make(rxlo, rxhi, widen), ctrl); + Node* cy = phase->C->constrained_convI2L(phase, y, TypeInt::make(rylo, ryhi, widen), ctrl); switch (op) { case Op_AddI: return new (phase->C) AddLNode(cx, cy); case Op_SubI: return new (phase->C) SubLNode(cx, cy);
--- a/src/share/vm/opto/connode.hpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/opto/connode.hpp Mon Jul 25 06:38:24 2016 +0100 @@ -244,19 +244,31 @@ private: // Can this node be removed post CCP or does it carry a required dependency? const bool _carry_dependency; + // Is this node dependent on a range check? + const bool _range_check_dependency; protected: virtual uint cmp( const Node &n ) const; virtual uint size_of() const; public: - CastIINode(Node *n, const Type *t, bool carry_dependency = false) - : ConstraintCastNode(n,t), _carry_dependency(carry_dependency) {} + CastIINode(Node *n, const Type *t, bool carry_dependency = false, bool range_check_dependency = false) + : ConstraintCastNode(n,t), _carry_dependency(carry_dependency), _range_check_dependency(range_check_dependency) { + init_class_id(Class_CastII); + } virtual int Opcode() const; virtual uint ideal_reg() const { return Op_RegI; } virtual Node *Identity( PhaseTransform *phase ); virtual const Type *Value( PhaseTransform *phase ) const; virtual Node *Ideal_DU_postCCP( PhaseCCP * ); + const bool has_range_check() { + #ifdef _LP64 + return _range_check_dependency; + #else + assert(!_range_check_dependency, "Should not have range check dependency"); + return false; + #endif + } #ifndef PRODUCT virtual void dump_spec(outputStream *st) const; #endif
--- a/src/share/vm/opto/escape.cpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/opto/escape.cpp Mon Jul 25 06:38:24 2016 +0100 @@ -958,8 +958,12 @@ strcmp(call->as_CallLeaf()->_name, "sha256_implCompressMB") == 0 || strcmp(call->as_CallLeaf()->_name, "sha512_implCompress") == 0 || strcmp(call->as_CallLeaf()->_name, "sha512_implCompressMB") == 0 || - strcmp(call->as_CallLeaf()->_name, "multiplyToLen") == 0) - ))) { + strcmp(call->as_CallLeaf()->_name, "multiplyToLen") == 0 || + strcmp(call->as_CallLeaf()->_name, "squareToLen") == 0 || + strcmp(call->as_CallLeaf()->_name, "mulAdd") == 0 || + strcmp(call->as_CallLeaf()->_name, "montgomery_multiply") == 0 || + strcmp(call->as_CallLeaf()->_name, "montgomery_square") == 0) + ))) { call->dump(); fatal(err_msg_res("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name)); }
--- a/src/share/vm/opto/graphKit.cpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/opto/graphKit.cpp Mon Jul 25 06:38:24 2016 +0100 @@ -1645,7 +1645,7 @@ //-------------------------array_element_address------------------------- Node* GraphKit::array_element_address(Node* ary, Node* idx, BasicType elembt, - const TypeInt* sizetype) { + const TypeInt* sizetype, Node* ctrl) { uint shift = exact_log2(type2aelembytes(elembt)); uint header = arrayOopDesc::base_offset_in_bytes(elembt); @@ -1670,9 +1670,9 @@ // number. (The prior range check has ensured this.) // This assertion is used by ConvI2LNode::Ideal. int index_max = max_jint - 1; // array size is max_jint, index is one less - if (sizetype != NULL) index_max = sizetype->_hi - 1; - const TypeLong* lidxtype = TypeLong::make(CONST64(0), index_max, Type::WidenMax); - idx = _gvn.transform( new (C) ConvI2LNode(idx, lidxtype) ); + if (sizetype != NULL) index_max = sizetype->_hi - 1; + const TypeInt* iidxtype = TypeInt::make(0, index_max, Type::WidenMax); + idx = C->constrained_convI2L(&_gvn, idx, iidxtype, ctrl); #endif Node* scale = _gvn.transform( new (C) LShiftXNode(idx, intcon(shift)) ); return basic_plus_adr(ary, base, scale); @@ -3491,10 +3491,6 @@ Node* initial_slow_cmp = _gvn.transform( new (C) CmpUNode( length, intcon( fast_size_limit ) ) ); Node* initial_slow_test = _gvn.transform( new (C) BoolNode( initial_slow_cmp, BoolTest::gt ) ); - if (initial_slow_test->is_Bool()) { - // Hide it behind a CMoveI, or else PhaseIdealLoop::split_up will get sick. - initial_slow_test = initial_slow_test->as_Bool()->as_int_value(&_gvn); - } // --- Size Computation --- // array_size = round_to_heap(array_header + (length << elem_shift)); @@ -3540,13 +3536,35 @@ Node* lengthx = ConvI2X(length); Node* headerx = ConvI2X(header_size); #ifdef _LP64 - { const TypeLong* tllen = _gvn.find_long_type(lengthx); - if (tllen != NULL && tllen->_lo < 0) { + { const TypeInt* tilen = _gvn.find_int_type(length); + if (tilen != NULL && tilen->_lo < 0) { // Add a manual constraint to a positive range. Cf. array_element_address. - jlong size_max = arrayOopDesc::max_array_length(T_BYTE); - if (size_max > tllen->_hi) size_max = tllen->_hi; - const TypeLong* tlcon = TypeLong::make(CONST64(0), size_max, Type::WidenMin); - lengthx = _gvn.transform( new (C) ConvI2LNode(length, tlcon)); + jlong size_max = fast_size_limit; + if (size_max > tilen->_hi) size_max = tilen->_hi; + const TypeInt* tlcon = TypeInt::make(0, size_max, Type::WidenMin); + + // Only do a narrow I2L conversion if the range check passed. + IfNode* iff = new (C) IfNode(control(), initial_slow_test, PROB_MIN, COUNT_UNKNOWN); + _gvn.transform(iff); + RegionNode* region = new (C) RegionNode(3); + _gvn.set_type(region, Type::CONTROL); + lengthx = new (C) PhiNode(region, TypeLong::LONG); + _gvn.set_type(lengthx, TypeLong::LONG); + + // Range check passed. Use ConvI2L node with narrow type. + Node* passed = IfFalse(iff); + region->init_req(1, passed); + // Make I2L conversion control dependent to prevent it from + // floating above the range check during loop optimizations. + lengthx->init_req(1, C->constrained_convI2L(&_gvn, length, tlcon, passed)); + + // Range check failed. Use ConvI2L with wide type because length may be invalid. + region->init_req(2, IfTrue(iff)); + lengthx->init_req(2, ConvI2X(length)); + + set_control(region); + record_for_igvn(region); + record_for_igvn(lengthx); } } #endif @@ -3577,6 +3595,11 @@ Node *mem = reset_memory(); set_all_memory(mem); // Create new memory state + if (initial_slow_test->is_Bool()) { + // Hide it behind a CMoveI, or else PhaseIdealLoop::split_up will get sick. + initial_slow_test = initial_slow_test->as_Bool()->as_int_value(&_gvn); + } + // Create the AllocateArrayNode and its result projections AllocateArrayNode* alloc = new (C) AllocateArrayNode(C, AllocateArrayNode::alloc_type(TypeInt::INT),
--- a/src/share/vm/opto/graphKit.hpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/opto/graphKit.hpp Mon Jul 25 06:38:24 2016 +0100 @@ -626,7 +626,9 @@ // Return addressing for an array element. Node* array_element_address(Node* ary, Node* idx, BasicType elembt, // Optional constraint on the array size: - const TypeInt* sizetype = NULL); + const TypeInt* sizetype = NULL, + // Optional control dependency (for example, on range check) + Node* ctrl = NULL); // Return a load of array element at idx. Node* load_array_element(Node* ctl, Node* ary, Node* idx, const TypeAryPtr* arytype);
--- a/src/share/vm/opto/library_call.cpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/opto/library_call.cpp Mon Jul 25 06:38:24 2016 +0100 @@ -324,6 +324,10 @@ bool inline_updateBytesCRC32(); bool inline_updateByteBufferCRC32(); bool inline_multiplyToLen(); + bool inline_squareToLen(); + bool inline_mulAdd(); + bool inline_montgomeryMultiply(); + bool inline_montgomerySquare(); bool inline_profileBoolean(); }; @@ -527,6 +531,21 @@ if (!UseMultiplyToLenIntrinsic) return NULL; break; + case vmIntrinsics::_squareToLen: + if (!UseSquareToLenIntrinsic) return NULL; + break; + + case vmIntrinsics::_mulAdd: + if (!UseMulAddIntrinsic) return NULL; + break; + + case vmIntrinsics::_montgomeryMultiply: + if (!UseMontgomeryMultiplyIntrinsic) return NULL; + break; + case vmIntrinsics::_montgomerySquare: + if (!UseMontgomerySquareIntrinsic) return NULL; + break; + case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt: case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt: if (!UseAESIntrinsics) return NULL; @@ -927,6 +946,17 @@ case vmIntrinsics::_multiplyToLen: return inline_multiplyToLen(); + case vmIntrinsics::_squareToLen: + return inline_squareToLen(); + + case vmIntrinsics::_mulAdd: + return inline_mulAdd(); + + case vmIntrinsics::_montgomeryMultiply: + return inline_montgomeryMultiply(); + case vmIntrinsics::_montgomerySquare: + return inline_montgomerySquare(); + case vmIntrinsics::_encodeISOArray: return inline_encodeISOArray(); @@ -5767,11 +5797,12 @@ assert(callee()->signature()->size() == 5, "multiplyToLen has 5 parameters"); - Node* x = argument(1); - Node* xlen = argument(2); - Node* y = argument(3); - Node* ylen = argument(4); - Node* z = argument(5); + // no receiver because it is a static method + Node* x = argument(0); + Node* xlen = argument(1); + Node* y = argument(2); + Node* ylen = argument(3); + Node* z = argument(4); const Type* x_type = x->Value(&_gvn); const Type* y_type = y->Value(&_gvn); @@ -5856,6 +5887,215 @@ return true; } +//-------------inline_squareToLen------------------------------------ +bool LibraryCallKit::inline_squareToLen() { + assert(UseSquareToLenIntrinsic, "not implementated on this platform"); + + address stubAddr = StubRoutines::squareToLen(); + if (stubAddr == NULL) { + return false; // Intrinsic's stub is not implemented on this platform + } + const char* stubName = "squareToLen"; + + assert(callee()->signature()->size() == 4, "implSquareToLen has 4 parameters"); + + Node* x = argument(0); + Node* len = argument(1); + Node* z = argument(2); + Node* zlen = argument(3); + + const Type* x_type = x->Value(&_gvn); + const Type* z_type = z->Value(&_gvn); + const TypeAryPtr* top_x = x_type->isa_aryptr(); + const TypeAryPtr* top_z = z_type->isa_aryptr(); + if (top_x == NULL || top_x->klass() == NULL || + top_z == NULL || top_z->klass() == NULL) { + // failed array check + return false; + } + + BasicType x_elem = x_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type(); + BasicType z_elem = z_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type(); + if (x_elem != T_INT || z_elem != T_INT) { + return false; + } + + + Node* x_start = array_element_address(x, intcon(0), x_elem); + Node* z_start = array_element_address(z, intcon(0), z_elem); + + Node* call = make_runtime_call(RC_LEAF|RC_NO_FP, + OptoRuntime::squareToLen_Type(), + stubAddr, stubName, TypePtr::BOTTOM, + x_start, len, z_start, zlen); + + set_result(z); + return true; +} + +//-------------inline_mulAdd------------------------------------------ +bool LibraryCallKit::inline_mulAdd() { + assert(UseMulAddIntrinsic, "not implementated on this platform"); + + address stubAddr = StubRoutines::mulAdd(); + if (stubAddr == NULL) { + return false; // Intrinsic's stub is not implemented on this platform + } + const char* stubName = "mulAdd"; + + assert(callee()->signature()->size() == 5, "mulAdd has 5 parameters"); + + Node* out = argument(0); + Node* in = argument(1); + Node* offset = argument(2); + Node* len = argument(3); + Node* k = argument(4); + + const Type* out_type = out->Value(&_gvn); + const Type* in_type = in->Value(&_gvn); + const TypeAryPtr* top_out = out_type->isa_aryptr(); + const TypeAryPtr* top_in = in_type->isa_aryptr(); + if (top_out == NULL || top_out->klass() == NULL || + top_in == NULL || top_in->klass() == NULL) { + // failed array check + return false; + } + + BasicType out_elem = out_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type(); + BasicType in_elem = in_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type(); + if (out_elem != T_INT || in_elem != T_INT) { + return false; + } + + Node* outlen = load_array_length(out); + Node* new_offset = _gvn.transform(new (C) SubINode(outlen, offset)); + Node* out_start = array_element_address(out, intcon(0), out_elem); + Node* in_start = array_element_address(in, intcon(0), in_elem); + + Node* call = make_runtime_call(RC_LEAF|RC_NO_FP, + OptoRuntime::mulAdd_Type(), + stubAddr, stubName, TypePtr::BOTTOM, + out_start,in_start, new_offset, len, k); + Node* result = _gvn.transform(new (C) ProjNode(call, TypeFunc::Parms)); + set_result(result); + return true; +} + +//-------------inline_montgomeryMultiply----------------------------------- +bool LibraryCallKit::inline_montgomeryMultiply() { + address stubAddr = StubRoutines::montgomeryMultiply(); + if (stubAddr == NULL) { + return false; // Intrinsic's stub is not implemented on this platform + } + + assert(UseMontgomeryMultiplyIntrinsic, "not implemented on this platform"); + const char* stubName = "montgomery_square"; + + assert(callee()->signature()->size() == 7, "montgomeryMultiply has 7 parameters"); + + Node* a = argument(0); + Node* b = argument(1); + Node* n = argument(2); + Node* len = argument(3); + Node* inv = argument(4); + Node* m = argument(6); + + const Type* a_type = a->Value(&_gvn); + const TypeAryPtr* top_a = a_type->isa_aryptr(); + const Type* b_type = b->Value(&_gvn); + const TypeAryPtr* top_b = b_type->isa_aryptr(); + const Type* n_type = a->Value(&_gvn); + const TypeAryPtr* top_n = n_type->isa_aryptr(); + const Type* m_type = a->Value(&_gvn); + const TypeAryPtr* top_m = m_type->isa_aryptr(); + if (top_a == NULL || top_a->klass() == NULL || + top_b == NULL || top_b->klass() == NULL || + top_n == NULL || top_n->klass() == NULL || + top_m == NULL || top_m->klass() == NULL) { + // failed array check + return false; + } + + BasicType a_elem = a_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type(); + BasicType b_elem = b_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type(); + BasicType n_elem = n_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type(); + BasicType m_elem = m_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type(); + if (a_elem != T_INT || b_elem != T_INT || n_elem != T_INT || m_elem != T_INT) { + return false; + } + + // Make the call + { + Node* a_start = array_element_address(a, intcon(0), a_elem); + Node* b_start = array_element_address(b, intcon(0), b_elem); + Node* n_start = array_element_address(n, intcon(0), n_elem); + Node* m_start = array_element_address(m, intcon(0), m_elem); + + Node* call = make_runtime_call(RC_LEAF, + OptoRuntime::montgomeryMultiply_Type(), + stubAddr, stubName, TypePtr::BOTTOM, + a_start, b_start, n_start, len, inv, top(), + m_start); + set_result(m); + } + + return true; +} + +bool LibraryCallKit::inline_montgomerySquare() { + address stubAddr = StubRoutines::montgomerySquare(); + if (stubAddr == NULL) { + return false; // Intrinsic's stub is not implemented on this platform + } + + assert(UseMontgomerySquareIntrinsic, "not implemented on this platform"); + const char* stubName = "montgomery_square"; + + assert(callee()->signature()->size() == 6, "montgomerySquare has 6 parameters"); + + Node* a = argument(0); + Node* n = argument(1); + Node* len = argument(2); + Node* inv = argument(3); + Node* m = argument(5); + + const Type* a_type = a->Value(&_gvn); + const TypeAryPtr* top_a = a_type->isa_aryptr(); + const Type* n_type = a->Value(&_gvn); + const TypeAryPtr* top_n = n_type->isa_aryptr(); + const Type* m_type = a->Value(&_gvn); + const TypeAryPtr* top_m = m_type->isa_aryptr(); + if (top_a == NULL || top_a->klass() == NULL || + top_n == NULL || top_n->klass() == NULL || + top_m == NULL || top_m->klass() == NULL) { + // failed array check + return false; + } + + BasicType a_elem = a_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type(); + BasicType n_elem = n_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type(); + BasicType m_elem = m_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type(); + if (a_elem != T_INT || n_elem != T_INT || m_elem != T_INT) { + return false; + } + + // Make the call + { + Node* a_start = array_element_address(a, intcon(0), a_elem); + Node* n_start = array_element_address(n, intcon(0), n_elem); + Node* m_start = array_element_address(m, intcon(0), m_elem); + + Node* call = make_runtime_call(RC_LEAF, + OptoRuntime::montgomerySquare_Type(), + stubAddr, stubName, TypePtr::BOTTOM, + a_start, n_start, len, inv, top(), + m_start); + set_result(m); + } + + return true; +} + /** * Calculate CRC32 for byte.
--- a/src/share/vm/opto/loopTransform.cpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/opto/loopTransform.cpp Mon Jul 25 06:38:24 2016 +0100 @@ -2438,7 +2438,7 @@ //============================================================================= // Process all the loops in the loop tree and replace any fill -// patterns with an intrisc version. +// patterns with an intrinsic version. bool PhaseIdealLoop::do_intrinsify_fill() { bool changed = false; for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) { @@ -2536,8 +2536,9 @@ } // Make sure the address expression can be handled. It should be - // head->phi * elsize + con. head->phi might have a ConvI2L. + // head->phi * elsize + con. head->phi might have a ConvI2L(CastII()). Node* elements[4]; + Node* cast = NULL; Node* conv = NULL; bool found_index = false; int count = store->in(MemNode::Address)->as_AddP()->unpack_offsets(elements, ARRAY_SIZE(elements)); @@ -2552,6 +2553,12 @@ conv = value; value = value->in(1); } + if (value->Opcode() == Op_CastII && + value->as_CastII()->has_range_check()) { + // Skip range check dependent CastII nodes + cast = value; + value = value->in(1); + } #endif if (value != head->phi()) { msg = "unhandled shift in address"; @@ -2564,9 +2571,16 @@ } } } else if (n->Opcode() == Op_ConvI2L && conv == NULL) { - if (n->in(1) == head->phi()) { + conv = n; + n = n->in(1); + if (n->Opcode() == Op_CastII && + n->as_CastII()->has_range_check()) { + // Skip range check dependent CastII nodes + cast = n; + n = n->in(1); + } + if (n == head->phi()) { found_index = true; - conv = n; } else { msg = "unhandled input to ConvI2L"; } @@ -2625,6 +2639,7 @@ // Address elements are ok if (con) ok.set(con->_idx); if (shift) ok.set(shift->_idx); + if (cast) ok.set(cast->_idx); if (conv) ok.set(conv->_idx); for (uint i = 0; msg == NULL && i < lpt->_body.size(); i++) {
--- a/src/share/vm/opto/loopopts.cpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/opto/loopopts.cpp Mon Jul 25 06:38:24 2016 +0100 @@ -772,6 +772,9 @@ #ifdef _LP64 if (m->Opcode() == Op_ConvI2L) return false; + if (m->is_CastII() && m->isa_CastII()->has_range_check()) { + return false; + } #endif } }
--- a/src/share/vm/opto/node.cpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/opto/node.cpp Mon Jul 25 06:38:24 2016 +0100 @@ -521,6 +521,11 @@ C->add_macro_node(n); if (is_expensive()) C->add_expensive_node(n); + // If the cloned node is a range check dependent CastII, add it to the list. + CastIINode* cast = n->isa_CastII(); + if (cast != NULL && cast->has_range_check()) { + C->add_range_check_cast(cast); + } n->set_idx(C->next_unique()); // Get new unique index as well debug_only( n->verify_construction() ); @@ -649,6 +654,11 @@ if (is_expensive()) { compile->remove_expensive_node(this); } + CastIINode* cast = isa_CastII(); + if (cast != NULL && cast->has_range_check()) { + compile->remove_range_check_cast(cast); + } + if (is_SafePoint()) { as_SafePoint()->delete_replaced_nodes(); } @@ -1344,6 +1354,10 @@ if (dead->is_expensive()) { igvn->C->remove_expensive_node(dead); } + CastIINode* cast = dead->isa_CastII(); + if (cast != NULL && cast->has_range_check()) { + igvn->C->remove_range_check_cast(cast); + } igvn->C->record_dead_node(dead->_idx); // Kill all inputs to the dead guy for (uint i=0; i < dead->req(); i++) {
--- a/src/share/vm/opto/node.hpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/opto/node.hpp Mon Jul 25 06:38:24 2016 +0100 @@ -54,6 +54,7 @@ class CatchNode; class CatchProjNode; class CheckCastPPNode; +class CastIINode; class ClearArrayNode; class CmpNode; class CodeBuffer; @@ -603,6 +604,7 @@ DEFINE_CLASS_ID(Type, Node, 2) DEFINE_CLASS_ID(Phi, Type, 0) DEFINE_CLASS_ID(ConstraintCast, Type, 1) + DEFINE_CLASS_ID(CastII, ConstraintCast, 0) DEFINE_CLASS_ID(CheckCastPP, Type, 2) DEFINE_CLASS_ID(CMove, Type, 3) DEFINE_CLASS_ID(SafePointScalarObject, Type, 4) @@ -727,6 +729,7 @@ DEFINE_CLASS_QUERY(Catch) DEFINE_CLASS_QUERY(CatchProj) DEFINE_CLASS_QUERY(CheckCastPP) + DEFINE_CLASS_QUERY(CastII) DEFINE_CLASS_QUERY(ConstraintCast) DEFINE_CLASS_QUERY(ClearArray) DEFINE_CLASS_QUERY(CMove)
--- a/src/share/vm/opto/parse2.cpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/opto/parse2.cpp Mon Jul 25 06:38:24 2016 +0100 @@ -158,7 +158,9 @@ // Check for always knowing you are throwing a range-check exception if (stopped()) return top(); - Node* ptr = array_element_address(ary, idx, type, sizetype); + // Make array address computation control dependent to prevent it + // from floating above the range check during loop optimizations. + Node* ptr = array_element_address(ary, idx, type, sizetype, control()); if (result2 != NULL) *result2 = elemtype; @@ -461,9 +463,12 @@ #ifdef _LP64 // Clean the 32-bit int into a real 64-bit offset. // Otherwise, the jint value 0 might turn into an offset of 0x0800000000. - const TypeLong* lkeytype = TypeLong::make(CONST64(0), num_cases-1, Type::WidenMin); - key_val = _gvn.transform( new (C) ConvI2LNode(key_val, lkeytype) ); + const TypeInt* ikeytype = TypeInt::make(0, num_cases-1, Type::WidenMin); + // Make I2L conversion control dependent to prevent it from + // floating above the range check during loop optimizations. + key_val = C->constrained_convI2L(&_gvn, key_val, ikeytype, control()); #endif + // Shift the value by wordsize so we have an index into the table, rather // than a switch value Node *shiftWord = _gvn.MakeConX(wordSize);
--- a/src/share/vm/opto/phaseX.cpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/opto/phaseX.cpp Mon Jul 25 06:38:24 2016 +0100 @@ -1339,6 +1339,10 @@ if (dead->is_expensive()) { C->remove_expensive_node(dead); } + CastIINode* cast = dead->isa_CastII(); + if (cast != NULL && cast->has_range_check()) { + C->remove_range_check_cast(cast); + } } } // while (_stack.is_nonempty()) }
--- a/src/share/vm/opto/runtime.cpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/opto/runtime.cpp Mon Jul 25 06:38:24 2016 +0100 @@ -958,6 +958,94 @@ return TypeFunc::make(domain, range); } +const TypeFunc* OptoRuntime::squareToLen_Type() { + // create input type (domain) + int num_args = 4; + int argcnt = num_args; + const Type** fields = TypeTuple::fields(argcnt); + int argp = TypeFunc::Parms; + fields[argp++] = TypePtr::NOTNULL; // x + fields[argp++] = TypeInt::INT; // len + fields[argp++] = TypePtr::NOTNULL; // z + fields[argp++] = TypeInt::INT; // zlen + assert(argp == TypeFunc::Parms+argcnt, "correct decoding"); + const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields); + + // no result type needed + fields = TypeTuple::fields(1); + fields[TypeFunc::Parms+0] = NULL; + const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields); + return TypeFunc::make(domain, range); +} + +// for mulAdd calls, 2 pointers and 3 ints, returning int +const TypeFunc* OptoRuntime::mulAdd_Type() { + // create input type (domain) + int num_args = 5; + int argcnt = num_args; + const Type** fields = TypeTuple::fields(argcnt); + int argp = TypeFunc::Parms; + fields[argp++] = TypePtr::NOTNULL; // out + fields[argp++] = TypePtr::NOTNULL; // in + fields[argp++] = TypeInt::INT; // offset + fields[argp++] = TypeInt::INT; // len + fields[argp++] = TypeInt::INT; // k + assert(argp == TypeFunc::Parms+argcnt, "correct decoding"); + const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields); + + // returning carry (int) + fields = TypeTuple::fields(1); + fields[TypeFunc::Parms+0] = TypeInt::INT; + const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields); + return TypeFunc::make(domain, range); +} + +const TypeFunc* OptoRuntime::montgomeryMultiply_Type() { + // create input type (domain) + int num_args = 7; + int argcnt = num_args; + const Type** fields = TypeTuple::fields(argcnt); + int argp = TypeFunc::Parms; + fields[argp++] = TypePtr::NOTNULL; // a + fields[argp++] = TypePtr::NOTNULL; // b + fields[argp++] = TypePtr::NOTNULL; // n + fields[argp++] = TypeInt::INT; // len + fields[argp++] = TypeLong::LONG; // inv + fields[argp++] = Type::HALF; + fields[argp++] = TypePtr::NOTNULL; // result + assert(argp == TypeFunc::Parms+argcnt, "correct decoding"); + const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields); + + // result type needed + fields = TypeTuple::fields(1); + fields[TypeFunc::Parms+0] = TypePtr::NOTNULL; + + const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields); + return TypeFunc::make(domain, range); +} + +const TypeFunc* OptoRuntime::montgomerySquare_Type() { + // create input type (domain) + int num_args = 6; + int argcnt = num_args; + const Type** fields = TypeTuple::fields(argcnt); + int argp = TypeFunc::Parms; + fields[argp++] = TypePtr::NOTNULL; // a + fields[argp++] = TypePtr::NOTNULL; // n + fields[argp++] = TypeInt::INT; // len + fields[argp++] = TypeLong::LONG; // inv + fields[argp++] = Type::HALF; + fields[argp++] = TypePtr::NOTNULL; // result + assert(argp == TypeFunc::Parms+argcnt, "correct decoding"); + const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields); + + // result type needed + fields = TypeTuple::fields(1); + fields[TypeFunc::Parms+0] = TypePtr::NOTNULL; + + const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields); + return TypeFunc::make(domain, range); +} //------------- Interpreter state access for on stack replacement
--- a/src/share/vm/opto/runtime.hpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/opto/runtime.hpp Mon Jul 25 06:38:24 2016 +0100 @@ -305,6 +305,12 @@ static const TypeFunc* multiplyToLen_Type(); + static const TypeFunc* squareToLen_Type(); + + static const TypeFunc* mulAdd_Type(); + static const TypeFunc* montgomeryMultiply_Type(); + static const TypeFunc* montgomerySquare_Type(); + static const TypeFunc* updateBytesCRC32_Type(); // leaf on stack replacement interpreter accessor types
--- a/src/share/vm/opto/superword.cpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/opto/superword.cpp Mon Jul 25 06:38:24 2016 +0100 @@ -2388,6 +2388,11 @@ return true; } } else if (opc == Op_ConvI2L) { + if (n->in(1)->Opcode() == Op_CastII && + n->in(1)->as_CastII()->has_range_check()) { + // Skip range check dependent CastII nodes + n = n->in(1); + } if (scaled_iv_plus_offset(n->in(1))) { return true; }
--- a/src/share/vm/prims/whitebox.cpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/prims/whitebox.cpp Mon Jul 25 06:38:24 2016 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,6 +25,7 @@ #include "precompiled.hpp" #include "memory/metadataFactory.hpp" +#include "memory/metaspaceShared.hpp" #include "memory/universe.hpp" #include "oops/oop.inline.hpp" @@ -914,6 +915,10 @@ return (jlong) MetaspaceGC::capacity_until_GC(); WB_END +WB_ENTRY(jboolean, WB_IsSharedClass(JNIEnv* env, jobject wb, jclass clazz)) + return (jboolean)MetaspaceShared::is_in_shared_space(java_lang_Class::as_Klass(JNIHandles::resolve_non_null(clazz))); +WB_END + WB_ENTRY(jboolean, WB_IsMonitorInflated(JNIEnv* env, jobject wb, jobject obj)) oop obj_oop = JNIHandles::resolve(obj); return (jboolean) obj_oop->mark()->has_monitor(); @@ -1034,6 +1039,7 @@ {CC"runMemoryUnitTests", CC"()V", (void*)&WB_RunMemoryUnitTests}, {CC"readFromNoaccessArea",CC"()V", (void*)&WB_ReadFromNoaccessArea}, {CC"stressVirtualSpaceResize",CC"(JJJ)I", (void*)&WB_StressVirtualSpaceResize}, + {CC"isSharedClass", CC"(Ljava/lang/Class;)Z", (void*)&WB_IsSharedClass }, #if INCLUDE_ALL_GCS {CC"g1InConcurrentMark", CC"()Z", (void*)&WB_G1InConcurrentMark}, {CC"g1IsHumongous", CC"(Ljava/lang/Object;)Z", (void*)&WB_G1IsHumongous },
--- a/src/share/vm/runtime/sharedRuntime.hpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/runtime/sharedRuntime.hpp Mon Jul 25 06:38:24 2016 +0100 @@ -145,6 +145,12 @@ static double dsqrt(double f); #endif + // Montgomery multiplication + static void montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints, + jint len, jlong inv, jint *m_ints); + static void montgomery_square(jint *a_ints, jint *n_ints, + jint len, jlong inv, jint *m_ints); + #ifdef __SOFTFP__ // C++ compiler generates soft float instructions as well as passing // float and double in registers.
--- a/src/share/vm/runtime/stubRoutines.cpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/runtime/stubRoutines.cpp Mon Jul 25 06:38:24 2016 +0100 @@ -136,6 +136,10 @@ address StubRoutines::_crc_table_adr = NULL; address StubRoutines::_multiplyToLen = NULL; +address StubRoutines::_squareToLen = NULL; +address StubRoutines::_mulAdd = NULL; +address StubRoutines::_montgomeryMultiply = NULL; +address StubRoutines::_montgomerySquare = NULL; double (* StubRoutines::_intrinsic_log )(double) = NULL; double (* StubRoutines::_intrinsic_log10 )(double) = NULL;
--- a/src/share/vm/runtime/stubRoutines.hpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/runtime/stubRoutines.hpp Mon Jul 25 06:38:24 2016 +0100 @@ -214,6 +214,10 @@ static address _crc_table_adr; static address _multiplyToLen; + static address _squareToLen; + static address _mulAdd; + static address _montgomeryMultiply; + static address _montgomerySquare; // These are versions of the java.lang.Math methods which perform // the same operations as the intrinsic version. They are used for @@ -374,6 +378,10 @@ static address crc_table_addr() { return _crc_table_adr; } static address multiplyToLen() {return _multiplyToLen; } + static address squareToLen() {return _squareToLen; } + static address mulAdd() {return _mulAdd; } + static address montgomeryMultiply() { return _montgomeryMultiply; } + static address montgomerySquare() { return _montgomerySquare; } static address select_fill_function(BasicType t, bool aligned, const char* &name);
--- a/src/share/vm/runtime/vmStructs.cpp Mon Jul 25 06:02:58 2016 +0100 +++ b/src/share/vm/runtime/vmStructs.cpp Mon Jul 25 06:38:24 2016 +0100 @@ -821,6 +821,8 @@ static_field(StubRoutines, _updateBytesCRC32, address) \ static_field(StubRoutines, _crc_table_adr, address) \ static_field(StubRoutines, _multiplyToLen, address) \ + static_field(StubRoutines, _squareToLen, address) \ + static_field(StubRoutines, _mulAdd, address) \ \ /*****************/ \ /* SharedRuntime */ \
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/intrinsics/montgomerymultiply/MontgomeryMultiplyTest.java Mon Jul 25 06:38:24 2016 +0100 @@ -0,0 +1,284 @@ +// +// Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2015, Red Hat Inc. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// + +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.MethodType; +import java.lang.reflect.Constructor; +import java.lang.reflect.Field; +import java.lang.reflect.Method; +import java.math.BigInteger; +import java.util.Arrays; +import java.util.Random; + +/** + * @test + * @bug 8130150 + * @library /testlibrary + * @requires (os.simpleArch == "x64") & (os.family != "windows") + * @summary Verify that the Montgomery multiply intrinsic works and correctly checks its arguments. + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+UseMontgomerySquareIntrinsic + * -XX:+UseMontgomeryMultiplyIntrinsic MontgomeryMultiplyTest + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+UseMontgomerySquareIntrinsic + * -XX:-UseMontgomeryMultiplyIntrinsic MontgomeryMultiplyTest + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-UseMontgomerySquareIntrinsic + * -XX:+UseMontgomeryMultiplyIntrinsic MontgomeryMultiplyTest + */ + +public class MontgomeryMultiplyTest { + + static final MethodHandles.Lookup lookup = MethodHandles.lookup(); + + static final MethodHandle montgomeryMultiplyHandle, montgomerySquareHandle; + static final MethodHandle bigIntegerConstructorHandle; + static final Field bigIntegerMagField; + + static { + // Use reflection to gain access to the methods we want to test. + try { + Method m = BigInteger.class.getDeclaredMethod("montgomeryMultiply", + /*a*/int[].class, /*b*/int[].class, /*n*/int[].class, /*len*/int.class, + /*inv*/long.class, /*product*/int[].class); + m.setAccessible(true); + montgomeryMultiplyHandle = lookup.unreflect(m); + + m = BigInteger.class.getDeclaredMethod("montgomerySquare", + /*a*/int[].class, /*n*/int[].class, /*len*/int.class, + /*inv*/long.class, /*product*/int[].class); + m.setAccessible(true); + montgomerySquareHandle = lookup.unreflect(m); + + Constructor c + = BigInteger.class.getDeclaredConstructor(int.class, int[].class); + c.setAccessible(true); + bigIntegerConstructorHandle = lookup.unreflectConstructor(c); + + bigIntegerMagField = BigInteger.class.getDeclaredField("mag"); + bigIntegerMagField.setAccessible(true); + + } catch (Throwable ex) { + throw new RuntimeException(ex); + } + } + + // Invoke either BigInteger.montgomeryMultiply or BigInteger.montgomerySquare. + int[] montgomeryMultiply(int[] a, int[] b, int[] n, int len, long inv, + int[] product) throws Throwable { + int[] result = + (a == b) ? (int[]) montgomerySquareHandle.invokeExact(a, n, len, inv, product) + : (int[]) montgomeryMultiplyHandle.invokeExact(a, b, n, len, inv, product); + return Arrays.copyOf(result, len); + } + + // Invoke the private constructor BigInteger(int[]). + BigInteger newBigInteger(int[] val) throws Throwable { + return (BigInteger) bigIntegerConstructorHandle.invokeExact(1, val); + } + + // Get the private field BigInteger.mag + int[] mag(BigInteger n) { + try { + return (int[]) bigIntegerMagField.get(n); + } catch (Exception ex) { + throw new RuntimeException(ex); + } + } + + // Montgomery multiplication + // Calculate a * b * r^-1 mod n) + // + // R is a power of the word size + // N' = R^-1 mod N + // + // T := ab + // m := (T mod R)N' mod R [so 0 <= m < R] + // t := (T + mN)/R + // if t >= N then return t - N else return t + // + BigInteger montgomeryMultiply(BigInteger a, BigInteger b, BigInteger N, + int len, BigInteger n_prime) + throws Throwable { + BigInteger T = a.multiply(b); + BigInteger R = BigInteger.ONE.shiftLeft(len*32); + BigInteger mask = R.subtract(BigInteger.ONE); + BigInteger m = (T.and(mask)).multiply(n_prime); + m = m.and(mask); // i.e. m.mod(R) + T = T.add(m.multiply(N)); + T = T.shiftRight(len*32); // i.e. T.divide(R) + if (T.compareTo(N) > 0) { + T = T.subtract(N); + } + return T; + } + + // Call the Montgomery multiply intrinsic. + BigInteger montgomeryMultiply(int[] a_words, int[] b_words, int[] n_words, + int len, BigInteger inv) + throws Throwable { + BigInteger t = montgomeryMultiply( + newBigInteger(a_words), + newBigInteger(b_words), + newBigInteger(n_words), + len, inv); + return t; + } + + // Check that the Montgomery multiply intrinsic returns the same + // result as the longhand calculation. + void check(int[] a_words, int[] b_words, int[] n_words, int len, BigInteger inv) + throws Throwable { + BigInteger n = newBigInteger(n_words); + BigInteger slow = montgomeryMultiply(a_words, b_words, n_words, len, inv); + BigInteger fast + = newBigInteger(montgomeryMultiply + (a_words, b_words, n_words, len, inv.longValue(), null)); + // The intrinsic may not return the same value as the longhand + // calculation but they must have the same residue mod N. + if (!slow.mod(n).equals(fast.mod(n))) { + throw new RuntimeException(); + } + } + + Random rnd = new Random(0); + + // Return a random value of length <= bits in an array of even length + int[] random_val(int bits) { + int len = (bits+63)/64; // i.e. length in longs + int[] val = new int[len*2]; + for (int i = 0; i < val.length; i++) + val[i] = rnd.nextInt(); + int leadingZeros = 64 - (bits & 64); + if (leadingZeros >= 32) { + val[0] = 0; + val[1] &= ~(-1l << (leadingZeros & 31)); + } else { + val[0] &= ~(-1l << leadingZeros); + } + return val; + } + + void testOneLength(int lenInBits, int lenInInts) throws Throwable { + BigInteger mod = new BigInteger(lenInBits, 2, rnd); + BigInteger r = BigInteger.ONE.shiftLeft(lenInInts * 32); + BigInteger n_prime = mod.modInverse(r).negate(); + + // Make n.length even, padding with a zero if necessary + int[] n = mag(mod); + if (n.length < lenInInts) { + int[] x = new int[lenInInts]; + System.arraycopy(n, 0, x, lenInInts-n.length, n.length); + n = x; + } + + for (int i = 0; i < 10000; i++) { + // multiply + check(random_val(lenInBits), random_val(lenInBits), n, lenInInts, n_prime); + // square + int[] tmp = random_val(lenInBits); + check(tmp, tmp, n, lenInInts, n_prime); + } + } + + // Test the Montgomery multiply intrinsic with a bunch of random + // values of varying lengths. Do this for long enough that the + // caller of the intrinsic is C2-compiled. + void testResultValues() throws Throwable { + // Test a couple of interesting edge cases. + testOneLength(1024, 32); + testOneLength(1025, 34); + for (int j = 10; j > 0; j--) { + // Construct a random prime whose length in words is even + int lenInBits = rnd.nextInt(2048) + 64; + int lenInInts = (lenInBits + 63)/64*2; + testOneLength(lenInBits, lenInInts); + } + } + + // Range checks + void testOneMontgomeryMultiplyCheck(int[] a, int[] b, int[] n, int len, long inv, + int[] product, Class klass) { + try { + montgomeryMultiply(a, b, n, len, inv, product); + } catch (Throwable ex) { + if (klass.isAssignableFrom(ex.getClass())) + return; + throw new RuntimeException(klass + " expected, " + ex + " was thrown"); + } + throw new RuntimeException(klass + " expected, was not thrown"); + } + + void testOneMontgomeryMultiplyCheck(int[] a, int[] b, BigInteger n, int len, BigInteger inv, + Class klass) { + testOneMontgomeryMultiplyCheck(a, b, mag(n), len, inv.longValue(), null, klass); + } + + void testOneMontgomeryMultiplyCheck(int[] a, int[] b, BigInteger n, int len, BigInteger inv, + int[] product, Class klass) { + testOneMontgomeryMultiplyCheck(a, b, mag(n), len, inv.longValue(), product, klass); + } + + void testMontgomeryMultiplyChecks() { + int[] blah = random_val(40); + int[] small = random_val(39); + BigInteger mod = new BigInteger(40*32 , 2, rnd); + BigInteger r = BigInteger.ONE.shiftLeft(40*32); + BigInteger n_prime = mod.modInverse(r).negate(); + + // Length out of range: square + testOneMontgomeryMultiplyCheck(blah, blah, mod, 41, n_prime, IllegalArgumentException.class); + testOneMontgomeryMultiplyCheck(blah, blah, mod, 0, n_prime, IllegalArgumentException.class); + testOneMontgomeryMultiplyCheck(blah, blah, mod, -1, n_prime, IllegalArgumentException.class); + // As above, but for multiply + testOneMontgomeryMultiplyCheck(blah, blah.clone(), mod, 41, n_prime, IllegalArgumentException.class); + testOneMontgomeryMultiplyCheck(blah, blah.clone(), mod, 0, n_prime, IllegalArgumentException.class); + testOneMontgomeryMultiplyCheck(blah, blah.clone(), mod, 0, n_prime, IllegalArgumentException.class); + + // Length odd + testOneMontgomeryMultiplyCheck(small, small, mod, 39, n_prime, IllegalArgumentException.class); + testOneMontgomeryMultiplyCheck(small, small, mod, 0, n_prime, IllegalArgumentException.class); + testOneMontgomeryMultiplyCheck(small, small, mod, -1, n_prime, IllegalArgumentException.class); + // As above, but for multiply + testOneMontgomeryMultiplyCheck(small, small.clone(), mod, 39, n_prime, IllegalArgumentException.class); + testOneMontgomeryMultiplyCheck(small, small.clone(), mod, 0, n_prime, IllegalArgumentException.class); + testOneMontgomeryMultiplyCheck(small, small.clone(), mod, -1, n_prime, IllegalArgumentException.class); + + // array too small + testOneMontgomeryMultiplyCheck(blah, blah, mod, 40, n_prime, small, IllegalArgumentException.class); + testOneMontgomeryMultiplyCheck(blah, blah.clone(), mod, 40, n_prime, small, IllegalArgumentException.class); + testOneMontgomeryMultiplyCheck(small, blah, mod, 40, n_prime, blah, IllegalArgumentException.class); + testOneMontgomeryMultiplyCheck(blah, small, mod, 40, n_prime, blah, IllegalArgumentException.class); + testOneMontgomeryMultiplyCheck(blah, blah, mod, 40, n_prime, small, IllegalArgumentException.class); + testOneMontgomeryMultiplyCheck(small, small, mod, 40, n_prime, blah, IllegalArgumentException.class); + } + + public static void main(String args[]) { + try { + new MontgomeryMultiplyTest().testMontgomeryMultiplyChecks(); + new MontgomeryMultiplyTest().testResultValues(); + } catch (Throwable ex) { + throw new RuntimeException(ex); + } + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/intrinsics/muladd/TestMulAdd.java Mon Jul 25 06:38:24 2016 +0100 @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 8081778 + * @summary Add C2 x86 intrinsic for BigInteger::mulAdd() method + * + * @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch + * -XX:+IgnoreUnrecognizedVMOptions -XX:-UseSquareToLenIntrinsic -XX:-UseMultiplyToLenIntrinsic + * -XX:+UseMulAddIntrinsic + * -XX:CompileCommand=dontinline,TestMulAdd::main + * -XX:CompileCommand=option,TestMulAdd::base_multiply,ccstr,DisableIntrinsic,_mulAdd + * -XX:CompileCommand=option,java.math.BigInteger::multiply,ccstr,DisableIntrinsic,_mulAdd + * -XX:CompileCommand=option,java.math.BigInteger::square,ccstr,DisableIntrinsic,_mulAdd + * -XX:CompileCommand=option,java.math.BigInteger::squareToLen,ccstr,DisableIntrinsic,_mulAdd + * -XX:CompileCommand=option,java.math.BigInteger::mulAdd,ccstr,DisableIntrinsic,_mulAdd + * -XX:CompileCommand=inline,java.math.BigInteger::multiply + * -XX:CompileCommand=inline,java.math.BigInteger::square + * -XX:CompileCommand=inline,java.math.BigInteger::squareToLen + * -XX:CompileCommand=inline,java.math.BigInteger::mulAdd TestMulAdd + */ + +import java.util.Random; +import java.math.*; + +public class TestMulAdd { + + // Avoid intrinsic by preventing inlining multiply() and mulAdd(). + public static BigInteger base_multiply(BigInteger op1) { + return op1.multiply(op1); + } + + // Generate mulAdd() intrinsic by inlining multiply(). + public static BigInteger new_multiply(BigInteger op1) { + return op1.multiply(op1); + } + + public static boolean bytecompare(BigInteger b1, BigInteger b2) { + byte[] data1 = b1.toByteArray(); + byte[] data2 = b2.toByteArray(); + if (data1.length != data2.length) + return false; + for (int i = 0; i < data1.length; i++) { + if (data1[i] != data2[i]) + return false; + } + return true; + } + + public static String stringify(BigInteger b) { + String strout= ""; + byte [] data = b.toByteArray(); + for (int i = 0; i < data.length; i++) { + strout += (String.format("%02x",data[i]) + " "); + } + return strout; + } + + public static void main(String args[]) throws Exception { + + BigInteger oldsum = new BigInteger("0"); + BigInteger newsum = new BigInteger("0"); + + BigInteger b1, b2, oldres, newres; + + Random rand = new Random(); + long seed = System.nanoTime(); + Random rand1 = new Random(); + long seed1 = System.nanoTime(); + rand.setSeed(seed); + rand1.setSeed(seed1); + + for (int j = 0; j < 100000; j++) { + int rand_int = rand1.nextInt(3136)+32; + b1 = new BigInteger(rand_int, rand); + + oldres = base_multiply(b1); + newres = new_multiply(b1); + + oldsum = oldsum.add(oldres); + newsum = newsum.add(newres); + + if (!bytecompare(oldres,newres)) { + System.out.print("mismatch for:b1:" + stringify(b1) + " :oldres:" + stringify(oldres) + " :newres:" + stringify(newres)); + System.out.println(b1); + throw new Exception("Failed"); + } + } + if (!bytecompare(oldsum,newsum)) { + System.out.println("Failure: oldsum:" + stringify(oldsum) + " newsum:" + stringify(newsum)); + throw new Exception("Failed"); + } else { + System.out.println("Success"); + } + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/intrinsics/squaretolen/TestSquareToLen.java Mon Jul 25 06:38:24 2016 +0100 @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 8081778 + * @summary Add C2 x86 intrinsic for BigInteger::squareToLen() method + * + * @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch + * -XX:+IgnoreUnrecognizedVMOptions + * -XX:+UseSquareToLenIntrinsic + * -XX:CompileCommand=exclude,TestSquareToLen::main + * -XX:CompileCommand=option,TestSquareToLen::base_multiply,ccstr,DisableIntrinsic,_squareToLen + * -XX:CompileCommand=option,java.math.BigInteger::multiply,ccstr,DisableIntrinsic,_squareToLen + * -XX:CompileCommand=option,java.math.BigInteger::square,ccstr,DisableIntrinsic,_squareToLen + * -XX:CompileCommand=option,java.math.BigInteger::squareToLen,ccstr,DisableIntrinsic,_squareToLen + * -XX:CompileCommand=inline,java.math.BigInteger::multiply + * -XX:CompileCommand=inline,java.math.BigInteger::square + * -XX:CompileCommand=inline,java.math.BigInteger::squareToLen TestSquareToLen + */ + +import java.util.Random; +import java.math.*; + +public class TestSquareToLen { + + // Avoid intrinsic by preventing inlining multiply() and squareToLen(). + public static BigInteger base_multiply(BigInteger op1) { + return op1.multiply(op1); + } + + // Generate squareToLen() intrinsic by inlining multiply(). + public static BigInteger new_multiply(BigInteger op1) { + return op1.multiply(op1); + } + + public static boolean bytecompare(BigInteger b1, BigInteger b2) { + byte[] data1 = b1.toByteArray(); + byte[] data2 = b2.toByteArray(); + if (data1.length != data2.length) + return false; + for (int i = 0; i < data1.length; i++) { + if (data1[i] != data2[i]) + return false; + } + return true; + } + + public static String stringify(BigInteger b) { + String strout= ""; + byte [] data = b.toByteArray(); + for (int i = 0; i < data.length; i++) { + strout += (String.format("%02x",data[i]) + " "); + } + return strout; + } + + public static void main(String args[]) throws Exception { + + BigInteger oldsum = new BigInteger("0"); + BigInteger newsum = new BigInteger("0"); + + BigInteger b1, b2, oldres, newres; + + Random rand = new Random(); + long seed = System.nanoTime(); + Random rand1 = new Random(); + long seed1 = System.nanoTime(); + rand.setSeed(seed); + rand1.setSeed(seed1); + + for (int j = 0; j < 100000; j++) { + int rand_int = rand1.nextInt(3136)+32; + b1 = new BigInteger(rand_int, rand); + + oldres = base_multiply(b1); + newres = new_multiply(b1); + + oldsum = oldsum.add(oldres); + newsum = newsum.add(newres); + + if (!bytecompare(oldres,newres)) { + System.out.print("mismatch for:b1:" + stringify(b1) + " :oldres:" + stringify(oldres) + " :newres:" + stringify(newres)); + System.out.println(b1); + throw new Exception("Failed"); + } + } + if (!bytecompare(oldsum,newsum)) { + System.out.println("Failure: oldsum:" + stringify(oldsum) + " newsum:" + stringify(newsum)); + throw new Exception("Failed"); + } else { + System.out.println("Success"); + } + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/loopopts/TestLoopPeeling.java Mon Jul 25 06:38:24 2016 +0100 @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8078262 + * @summary Tests correct dominator information after loop peeling. + * @run main/othervm -Xcomp -XX:CompileCommand=compileonly,TestLoopPeeling::test* TestLoopPeeling + */ +public class TestLoopPeeling { + + public int[] array = new int[100]; + + public static void main(String args[]) { + TestLoopPeeling test = new TestLoopPeeling(); + try { + test.testArrayAccess(0, 1); + test.testArrayAllocation(0, 1); + } catch (Exception e) { + // Ignore exceptions + } + } + + public void testArrayAccess(int index, int inc) { + int storeIndex = -1; + + for (; index < 10; index += inc) { + // This loop invariant check triggers loop peeling because it can + // be moved out of the loop (see 'IdealLoopTree::policy_peeling'). + if (inc == 42) return; + + // This loop variant usage of LShiftL( ConvI2L( Phi(storeIndex) ) ) + // prevents the split if optimization that would otherwise clone the + // LShiftL and ConvI2L nodes and assign them to their corresponding array + // address computation (see 'PhaseIdealLoop::split_if_with_blocks_post'). + if (storeIndex > 0 && array[storeIndex] == 42) return; + + if (index == 42) { + // This store and the corresponding range check are moved out of the + // loop and both used after old loop and the peeled iteration exit. + // For the peeled iteration, storeIndex is always -1 and the ConvI2L + // is replaced by TOP. However, the range check is not folded because + // we don't do the split if optimization in PhaseIdealLoop2. + // As a result, we have a (dead) control path from the peeled iteration + // to the StoreI but the data path is removed. + array[storeIndex] = 1; + return; + } + + storeIndex++; + } + } + + public byte[] testArrayAllocation(int index, int inc) { + int allocationCount = -1; + byte[] result; + + for (; index < 10; index += inc) { + // This loop invariant check triggers loop peeling because it can + // be moved out of the loop (see 'IdealLoopTree::policy_peeling'). + if (inc == 42) return null; + + if (index == 42) { + // This allocation and the corresponding size check are moved out of the + // loop and both used after old loop and the peeled iteration exit. + // For the peeled iteration, allocationCount is always -1 and the ConvI2L + // is replaced by TOP. However, the size check is not folded because + // we don't do the split if optimization in PhaseIdealLoop2. + // As a result, we have a (dead) control path from the peeled iteration + // to the allocation but the data path is removed. + result = new byte[allocationCount]; + return result; + } + + allocationCount++; + } + return null; + } +} +
--- a/test/testlibrary/whitebox/sun/hotspot/WhiteBox.java Mon Jul 25 06:02:58 2016 +0100 +++ b/test/testlibrary/whitebox/sun/hotspot/WhiteBox.java Mon Jul 25 06:38:24 2016 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -233,4 +233,6 @@ return offset; } + // Class Data Sharing + public native boolean isSharedClass(Class<?> c); }