Mercurial > hg > icedtea7-forest-aarch64 > hotspot
changeset 4210:b9e0f2c87dd6 hs24-b26
Merge
author | amurillo |
---|---|
date | Thu, 29 Nov 2012 22:32:44 -0800 |
parents | 8e459e9615fd (current diff) 1ba2ed1c07df (diff) |
children | ed9b424d5e43 |
files | |
diffstat | 70 files changed, 3540 insertions(+), 659 deletions(-) [+] |
line wrap: on
line diff
--- a/make/hotspot_version Thu Nov 29 19:41:00 2012 -0800 +++ b/make/hotspot_version Thu Nov 29 22:32:44 2012 -0800 @@ -35,7 +35,7 @@ HS_MAJOR_VER=24 HS_MINOR_VER=0 -HS_BUILD_NUMBER=25 +HS_BUILD_NUMBER=26 JDK_MAJOR_VER=1 JDK_MINOR_VER=7
--- a/make/jprt.properties Thu Nov 29 19:41:00 2012 -0800 +++ b/make/jprt.properties Thu Nov 29 22:32:44 2012 -0800 @@ -38,7 +38,7 @@ # This tells jprt what default release we want to build -jprt.hotspot.default.release=jdk7u10 +jprt.hotspot.default.release=jdk7u12 jprt.tools.default.release=${jprt.submit.option.release?${jprt.submit.option.release}:${jprt.hotspot.default.release}} @@ -54,77 +54,77 @@ # Define the Solaris platforms we want for the various releases jprt.my.solaris.sparc.jdk8=solaris_sparc_5.10 jprt.my.solaris.sparc.jdk7=solaris_sparc_5.10 -jprt.my.solaris.sparc.jdk7u10=${jprt.my.solaris.sparc.jdk7} +jprt.my.solaris.sparc.jdk7u12=${jprt.my.solaris.sparc.jdk7} jprt.my.solaris.sparc=${jprt.my.solaris.sparc.${jprt.tools.default.release}} jprt.my.solaris.sparcv9.jdk8=solaris_sparcv9_5.10 jprt.my.solaris.sparcv9.jdk7=solaris_sparcv9_5.10 -jprt.my.solaris.sparcv9.jdk7u10=${jprt.my.solaris.sparcv9.jdk7} +jprt.my.solaris.sparcv9.jdk7u12=${jprt.my.solaris.sparcv9.jdk7} jprt.my.solaris.sparcv9=${jprt.my.solaris.sparcv9.${jprt.tools.default.release}} jprt.my.solaris.i586.jdk8=solaris_i586_5.10 jprt.my.solaris.i586.jdk7=solaris_i586_5.10 -jprt.my.solaris.i586.jdk7u10=${jprt.my.solaris.i586.jdk7} +jprt.my.solaris.i586.jdk7u12=${jprt.my.solaris.i586.jdk7} jprt.my.solaris.i586=${jprt.my.solaris.i586.${jprt.tools.default.release}} jprt.my.solaris.x64.jdk8=solaris_x64_5.10 jprt.my.solaris.x64.jdk7=solaris_x64_5.10 -jprt.my.solaris.x64.jdk7u10=${jprt.my.solaris.x64.jdk7} +jprt.my.solaris.x64.jdk7u12=${jprt.my.solaris.x64.jdk7} jprt.my.solaris.x64=${jprt.my.solaris.x64.${jprt.tools.default.release}} jprt.my.linux.i586.jdk8=linux_i586_2.6 jprt.my.linux.i586.jdk7=linux_i586_2.6 -jprt.my.linux.i586.jdk7u10=${jprt.my.linux.i586.jdk7} +jprt.my.linux.i586.jdk7u12=${jprt.my.linux.i586.jdk7} jprt.my.linux.i586=${jprt.my.linux.i586.${jprt.tools.default.release}} jprt.my.linux.x64.jdk8=linux_x64_2.6 jprt.my.linux.x64.jdk7=linux_x64_2.6 -jprt.my.linux.x64.jdk7u10=${jprt.my.linux.x64.jdk7} +jprt.my.linux.x64.jdk7u12=${jprt.my.linux.x64.jdk7} jprt.my.linux.x64=${jprt.my.linux.x64.${jprt.tools.default.release}} jprt.my.linux.ppc.jdk8=linux_ppc_2.6 jprt.my.linux.ppc.jdk7=linux_ppc_2.6 -jprt.my.linux.ppc.jdk7u10=${jprt.my.linux.ppc.jdk7} +jprt.my.linux.ppc.jdk7u12=${jprt.my.linux.ppc.jdk7} jprt.my.linux.ppc=${jprt.my.linux.ppc.${jprt.tools.default.release}} jprt.my.linux.ppcv2.jdk8=linux_ppcv2_2.6 jprt.my.linux.ppcv2.jdk7=linux_ppcv2_2.6 -jprt.my.linux.ppcv2.jdk7u10=${jprt.my.linux.ppcv2.jdk7} +jprt.my.linux.ppcv2.jdk7u12=${jprt.my.linux.ppcv2.jdk7} jprt.my.linux.ppcv2=${jprt.my.linux.ppcv2.${jprt.tools.default.release}} jprt.my.linux.ppcsflt.jdk8=linux_ppcsflt_2.6 jprt.my.linux.ppcsflt.jdk7=linux_ppcsflt_2.6 -jprt.my.linux.ppcsflt.jdk7u10=${jprt.my.linux.ppcsflt.jdk7} +jprt.my.linux.ppcsflt.jdk7u12=${jprt.my.linux.ppcsflt.jdk7} jprt.my.linux.ppcsflt=${jprt.my.linux.ppcsflt.${jprt.tools.default.release}} jprt.my.linux.armvfp.jdk8=linux_armvfp_2.6 jprt.my.linux.armvfp.jdk7=linux_armvfp_2.6 -jprt.my.linux.armvfp.jdk7u10=${jprt.my.linux.armvfp.jdk7} +jprt.my.linux.armvfp.jdk7u12=${jprt.my.linux.armvfp.jdk7} jprt.my.linux.armvfp=${jprt.my.linux.armvfp.${jprt.tools.default.release}} jprt.my.linux.armv6.jdk8=linux_armv6_2.6 jprt.my.linux.armv6.jdk7=linux_armv6_2.6 -jprt.my.linux.armv6.jdk7u10=${jprt.my.linux.armv6.jdk7} +jprt.my.linux.armv6.jdk7u12=${jprt.my.linux.armv6.jdk7} jprt.my.linux.armv6=${jprt.my.linux.armv6.${jprt.tools.default.release}} jprt.my.linux.armsflt.jdk8=linux_armsflt_2.6 jprt.my.linux.armsflt.jdk7=linux_armsflt_2.6 -jprt.my.linux.armsflt.jdk7u10=${jprt.my.linux.armsflt.jdk7} +jprt.my.linux.armsflt.jdk7u12=${jprt.my.linux.armsflt.jdk7} jprt.my.linux.armsflt=${jprt.my.linux.armsflt.${jprt.tools.default.release}} jprt.my.macosx.x64.jdk8=macosx_x64_10.7 jprt.my.macosx.x64.jdk7=macosx_x64_10.7 -jprt.my.macosx.x64.jdk7u10=${jprt.my.macosx.x64.jdk7} +jprt.my.macosx.x64.jdk7u12=${jprt.my.macosx.x64.jdk7} jprt.my.macosx.x64=${jprt.my.macosx.x64.${jprt.tools.default.release}} jprt.my.windows.i586.jdk8=windows_i586_5.1 jprt.my.windows.i586.jdk7=windows_i586_5.1 -jprt.my.windows.i586.jdk7u10=${jprt.my.windows.i586.jdk7} +jprt.my.windows.i586.jdk7u12=${jprt.my.windows.i586.jdk7} jprt.my.windows.i586=${jprt.my.windows.i586.${jprt.tools.default.release}} jprt.my.windows.x64.jdk8=windows_x64_5.2 jprt.my.windows.x64.jdk7=windows_x64_5.2 -jprt.my.windows.x64.jdk7u10=${jprt.my.windows.x64.jdk7} +jprt.my.windows.x64.jdk7u12=${jprt.my.windows.x64.jdk7} jprt.my.windows.x64=${jprt.my.windows.x64.${jprt.tools.default.release}} # Standard list of jprt build targets for this source tree @@ -159,7 +159,7 @@ jprt.build.targets.jdk8=${jprt.build.targets.all} jprt.build.targets.jdk7=${jprt.build.targets.all} -jprt.build.targets.jdk7u10=${jprt.build.targets.all} +jprt.build.targets.jdk7u12=${jprt.build.targets.all} jprt.build.targets=${jprt.build.targets.${jprt.tools.default.release}} # Subset lists of test targets for this source tree @@ -452,7 +452,7 @@ jprt.test.targets.jdk8=${jprt.test.targets.standard} jprt.test.targets.jdk7=${jprt.test.targets.standard} -jprt.test.targets.jdk7u10=${jprt.test.targets.jdk7} +jprt.test.targets.jdk7u12=${jprt.test.targets.jdk7} jprt.test.targets=${jprt.test.targets.${jprt.tools.default.release}} # The default test/Makefile targets that should be run @@ -512,7 +512,7 @@ jprt.make.rule.test.targets.jdk8=${jprt.make.rule.test.targets.standard} jprt.make.rule.test.targets.jdk7=${jprt.make.rule.test.targets.standard} -jprt.make.rule.test.targets.jdk7u10=${jprt.make.rule.test.targets.jdk7} +jprt.make.rule.test.targets.jdk7u12=${jprt.make.rule.test.targets.jdk7} jprt.make.rule.test.targets=${jprt.make.rule.test.targets.${jprt.tools.default.release}} # 7155453: Work-around to prevent popups on OSX from blocking test completion
--- a/src/cpu/x86/vm/assembler_x86.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/cpu/x86/vm/assembler_x86.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -1017,6 +1017,67 @@ emit_simd_arith(0x58, dst, src, VEX_SIMD_F3); } +void Assembler::aesdec(XMMRegister dst, Address src) { + assert(VM_Version::supports_aes(), ""); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + emit_byte(0xde); + emit_operand(dst, src); +} + +void Assembler::aesdec(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_aes(), ""); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + emit_byte(0xde); + emit_byte(0xC0 | encode); +} + +void Assembler::aesdeclast(XMMRegister dst, Address src) { + assert(VM_Version::supports_aes(), ""); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + emit_byte(0xdf); + emit_operand(dst, src); +} + +void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_aes(), ""); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + emit_byte(0xdf); + emit_byte(0xC0 | encode); +} + +void Assembler::aesenc(XMMRegister dst, Address src) { + assert(VM_Version::supports_aes(), ""); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + emit_byte(0xdc); + emit_operand(dst, src); +} + +void Assembler::aesenc(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_aes(), ""); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + emit_byte(0xdc); + emit_byte(0xC0 | encode); +} + +void Assembler::aesenclast(XMMRegister dst, Address src) { + assert(VM_Version::supports_aes(), ""); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + emit_byte(0xdd); + emit_operand(dst, src); +} + +void Assembler::aesenclast(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_aes(), ""); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + emit_byte(0xdd); + emit_byte(0xC0 | encode); +} + + void Assembler::andl(Address dst, int32_t imm32) { InstructionMark im(this); prefix(dst); @@ -2337,6 +2398,22 @@ a_byte(p); } +void Assembler::pshufb(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_ssse3(), ""); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + emit_byte(0x00); + emit_byte(0xC0 | encode); +} + +void Assembler::pshufb(XMMRegister dst, Address src) { + assert(VM_Version::supports_ssse3(), ""); + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + emit_byte(0x00); + emit_operand(dst, src); +} + void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); @@ -8049,6 +8126,15 @@ LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); } +void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::movdqu(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::movdqu(dst, Address(rscratch1, 0)); + } +} + void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) { if (reachable(src)) { Assembler::movsd(dst, as_Address(src)); @@ -8339,6 +8425,17 @@ } } +void MacroAssembler::pshufb(XMMRegister dst, AddressLiteral src) { + // Used in sign-bit flipping with aligned address. + assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); + if (reachable(src)) { + Assembler::pshufb(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::pshufb(dst, Address(rscratch1, 0)); + } +} + // AVX 3-operands instructions void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
--- a/src/cpu/x86/vm/assembler_x86.hpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/cpu/x86/vm/assembler_x86.hpp Thu Nov 29 22:32:44 2012 -0800 @@ -885,6 +885,17 @@ void addss(XMMRegister dst, Address src); void addss(XMMRegister dst, XMMRegister src); + // AES instructions + void aesdec(XMMRegister dst, Address src); + void aesdec(XMMRegister dst, XMMRegister src); + void aesdeclast(XMMRegister dst, Address src); + void aesdeclast(XMMRegister dst, XMMRegister src); + void aesenc(XMMRegister dst, Address src); + void aesenc(XMMRegister dst, XMMRegister src); + void aesenclast(XMMRegister dst, Address src); + void aesenclast(XMMRegister dst, XMMRegister src); + + void andl(Address dst, int32_t imm32); void andl(Register dst, int32_t imm32); void andl(Register dst, Address src); @@ -1434,6 +1445,10 @@ void prefetcht2(Address src); void prefetchw(Address src); + // Shuffle Bytes + void pshufb(XMMRegister dst, XMMRegister src); + void pshufb(XMMRegister dst, Address src); + // Shuffle Packed Doublewords void pshufd(XMMRegister dst, XMMRegister src, int mode); void pshufd(XMMRegister dst, Address src, int mode); @@ -2596,6 +2611,12 @@ void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); } void divss(XMMRegister dst, AddressLiteral src); + // Move Unaligned Double Quadword + void movdqu(Address dst, XMMRegister src) { Assembler::movdqu(dst, src); } + void movdqu(XMMRegister dst, Address src) { Assembler::movdqu(dst, src); } + void movdqu(XMMRegister dst, XMMRegister src) { Assembler::movdqu(dst, src); } + void movdqu(XMMRegister dst, AddressLiteral src); + void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); } void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); } void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); } @@ -2643,6 +2664,10 @@ void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); } void xorps(XMMRegister dst, AddressLiteral src); + // Shuffle Bytes + void pshufb(XMMRegister dst, XMMRegister src) { Assembler::pshufb(dst, src); } + void pshufb(XMMRegister dst, Address src) { Assembler::pshufb(dst, src); } + void pshufb(XMMRegister dst, AddressLiteral src); // AVX 3-operands instructions void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); }
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -2155,6 +2155,529 @@ } } + // AES intrinsic stubs + enum {AESBlockSize = 16}; + + address generate_key_shuffle_mask() { + __ align(16); + StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask"); + address start = __ pc(); + __ emit_data(0x00010203, relocInfo::none, 0 ); + __ emit_data(0x04050607, relocInfo::none, 0 ); + __ emit_data(0x08090a0b, relocInfo::none, 0 ); + __ emit_data(0x0c0d0e0f, relocInfo::none, 0 ); + return start; + } + + // Utility routine for loading a 128-bit key word in little endian format + // can optionally specify that the shuffle mask is already in an xmmregister + void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { + __ movdqu(xmmdst, Address(key, offset)); + if (xmm_shuf_mask != NULL) { + __ pshufb(xmmdst, xmm_shuf_mask); + } else { + __ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); + } + } + + // aesenc using specified key+offset + // can optionally specify that the shuffle mask is already in an xmmregister + void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { + load_key(xmmtmp, key, offset, xmm_shuf_mask); + __ aesenc(xmmdst, xmmtmp); + } + + // aesdec using specified key+offset + // can optionally specify that the shuffle mask is already in an xmmregister + void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { + load_key(xmmtmp, key, offset, xmm_shuf_mask); + __ aesdec(xmmdst, xmmtmp); + } + + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // + address generate_aescrypt_encryptBlock() { + assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); + Label L_doLast; + address start = __ pc(); + + const Register from = rsi; // source array address + const Register to = rdx; // destination array address + const Register key = rcx; // key array address + const Register keylen = rax; + const Address from_param(rbp, 8+0); + const Address to_param (rbp, 8+4); + const Address key_param (rbp, 8+8); + + const XMMRegister xmm_result = xmm0; + const XMMRegister xmm_temp = xmm1; + const XMMRegister xmm_key_shuf_mask = xmm2; + + __ enter(); // required for proper stackwalking of RuntimeStub frame + __ push(rsi); + __ movptr(from , from_param); + __ movptr(to , to_param); + __ movptr(key , key_param); + + __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + // keylen = # of 32-bit words, convert to 128-bit words + __ shrl(keylen, 2); + __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more + + __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); + __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input + + // For encryption, the java expanded key ordering is just what we need + + load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); + __ pxor(xmm_result, xmm_temp); + for (int offset = 0x10; offset <= 0x90; offset += 0x10) { + aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); + } + load_key (xmm_temp, key, 0xa0, xmm_key_shuf_mask); + __ cmpl(keylen, 0); + __ jcc(Assembler::equal, L_doLast); + __ aesenc(xmm_result, xmm_temp); // only in 192 and 256 bit keys + aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); + load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask); + __ subl(keylen, 2); + __ jcc(Assembler::equal, L_doLast); + __ aesenc(xmm_result, xmm_temp); // only in 256 bit keys + aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); + load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask); + + __ BIND(L_doLast); + __ aesenclast(xmm_result, xmm_temp); + __ movdqu(Address(to, 0), xmm_result); // store the result + __ xorptr(rax, rax); // return 0 + __ pop(rsi); + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + return start; + } + + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // + address generate_aescrypt_decryptBlock() { + assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); + Label L_doLast; + address start = __ pc(); + + const Register from = rsi; // source array address + const Register to = rdx; // destination array address + const Register key = rcx; // key array address + const Register keylen = rax; + const Address from_param(rbp, 8+0); + const Address to_param (rbp, 8+4); + const Address key_param (rbp, 8+8); + + const XMMRegister xmm_result = xmm0; + const XMMRegister xmm_temp = xmm1; + const XMMRegister xmm_key_shuf_mask = xmm2; + + __ enter(); // required for proper stackwalking of RuntimeStub frame + __ push(rsi); + __ movptr(from , from_param); + __ movptr(to , to_param); + __ movptr(key , key_param); + + __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + // keylen = # of 32-bit words, convert to 128-bit words + __ shrl(keylen, 2); + __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more + + __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); + __ movdqu(xmm_result, Address(from, 0)); + + // for decryption java expanded key ordering is rotated one position from what we want + // so we start from 0x10 here and hit 0x00 last + // we don't know if the key is aligned, hence not using load-execute form + load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask); + __ pxor (xmm_result, xmm_temp); + for (int offset = 0x20; offset <= 0xa0; offset += 0x10) { + aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); + } + __ cmpl(keylen, 0); + __ jcc(Assembler::equal, L_doLast); + // only in 192 and 256 bit keys + aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); + aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask); + __ subl(keylen, 2); + __ jcc(Assembler::equal, L_doLast); + // only in 256 bit keys + aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); + aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask); + + __ BIND(L_doLast); + // for decryption the aesdeclast operation is always on key+0x00 + load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); + __ aesdeclast(xmm_result, xmm_temp); + + __ movdqu(Address(to, 0), xmm_result); // store the result + + __ xorptr(rax, rax); // return 0 + __ pop(rsi); + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + return start; + } + + void handleSOERegisters(bool saving) { + const int saveFrameSizeInBytes = 4 * wordSize; + const Address saved_rbx (rbp, -3 * wordSize); + const Address saved_rsi (rbp, -2 * wordSize); + const Address saved_rdi (rbp, -1 * wordSize); + + if (saving) { + __ subptr(rsp, saveFrameSizeInBytes); + __ movptr(saved_rsi, rsi); + __ movptr(saved_rdi, rdi); + __ movptr(saved_rbx, rbx); + } else { + // restoring + __ movptr(rsi, saved_rsi); + __ movptr(rdi, saved_rdi); + __ movptr(rbx, saved_rbx); + } + } + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // c_rarg3 - r vector byte array address + // c_rarg4 - input length + // + address generate_cipherBlockChaining_encryptAESCrypt() { + assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); + address start = __ pc(); + + Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256; + const Register from = rsi; // source array address + const Register to = rdx; // destination array address + const Register key = rcx; // key array address + const Register rvec = rdi; // r byte array initialized from initvector array address + // and left with the results of the last encryption block + const Register len_reg = rbx; // src len (must be multiple of blocksize 16) + const Register pos = rax; + + // xmm register assignments for the loops below + const XMMRegister xmm_result = xmm0; + const XMMRegister xmm_temp = xmm1; + // first 6 keys preloaded into xmm2-xmm7 + const int XMM_REG_NUM_KEY_FIRST = 2; + const int XMM_REG_NUM_KEY_LAST = 7; + const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); + + __ enter(); // required for proper stackwalking of RuntimeStub frame + handleSOERegisters(true /*saving*/); + + // load registers from incoming parameters + const Address from_param(rbp, 8+0); + const Address to_param (rbp, 8+4); + const Address key_param (rbp, 8+8); + const Address rvec_param (rbp, 8+12); + const Address len_param (rbp, 8+16); + __ movptr(from , from_param); + __ movptr(to , to_param); + __ movptr(key , key_param); + __ movptr(rvec , rvec_param); + __ movptr(len_reg , len_param); + + const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front + __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); + // load up xmm regs 2 thru 7 with keys 0-5 + for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { + load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); + offset += 0x10; + } + + __ movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec + + // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) + __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + __ cmpl(rax, 44); + __ jcc(Assembler::notEqual, L_key_192_256); + + // 128 bit code follows here + __ movptr(pos, 0); + __ align(OptoLoopAlignment); + __ BIND(L_loopTop_128); + __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input + __ pxor (xmm_result, xmm_temp); // xor with the current r vector + + __ pxor (xmm_result, xmm_key0); // do the aes rounds + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { + __ aesenc(xmm_result, as_XMMRegister(rnum)); + } + for (int key_offset = 0x60; key_offset <= 0x90; key_offset += 0x10) { + aes_enc_key(xmm_result, xmm_temp, key, key_offset); + } + load_key(xmm_temp, key, 0xa0); + __ aesenclast(xmm_result, xmm_temp); + + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + // no need to store r to memory until we exit + __ addptr(pos, AESBlockSize); + __ subptr(len_reg, AESBlockSize); + __ jcc(Assembler::notEqual, L_loopTop_128); + + __ BIND(L_exit); + __ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object + + handleSOERegisters(false /*restoring*/); + __ movl(rax, 0); // return 0 (why?) + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + __ BIND(L_key_192_256); + // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) + __ cmpl(rax, 52); + __ jcc(Assembler::notEqual, L_key_256); + + // 192-bit code follows here (could be changed to use more xmm registers) + __ movptr(pos, 0); + __ align(OptoLoopAlignment); + __ BIND(L_loopTop_192); + __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input + __ pxor (xmm_result, xmm_temp); // xor with the current r vector + + __ pxor (xmm_result, xmm_key0); // do the aes rounds + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { + __ aesenc(xmm_result, as_XMMRegister(rnum)); + } + for (int key_offset = 0x60; key_offset <= 0xb0; key_offset += 0x10) { + aes_enc_key(xmm_result, xmm_temp, key, key_offset); + } + load_key(xmm_temp, key, 0xc0); + __ aesenclast(xmm_result, xmm_temp); + + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + // no need to store r to memory until we exit + __ addptr(pos, AESBlockSize); + __ subptr(len_reg, AESBlockSize); + __ jcc(Assembler::notEqual, L_loopTop_192); + __ jmp(L_exit); + + __ BIND(L_key_256); + // 256-bit code follows here (could be changed to use more xmm registers) + __ movptr(pos, 0); + __ align(OptoLoopAlignment); + __ BIND(L_loopTop_256); + __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input + __ pxor (xmm_result, xmm_temp); // xor with the current r vector + + __ pxor (xmm_result, xmm_key0); // do the aes rounds + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { + __ aesenc(xmm_result, as_XMMRegister(rnum)); + } + for (int key_offset = 0x60; key_offset <= 0xd0; key_offset += 0x10) { + aes_enc_key(xmm_result, xmm_temp, key, key_offset); + } + load_key(xmm_temp, key, 0xe0); + __ aesenclast(xmm_result, xmm_temp); + + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + // no need to store r to memory until we exit + __ addptr(pos, AESBlockSize); + __ subptr(len_reg, AESBlockSize); + __ jcc(Assembler::notEqual, L_loopTop_256); + __ jmp(L_exit); + + return start; + } + + + // CBC AES Decryption. + // In 32-bit stub, because of lack of registers we do not try to parallelize 4 blocks at a time. + // + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // c_rarg3 - r vector byte array address + // c_rarg4 - input length + // + + address generate_cipherBlockChaining_decryptAESCrypt() { + assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); + address start = __ pc(); + + Label L_exit, L_key_192_256, L_key_256; + Label L_singleBlock_loopTop_128; + Label L_singleBlock_loopTop_192, L_singleBlock_loopTop_256; + const Register from = rsi; // source array address + const Register to = rdx; // destination array address + const Register key = rcx; // key array address + const Register rvec = rdi; // r byte array initialized from initvector array address + // and left with the results of the last encryption block + const Register len_reg = rbx; // src len (must be multiple of blocksize 16) + const Register pos = rax; + + // xmm register assignments for the loops below + const XMMRegister xmm_result = xmm0; + const XMMRegister xmm_temp = xmm1; + // first 6 keys preloaded into xmm2-xmm7 + const int XMM_REG_NUM_KEY_FIRST = 2; + const int XMM_REG_NUM_KEY_LAST = 7; + const int FIRST_NON_REG_KEY_offset = 0x70; + const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); + + __ enter(); // required for proper stackwalking of RuntimeStub frame + handleSOERegisters(true /*saving*/); + + // load registers from incoming parameters + const Address from_param(rbp, 8+0); + const Address to_param (rbp, 8+4); + const Address key_param (rbp, 8+8); + const Address rvec_param (rbp, 8+12); + const Address len_param (rbp, 8+16); + __ movptr(from , from_param); + __ movptr(to , to_param); + __ movptr(key , key_param); + __ movptr(rvec , rvec_param); + __ movptr(len_reg , len_param); + + // the java expanded key ordering is rotated one position from what we want + // so we start from 0x10 here and hit 0x00 last + const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front + __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); + // load up xmm regs 2 thru 6 with first 5 keys + for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { + load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); + offset += 0x10; + } + + // inside here, use the rvec register to point to previous block cipher + // with which we xor at the end of each newly decrypted block + const Register prev_block_cipher_ptr = rvec; + + // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) + __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + __ cmpl(rax, 44); + __ jcc(Assembler::notEqual, L_key_192_256); + + + // 128-bit code follows here, parallelized + __ movptr(pos, 0); + __ align(OptoLoopAlignment); + __ BIND(L_singleBlock_loopTop_128); + __ cmpptr(len_reg, 0); // any blocks left?? + __ jcc(Assembler::equal, L_exit); + __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input + __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { + __ aesdec(xmm_result, as_XMMRegister(rnum)); + } + for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xa0; key_offset += 0x10) { // 128-bit runs up to key offset a0 + aes_dec_key(xmm_result, xmm_temp, key, key_offset); + } + load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 + __ aesdeclast(xmm_result, xmm_temp); + __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); + __ pxor (xmm_result, xmm_temp); // xor with the current r vector + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + // no need to store r to memory until we exit + __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr + __ addptr(pos, AESBlockSize); + __ subptr(len_reg, AESBlockSize); + __ jmp(L_singleBlock_loopTop_128); + + + __ BIND(L_exit); + __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); + __ movptr(rvec , rvec_param); // restore this since used in loop + __ movdqu(Address(rvec, 0), xmm_temp); // final value of r stored in rvec of CipherBlockChaining object + handleSOERegisters(false /*restoring*/); + __ movl(rax, 0); // return 0 (why?) + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + + __ BIND(L_key_192_256); + // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) + __ cmpl(rax, 52); + __ jcc(Assembler::notEqual, L_key_256); + + // 192-bit code follows here (could be optimized to use parallelism) + __ movptr(pos, 0); + __ align(OptoLoopAlignment); + __ BIND(L_singleBlock_loopTop_192); + __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input + __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { + __ aesdec(xmm_result, as_XMMRegister(rnum)); + } + for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xc0; key_offset += 0x10) { // 192-bit runs up to key offset c0 + aes_dec_key(xmm_result, xmm_temp, key, key_offset); + } + load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 + __ aesdeclast(xmm_result, xmm_temp); + __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); + __ pxor (xmm_result, xmm_temp); // xor with the current r vector + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + // no need to store r to memory until we exit + __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr + __ addptr(pos, AESBlockSize); + __ subptr(len_reg, AESBlockSize); + __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192); + __ jmp(L_exit); + + __ BIND(L_key_256); + // 256-bit code follows here (could be optimized to use parallelism) + __ movptr(pos, 0); + __ align(OptoLoopAlignment); + __ BIND(L_singleBlock_loopTop_256); + __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input + __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { + __ aesdec(xmm_result, as_XMMRegister(rnum)); + } + for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) { // 256-bit runs up to key offset e0 + aes_dec_key(xmm_result, xmm_temp, key, key_offset); + } + load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 + __ aesdeclast(xmm_result, xmm_temp); + __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); + __ pxor (xmm_result, xmm_temp); // xor with the current r vector + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + // no need to store r to memory until we exit + __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr + __ addptr(pos, AESBlockSize); + __ subptr(len_reg, AESBlockSize); + __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256); + __ jmp(L_exit); + + return start; + } + + public: // Information about frame layout at time of blocking runtime call. // Note that we only have to preserve callee-saved registers since @@ -2350,6 +2873,16 @@ generate_arraycopy_stubs(); generate_math_stubs(); + + // don't bother generating these AES intrinsic stubs unless global flag is set + if (UseAESIntrinsics) { + StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others + + StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); + StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); + StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); + StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt(); + } }
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -2958,6 +2958,548 @@ } } + // AES intrinsic stubs + enum {AESBlockSize = 16}; + + address generate_key_shuffle_mask() { + __ align(16); + StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask"); + address start = __ pc(); + __ emit_data64( 0x0405060700010203, relocInfo::none ); + __ emit_data64( 0x0c0d0e0f08090a0b, relocInfo::none ); + return start; + } + + // Utility routine for loading a 128-bit key word in little endian format + // can optionally specify that the shuffle mask is already in an xmmregister + void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { + __ movdqu(xmmdst, Address(key, offset)); + if (xmm_shuf_mask != NULL) { + __ pshufb(xmmdst, xmm_shuf_mask); + } else { + __ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); + } + } + + // aesenc using specified key+offset + // can optionally specify that the shuffle mask is already in an xmmregister + void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { + load_key(xmmtmp, key, offset, xmm_shuf_mask); + __ aesenc(xmmdst, xmmtmp); + } + + // aesdec using specified key+offset + // can optionally specify that the shuffle mask is already in an xmmregister + void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { + load_key(xmmtmp, key, offset, xmm_shuf_mask); + __ aesdec(xmmdst, xmmtmp); + } + + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // + address generate_aescrypt_encryptBlock() { + assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); + Label L_doLast; + address start = __ pc(); + + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register keylen = rax; + + const XMMRegister xmm_result = xmm0; + const XMMRegister xmm_temp = xmm1; + const XMMRegister xmm_key_shuf_mask = xmm2; + + __ enter(); // required for proper stackwalking of RuntimeStub frame + + __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + // keylen = # of 32-bit words, convert to 128-bit words + __ shrl(keylen, 2); + __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more + + __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); + __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input + + // For encryption, the java expanded key ordering is just what we need + // we don't know if the key is aligned, hence not using load-execute form + + load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); + __ pxor(xmm_result, xmm_temp); + for (int offset = 0x10; offset <= 0x90; offset += 0x10) { + aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); + } + load_key (xmm_temp, key, 0xa0, xmm_key_shuf_mask); + __ cmpl(keylen, 0); + __ jcc(Assembler::equal, L_doLast); + __ aesenc(xmm_result, xmm_temp); // only in 192 and 256 bit keys + aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); + load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask); + __ subl(keylen, 2); + __ jcc(Assembler::equal, L_doLast); + __ aesenc(xmm_result, xmm_temp); // only in 256 bit keys + aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); + load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask); + + __ BIND(L_doLast); + __ aesenclast(xmm_result, xmm_temp); + __ movdqu(Address(to, 0), xmm_result); // store the result + __ xorptr(rax, rax); // return 0 + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + return start; + } + + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // + address generate_aescrypt_decryptBlock() { + assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); + Label L_doLast; + address start = __ pc(); + + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register keylen = rax; + + const XMMRegister xmm_result = xmm0; + const XMMRegister xmm_temp = xmm1; + const XMMRegister xmm_key_shuf_mask = xmm2; + + __ enter(); // required for proper stackwalking of RuntimeStub frame + + __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + // keylen = # of 32-bit words, convert to 128-bit words + __ shrl(keylen, 2); + __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more + + __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); + __ movdqu(xmm_result, Address(from, 0)); + + // for decryption java expanded key ordering is rotated one position from what we want + // so we start from 0x10 here and hit 0x00 last + // we don't know if the key is aligned, hence not using load-execute form + load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask); + __ pxor (xmm_result, xmm_temp); + for (int offset = 0x20; offset <= 0xa0; offset += 0x10) { + aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); + } + __ cmpl(keylen, 0); + __ jcc(Assembler::equal, L_doLast); + // only in 192 and 256 bit keys + aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); + aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask); + __ subl(keylen, 2); + __ jcc(Assembler::equal, L_doLast); + // only in 256 bit keys + aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); + aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask); + + __ BIND(L_doLast); + // for decryption the aesdeclast operation is always on key+0x00 + load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); + __ aesdeclast(xmm_result, xmm_temp); + + __ movdqu(Address(to, 0), xmm_result); // store the result + + __ xorptr(rax, rax); // return 0 + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + return start; + } + + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // c_rarg3 - r vector byte array address + // c_rarg4 - input length + // + address generate_cipherBlockChaining_encryptAESCrypt() { + assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); + address start = __ pc(); + + Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256; + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register rvec = c_rarg3; // r byte array initialized from initvector array address + // and left with the results of the last encryption block +#ifndef _WIN64 + const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16) +#else + const Address len_mem(rsp, 6 * wordSize); // length is on stack on Win64 + const Register len_reg = r10; // pick the first volatile windows register +#endif + const Register pos = rax; + + // xmm register assignments for the loops below + const XMMRegister xmm_result = xmm0; + const XMMRegister xmm_temp = xmm1; + // keys 0-10 preloaded into xmm2-xmm12 + const int XMM_REG_NUM_KEY_FIRST = 2; + const int XMM_REG_NUM_KEY_LAST = 12; + const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); + const XMMRegister xmm_key10 = as_XMMRegister(XMM_REG_NUM_KEY_LAST); + + __ enter(); // required for proper stackwalking of RuntimeStub frame + +#ifdef _WIN64 + // on win64, fill len_reg from stack position + __ movl(len_reg, len_mem); + // save the xmm registers which must be preserved 6-12 + __ subptr(rsp, -rsp_after_call_off * wordSize); + for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) { + __ movdqu(xmm_save(i), as_XMMRegister(i)); + } +#endif + + const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front + __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); + // load up xmm regs 2 thru 12 with key 0x00 - 0xa0 + for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { + load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); + offset += 0x10; + } + + __ movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec + + // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) + __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + __ cmpl(rax, 44); + __ jcc(Assembler::notEqual, L_key_192_256); + + // 128 bit code follows here + __ movptr(pos, 0); + __ align(OptoLoopAlignment); + __ BIND(L_loopTop_128); + __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input + __ pxor (xmm_result, xmm_temp); // xor with the current r vector + + __ pxor (xmm_result, xmm_key0); // do the aes rounds + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) { + __ aesenc(xmm_result, as_XMMRegister(rnum)); + } + __ aesenclast(xmm_result, xmm_key10); + + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + // no need to store r to memory until we exit + __ addptr(pos, AESBlockSize); + __ subptr(len_reg, AESBlockSize); + __ jcc(Assembler::notEqual, L_loopTop_128); + + __ BIND(L_exit); + __ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object + +#ifdef _WIN64 + // restore xmm regs belonging to calling function + for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) { + __ movdqu(as_XMMRegister(i), xmm_save(i)); + } +#endif + __ movl(rax, 0); // return 0 (why?) + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + __ BIND(L_key_192_256); + // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) + __ cmpl(rax, 52); + __ jcc(Assembler::notEqual, L_key_256); + + // 192-bit code follows here (could be changed to use more xmm registers) + __ movptr(pos, 0); + __ align(OptoLoopAlignment); + __ BIND(L_loopTop_192); + __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input + __ pxor (xmm_result, xmm_temp); // xor with the current r vector + + __ pxor (xmm_result, xmm_key0); // do the aes rounds + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { + __ aesenc(xmm_result, as_XMMRegister(rnum)); + } + aes_enc_key(xmm_result, xmm_temp, key, 0xb0); + load_key(xmm_temp, key, 0xc0); + __ aesenclast(xmm_result, xmm_temp); + + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + // no need to store r to memory until we exit + __ addptr(pos, AESBlockSize); + __ subptr(len_reg, AESBlockSize); + __ jcc(Assembler::notEqual, L_loopTop_192); + __ jmp(L_exit); + + __ BIND(L_key_256); + // 256-bit code follows here (could be changed to use more xmm registers) + __ movptr(pos, 0); + __ align(OptoLoopAlignment); + __ BIND(L_loopTop_256); + __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input + __ pxor (xmm_result, xmm_temp); // xor with the current r vector + + __ pxor (xmm_result, xmm_key0); // do the aes rounds + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { + __ aesenc(xmm_result, as_XMMRegister(rnum)); + } + aes_enc_key(xmm_result, xmm_temp, key, 0xb0); + aes_enc_key(xmm_result, xmm_temp, key, 0xc0); + aes_enc_key(xmm_result, xmm_temp, key, 0xd0); + load_key(xmm_temp, key, 0xe0); + __ aesenclast(xmm_result, xmm_temp); + + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + // no need to store r to memory until we exit + __ addptr(pos, AESBlockSize); + __ subptr(len_reg, AESBlockSize); + __ jcc(Assembler::notEqual, L_loopTop_256); + __ jmp(L_exit); + + return start; + } + + + + // This is a version of CBC/AES Decrypt which does 4 blocks in a loop at a time + // to hide instruction latency + // + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // c_rarg3 - r vector byte array address + // c_rarg4 - input length + // + + address generate_cipherBlockChaining_decryptAESCrypt_Parallel() { + assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); + address start = __ pc(); + + Label L_exit, L_key_192_256, L_key_256; + Label L_singleBlock_loopTop_128, L_multiBlock_loopTop_128; + Label L_singleBlock_loopTop_192, L_singleBlock_loopTop_256; + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register rvec = c_rarg3; // r byte array initialized from initvector array address + // and left with the results of the last encryption block +#ifndef _WIN64 + const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16) +#else + const Address len_mem(rsp, 6 * wordSize); // length is on stack on Win64 + const Register len_reg = r10; // pick the first volatile windows register +#endif + const Register pos = rax; + + // xmm register assignments for the loops below + const XMMRegister xmm_result = xmm0; + // keys 0-10 preloaded into xmm2-xmm12 + const int XMM_REG_NUM_KEY_FIRST = 5; + const int XMM_REG_NUM_KEY_LAST = 15; + const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); + const XMMRegister xmm_key_last = as_XMMRegister(XMM_REG_NUM_KEY_LAST); + + __ enter(); // required for proper stackwalking of RuntimeStub frame + +#ifdef _WIN64 + // on win64, fill len_reg from stack position + __ movl(len_reg, len_mem); + // save the xmm registers which must be preserved 6-15 + __ subptr(rsp, -rsp_after_call_off * wordSize); + for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) { + __ movdqu(xmm_save(i), as_XMMRegister(i)); + } +#endif + // the java expanded key ordering is rotated one position from what we want + // so we start from 0x10 here and hit 0x00 last + const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front + __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); + // load up xmm regs 5 thru 15 with key 0x10 - 0xa0 - 0x00 + for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { + if (rnum == XMM_REG_NUM_KEY_LAST) offset = 0x00; + load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); + offset += 0x10; + } + + const XMMRegister xmm_prev_block_cipher = xmm1; // holds cipher of previous block + // registers holding the four results in the parallelized loop + const XMMRegister xmm_result0 = xmm0; + const XMMRegister xmm_result1 = xmm2; + const XMMRegister xmm_result2 = xmm3; + const XMMRegister xmm_result3 = xmm4; + + __ movdqu(xmm_prev_block_cipher, Address(rvec, 0x00)); // initialize with initial rvec + + // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) + __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + __ cmpl(rax, 44); + __ jcc(Assembler::notEqual, L_key_192_256); + + + // 128-bit code follows here, parallelized + __ movptr(pos, 0); + __ align(OptoLoopAlignment); + __ BIND(L_multiBlock_loopTop_128); + __ cmpptr(len_reg, 4*AESBlockSize); // see if at least 4 blocks left + __ jcc(Assembler::less, L_singleBlock_loopTop_128); + + __ movdqu(xmm_result0, Address(from, pos, Address::times_1, 0*AESBlockSize)); // get next 4 blocks into xmmresult registers + __ movdqu(xmm_result1, Address(from, pos, Address::times_1, 1*AESBlockSize)); + __ movdqu(xmm_result2, Address(from, pos, Address::times_1, 2*AESBlockSize)); + __ movdqu(xmm_result3, Address(from, pos, Address::times_1, 3*AESBlockSize)); + +#define DoFour(opc, src_reg) \ + __ opc(xmm_result0, src_reg); \ + __ opc(xmm_result1, src_reg); \ + __ opc(xmm_result2, src_reg); \ + __ opc(xmm_result3, src_reg); + + DoFour(pxor, xmm_key_first); + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) { + DoFour(aesdec, as_XMMRegister(rnum)); + } + DoFour(aesdeclast, xmm_key_last); + // for each result, xor with the r vector of previous cipher block + __ pxor(xmm_result0, xmm_prev_block_cipher); + __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 0*AESBlockSize)); + __ pxor(xmm_result1, xmm_prev_block_cipher); + __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 1*AESBlockSize)); + __ pxor(xmm_result2, xmm_prev_block_cipher); + __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 2*AESBlockSize)); + __ pxor(xmm_result3, xmm_prev_block_cipher); + __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 3*AESBlockSize)); // this will carry over to next set of blocks + + __ movdqu(Address(to, pos, Address::times_1, 0*AESBlockSize), xmm_result0); // store 4 results into the next 64 bytes of output + __ movdqu(Address(to, pos, Address::times_1, 1*AESBlockSize), xmm_result1); + __ movdqu(Address(to, pos, Address::times_1, 2*AESBlockSize), xmm_result2); + __ movdqu(Address(to, pos, Address::times_1, 3*AESBlockSize), xmm_result3); + + __ addptr(pos, 4*AESBlockSize); + __ subptr(len_reg, 4*AESBlockSize); + __ jmp(L_multiBlock_loopTop_128); + + // registers used in the non-parallelized loops + const XMMRegister xmm_prev_block_cipher_save = xmm2; + const XMMRegister xmm_temp = xmm3; + + __ align(OptoLoopAlignment); + __ BIND(L_singleBlock_loopTop_128); + __ cmpptr(len_reg, 0); // any blocks left?? + __ jcc(Assembler::equal, L_exit); + __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input + __ movdqa(xmm_prev_block_cipher_save, xmm_result); // save for next r vector + __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) { + __ aesdec(xmm_result, as_XMMRegister(rnum)); + } + __ aesdeclast(xmm_result, xmm_key_last); + __ pxor (xmm_result, xmm_prev_block_cipher); // xor with the current r vector + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + // no need to store r to memory until we exit + __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block + + __ addptr(pos, AESBlockSize); + __ subptr(len_reg, AESBlockSize); + __ jmp(L_singleBlock_loopTop_128); + + + __ BIND(L_exit); + __ movdqu(Address(rvec, 0), xmm_prev_block_cipher); // final value of r stored in rvec of CipherBlockChaining object +#ifdef _WIN64 + // restore regs belonging to calling function + for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) { + __ movdqu(as_XMMRegister(i), xmm_save(i)); + } +#endif + __ movl(rax, 0); // return 0 (why?) + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + + __ BIND(L_key_192_256); + // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) + __ cmpl(rax, 52); + __ jcc(Assembler::notEqual, L_key_256); + + // 192-bit code follows here (could be optimized to use parallelism) + __ movptr(pos, 0); + __ align(OptoLoopAlignment); + __ BIND(L_singleBlock_loopTop_192); + __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input + __ movdqa(xmm_prev_block_cipher_save, xmm_result); // save for next r vector + __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) { + __ aesdec(xmm_result, as_XMMRegister(rnum)); + } + aes_dec_key(xmm_result, xmm_temp, key, 0xb0); // 192-bit key goes up to c0 + aes_dec_key(xmm_result, xmm_temp, key, 0xc0); + __ aesdeclast(xmm_result, xmm_key_last); // xmm15 always came from key+0 + __ pxor (xmm_result, xmm_prev_block_cipher); // xor with the current r vector + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + // no need to store r to memory until we exit + __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block + + __ addptr(pos, AESBlockSize); + __ subptr(len_reg, AESBlockSize); + __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192); + __ jmp(L_exit); + + __ BIND(L_key_256); + // 256-bit code follows here (could be optimized to use parallelism) + __ movptr(pos, 0); + __ align(OptoLoopAlignment); + __ BIND(L_singleBlock_loopTop_256); + __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input + __ movdqa(xmm_prev_block_cipher_save, xmm_result); // save for next r vector + __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) { + __ aesdec(xmm_result, as_XMMRegister(rnum)); + } + aes_dec_key(xmm_result, xmm_temp, key, 0xb0); // 256-bit key goes up to e0 + aes_dec_key(xmm_result, xmm_temp, key, 0xc0); + aes_dec_key(xmm_result, xmm_temp, key, 0xd0); + aes_dec_key(xmm_result, xmm_temp, key, 0xe0); + __ aesdeclast(xmm_result, xmm_key_last); // xmm15 came from key+0 + __ pxor (xmm_result, xmm_prev_block_cipher); // xor with the current r vector + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + // no need to store r to memory until we exit + __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block + + __ addptr(pos, AESBlockSize); + __ subptr(len_reg, AESBlockSize); + __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256); + __ jmp(L_exit); + + return start; + } + + + #undef __ #define __ masm-> @@ -3152,6 +3694,16 @@ generate_arraycopy_stubs(); generate_math_stubs(); + + // don't bother generating these AES intrinsic stubs unless global flag is set + if (UseAESIntrinsics) { + StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // needed by the others + + StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); + StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); + StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); + StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel(); + } } public:
--- a/src/cpu/x86/vm/stubRoutines_x86_32.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/cpu/x86/vm/stubRoutines_x86_32.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -44,3 +44,4 @@ address StubRoutines::x86::_verify_mxcsr_entry = NULL; address StubRoutines::x86::_verify_fpu_cntrl_wrd_entry = NULL; +address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
--- a/src/cpu/x86/vm/stubRoutines_x86_32.hpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/cpu/x86/vm/stubRoutines_x86_32.hpp Thu Nov 29 22:32:44 2012 -0800 @@ -41,10 +41,14 @@ private: static address _verify_mxcsr_entry; static address _verify_fpu_cntrl_wrd_entry; + // shuffle mask for fixing up 128-bit words consisting of big-endian 32-bit integers + static address _key_shuffle_mask_addr; public: static address verify_mxcsr_entry() { return _verify_mxcsr_entry; } static address verify_fpu_cntrl_wrd_entry() { return _verify_fpu_cntrl_wrd_entry; } + static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; } + }; static bool returns_to_call_stub(address return_pc) { return return_pc == _call_stub_return_address; }
--- a/src/cpu/x86/vm/stubRoutines_x86_64.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/cpu/x86/vm/stubRoutines_x86_64.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -56,3 +56,4 @@ address StubRoutines::x86::_double_sign_mask = NULL; address StubRoutines::x86::_double_sign_flip = NULL; address StubRoutines::x86::_mxcsr_std = NULL; +address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
--- a/src/cpu/x86/vm/stubRoutines_x86_64.hpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/cpu/x86/vm/stubRoutines_x86_64.hpp Thu Nov 29 22:32:44 2012 -0800 @@ -54,6 +54,8 @@ static address _double_sign_mask; static address _double_sign_flip; static address _mxcsr_std; + // shuffle mask for fixing up 128-bit words consisting of big-endian 32-bit integers + static address _key_shuffle_mask_addr; public: @@ -116,6 +118,9 @@ { return _mxcsr_std; } + + static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; } + }; #endif // CPU_X86_VM_STUBROUTINES_X86_64_HPP
--- a/src/cpu/x86/vm/vm_version_x86.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/cpu/x86/vm/vm_version_x86.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -419,13 +419,16 @@ if (UseAVX < 1) _cpuFeatures &= ~CPU_AVX; + if (!UseAES && !FLAG_IS_DEFAULT(UseAES)) + _cpuFeatures &= ~CPU_AES; + if (logical_processors_per_package() == 1) { // HT processor could be installed on a system which doesn't support HT. _cpuFeatures &= ~CPU_HT; } char buf[256]; - jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", cores_per_cpu(), threads_per_core(), cpu_family(), _model, _stepping, (supports_cmov() ? ", cmov" : ""), @@ -441,6 +444,7 @@ (supports_popcnt() ? ", popcnt" : ""), (supports_avx() ? ", avx" : ""), (supports_avx2() ? ", avx2" : ""), + (supports_aes() ? ", aes" : ""), (supports_mmx_ext() ? ", mmxext" : ""), (supports_3dnow_prefetch() ? ", 3dnowpref" : ""), (supports_lzcnt() ? ", lzcnt": ""), @@ -472,6 +476,29 @@ if (!supports_avx ()) // Drop to 0 if no AVX support UseAVX = 0; + // Use AES instructions if available. + if (supports_aes()) { + if (FLAG_IS_DEFAULT(UseAES)) { + UseAES = true; + } + } else if (UseAES) { + if (!FLAG_IS_DEFAULT(UseAES)) + warning("AES instructions not available on this CPU"); + FLAG_SET_DEFAULT(UseAES, false); + } + + // The AES intrinsic stubs require AES instruction support (of course) + // but also require AVX and sse3 modes for instructions it use. + if (UseAES && (UseAVX > 0) && (UseSSE > 2)) { + if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { + UseAESIntrinsics = true; + } + } else if (UseAESIntrinsics) { + if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) + warning("AES intrinsics not available on this CPU"); + FLAG_SET_DEFAULT(UseAESIntrinsics, false); + } + #ifdef COMPILER2 if (UseFPUForSpilling) { if (UseSSE < 2) { @@ -714,6 +741,9 @@ if (UseAVX > 0) { tty->print(" UseAVX=%d",UseAVX); } + if (UseAES) { + tty->print(" UseAES=1"); + } tty->cr(); tty->print("Allocation"); if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) {
--- a/src/cpu/x86/vm/vm_version_x86.hpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/cpu/x86/vm/vm_version_x86.hpp Thu Nov 29 22:32:44 2012 -0800 @@ -78,7 +78,9 @@ sse4_2 : 1, : 2, popcnt : 1, - : 3, + : 1, + aes : 1, + : 1, osxsave : 1, avx : 1, : 3; @@ -244,7 +246,8 @@ CPU_TSC = (1 << 15), CPU_TSCINV = (1 << 16), CPU_AVX = (1 << 17), - CPU_AVX2 = (1 << 18) + CPU_AVX2 = (1 << 18), + CPU_AES = (1 << 19) } cpuFeatureFlags; enum { @@ -420,6 +423,8 @@ result |= CPU_TSC; if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0) result |= CPU_TSCINV; + if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0) + result |= CPU_AES; // AMD features. if (is_amd()) { @@ -544,6 +549,7 @@ static bool supports_avx() { return (_cpuFeatures & CPU_AVX) != 0; } static bool supports_avx2() { return (_cpuFeatures & CPU_AVX2) != 0; } static bool supports_tsc() { return (_cpuFeatures & CPU_TSC) != 0; } + static bool supports_aes() { return (_cpuFeatures & CPU_AES) != 0; } // Intel features static bool is_intel_family_core() { return is_intel() &&
--- a/src/os/bsd/vm/perfMemory_bsd.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/os/bsd/vm/perfMemory_bsd.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -30,6 +30,7 @@ #include "os_bsd.inline.hpp" #include "runtime/handles.inline.hpp" #include "runtime/perfMemory.hpp" +#include "services/memTracker.hpp" #include "utilities/exceptions.hpp" // put OS-includes here @@ -753,6 +754,10 @@ // clear the shared memory region (void)::memset((void*) mapAddress, 0, size); + // it does not go through os api, the operation has to record from here + MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC); + MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal); + return mapAddress; } @@ -912,6 +917,10 @@ "Could not map PerfMemory"); } + // it does not go through os api, the operation has to record from here + MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC); + MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal); + *addr = mapAddress; *sizep = size;
--- a/src/os/linux/vm/perfMemory_linux.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/os/linux/vm/perfMemory_linux.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -30,6 +30,7 @@ #include "os_linux.inline.hpp" #include "runtime/handles.inline.hpp" #include "runtime/perfMemory.hpp" +#include "services/memTracker.hpp" #include "utilities/exceptions.hpp" // put OS-includes here @@ -753,6 +754,10 @@ // clear the shared memory region (void)::memset((void*) mapAddress, 0, size); + // it does not go through os api, the operation has to record from here + MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC); + MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal); + return mapAddress; } @@ -912,6 +917,10 @@ "Could not map PerfMemory"); } + // it does not go through os api, the operation has to record from here + MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC); + MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal); + *addr = mapAddress; *sizep = size;
--- a/src/os/solaris/vm/os_solaris.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/os/solaris/vm/os_solaris.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -55,6 +55,7 @@ #include "runtime/threadCritical.hpp" #include "runtime/timer.hpp" #include "services/attachListener.hpp" +#include "services/memTracker.hpp" #include "services/runtimeService.hpp" #include "thread_solaris.inline.hpp" #include "utilities/decoder.hpp" @@ -1482,11 +1483,11 @@ // First crack at OS-specific initialization, from inside the new thread. -void os::initialize_thread() { +void os::initialize_thread(Thread* thr) { int r = thr_main() ; guarantee (r == 0 || r == 1, "CR6501650 or CR6493689") ; if (r) { - JavaThread* jt = (JavaThread *)Thread::current(); + JavaThread* jt = (JavaThread *)thr; assert(jt != NULL,"Sanity check"); size_t stack_size; address base = jt->stack_base(); @@ -3072,11 +3073,12 @@ // Since snv_84, Solaris attempts to honor the address hint - see 5003415. // Give it a try, if the kernel honors the hint we can return immediately. char* addr = Solaris::anon_mmap(requested_addr, bytes, 0, false); + volatile int err = errno; if (addr == requested_addr) { return addr; } else if (addr != NULL) { - unmap_memory(addr, bytes); + pd_unmap_memory(addr, bytes); } if (PrintMiscellaneous && Verbose) {
--- a/src/os/solaris/vm/perfMemory_solaris.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/os/solaris/vm/perfMemory_solaris.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -30,6 +30,7 @@ #include "os_solaris.inline.hpp" #include "runtime/handles.inline.hpp" #include "runtime/perfMemory.hpp" +#include "services/memTracker.hpp" #include "utilities/exceptions.hpp" // put OS-includes here @@ -768,6 +769,10 @@ // clear the shared memory region (void)::memset((void*) mapAddress, 0, size); + // it does not go through os api, the operation has to record from here + MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC); + MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal); + return mapAddress; } @@ -927,6 +932,10 @@ "Could not map PerfMemory"); } + // it does not go through os api, the operation has to record from here + MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC); + MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal); + *addr = mapAddress; *sizep = size;
--- a/src/os/windows/vm/perfMemory_windows.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/os/windows/vm/perfMemory_windows.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -30,6 +30,7 @@ #include "os_windows.inline.hpp" #include "runtime/handles.inline.hpp" #include "runtime/perfMemory.hpp" +#include "services/memTracker.hpp" #include "utilities/exceptions.hpp" #include <windows.h> @@ -1496,6 +1497,10 @@ // clear the shared memory region (void)memset(mapAddress, '\0', size); + // it does not go through os api, the operation has to record from here + MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC); + MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal); + return (char*) mapAddress; } @@ -1672,6 +1677,11 @@ "Could not map PerfMemory"); } + // it does not go through os api, the operation has to record from here + MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC); + MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal); + + *addrp = (char*)mapAddress; *sizep = size; @@ -1824,6 +1834,8 @@ } remove_file_mapping(addr); + // it does not go through os api, the operation has to record from here + MemTracker::record_virtual_memory_release((address)addr, bytes); } char* PerfMemory::backing_store_filename() {
--- a/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -291,7 +291,7 @@ return (char*) -1; } -void os::initialize_thread() { +void os::initialize_thread(Thread* thr) { // Nothing to do. }
--- a/src/os_cpu/bsd_zero/vm/os_bsd_zero.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/os_cpu/bsd_zero/vm/os_bsd_zero.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -97,7 +97,7 @@ #endif // SPARC } -void os::initialize_thread() { +void os::initialize_thread(Thread* thr) { // Nothing to do. }
--- a/src/os_cpu/linux_sparc/vm/os_linux_sparc.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/os_cpu/linux_sparc/vm/os_linux_sparc.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -218,7 +218,7 @@ return (char*) 0; } -void os::initialize_thread() {} +void os::initialize_thread(Thread* thr) {} void os::print_context(outputStream *st, void *context) { if (context == NULL) return;
--- a/src/os_cpu/linux_x86/vm/os_linux_x86.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/os_cpu/linux_x86/vm/os_linux_x86.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -108,7 +108,7 @@ return (char*) -1; } -void os::initialize_thread() { +void os::initialize_thread(Thread* thr) { // Nothing to do. }
--- a/src/os_cpu/linux_zero/vm/os_linux_zero.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/os_cpu/linux_zero/vm/os_linux_zero.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -92,7 +92,7 @@ #endif // SPARC } -void os::initialize_thread() { +void os::initialize_thread(Thread * thr){ // Nothing to do. }
--- a/src/os_cpu/windows_x86/vm/os_windows_x86.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/os_cpu/windows_x86/vm/os_windows_x86.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -213,7 +213,7 @@ return true; } -void os::initialize_thread() { +void os::initialize_thread(Thread* thr) { // Nothing to do. }
--- a/src/share/vm/classfile/vmSymbols.hpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/classfile/vmSymbols.hpp Thu Nov 29 22:32:44 2012 -0800 @@ -110,10 +110,12 @@ template(sun_jkernel_DownloadManager, "sun/jkernel/DownloadManager") \ template(getBootClassPathEntryForClass_name, "getBootClassPathEntryForClass") \ template(sun_misc_PostVMInitHook, "sun/misc/PostVMInitHook") \ + template(sun_misc_Launcher_ExtClassLoader, "sun/misc/Launcher$ExtClassLoader") \ \ /* Java runtime version access */ \ template(sun_misc_Version, "sun/misc/Version") \ template(java_runtime_name_name, "java_runtime_name") \ + template(java_runtime_version_name, "java_runtime_version") \ \ /* class file format tags */ \ template(tag_source_file, "SourceFile") \ @@ -719,6 +721,21 @@ /* java/lang/ref/Reference */ \ do_intrinsic(_Reference_get, java_lang_ref_Reference, get_name, void_object_signature, F_R) \ \ + /* support for com.sum.crypto.provider.AESCrypt and some of its callers */ \ + do_class(com_sun_crypto_provider_aescrypt, "com/sun/crypto/provider/AESCrypt") \ + do_intrinsic(_aescrypt_encryptBlock, com_sun_crypto_provider_aescrypt, encryptBlock_name, byteArray_int_byteArray_int_signature, F_R) \ + do_intrinsic(_aescrypt_decryptBlock, com_sun_crypto_provider_aescrypt, decryptBlock_name, byteArray_int_byteArray_int_signature, F_R) \ + do_name( encryptBlock_name, "encryptBlock") \ + do_name( decryptBlock_name, "decryptBlock") \ + do_signature(byteArray_int_byteArray_int_signature, "([BI[BI)V") \ + \ + do_class(com_sun_crypto_provider_cipherBlockChaining, "com/sun/crypto/provider/CipherBlockChaining") \ + do_intrinsic(_cipherBlockChaining_encryptAESCrypt, com_sun_crypto_provider_cipherBlockChaining, encrypt_name, byteArray_int_int_byteArray_int_signature, F_R) \ + do_intrinsic(_cipherBlockChaining_decryptAESCrypt, com_sun_crypto_provider_cipherBlockChaining, decrypt_name, byteArray_int_int_byteArray_int_signature, F_R) \ + do_name( encrypt_name, "encrypt") \ + do_name( decrypt_name, "decrypt") \ + do_signature(byteArray_int_int_byteArray_int_signature, "([BII[BI)V") \ + \ /* support for sun.misc.Unsafe */ \ do_class(sun_misc_Unsafe, "sun/misc/Unsafe") \ \
--- a/src/share/vm/memory/allocation.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/memory/allocation.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -389,19 +389,18 @@ NOT_PRODUCT(Atomic::inc(&_instance_count);) } -Arena::Arena(Arena *a) : _chunk(a->_chunk), _hwm(a->_hwm), _max(a->_max), _first(a->_first) { - set_size_in_bytes(a->size_in_bytes()); - NOT_PRODUCT(Atomic::inc(&_instance_count);) -} - - Arena *Arena::move_contents(Arena *copy) { copy->destruct_contents(); copy->_chunk = _chunk; copy->_hwm = _hwm; copy->_max = _max; copy->_first = _first; - copy->set_size_in_bytes(size_in_bytes()); + + // workaround rare racing condition, which could double count + // the arena size by native memory tracking + size_t size = size_in_bytes(); + set_size_in_bytes(0); + copy->set_size_in_bytes(size); // Destroy original arena reset(); return copy; // Return Arena with contents @@ -453,6 +452,9 @@ char* end = _first->next() ? _first->top() : _hwm; free_malloced_objects(_first, _first->bottom(), end, _hwm); } + // reset size before chop to avoid a rare racing condition + // that can have total arena memory exceed total chunk memory + set_size_in_bytes(0); _first->chop(); reset(); }
--- a/src/share/vm/memory/allocation.hpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/memory/allocation.hpp Thu Nov 29 22:32:44 2012 -0800 @@ -134,8 +134,10 @@ mtNMT = 0x0A00, // memory used by native memory tracking mtChunk = 0x0B00, // chunk that holds content of arenas mtJavaHeap = 0x0C00, // Java heap - mtDontTrack = 0x0D00, // memory we donot or cannot track - mt_number_of_types = 0x000C, // number of memory types + mtClassShared = 0x0D00, // class data sharing + mt_number_of_types = 0x000D, // number of memory types (mtDontTrack + // is not included as validate type) + mtDontTrack = 0x0E00, // memory we do not or cannot track mt_masks = 0x7F00, // object type mask @@ -299,7 +301,6 @@ public: Arena(); Arena(size_t init_size); - Arena(Arena *old); ~Arena(); void destruct_contents(); char* hwm() const { return _hwm; }
--- a/src/share/vm/memory/filemap.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/memory/filemap.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -29,6 +29,7 @@ #include "runtime/arguments.hpp" #include "runtime/java.hpp" #include "runtime/os.hpp" +#include "services/memTracker.hpp" #include "utilities/defaultStream.hpp" # include <sys/stat.h> @@ -358,7 +359,13 @@ ReservedSpace unmapped_rs = rs.last_part(size); mapped_rs.release(); - return map_region(i, true); + // This memory still belongs to JavaHeap + MemTracker::record_virtual_memory_type((address)unmapped_rs.base(), mtJavaHeap); + char* mapped_addr = map_region(i, true); + if (mapped_addr != NULL) { + MemTracker::record_virtual_memory_type((address)mapped_addr, mtJavaHeap); + } + return mapped_addr; }
--- a/src/share/vm/memory/genCollectedHeap.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/memory/genCollectedHeap.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -51,6 +51,7 @@ #include "runtime/java.hpp" #include "runtime/vmThread.hpp" #include "services/memoryService.hpp" +#include "services/memTracker.hpp" #include "utilities/vmError.hpp" #include "utilities/workgroup.hpp" #ifndef SERIALGC @@ -171,9 +172,13 @@ ReservedSpace this_rs = heap_rs.first_part(_gen_specs[i]->max_size(), UseSharedSpaces, UseSharedSpaces); _gens[i] = _gen_specs[i]->init(this_rs, i, rem_set()); + // tag generations in JavaHeap + MemTracker::record_virtual_memory_type((address)this_rs.base(), mtJavaHeap); heap_rs = heap_rs.last_part(_gen_specs[i]->max_size()); } _perm_gen = perm_gen_spec->init(heap_rs, PermSize, rem_set()); + // tag PermGen + MemTracker::record_virtual_memory_type((address)heap_rs.base(), mtJavaHeap); clear_incremental_collection_failed();
--- a/src/share/vm/memory/resourceArea.hpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/memory/resourceArea.hpp Thu Nov 29 22:32:44 2012 -0800 @@ -127,15 +127,21 @@ void reset_to_mark() { if (UseMallocOnly) free_malloced_objects(); - if( _chunk->next() ) // Delete later chunks + if( _chunk->next() ) { // Delete later chunks + // reset arena size before delete chunks. Otherwise, the total + // arena size could exceed total chunk size + assert(_area->size_in_bytes() > size_in_bytes(), "Sanity check"); + _area->set_size_in_bytes(size_in_bytes()); _chunk->next_chop(); + } else { + assert(_area->size_in_bytes() == size_in_bytes(), "Sanity check"); + } _area->_chunk = _chunk; // Roll back arena to saved chunk _area->_hwm = _hwm; _area->_max = _max; // clear out this chunk (to detect allocation bugs) if (ZapResourceArea) memset(_hwm, badResourceValue, _max - _hwm); - _area->set_size_in_bytes(size_in_bytes()); } ~ResourceMark() { @@ -219,15 +225,21 @@ void reset_to_mark() { if (UseMallocOnly) free_malloced_objects(); - if( _chunk->next() ) // Delete later chunks + if( _chunk->next() ) { // Delete later chunks + // reset arena size before delete chunks. Otherwise, the total + // arena size could exceed total chunk size + assert(_area->size_in_bytes() > size_in_bytes(), "Sanity check"); + _area->set_size_in_bytes(size_in_bytes()); _chunk->next_chop(); + } else { + assert(_area->size_in_bytes() == size_in_bytes(), "Sanity check"); + } _area->_chunk = _chunk; // Roll back arena to saved chunk _area->_hwm = _hwm; _area->_max = _max; // clear out this chunk (to detect allocation bugs) if (ZapResourceArea) memset(_hwm, badResourceValue, _max - _hwm); - _area->set_size_in_bytes(size_in_bytes()); } ~DeoptResourceMark() {
--- a/src/share/vm/oops/methodOop.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/oops/methodOop.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -1097,8 +1097,12 @@ vmSymbols::SID methodOopDesc::klass_id_for_intrinsics(klassOop holder) { // if loader is not the default loader (i.e., != NULL), we can't know the intrinsics // because we are not loading from core libraries - if (instanceKlass::cast(holder)->class_loader() != NULL) + // exception: the AES intrinsics come from lib/ext/sunjce_provider.jar + // which does not use the class default class loader so we check for its loader here + if ((instanceKlass::cast(holder)->class_loader() != NULL) && + instanceKlass::cast(holder)->class_loader()->klass()->klass_part()->name() != vmSymbols::sun_misc_Launcher_ExtClassLoader()) { return vmSymbols::NO_SID; // regardless of name, no intrinsics here + } // see if the klass name is well-known: Symbol* klass_name = instanceKlass::cast(holder)->name();
--- a/src/share/vm/opto/callGenerator.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/opto/callGenerator.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -670,6 +670,129 @@ } +//------------------------PredictedIntrinsicGenerator------------------------------ +// Internal class which handles all predicted Intrinsic calls. +class PredictedIntrinsicGenerator : public CallGenerator { + CallGenerator* _intrinsic; + CallGenerator* _cg; + +public: + PredictedIntrinsicGenerator(CallGenerator* intrinsic, + CallGenerator* cg) + : CallGenerator(cg->method()) + { + _intrinsic = intrinsic; + _cg = cg; + } + + virtual bool is_virtual() const { return true; } + virtual bool is_inlined() const { return true; } + virtual bool is_intrinsic() const { return true; } + + virtual JVMState* generate(JVMState* jvms); +}; + + +CallGenerator* CallGenerator::for_predicted_intrinsic(CallGenerator* intrinsic, + CallGenerator* cg) { + return new PredictedIntrinsicGenerator(intrinsic, cg); +} + + +JVMState* PredictedIntrinsicGenerator::generate(JVMState* jvms) { + GraphKit kit(jvms); + PhaseGVN& gvn = kit.gvn(); + + CompileLog* log = kit.C->log(); + if (log != NULL) { + log->elem("predicted_intrinsic bci='%d' method='%d'", + jvms->bci(), log->identify(method())); + } + + Node* slow_ctl = _intrinsic->generate_predicate(kit.sync_jvms()); + if (kit.failing()) + return NULL; // might happen because of NodeCountInliningCutoff + + SafePointNode* slow_map = NULL; + JVMState* slow_jvms; + if (slow_ctl != NULL) { + PreserveJVMState pjvms(&kit); + kit.set_control(slow_ctl); + if (!kit.stopped()) { + slow_jvms = _cg->generate(kit.sync_jvms()); + if (kit.failing()) + return NULL; // might happen because of NodeCountInliningCutoff + assert(slow_jvms != NULL, "must be"); + kit.add_exception_states_from(slow_jvms); + kit.set_map(slow_jvms->map()); + if (!kit.stopped()) + slow_map = kit.stop(); + } + } + + if (kit.stopped()) { + // Predicate is always false. + kit.set_jvms(slow_jvms); + return kit.transfer_exceptions_into_jvms(); + } + + // Generate intrinsic code: + JVMState* new_jvms = _intrinsic->generate(kit.sync_jvms()); + if (new_jvms == NULL) { + // Intrinsic failed, so use slow code or make a direct call. + if (slow_map == NULL) { + CallGenerator* cg = CallGenerator::for_direct_call(method()); + new_jvms = cg->generate(kit.sync_jvms()); + } else { + kit.set_jvms(slow_jvms); + return kit.transfer_exceptions_into_jvms(); + } + } + kit.add_exception_states_from(new_jvms); + kit.set_jvms(new_jvms); + + // Need to merge slow and fast? + if (slow_map == NULL) { + // The fast path is the only path remaining. + return kit.transfer_exceptions_into_jvms(); + } + + if (kit.stopped()) { + // Intrinsic method threw an exception, so it's just the slow path after all. + kit.set_jvms(slow_jvms); + return kit.transfer_exceptions_into_jvms(); + } + + // Finish the diamond. + kit.C->set_has_split_ifs(true); // Has chance for split-if optimization + RegionNode* region = new (kit.C) RegionNode(3); + region->init_req(1, kit.control()); + region->init_req(2, slow_map->control()); + kit.set_control(gvn.transform(region)); + Node* iophi = PhiNode::make(region, kit.i_o(), Type::ABIO); + iophi->set_req(2, slow_map->i_o()); + kit.set_i_o(gvn.transform(iophi)); + kit.merge_memory(slow_map->merged_memory(), region, 2); + uint tos = kit.jvms()->stkoff() + kit.sp(); + uint limit = slow_map->req(); + for (uint i = TypeFunc::Parms; i < limit; i++) { + // Skip unused stack slots; fast forward to monoff(); + if (i == tos) { + i = kit.jvms()->monoff(); + if( i >= limit ) break; + } + Node* m = kit.map()->in(i); + Node* n = slow_map->in(i); + if (m != n) { + const Type* t = gvn.type(m)->meet(gvn.type(n)); + Node* phi = PhiNode::make(region, m, t); + phi->set_req(2, n); + kit.map()->set_req(i, gvn.transform(phi)); + } + } + return kit.transfer_exceptions_into_jvms(); +} + //-------------------------UncommonTrapCallGenerator----------------------------- // Internal class which handles all out-of-line calls checking receiver type. class UncommonTrapCallGenerator : public CallGenerator {
--- a/src/share/vm/opto/callGenerator.hpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/opto/callGenerator.hpp Thu Nov 29 22:32:44 2012 -0800 @@ -143,6 +143,9 @@ // Registry for intrinsics: static CallGenerator* for_intrinsic(ciMethod* m); static void register_intrinsic(ciMethod* m, CallGenerator* cg); + static CallGenerator* for_predicted_intrinsic(CallGenerator* intrinsic, + CallGenerator* cg); + virtual Node* generate_predicate(JVMState* jvms) { return NULL; }; static void print_inlining(ciMethod* callee, int inline_level, int bci, const char* msg) { if (PrintInlining)
--- a/src/share/vm/opto/doCall.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/opto/doCall.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -108,7 +108,17 @@ // intrinsics handle strict f.p. correctly. if (allow_inline && allow_intrinsics) { CallGenerator* cg = find_intrinsic(callee, call_is_virtual); - if (cg != NULL) return cg; + if (cg != NULL) { + if (cg->is_predicted()) { + // Code without intrinsic but, hopefully, inlined. + CallGenerator* inline_cg = this->call_generator(callee, + vtable_index, call_is_virtual, jvms, allow_inline, prof_factor, false); + if (inline_cg != NULL) { + cg = CallGenerator::for_predicted_intrinsic(cg, inline_cg); + } + } + return cg; + } } // Do method handle calls.
--- a/src/share/vm/opto/escape.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/opto/escape.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -886,12 +886,16 @@ arg_has_oops && (i > TypeFunc::Parms); #ifdef ASSERT if (!(is_arraycopy || - call->as_CallLeaf()->_name != NULL && - (strcmp(call->as_CallLeaf()->_name, "g1_wb_pre") == 0 || - strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 )) - ) { + (call->as_CallLeaf()->_name != NULL && + (strcmp(call->as_CallLeaf()->_name, "g1_wb_pre") == 0 || + strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 || + strcmp(call->as_CallLeaf()->_name, "aescrypt_encryptBlock") == 0 || + strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 || + strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 || + strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_decryptAESCrypt") == 0) + ))) { call->dump(); - assert(false, "EA: unexpected CallLeaf"); + fatal(err_msg_res("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name)); } #endif // Always process arraycopy's destination object since
--- a/src/share/vm/opto/library_call.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/opto/library_call.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -44,18 +44,22 @@ public: private: bool _is_virtual; + bool _is_predicted; vmIntrinsics::ID _intrinsic_id; public: - LibraryIntrinsic(ciMethod* m, bool is_virtual, vmIntrinsics::ID id) + LibraryIntrinsic(ciMethod* m, bool is_virtual, bool is_predicted, vmIntrinsics::ID id) : InlineCallGenerator(m), _is_virtual(is_virtual), + _is_predicted(is_predicted), _intrinsic_id(id) { } virtual bool is_intrinsic() const { return true; } virtual bool is_virtual() const { return _is_virtual; } + virtual bool is_predicted() const { return _is_predicted; } virtual JVMState* generate(JVMState* jvms); + virtual Node* generate_predicate(JVMState* jvms); vmIntrinsics::ID intrinsic_id() const { return _intrinsic_id; } }; @@ -83,6 +87,7 @@ int arg_size() const { return callee()->arg_size(); } bool try_to_inline(); + Node* try_to_predicate(); // Helper functions to inline natives void push_result(RegionNode* region, PhiNode* value); @@ -148,6 +153,7 @@ CallJavaNode* generate_method_call_virtual(vmIntrinsics::ID method_id) { return generate_method_call(method_id, true, false); } + Node * load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, bool is_exact, bool is_static); Node* make_string_method_node(int opcode, Node* str1_start, Node* cnt1, Node* str2_start, Node* cnt2); Node* make_string_method_node(int opcode, Node* str1, Node* str2); @@ -253,6 +259,10 @@ bool inline_reverseBytes(vmIntrinsics::ID id); bool inline_reference_get(); + bool inline_aescrypt_Block(vmIntrinsics::ID id); + bool inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id); + Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting); + Node* get_key_start_from_aescrypt_object(Node* aescrypt_object); }; @@ -306,6 +316,8 @@ } } + bool is_predicted = false; + switch (id) { case vmIntrinsics::_compareTo: if (!SpecialStringCompareTo) return NULL; @@ -413,6 +425,18 @@ break; #endif + case vmIntrinsics::_aescrypt_encryptBlock: + case vmIntrinsics::_aescrypt_decryptBlock: + if (!UseAESIntrinsics) return NULL; + break; + + case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt: + case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt: + if (!UseAESIntrinsics) return NULL; + // these two require the predicated logic + is_predicted = true; + break; + default: assert(id <= vmIntrinsics::LAST_COMPILER_INLINE, "caller responsibility"); assert(id != vmIntrinsics::_Object_init && id != vmIntrinsics::_invoke, "enum out of order?"); @@ -444,7 +468,7 @@ if (!InlineUnsafeOps) return NULL; } - return new LibraryIntrinsic(m, is_virtual, (vmIntrinsics::ID) id); + return new LibraryIntrinsic(m, is_virtual, is_predicted, (vmIntrinsics::ID) id); } //----------------------register_library_intrinsics----------------------- @@ -496,6 +520,47 @@ return NULL; } +Node* LibraryIntrinsic::generate_predicate(JVMState* jvms) { + LibraryCallKit kit(jvms, this); + Compile* C = kit.C; + int nodes = C->unique(); +#ifndef PRODUCT + assert(is_predicted(), "sanity"); + if ((PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) && Verbose) { + char buf[1000]; + const char* str = vmIntrinsics::short_name_as_C_string(intrinsic_id(), buf, sizeof(buf)); + tty->print_cr("Predicate for intrinsic %s", str); + } +#endif + + Node* slow_ctl = kit.try_to_predicate(); + if (!kit.failing()) { + if (C->log()) { + C->log()->elem("predicate_intrinsic id='%s'%s nodes='%d'", + vmIntrinsics::name_at(intrinsic_id()), + (is_virtual() ? " virtual='1'" : ""), + C->unique() - nodes); + } + return slow_ctl; // Could be NULL if the check folds. + } + + // The intrinsic bailed out + if (PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) { + if (jvms->has_method()) { + // Not a root compile. + const char* msg = "failed to generate predicate for intrinsic"; + CompileTask::print_inlining(kit.callee(), jvms->depth() - 1, kit.bci(), msg); + } else { + // Root compile + tty->print("Did not generate predicate for intrinsic %s%s at bci:%d in", + vmIntrinsics::name_at(intrinsic_id()), + (is_virtual() ? " (virtual)" : ""), kit.bci()); + } + } + C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_failed); + return NULL; +} + bool LibraryCallKit::try_to_inline() { // Handle symbolic names for otherwise undistinguished boolean switches: const bool is_store = true; @@ -767,6 +832,14 @@ case vmIntrinsics::_Reference_get: return inline_reference_get(); + case vmIntrinsics::_aescrypt_encryptBlock: + case vmIntrinsics::_aescrypt_decryptBlock: + return inline_aescrypt_Block(intrinsic_id()); + + case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt: + case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt: + return inline_cipherBlockChaining_AESCrypt(intrinsic_id()); + default: // If you get here, it may be that someone has added a new intrinsic // to the list in vmSymbols.hpp without implementing it here. @@ -780,6 +853,36 @@ } } +Node* LibraryCallKit::try_to_predicate() { + if (!jvms()->has_method()) { + // Root JVMState has a null method. + assert(map()->memory()->Opcode() == Op_Parm, ""); + // Insert the memory aliasing node + set_all_memory(reset_memory()); + } + assert(merged_memory(), ""); + + switch (intrinsic_id()) { + case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt: + return inline_cipherBlockChaining_AESCrypt_predicate(false); + case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt: + return inline_cipherBlockChaining_AESCrypt_predicate(true); + + default: + // If you get here, it may be that someone has added a new intrinsic + // to the list in vmSymbols.hpp without implementing it here. +#ifndef PRODUCT + if ((PrintMiscellaneous && (Verbose || WizardMode)) || PrintOpto) { + tty->print_cr("*** Warning: Unimplemented predicate for intrinsic %s(%d)", + vmIntrinsics::name_at(intrinsic_id()), intrinsic_id()); + } +#endif + Node* slow_ctl = control(); + set_control(top()); // No fast path instrinsic + return slow_ctl; + } +} + //------------------------------push_result------------------------------ // Helper function for finishing intrinsics. void LibraryCallKit::push_result(RegionNode* region, PhiNode* value) { @@ -5613,3 +5716,265 @@ push(result); return true; } + + +Node * LibraryCallKit::load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, + bool is_exact=true, bool is_static=false) { + + const TypeInstPtr* tinst = _gvn.type(fromObj)->isa_instptr(); + assert(tinst != NULL, "obj is null"); + assert(tinst->klass()->is_loaded(), "obj is not loaded"); + assert(!is_exact || tinst->klass_is_exact(), "klass not exact"); + + ciField* field = tinst->klass()->as_instance_klass()->get_field_by_name(ciSymbol::make(fieldName), + ciSymbol::make(fieldTypeString), + is_static); + if (field == NULL) return (Node *) NULL; + assert (field != NULL, "undefined field"); + + // Next code copied from Parse::do_get_xxx(): + + // Compute address and memory type. + int offset = field->offset_in_bytes(); + bool is_vol = field->is_volatile(); + ciType* field_klass = field->type(); + assert(field_klass->is_loaded(), "should be loaded"); + const TypePtr* adr_type = C->alias_type(field)->adr_type(); + Node *adr = basic_plus_adr(fromObj, fromObj, offset); + BasicType bt = field->layout_type(); + + // Build the resultant type of the load + const Type *type = TypeOopPtr::make_from_klass(field_klass->as_klass()); + + // Build the load. + Node* loadedField = make_load(NULL, adr, type, bt, adr_type, is_vol); + return loadedField; +} + + +//------------------------------inline_aescrypt_Block----------------------- +bool LibraryCallKit::inline_aescrypt_Block(vmIntrinsics::ID id) { + address stubAddr; + const char *stubName; + assert(UseAES, "need AES instruction support"); + + switch(id) { + case vmIntrinsics::_aescrypt_encryptBlock: + stubAddr = StubRoutines::aescrypt_encryptBlock(); + stubName = "aescrypt_encryptBlock"; + break; + case vmIntrinsics::_aescrypt_decryptBlock: + stubAddr = StubRoutines::aescrypt_decryptBlock(); + stubName = "aescrypt_decryptBlock"; + break; + } + if (stubAddr == NULL) return false; + + // Restore the stack and pop off the arguments. + int nargs = 5; // this + 2 oop/offset combos + assert(callee()->signature()->size() == nargs-1, "encryptBlock has 4 arguments"); + + Node *aescrypt_object = argument(0); + Node *src = argument(1); + Node *src_offset = argument(2); + Node *dest = argument(3); + Node *dest_offset = argument(4); + + // (1) src and dest are arrays. + const Type* src_type = src->Value(&_gvn); + const Type* dest_type = dest->Value(&_gvn); + const TypeAryPtr* top_src = src_type->isa_aryptr(); + const TypeAryPtr* top_dest = dest_type->isa_aryptr(); + assert (top_src != NULL && top_src->klass() != NULL && top_dest != NULL && top_dest->klass() != NULL, "args are strange"); + + // for the quick and dirty code we will skip all the checks. + // we are just trying to get the call to be generated. + Node* src_start = src; + Node* dest_start = dest; + if (src_offset != NULL || dest_offset != NULL) { + assert(src_offset != NULL && dest_offset != NULL, ""); + src_start = array_element_address(src, src_offset, T_BYTE); + dest_start = array_element_address(dest, dest_offset, T_BYTE); + } + + // now need to get the start of its expanded key array + // this requires a newer class file that has this array as littleEndian ints, otherwise we revert to java + Node* k_start = get_key_start_from_aescrypt_object(aescrypt_object); + if (k_start == NULL) return false; + + // Call the stub. + make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::aescrypt_block_Type(), + stubAddr, stubName, TypePtr::BOTTOM, + src_start, dest_start, k_start); + + return true; +} + +//------------------------------inline_cipherBlockChaining_AESCrypt----------------------- +bool LibraryCallKit::inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id) { + address stubAddr; + const char *stubName; + + assert(UseAES, "need AES instruction support"); + + switch(id) { + case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt: + stubAddr = StubRoutines::cipherBlockChaining_encryptAESCrypt(); + stubName = "cipherBlockChaining_encryptAESCrypt"; + break; + case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt: + stubAddr = StubRoutines::cipherBlockChaining_decryptAESCrypt(); + stubName = "cipherBlockChaining_decryptAESCrypt"; + break; + } + if (stubAddr == NULL) return false; + + + // Restore the stack and pop off the arguments. + int nargs = 6; // this + oop/offset + len + oop/offset + assert(callee()->signature()->size() == nargs-1, "wrong number of arguments"); + Node *cipherBlockChaining_object = argument(0); + Node *src = argument(1); + Node *src_offset = argument(2); + Node *len = argument(3); + Node *dest = argument(4); + Node *dest_offset = argument(5); + + // (1) src and dest are arrays. + const Type* src_type = src->Value(&_gvn); + const Type* dest_type = dest->Value(&_gvn); + const TypeAryPtr* top_src = src_type->isa_aryptr(); + const TypeAryPtr* top_dest = dest_type->isa_aryptr(); + assert (top_src != NULL && top_src->klass() != NULL + && top_dest != NULL && top_dest->klass() != NULL, "args are strange"); + + // checks are the responsibility of the caller + Node* src_start = src; + Node* dest_start = dest; + if (src_offset != NULL || dest_offset != NULL) { + assert(src_offset != NULL && dest_offset != NULL, ""); + src_start = array_element_address(src, src_offset, T_BYTE); + dest_start = array_element_address(dest, dest_offset, T_BYTE); + } + + // if we are in this set of code, we "know" the embeddedCipher is an AESCrypt object + // (because of the predicated logic executed earlier). + // so we cast it here safely. + // this requires a newer class file that has this array as littleEndian ints, otherwise we revert to java + + Node* embeddedCipherObj = load_field_from_object(cipherBlockChaining_object, "embeddedCipher", "Lcom/sun/crypto/provider/SymmetricCipher;", /*is_exact*/ false); + if (embeddedCipherObj == NULL) return false; + + // cast it to what we know it will be at runtime + const TypeInstPtr* tinst = _gvn.type(cipherBlockChaining_object)->isa_instptr(); + assert(tinst != NULL, "CBC obj is null"); + assert(tinst->klass()->is_loaded(), "CBC obj is not loaded"); + ciKlass* klass_AESCrypt = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make("com/sun/crypto/provider/AESCrypt")); + if (!klass_AESCrypt->is_loaded()) return false; + + ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass(); + const TypeKlassPtr* aklass = TypeKlassPtr::make(instklass_AESCrypt); + const TypeOopPtr* xtype = aklass->as_instance_type(); + Node* aescrypt_object = new(C) CheckCastPPNode(control(), embeddedCipherObj, xtype); + aescrypt_object = _gvn.transform(aescrypt_object); + + // we need to get the start of the aescrypt_object's expanded key array + Node* k_start = get_key_start_from_aescrypt_object(aescrypt_object); + if (k_start == NULL) return false; + + // similarly, get the start address of the r vector + Node* objRvec = load_field_from_object(cipherBlockChaining_object, "r", "[B", /*is_exact*/ false); + if (objRvec == NULL) return false; + Node* r_start = array_element_address(objRvec, intcon(0), T_BYTE); + + // Call the stub, passing src_start, dest_start, k_start, r_start and src_len + make_runtime_call(RC_LEAF|RC_NO_FP, + OptoRuntime::cipherBlockChaining_aescrypt_Type(), + stubAddr, stubName, TypePtr::BOTTOM, + src_start, dest_start, k_start, r_start, len); + + // return is void so no result needs to be pushed + + return true; +} + +//------------------------------get_key_start_from_aescrypt_object----------------------- +Node * LibraryCallKit::get_key_start_from_aescrypt_object(Node *aescrypt_object) { + Node* objAESCryptKey = load_field_from_object(aescrypt_object, "K", "[I", /*is_exact*/ false); + assert (objAESCryptKey != NULL, "wrong version of com.sun.crypto.provider.AESCrypt"); + if (objAESCryptKey == NULL) return (Node *) NULL; + + // now have the array, need to get the start address of the K array + Node* k_start = array_element_address(objAESCryptKey, intcon(0), T_INT); + return k_start; +} + +//----------------------------inline_cipherBlockChaining_AESCrypt_predicate---------------------------- +// Return node representing slow path of predicate check. +// the pseudo code we want to emulate with this predicate is: +// for encryption: +// if (embeddedCipherObj instanceof AESCrypt) do_intrinsic, else do_javapath +// for decryption: +// if ((embeddedCipherObj instanceof AESCrypt) && (cipher!=plain)) do_intrinsic, else do_javapath +// note cipher==plain is more conservative than the original java code but that's OK +// +Node* LibraryCallKit::inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting) { + // First, check receiver for NULL since it is virtual method. + int nargs = arg_size(); + Node* objCBC = argument(0); + _sp += nargs; + objCBC = do_null_check(objCBC, T_OBJECT); + _sp -= nargs; + + if (stopped()) return NULL; // Always NULL + + // Load embeddedCipher field of CipherBlockChaining object. + Node* embeddedCipherObj = load_field_from_object(objCBC, "embeddedCipher", "Lcom/sun/crypto/provider/SymmetricCipher;", /*is_exact*/ false); + + // get AESCrypt klass for instanceOf check + // AESCrypt might not be loaded yet if some other SymmetricCipher got us to this compile point + // will have same classloader as CipherBlockChaining object + const TypeInstPtr* tinst = _gvn.type(objCBC)->isa_instptr(); + assert(tinst != NULL, "CBCobj is null"); + assert(tinst->klass()->is_loaded(), "CBCobj is not loaded"); + + // we want to do an instanceof comparison against the AESCrypt class + ciKlass* klass_AESCrypt = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make("com/sun/crypto/provider/AESCrypt")); + if (!klass_AESCrypt->is_loaded()) { + // if AESCrypt is not even loaded, we never take the intrinsic fast path + Node* ctrl = control(); + set_control(top()); // no regular fast path + return ctrl; + } + ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass(); + + _sp += nargs; // gen_instanceof might do an uncommon trap + Node* instof = gen_instanceof(embeddedCipherObj, makecon(TypeKlassPtr::make(instklass_AESCrypt))); + _sp -= nargs; + Node* cmp_instof = _gvn.transform(new (C) CmpINode(instof, intcon(1))); + Node* bool_instof = _gvn.transform(new (C) BoolNode(cmp_instof, BoolTest::ne)); + + Node* instof_false = generate_guard(bool_instof, NULL, PROB_MIN); + + // for encryption, we are done + if (!decrypting) + return instof_false; // even if it is NULL + + // for decryption, we need to add a further check to avoid + // taking the intrinsic path when cipher and plain are the same + // see the original java code for why. + RegionNode* region = new(C) RegionNode(3); + region->init_req(1, instof_false); + Node* src = argument(1); + Node *dest = argument(4); + Node* cmp_src_dest = _gvn.transform(new (C) CmpPNode(src, dest)); + Node* bool_src_dest = _gvn.transform(new (C) BoolNode(cmp_src_dest, BoolTest::eq)); + Node* src_dest_conjoint = generate_guard(bool_src_dest, NULL, PROB_MIN); + region->init_req(2, src_dest_conjoint); + + record_for_igvn(region); + return _gvn.transform(region); + +} + +
--- a/src/share/vm/opto/runtime.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/opto/runtime.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -811,6 +811,48 @@ return TypeFunc::make(domain, range); } +// for aescrypt encrypt/decrypt operations, just three pointers returning void (length is constant) +const TypeFunc* OptoRuntime::aescrypt_block_Type() { + // create input type (domain) + int num_args = 3; + int argcnt = num_args; + const Type** fields = TypeTuple::fields(argcnt); + int argp = TypeFunc::Parms; + fields[argp++] = TypePtr::NOTNULL; // src + fields[argp++] = TypePtr::NOTNULL; // dest + fields[argp++] = TypePtr::NOTNULL; // k array + assert(argp == TypeFunc::Parms+argcnt, "correct decoding"); + const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields); + + // no result type needed + fields = TypeTuple::fields(1); + fields[TypeFunc::Parms+0] = NULL; // void + const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields); + return TypeFunc::make(domain, range); +} + +// for cipherBlockChaining calls of aescrypt encrypt/decrypt, four pointers and a length, returning void +const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() { + // create input type (domain) + int num_args = 5; + int argcnt = num_args; + const Type** fields = TypeTuple::fields(argcnt); + int argp = TypeFunc::Parms; + fields[argp++] = TypePtr::NOTNULL; // src + fields[argp++] = TypePtr::NOTNULL; // dest + fields[argp++] = TypePtr::NOTNULL; // k array + fields[argp++] = TypePtr::NOTNULL; // r array + fields[argp++] = TypeInt::INT; // src len + assert(argp == TypeFunc::Parms+argcnt, "correct decoding"); + const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields); + + // no result type needed + fields = TypeTuple::fields(1); + fields[TypeFunc::Parms+0] = NULL; // void + const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields); + return TypeFunc::make(domain, range); +} + //------------- Interpreter state access for on stack replacement const TypeFunc* OptoRuntime::osr_end_Type() { // create input type (domain)
--- a/src/share/vm/opto/runtime.hpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/opto/runtime.hpp Thu Nov 29 22:32:44 2012 -0800 @@ -280,6 +280,9 @@ static const TypeFunc* array_fill_Type(); + static const TypeFunc* aescrypt_block_Type(); + static const TypeFunc* cipherBlockChaining_aescrypt_Type(); + // leaf on stack replacement interpreter accessor types static const TypeFunc* osr_end_Type();
--- a/src/share/vm/runtime/arguments.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/runtime/arguments.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -1959,6 +1959,12 @@ } #endif // SPARC + // check native memory tracking flags + if (PrintNMTStatistics && MemTracker::tracking_level() == MemTracker::NMT_off) { + warning("PrintNMTStatistics is disabled, because native memory tracking is not enabled"); + PrintNMTStatistics = false; + } + return status; }
--- a/src/share/vm/runtime/globals.hpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/runtime/globals.hpp Thu Nov 29 22:32:44 2012 -0800 @@ -530,6 +530,9 @@ product(intx, UseSSE, 99, \ "Highest supported SSE instructions set on x86/x64") \ \ + product(bool, UseAES, false, \ + "Control whether AES instructions can be used on x86/x64") \ + \ product(uintx, LargePageSizeInBytes, 0, \ "Large page size (0 to let VM choose the page size") \ \ @@ -632,6 +635,9 @@ product(bool, UseSSE42Intrinsics, false, \ "SSE4.2 versions of intrinsics") \ \ + product(bool, UseAESIntrinsics, false, \ + "use intrinsics for AES versions of crypto") \ + \ develop(bool, TraceCallFixup, false, \ "traces all call fixups") \ \ @@ -860,6 +866,9 @@ product(ccstr, NativeMemoryTracking, "off", \ "Native memory tracking options") \ \ + diagnostic(bool, PrintNMTStatistics, false, \ + "Print native memory tracking summary data if it is on") \ + \ diagnostic(bool, LogCompilation, false, \ "Log compilation activity in detail to hotspot.log or LogFile") \ \
--- a/src/share/vm/runtime/handles.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/runtime/handles.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -153,13 +153,18 @@ // Delete later chunks if( _chunk->next() ) { + // reset arena size before delete chunks. Otherwise, the total + // arena size could exceed total chunk size + assert(area->size_in_bytes() > size_in_bytes(), "Sanity check"); + area->set_size_in_bytes(size_in_bytes()); _chunk->next_chop(); + } else { + assert(area->size_in_bytes() == size_in_bytes(), "Sanity check"); } // Roll back arena to saved top markers area->_chunk = _chunk; area->_hwm = _hwm; area->_max = _max; - area->set_size_in_bytes(_size_in_bytes); #ifdef ASSERT // clear out first chunk (to detect allocation bugs) if (ZapVMHandleArea) {
--- a/src/share/vm/runtime/handles.hpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/runtime/handles.hpp Thu Nov 29 22:32:44 2012 -0800 @@ -319,6 +319,7 @@ void set_previous_handle_mark(HandleMark* mark) { _previous_handle_mark = mark; } HandleMark* previous_handle_mark() const { return _previous_handle_mark; } + size_t size_in_bytes() const { return _size_in_bytes; } public: HandleMark(); // see handles_inline.hpp HandleMark(Thread* thread) { initialize(thread); }
--- a/src/share/vm/runtime/handles.inline.hpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/runtime/handles.inline.hpp Thu Nov 29 22:32:44 2012 -0800 @@ -79,13 +79,18 @@ HandleArea* area = _area; // help compilers with poor alias analysis // Delete later chunks if( _chunk->next() ) { + // reset arena size before delete chunks. Otherwise, the total + // arena size could exceed total chunk size + assert(area->size_in_bytes() > size_in_bytes(), "Sanity check"); + area->set_size_in_bytes(size_in_bytes()); _chunk->next_chop(); + } else { + assert(area->size_in_bytes() == size_in_bytes(), "Sanity check"); } // Roll back arena to saved top markers area->_chunk = _chunk; area->_hwm = _hwm; area->_max = _max; - area->set_size_in_bytes(_size_in_bytes); debug_only(area->_handle_mark_nesting--); }
--- a/src/share/vm/runtime/java.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/runtime/java.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -57,6 +57,8 @@ #include "runtime/task.hpp" #include "runtime/timer.hpp" #include "runtime/vm_operations.hpp" +#include "services/memReporter.hpp" +#include "services/memTracker.hpp" #include "trace/tracing.hpp" #include "trace/traceEventTypes.hpp" #include "utilities/dtrace.hpp" @@ -356,6 +358,15 @@ } #endif // COMPILER2 #endif // ENABLE_ZAP_DEAD_LOCALS + // Native memory tracking data + if (PrintNMTStatistics) { + if (MemTracker::is_on()) { + BaselineTTYOutputer outputer(tty); + MemTracker::print_memory_usage(outputer, K, false); + } else { + tty->print_cr(MemTracker::reason()); + } + } } #else // PRODUCT MODE STATISTICS @@ -373,6 +384,16 @@ if (PrintBiasedLockingStatistics) { BiasedLocking::print_counters(); } + + // Native memory tracking data + if (PrintNMTStatistics) { + if (MemTracker::is_on()) { + BaselineTTYOutputer outputer(tty); + MemTracker::print_memory_usage(outputer, K, false); + } else { + tty->print_cr(MemTracker::reason()); + } + } } #endif @@ -661,6 +682,7 @@ JDK_Version JDK_Version::_current; const char* JDK_Version::_runtime_name; +const char* JDK_Version::_runtime_version; void JDK_Version::initialize() { jdk_version_info info;
--- a/src/share/vm/runtime/java.hpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/runtime/java.hpp Thu Nov 29 22:32:44 2012 -0800 @@ -75,6 +75,7 @@ static JDK_Version _current; static const char* _runtime_name; + static const char* _runtime_version; // In this class, we promote the minor version of release to be the // major version for releases >= 5 in anticipation of the JDK doing the @@ -189,6 +190,13 @@ _runtime_name = name; } + static const char* runtime_version() { + return _runtime_version; + } + static void set_runtime_version(const char* version) { + _runtime_version = version; + } + // Convenience methods for queries on the current major/minor version static bool is_jdk12x_version() { return current().compare_major(2) == 0;
--- a/src/share/vm/runtime/os.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/runtime/os.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -593,9 +593,7 @@ if (PrintMalloc && tty != NULL) tty->print_cr("os::malloc " SIZE_FORMAT " bytes --> " PTR_FORMAT, size, memblock); // we do not track MallocCushion memory - if (MemTracker::is_on()) { MemTracker::record_malloc((address)memblock, size, memflags, caller == 0 ? CALLER_PC : caller); - } return memblock; } @@ -606,7 +604,7 @@ NOT_PRODUCT(inc_stat_counter(&num_mallocs, 1)); NOT_PRODUCT(inc_stat_counter(&alloc_bytes, size)); void* ptr = ::realloc(memblock, size); - if (ptr != NULL && MemTracker::is_on()) { + if (ptr != NULL) { MemTracker::record_realloc((address)memblock, (address)ptr, size, memflags, caller == 0 ? CALLER_PC : caller); } @@ -1389,7 +1387,7 @@ char* os::reserve_memory(size_t bytes, char* addr, size_t alignment_hint) { char* result = pd_reserve_memory(bytes, addr, alignment_hint); - if (result != NULL && MemTracker::is_on()) { + if (result != NULL) { MemTracker::record_virtual_memory_reserve((address)result, bytes, CALLER_PC); } @@ -1397,7 +1395,7 @@ } char* os::attempt_reserve_memory_at(size_t bytes, char* addr) { char* result = pd_attempt_reserve_memory_at(bytes, addr); - if (result != NULL && MemTracker::is_on()) { + if (result != NULL) { MemTracker::record_virtual_memory_reserve((address)result, bytes, CALLER_PC); } return result; @@ -1410,7 +1408,7 @@ bool os::commit_memory(char* addr, size_t bytes, bool executable) { bool res = pd_commit_memory(addr, bytes, executable); - if (res && MemTracker::is_on()) { + if (res) { MemTracker::record_virtual_memory_commit((address)addr, bytes, CALLER_PC); } return res; @@ -1419,7 +1417,7 @@ bool os::commit_memory(char* addr, size_t size, size_t alignment_hint, bool executable) { bool res = os::pd_commit_memory(addr, size, alignment_hint, executable); - if (res && MemTracker::is_on()) { + if (res) { MemTracker::record_virtual_memory_commit((address)addr, size, CALLER_PC); } return res; @@ -1446,8 +1444,9 @@ char *addr, size_t bytes, bool read_only, bool allow_exec) { char* result = pd_map_memory(fd, file_name, file_offset, addr, bytes, read_only, allow_exec); - if (result != NULL && MemTracker::is_on()) { + if (result != NULL) { MemTracker::record_virtual_memory_reserve((address)result, bytes, CALLER_PC); + MemTracker::record_virtual_memory_commit((address)result, bytes, CALLER_PC); } return result; } @@ -1462,6 +1461,7 @@ bool os::unmap_memory(char *addr, size_t bytes) { bool result = pd_unmap_memory(addr, bytes); if (result) { + MemTracker::record_virtual_memory_uncommit((address)addr, bytes); MemTracker::record_virtual_memory_release((address)addr, bytes); } return result;
--- a/src/share/vm/runtime/os.hpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/runtime/os.hpp Thu Nov 29 22:32:44 2012 -0800 @@ -387,7 +387,7 @@ static void pd_start_thread(Thread* thread); static void start_thread(Thread* thread); - static void initialize_thread(); + static void initialize_thread(Thread* thr); static void free_thread(OSThread* osthread); // thread id on Linux/64bit is 64bit, on Windows and Solaris, it's 32bit
--- a/src/share/vm/runtime/stubRoutines.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/runtime/stubRoutines.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -120,6 +120,10 @@ address StubRoutines::_arrayof_jshort_fill; address StubRoutines::_arrayof_jint_fill; +address StubRoutines::_aescrypt_encryptBlock = NULL; +address StubRoutines::_aescrypt_decryptBlock = NULL; +address StubRoutines::_cipherBlockChaining_encryptAESCrypt = NULL; +address StubRoutines::_cipherBlockChaining_decryptAESCrypt = NULL; double (* StubRoutines::_intrinsic_log )(double) = NULL; double (* StubRoutines::_intrinsic_log10 )(double) = NULL;
--- a/src/share/vm/runtime/stubRoutines.hpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/runtime/stubRoutines.hpp Thu Nov 29 22:32:44 2012 -0800 @@ -199,6 +199,11 @@ // zero heap space aligned to jlong (8 bytes) static address _zero_aligned_words; + static address _aescrypt_encryptBlock; + static address _aescrypt_decryptBlock; + static address _cipherBlockChaining_encryptAESCrypt; + static address _cipherBlockChaining_decryptAESCrypt; + // These are versions of the java.lang.Math methods which perform // the same operations as the intrinsic version. They are used for // constant folding in the compiler to ensure equivalence. If the @@ -330,6 +335,11 @@ static address arrayof_jshort_fill() { return _arrayof_jshort_fill; } static address arrayof_jint_fill() { return _arrayof_jint_fill; } + static address aescrypt_encryptBlock() { return _aescrypt_encryptBlock; } + static address aescrypt_decryptBlock() { return _aescrypt_decryptBlock; } + static address cipherBlockChaining_encryptAESCrypt() { return _cipherBlockChaining_encryptAESCrypt; } + static address cipherBlockChaining_decryptAESCrypt() { return _cipherBlockChaining_decryptAESCrypt; } + static address select_fill_function(BasicType t, bool aligned, const char* &name); static address zero_aligned_words() { return _zero_aligned_words; }
--- a/src/share/vm/runtime/thread.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/runtime/thread.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -306,20 +306,25 @@ // initialize structure dependent on thread local storage ThreadLocalStorage::set_thread(this); - - // set up any platform-specific state. - os::initialize_thread(); } void Thread::record_stack_base_and_size() { set_stack_base(os::current_stack_base()); set_stack_size(os::current_stack_size()); - - // record thread's native stack, stack grows downward - address vm_base = _stack_base - _stack_size; - MemTracker::record_virtual_memory_reserve(vm_base, _stack_size, - CURRENT_PC, this); - MemTracker::record_virtual_memory_type(vm_base, mtThreadStack); + // CR 7190089: on Solaris, primordial thread's stack is adjusted + // in initialize_thread(). Without the adjustment, stack size is + // incorrect if stack is set to unlimited (ulimit -s unlimited). + // So far, only Solaris has real implementation of initialize_thread(). + // + // set up any platform-specific state. + os::initialize_thread(this); + + // record thread's native stack, stack grows downward + if (MemTracker::is_on()) { + address stack_low_addr = stack_base() - stack_size(); + MemTracker::record_thread_stack(stack_low_addr, stack_size(), this, + CURRENT_PC); + } } @@ -327,8 +332,17 @@ // Reclaim the objectmonitors from the omFreeList of the moribund thread. ObjectSynchronizer::omFlush (this) ; - MemTracker::record_virtual_memory_release((_stack_base - _stack_size), - _stack_size, this); + // stack_base can be NULL if the thread is never started or exited before + // record_stack_base_and_size called. Although, we would like to ensure + // that all started threads do call record_stack_base_and_size(), there is + // not proper way to enforce that. + if (_stack_base != NULL) { + address low_stack_addr = stack_base() - stack_size(); + MemTracker::release_thread_stack(low_stack_addr, stack_size(), this); +#ifdef ASSERT + set_stack_base(NULL); +#endif + } // deallocate data structures delete resource_area(); @@ -1008,6 +1022,7 @@ } char java_runtime_name[128] = ""; +char java_runtime_version[128] = ""; // extract the JRE name from sun.misc.Version.java_runtime_name static const char* get_java_runtime_name(TRAPS) { @@ -1030,6 +1045,27 @@ } } +// extract the JRE version from sun.misc.Version.java_runtime_version +static const char* get_java_runtime_version(TRAPS) { + klassOop k = SystemDictionary::find(vmSymbols::sun_misc_Version(), + Handle(), Handle(), CHECK_AND_CLEAR_NULL); + fieldDescriptor fd; + bool found = k != NULL && + instanceKlass::cast(k)->find_local_field(vmSymbols::java_runtime_version_name(), + vmSymbols::string_signature(), &fd); + if (found) { + oop name_oop = k->java_mirror()->obj_field(fd.offset()); + if (name_oop == NULL) + return NULL; + const char* name = java_lang_String::as_utf8_string(name_oop, + java_runtime_version, + sizeof(java_runtime_version)); + return name; + } else { + return NULL; + } +} + // General purpose hook into Java code, run once when the VM is initialized. // The Java library method itself may be changed independently from the VM. static void call_postVMInitHook(TRAPS) { @@ -1527,10 +1563,12 @@ tty->print_cr("terminate thread %p", this); } - // Info NMT that this JavaThread is exiting, its memory - // recorder should be collected + // By now, this thread should already be invisible to safepoint, + // and its per-thread recorder also collected. assert(!is_safepoint_visible(), "wrong state"); - MemTracker::thread_exiting(this); +#if INCLUDE_NMT + assert(get_recorder() == NULL, "Already collected"); +#endif // INCLUDE_NMT // JSR166 -- return the parker to the free list Parker::Release(_parker); @@ -2431,6 +2469,7 @@ } void JavaThread::remove_stack_guard_pages() { + assert(Thread::current() == this, "from different thread"); if (_stack_guard_state == stack_guard_unused) return; address low_addr = stack_base() - stack_size(); size_t len = (StackYellowPages + StackRedPages) * os::vm_page_size(); @@ -3454,6 +3493,7 @@ // get the Java runtime name after java.lang.System is initialized JDK_Version::set_runtime_name(get_java_runtime_name(THREAD)); + JDK_Version::set_runtime_version(get_java_runtime_version(THREAD)); } else { warning("java.lang.System not initialized"); } @@ -4070,7 +4110,10 @@ // Now, this thread is not visible to safepoint p->set_safepoint_visible(false); - + // once the thread becomes safepoint invisible, we can not use its per-thread + // recorder. And Threads::do_threads() no longer walks this thread, so we have + // to release its per-thread recorder here. + MemTracker::thread_exiting(p); } // unlock Threads_lock // Since Events::log uses a lock, we grab it outside the Threads_lock
--- a/src/share/vm/runtime/vm_version.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/runtime/vm_version.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -241,19 +241,21 @@ #ifndef FLOAT_ARCH #if defined(__SOFTFP__) - #define FLOAT_ARCH "-sflt" + #define FLOAT_ARCH_STR "-sflt" #elif defined(E500V2) - #define FLOAT_ARCH "-e500v2" + #define FLOAT_ARCH_STR "-e500v2" #elif defined(ARM) - #define FLOAT_ARCH "-vfp" + #define FLOAT_ARCH_STR "-vfp" #elif defined(PPC) - #define FLOAT_ARCH "-hflt" + #define FLOAT_ARCH_STR "-hflt" #else - #define FLOAT_ARCH "" + #define FLOAT_ARCH_STR "" #endif + #else + #define FLOAT_ARCH_STR XSTR(FLOAT_ARCH) #endif - return VMNAME " (" VM_RELEASE ") for " OS "-" CPU FLOAT_ARCH + return VMNAME " (" VM_RELEASE ") for " OS "-" CPU FLOAT_ARCH_STR " JRE (" JRE_RELEASE_VERSION "), built on " __DATE__ " " __TIME__ " by " XSTR(HOTSPOT_BUILD_USER) " with " HOTSPOT_BUILD_COMPILER; }
--- a/src/share/vm/services/attachListener.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/services/attachListener.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -404,6 +404,8 @@ static void attach_listener_thread_entry(JavaThread* thread, TRAPS) { os::set_priority(thread, NearMaxPriority); + thread->record_stack_base_and_size(); + if (AttachListener::pd_init() != 0) { return; }
--- a/src/share/vm/services/memBaseline.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/services/memBaseline.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -40,6 +40,7 @@ {mtSymbol, "Symbol"}, {mtNMT, "Memory Tracking"}, {mtChunk, "Pooled Free Chunks"}, + {mtClassShared,"Shared spaces for classes"}, {mtNone, "Unknown"} // It can happen when type tagging records are lagging // behind }; @@ -55,6 +56,7 @@ _malloc_cs = NULL; _vm_cs = NULL; + _vm_map = NULL; _number_of_classes = 0; _number_of_threads = 0; @@ -72,6 +74,11 @@ _vm_cs = NULL; } + if (_vm_map != NULL) { + delete _vm_map; + _vm_map = NULL; + } + reset(); } @@ -85,6 +92,7 @@ if (_malloc_cs != NULL) _malloc_cs->clear(); if (_vm_cs != NULL) _vm_cs->clear(); + if (_vm_map != NULL) _vm_map->clear(); for (int index = 0; index < NUMBER_OF_MEMORY_TYPE; index ++) { _malloc_data[index].clear(); @@ -94,39 +102,41 @@ } MemBaseline::~MemBaseline() { - if (_malloc_cs != NULL) { - delete _malloc_cs; - } - - if (_vm_cs != NULL) { - delete _vm_cs; - } + clear(); } // baseline malloc'd memory records, generate overall summary and summaries by // memory types bool MemBaseline::baseline_malloc_summary(const MemPointerArray* malloc_records) { - MemPointerArrayIteratorImpl mItr((MemPointerArray*)malloc_records); - MemPointerRecord* mptr = (MemPointerRecord*)mItr.current(); + MemPointerArrayIteratorImpl malloc_itr((MemPointerArray*)malloc_records); + MemPointerRecord* malloc_ptr = (MemPointerRecord*)malloc_itr.current(); size_t used_arena_size = 0; int index; - while (mptr != NULL) { - index = flag2index(FLAGS_TO_MEMORY_TYPE(mptr->flags())); - size_t size = mptr->size(); - _total_malloced += size; - _malloc_data[index].inc(size); - if (MemPointerRecord::is_arena_record(mptr->flags())) { - // see if arena size record present - MemPointerRecord* next_p = (MemPointerRecordEx*)mItr.peek_next(); - if (MemPointerRecord::is_arena_size_record(next_p->flags())) { - assert(next_p->is_size_record_of_arena(mptr), "arena records do not match"); - size = next_p->size(); - _arena_data[index].inc(size); - used_arena_size += size; - mItr.next(); + while (malloc_ptr != NULL) { + index = flag2index(FLAGS_TO_MEMORY_TYPE(malloc_ptr->flags())); + size_t size = malloc_ptr->size(); + if (malloc_ptr->is_arena_memory_record()) { + // We do have anonymous arenas, they are either used as value objects, + // which are embedded inside other objects, or used as stack objects. + _arena_data[index].inc(size); + used_arena_size += size; + } else { + _total_malloced += size; + _malloc_data[index].inc(size); + if (malloc_ptr->is_arena_record()) { + // see if arena memory record present + MemPointerRecord* next_malloc_ptr = (MemPointerRecordEx*)malloc_itr.peek_next(); + if (next_malloc_ptr->is_arena_memory_record()) { + assert(next_malloc_ptr->is_memory_record_of_arena(malloc_ptr), + "Arena records do not match"); + size = next_malloc_ptr->size(); + _arena_data[index].inc(size); + used_arena_size += size; + malloc_itr.next(); + } } } - mptr = (MemPointerRecordEx*)mItr.next(); + malloc_ptr = (MemPointerRecordEx*)malloc_itr.next(); } // substract used arena size to get size of arena chunk in free list @@ -142,20 +152,23 @@ // baseline mmap'd memory records, generate overall summary and summaries by // memory types bool MemBaseline::baseline_vm_summary(const MemPointerArray* vm_records) { - MemPointerArrayIteratorImpl vItr((MemPointerArray*)vm_records); - VMMemRegion* vptr = (VMMemRegion*)vItr.current(); + MemPointerArrayIteratorImpl vm_itr((MemPointerArray*)vm_records); + VMMemRegion* vm_ptr = (VMMemRegion*)vm_itr.current(); int index; - while (vptr != NULL) { - index = flag2index(FLAGS_TO_MEMORY_TYPE(vptr->flags())); - + while (vm_ptr != NULL) { + if (vm_ptr->is_reserved_region()) { + index = flag2index(FLAGS_TO_MEMORY_TYPE(vm_ptr->flags())); // we use the number of thread stack to count threads - if (IS_MEMORY_TYPE(vptr->flags(), mtThreadStack)) { + if (IS_MEMORY_TYPE(vm_ptr->flags(), mtThreadStack)) { _number_of_threads ++; } - _total_vm_reserved += vptr->reserved_size(); - _total_vm_committed += vptr->committed_size(); - _vm_data[index].inc(vptr->reserved_size(), vptr->committed_size()); - vptr = (VMMemRegion*)vItr.next(); + _total_vm_reserved += vm_ptr->size(); + _vm_data[index].inc(vm_ptr->size(), 0); + } else { + _total_vm_committed += vm_ptr->size(); + _vm_data[index].inc(0, vm_ptr->size()); + } + vm_ptr = (VMMemRegion*)vm_itr.next(); } return true; } @@ -165,41 +178,57 @@ bool MemBaseline::baseline_malloc_details(const MemPointerArray* malloc_records) { assert(MemTracker::track_callsite(), "detail tracking is off"); - MemPointerArrayIteratorImpl mItr((MemPointerArray*)malloc_records); - MemPointerRecordEx* mptr = (MemPointerRecordEx*)mItr.current(); - MallocCallsitePointer mp; + MemPointerArrayIteratorImpl malloc_itr(const_cast<MemPointerArray*>(malloc_records)); + MemPointerRecordEx* malloc_ptr = (MemPointerRecordEx*)malloc_itr.current(); + MallocCallsitePointer malloc_callsite; + // initailize malloc callsite array if (_malloc_cs == NULL) { _malloc_cs = new (std::nothrow) MemPointerArrayImpl<MallocCallsitePointer>(64); // out of native memory - if (_malloc_cs == NULL) { + if (_malloc_cs == NULL || _malloc_cs->out_of_memory()) { return false; } } else { _malloc_cs->clear(); } + MemPointerArray* malloc_data = const_cast<MemPointerArray*>(malloc_records); + + // sort into callsite pc order. Details are aggregated by callsites + malloc_data->sort((FN_SORT)malloc_sort_by_pc); + bool ret = true; + // baseline memory that is totaled over 1 KB - while (mptr != NULL) { - if (!MemPointerRecord::is_arena_size_record(mptr->flags())) { + while (malloc_ptr != NULL) { + if (!MemPointerRecord::is_arena_memory_record(malloc_ptr->flags())) { // skip thread stacks - if (!IS_MEMORY_TYPE(mptr->flags(), mtThreadStack)) { - if (mp.addr() != mptr->pc()) { - if ((mp.amount()/K) > 0) { - if (!_malloc_cs->append(&mp)) { - return false; + if (!IS_MEMORY_TYPE(malloc_ptr->flags(), mtThreadStack)) { + if (malloc_callsite.addr() != malloc_ptr->pc()) { + if ((malloc_callsite.amount()/K) > 0) { + if (!_malloc_cs->append(&malloc_callsite)) { + ret = false; + break; } } - mp = MallocCallsitePointer(mptr->pc()); + malloc_callsite = MallocCallsitePointer(malloc_ptr->pc()); } - mp.inc(mptr->size()); + malloc_callsite.inc(malloc_ptr->size()); } } - mptr = (MemPointerRecordEx*)mItr.next(); + malloc_ptr = (MemPointerRecordEx*)malloc_itr.next(); } - if (mp.addr() != 0 && (mp.amount()/K) > 0) { - if (!_malloc_cs->append(&mp)) { + // restore to address order. Snapshot malloc data is maintained in memory + // address order. + malloc_data->sort((FN_SORT)malloc_sort_by_addr); + + if (!ret) { + return false; + } + // deal with last record + if (malloc_callsite.addr() != 0 && (malloc_callsite.amount()/K) > 0) { + if (!_malloc_cs->append(&malloc_callsite)) { return false; } } @@ -210,34 +239,106 @@ bool MemBaseline::baseline_vm_details(const MemPointerArray* vm_records) { assert(MemTracker::track_callsite(), "detail tracking is off"); - VMCallsitePointer vp; - MemPointerArrayIteratorImpl vItr((MemPointerArray*)vm_records); - VMMemRegionEx* vptr = (VMMemRegionEx*)vItr.current(); + VMCallsitePointer vm_callsite; + VMCallsitePointer* cur_callsite = NULL; + MemPointerArrayIteratorImpl vm_itr((MemPointerArray*)vm_records); + VMMemRegionEx* vm_ptr = (VMMemRegionEx*)vm_itr.current(); + // initialize virtual memory map array + if (_vm_map == NULL) { + _vm_map = new (std::nothrow) MemPointerArrayImpl<VMMemRegionEx>(vm_records->length()); + if (_vm_map == NULL || _vm_map->out_of_memory()) { + return false; + } + } else { + _vm_map->clear(); + } + + // initialize virtual memory callsite array if (_vm_cs == NULL) { _vm_cs = new (std::nothrow) MemPointerArrayImpl<VMCallsitePointer>(64); - if (_vm_cs == NULL) { + if (_vm_cs == NULL || _vm_cs->out_of_memory()) { return false; } } else { _vm_cs->clear(); } - while (vptr != NULL) { - if (vp.addr() != vptr->pc()) { - if (!_vm_cs->append(&vp)) { + // consolidate virtual memory data + VMMemRegionEx* reserved_rec = NULL; + VMMemRegionEx* committed_rec = NULL; + + // vm_ptr is coming in increasing base address order + while (vm_ptr != NULL) { + if (vm_ptr->is_reserved_region()) { + // consolidate reserved memory regions for virtual memory map. + // The criteria for consolidation is: + // 1. two adjacent reserved memory regions + // 2. belong to the same memory type + // 3. reserved from the same callsite + if (reserved_rec == NULL || + reserved_rec->base() + reserved_rec->size() != vm_ptr->addr() || + FLAGS_TO_MEMORY_TYPE(reserved_rec->flags()) != FLAGS_TO_MEMORY_TYPE(vm_ptr->flags()) || + reserved_rec->pc() != vm_ptr->pc()) { + if (!_vm_map->append(vm_ptr)) { return false; } - vp = VMCallsitePointer(vptr->pc()); + // inserted reserved region, we need the pointer to the element in virtual + // memory map array. + reserved_rec = (VMMemRegionEx*)_vm_map->at(_vm_map->length() - 1); + } else { + reserved_rec->expand_region(vm_ptr->addr(), vm_ptr->size()); } - vp.inc(vptr->size(), vptr->committed_size()); - vptr = (VMMemRegionEx*)vItr.next(); - } - if (vp.addr() != 0) { - if (!_vm_cs->append(&vp)) { + + if (cur_callsite != NULL && !_vm_cs->append(cur_callsite)) { return false; } + vm_callsite = VMCallsitePointer(vm_ptr->pc()); + cur_callsite = &vm_callsite; + vm_callsite.inc(vm_ptr->size(), 0); + } else { + // consolidate committed memory regions for virtual memory map + // The criterial is: + // 1. two adjacent committed memory regions + // 2. committed from the same callsite + if (committed_rec == NULL || + committed_rec->base() + committed_rec->size() != vm_ptr->addr() || + committed_rec->pc() != vm_ptr->pc()) { + if (!_vm_map->append(vm_ptr)) { + return false; } + committed_rec = (VMMemRegionEx*)_vm_map->at(_vm_map->length() - 1); + } else { + committed_rec->expand_region(vm_ptr->addr(), vm_ptr->size()); + } + vm_callsite.inc(0, vm_ptr->size()); + } + vm_ptr = (VMMemRegionEx*)vm_itr.next(); + } + // deal with last record + if (cur_callsite != NULL && !_vm_cs->append(cur_callsite)) { + return false; + } + + // sort it into callsite pc order. Details are aggregated by callsites + _vm_cs->sort((FN_SORT)bl_vm_sort_by_pc); + + // walk the array to consolidate record by pc + MemPointerArrayIteratorImpl itr(_vm_cs); + VMCallsitePointer* callsite_rec = (VMCallsitePointer*)itr.current(); + VMCallsitePointer* next_rec = (VMCallsitePointer*)itr.next(); + while (next_rec != NULL) { + assert(callsite_rec != NULL, "Sanity check"); + if (next_rec->addr() == callsite_rec->addr()) { + callsite_rec->inc(next_rec->reserved_amount(), next_rec->committed_amount()); + itr.remove(); + next_rec = (VMCallsitePointer*)itr.current(); + } else { + callsite_rec = next_rec; + next_rec = (VMCallsitePointer*)itr.next(); + } + } + return true; } @@ -251,12 +352,8 @@ _number_of_classes = SystemDictionary::number_of_classes(); if (!summary_only && MemTracker::track_callsite() && _baselined) { - ((MemPointerArray*)snapshot._alloc_ptrs)->sort((FN_SORT)malloc_sort_by_pc); - ((MemPointerArray*)snapshot._vm_ptrs)->sort((FN_SORT)vm_sort_by_pc); _baselined = baseline_malloc_details(snapshot._alloc_ptrs) && baseline_vm_details(snapshot._vm_ptrs); - ((MemPointerArray*)snapshot._alloc_ptrs)->sort((FN_SORT)malloc_sort_by_addr); - ((MemPointerArray*)snapshot._vm_ptrs)->sort((FN_SORT)vm_sort_by_addr); } return _baselined; } @@ -278,7 +375,7 @@ return MemType2NameMap[index]._name; } } - assert(false, "no type"); + assert(false, err_msg("bad type %x", type)); return NULL; } @@ -341,13 +438,6 @@ return UNSIGNED_COMPARE(mp1->addr(), mp2->addr()); } -// sort snapshot mmap'd records in callsite pc order -int MemBaseline::vm_sort_by_pc(const void* p1, const void* p2) { - assert(MemTracker::track_callsite(),"Just check"); - const VMMemRegionEx* mp1 = (const VMMemRegionEx*)p1; - const VMMemRegionEx* mp2 = (const VMMemRegionEx*)p2; - return UNSIGNED_COMPARE(mp1->pc(), mp2->pc()); -} // sort baselined mmap'd records in size (reserved size) order int MemBaseline::bl_vm_sort_by_size(const void* p1, const void* p2) { @@ -376,12 +466,3 @@ return delta; } -// sort snapshot mmap'd records in memory block address order -int MemBaseline::vm_sort_by_addr(const void* p1, const void* p2) { - assert(MemTracker::is_on(), "Just check"); - const VMMemRegion* mp1 = (const VMMemRegion*)p1; - const VMMemRegion* mp2 = (const VMMemRegion*)p2; - int delta = UNSIGNED_COMPARE(mp1->addr(), mp2->addr()); - assert(delta != 0, "dup pointer"); - return delta; -}
--- a/src/share/vm/services/memBaseline.hpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/services/memBaseline.hpp Thu Nov 29 22:32:44 2012 -0800 @@ -320,6 +320,8 @@ // only available when detail tracking is on. MemPointerArray* _malloc_cs; MemPointerArray* _vm_cs; + // virtual memory map + MemPointerArray* _vm_map; private: static MemType2Name MemType2NameMap[NUMBER_OF_MEMORY_TYPE]; @@ -432,9 +434,6 @@ static int malloc_sort_by_pc(const void* p1, const void* p2); static int malloc_sort_by_addr(const void* p1, const void* p2); - static int vm_sort_by_pc(const void* p1, const void* p2); - static int vm_sort_by_addr(const void* p1, const void* p2); - private: // sorting functions for baselined records static int bl_malloc_sort_by_size(const void* p1, const void* p2);
--- a/src/share/vm/services/memPtr.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/services/memPtr.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -40,35 +40,3 @@ return seq; } - - -bool VMMemRegion::contains(const VMMemRegion* mr) const { - assert(base() != 0, "no base address"); - assert(size() != 0 || committed_size() != 0, - "no range"); - address base_addr = base(); - address end_addr = base_addr + - (is_reserve_record()? reserved_size(): committed_size()); - if (mr->is_reserve_record()) { - if (mr->base() == base_addr && mr->size() == size()) { - // the same range - return true; - } - return false; - } else if (mr->is_commit_record() || mr->is_uncommit_record()) { - assert(mr->base() != 0 && mr->committed_size() > 0, - "bad record"); - return (mr->base() >= base_addr && - (mr->base() + mr->committed_size()) <= end_addr); - } else if (mr->is_type_tagging_record()) { - assert(mr->base() != 0, "no base"); - return mr->base() == base_addr; - } else if (mr->is_release_record()) { - assert(mr->base() != 0 && mr->size() > 0, - "bad record"); - return (mr->base() == base_addr && mr->size() == size()); - } else { - assert(false, "what happened?"); - return false; - } -}
--- a/src/share/vm/services/memPtr.hpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/services/memPtr.hpp Thu Nov 29 22:32:44 2012 -0800 @@ -165,7 +165,7 @@ return (flags & (otArena | tag_size)) == otArena; } - inline static bool is_arena_size_record(MEMFLAGS flags) { + inline static bool is_arena_memory_record(MEMFLAGS flags) { return (flags & (otArena | tag_size)) == (otArena | tag_size); } @@ -256,8 +256,8 @@ } // if this record records a size information of an arena - inline bool is_arena_size_record() const { - return is_arena_size_record(_flags); + inline bool is_arena_memory_record() const { + return is_arena_memory_record(_flags); } // if this pointer represents an address to an arena object @@ -266,8 +266,8 @@ } // if this record represents a size information of specific arena - inline bool is_size_record_of_arena(const MemPointerRecord* arena_rc) { - assert(is_arena_size_record(), "not size record"); + inline bool is_memory_record_of_arena(const MemPointerRecord* arena_rc) { + assert(is_arena_memory_record(), "not size record"); assert(arena_rc->is_arena_record(), "not arena record"); return (arena_rc->addr() + sizeof(void*)) == addr(); } @@ -291,6 +291,37 @@ inline bool is_type_tagging_record() const { return is_virtual_memory_type_record(_flags); } + + // if the two memory pointer records actually represent the same + // memory block + inline bool is_same_region(const MemPointerRecord* other) const { + return (addr() == other->addr() && size() == other->size()); + } + + // if this memory region fully contains another one + inline bool contains_region(const MemPointerRecord* other) const { + return contains_region(other->addr(), other->size()); + } + + // if this memory region fully contains specified memory range + inline bool contains_region(address add, size_t sz) const { + return (addr() <= add && addr() + size() >= add + sz); + } + + inline bool contains_address(address add) const { + return (addr() <= add && addr() + size() > add); + } + + // if this memory region overlaps another region + inline bool overlaps_region(const MemPointerRecord* other) const { + assert(other != NULL, "Just check"); + assert(size() > 0 && other->size() > 0, "empty range"); + return contains_address(other->addr()) || + contains_address(other->addr() + other->size() - 1) || // exclude end address + other->contains_address(addr()) || + other->contains_address(addr() + size() - 1); // exclude end address + } + }; // MemPointerRecordEx also records callsite pc, from where @@ -321,66 +352,32 @@ } }; -// a virtual memory region +// a virtual memory region. The region can represent a reserved +// virtual memory region or a committed memory region class VMMemRegion : public MemPointerRecord { - private: - // committed size - size_t _committed_size; - public: - VMMemRegion(): _committed_size(0) { } + VMMemRegion() { } void init(const MemPointerRecord* mp) { - assert(mp->is_vm_pointer(), "not virtual memory pointer"); + assert(mp->is_vm_pointer(), "Sanity check"); _addr = mp->addr(); - if (mp->is_commit_record() || mp->is_uncommit_record()) { - _committed_size = mp->size(); - set_size(_committed_size); - } else { set_size(mp->size()); - _committed_size = 0; - } set_flags(mp->flags()); } VMMemRegion& operator=(const VMMemRegion& other) { MemPointerRecord::operator=(other); - _committed_size = other.committed_size(); return *this; } - inline bool is_reserve_record() const { - return is_virtual_memory_reserve_record(flags()); - } - - inline bool is_release_record() const { - return is_virtual_memory_release_record(flags()); - } - - // resize reserved VM range - inline void set_reserved_size(size_t new_size) { - assert(new_size >= committed_size(), "resize"); - set_size(new_size); + inline bool is_reserved_region() const { + return is_allocation_record(); } - inline void commit(size_t size) { - _committed_size += size; + inline bool is_committed_region() const { + return is_commit_record(); } - inline void uncommit(size_t size) { - if (_committed_size >= size) { - _committed_size -= size; - } else { - _committed_size = 0; - } - } - - /* - * if this virtual memory range covers whole range of - * the other VMMemRegion - */ - bool contains(const VMMemRegion* mr) const; - /* base address of this virtual memory range */ inline address base() const { return addr(); @@ -391,13 +388,28 @@ set_flags(flags() | (f & mt_masks)); } - // release part of memory range - inline void partial_release(address add, size_t sz) { - assert(add >= addr() && add < addr() + size(), "not valid address"); - // for now, it can partially release from the both ends, - // but not in the middle + // expand this region to also cover specified range. + // The range has to be on either end of the memory region. + void expand_region(address addr, size_t sz) { + if (addr < base()) { + assert(addr + sz == base(), "Sanity check"); + _addr = addr; + set_size(size() + sz); + } else { + assert(base() + size() == addr, "Sanity check"); + set_size(size() + sz); + } + } + + // exclude the specified address range from this region. + // The excluded memory range has to be on either end of this memory + // region. + inline void exclude_region(address add, size_t sz) { + assert(is_reserved_region() || is_committed_region(), "Sanity check"); + assert(addr() != NULL && size() != 0, "Sanity check"); + assert(add >= addr() && add < addr() + size(), "Sanity check"); assert(add == addr() || (add + sz) == (addr() + size()), - "release in the middle"); + "exclude in the middle"); if (add == addr()) { set_addr(add + sz); set_size(size() - sz); @@ -405,16 +417,6 @@ set_size(size() - sz); } } - - // the committed size of the virtual memory block - inline size_t committed_size() const { - return _committed_size; - } - - // the reserved size of the virtual memory block - inline size_t reserved_size() const { - return size(); - } }; class VMMemRegionEx : public VMMemRegion {
--- a/src/share/vm/services/memPtrArray.hpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/services/memPtrArray.hpp Thu Nov 29 22:32:44 2012 -0800 @@ -84,11 +84,7 @@ // implementation class class MemPointerArrayIteratorImpl : public MemPointerArrayIterator { -#ifdef ASSERT protected: -#else - private: -#endif MemPointerArray* _array; int _pos;
--- a/src/share/vm/services/memRecorder.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/services/memRecorder.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -31,14 +31,19 @@ #include "services/memTracker.hpp" MemPointer* SequencedRecordIterator::next_record() { - MemPointer* itr_cur = _itr.current(); - if (itr_cur == NULL) return NULL; - MemPointer* itr_next = _itr.next(); + MemPointerRecord* itr_cur = (MemPointerRecord*)_itr.current(); + if (itr_cur == NULL) { + return itr_cur; + } + + MemPointerRecord* itr_next = (MemPointerRecord*)_itr.next(); - while (itr_next != NULL && - same_kind((MemPointerRecord*)itr_cur, (MemPointerRecord*)itr_next)) { + // don't collapse virtual memory records + while (itr_next != NULL && !itr_cur->is_vm_pointer() && + !itr_next->is_vm_pointer() && + same_kind(itr_cur, itr_next)) { itr_cur = itr_next; - itr_next = _itr.next(); + itr_next = (MemPointerRecord*)_itr.next(); } return itr_cur;
--- a/src/share/vm/services/memRecorder.hpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/services/memRecorder.hpp Thu Nov 29 22:32:44 2012 -0800 @@ -188,6 +188,7 @@ // Test if the two records are the same kind: the same memory block and allocation // type. inline bool same_kind(const MemPointerRecord* p1, const MemPointerRecord* p2) const { + assert(!p1->is_vm_pointer() && !p2->is_vm_pointer(), "malloc pointer only"); return (p1->addr() == p2->addr() && (p1->flags() &MemPointerRecord::tag_masks) == (p2->flags() & MemPointerRecord::tag_masks));
--- a/src/share/vm/services/memReporter.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/services/memReporter.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -51,6 +51,7 @@ report_summaries(baseline); if (!summary_only && MemTracker::track_callsite()) { + report_virtual_memory_map(baseline); report_callsites(baseline); } _outputer.done(); @@ -74,6 +75,25 @@ _outputer.done_category_summary(); } +void BaselineReporter::report_virtual_memory_map(const MemBaseline& baseline) { + _outputer.start_virtual_memory_map(); + MemBaseline* pBL = const_cast<MemBaseline*>(&baseline); + MemPointerArrayIteratorImpl itr = MemPointerArrayIteratorImpl(pBL->_vm_map); + VMMemRegionEx* rgn = (VMMemRegionEx*)itr.current(); + while (rgn != NULL) { + if (rgn->is_reserved_region()) { + _outputer.reserved_memory_region(FLAGS_TO_MEMORY_TYPE(rgn->flags()), + rgn->base(), rgn->base() + rgn->size(), amount_in_current_scale(rgn->size()), rgn->pc()); + } else { + _outputer.committed_memory_region(rgn->base(), rgn->base() + rgn->size(), + amount_in_current_scale(rgn->size()), rgn->pc()); + } + rgn = (VMMemRegionEx*)itr.next(); + } + + _outputer.done_virtual_memory_map(); +} + void BaselineReporter::report_callsites(const MemBaseline& baseline) { _outputer.start_callsite(); MemBaseline* pBL = const_cast<MemBaseline*>(&baseline); @@ -324,6 +344,40 @@ _output->print_cr(" "); } + +void BaselineTTYOutputer::start_virtual_memory_map() { + _output->print_cr("Virtual memory map:"); +} + +void BaselineTTYOutputer::reserved_memory_region(MEMFLAGS type, address base, address end, + size_t size, address pc) { + const char* unit = memory_unit(_scale); + char buf[128]; + int offset; + _output->print_cr(" "); + _output->print_cr("[" PTR_FORMAT " - " PTR_FORMAT "] reserved %d%s for %s", base, end, size, unit, + MemBaseline::type2name(type)); + if (os::dll_address_to_function_name(pc, buf, sizeof(buf), &offset)) { + _output->print_cr("\t\tfrom [%s+0x%x]", buf, offset); + } +} + +void BaselineTTYOutputer::committed_memory_region(address base, address end, size_t size, address pc) { + const char* unit = memory_unit(_scale); + char buf[128]; + int offset; + _output->print("\t[" PTR_FORMAT " - " PTR_FORMAT "] committed %d%s", base, end, size, unit); + if (os::dll_address_to_function_name(pc, buf, sizeof(buf), &offset)) { + _output->print_cr(" from [%s+0x%x]", buf, offset); + } +} + +void BaselineTTYOutputer::done_virtual_memory_map() { + _output->print_cr(" "); +} + + + void BaselineTTYOutputer::start_callsite() { _output->print_cr("Details:"); _output->print_cr(" "); @@ -337,7 +391,7 @@ size_t malloc_count) { if (malloc_amt > 0) { const char* unit = memory_unit(_scale); - char buf[64]; + char buf[128]; int offset; if (pc == 0) { _output->print("[BOOTSTRAP]%18s", " "); @@ -357,7 +411,7 @@ size_t committed_amt) { if (reserved_amt > 0) { const char* unit = memory_unit(_scale); - char buf[64]; + char buf[128]; int offset; if (pc == 0) { _output->print("[BOOTSTRAP]%18s", " "); @@ -502,7 +556,7 @@ int malloc_diff, int malloc_count_diff) { if (malloc_diff != 0) { const char* unit = memory_unit(_scale); - char buf[64]; + char buf[128]; int offset; if (pc == 0) { _output->print_cr("[BOOTSTRAP]%18s", " ");
--- a/src/share/vm/services/memReporter.hpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/services/memReporter.hpp Thu Nov 29 22:32:44 2012 -0800 @@ -91,6 +91,11 @@ virtual void done_category_summary() = 0; + virtual void start_virtual_memory_map() = 0; + virtual void reserved_memory_region(MEMFLAGS type, address base, address end, size_t size, address pc) = 0; + virtual void committed_memory_region(address base, address end, size_t size, address pc) = 0; + virtual void done_virtual_memory_map() = 0; + /* * Report callsite information */ @@ -134,6 +139,7 @@ private: void report_summaries(const MemBaseline& baseline); + void report_virtual_memory_map(const MemBaseline& baseline); void report_callsites(const MemBaseline& baseline); void diff_summaries(const MemBaseline& cur, const MemBaseline& prev); @@ -249,6 +255,13 @@ void done_category_summary(); + // virtual memory map + void start_virtual_memory_map(); + void reserved_memory_region(MEMFLAGS type, address base, address end, size_t size, address pc); + void committed_memory_region(address base, address end, size_t size, address pc); + void done_virtual_memory_map(); + + /* * Report callsite information */
--- a/src/share/vm/services/memSnapshot.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/services/memSnapshot.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -31,148 +31,357 @@ #include "services/memSnapshot.hpp" #include "services/memTracker.hpp" +#ifdef ASSERT -// stagging data groups the data of a VM memory range, so we can consolidate -// them into one record during the walk -bool StagingWalker::consolidate_vm_records(VMMemRegionEx* vm_rec) { - MemPointerRecord* cur = (MemPointerRecord*)_itr.current(); - assert(cur != NULL && cur->is_vm_pointer(), "not a virtual memory pointer"); +void decode_pointer_record(MemPointerRecord* rec) { + tty->print("Pointer: [" PTR_FORMAT " - " PTR_FORMAT "] size = %d bytes", rec->addr(), + rec->addr() + rec->size(), (int)rec->size()); + tty->print(" type = %s", MemBaseline::type2name(FLAGS_TO_MEMORY_TYPE(rec->flags()))); + if (rec->is_vm_pointer()) { + if (rec->is_allocation_record()) { + tty->print_cr(" (reserve)"); + } else if (rec->is_commit_record()) { + tty->print_cr(" (commit)"); + } else if (rec->is_uncommit_record()) { + tty->print_cr(" (uncommit)"); + } else if (rec->is_deallocation_record()) { + tty->print_cr(" (release)"); + } else { + tty->print_cr(" (tag)"); + } + } else { + if (rec->is_arena_memory_record()) { + tty->print_cr(" (arena size)"); + } else if (rec->is_allocation_record()) { + tty->print_cr(" (malloc)"); + } else { + tty->print_cr(" (free)"); + } + } + if (MemTracker::track_callsite()) { + char buf[1024]; + address pc = ((MemPointerRecordEx*)rec)->pc(); + if (pc != NULL && os::dll_address_to_function_name(pc, buf, sizeof(buf), NULL)) { + tty->print_cr("\tfrom %s", buf); + } else { + tty->print_cr("\tcould not decode pc = " PTR_FORMAT "", pc); + } + } +} - jint cur_seq; - jint next_seq; +void decode_vm_region_record(VMMemRegion* rec) { + tty->print("VM Region [" PTR_FORMAT " - " PTR_FORMAT "]", rec->addr(), + rec->addr() + rec->size()); + tty->print(" type = %s", MemBaseline::type2name(FLAGS_TO_MEMORY_TYPE(rec->flags()))); + if (rec->is_allocation_record()) { + tty->print_cr(" (reserved)"); + } else if (rec->is_commit_record()) { + tty->print_cr(" (committed)"); + } else { + ShouldNotReachHere(); + } + if (MemTracker::track_callsite()) { + char buf[1024]; + address pc = ((VMMemRegionEx*)rec)->pc(); + if (pc != NULL && os::dll_address_to_function_name(pc, buf, sizeof(buf), NULL)) { + tty->print_cr("\tfrom %s", buf); + } else { + tty->print_cr("\tcould not decode pc = " PTR_FORMAT "", pc); + } - bool trackCallsite = MemTracker::track_callsite(); + } +} + +#endif + - if (trackCallsite) { - vm_rec->init((MemPointerRecordEx*)cur); - cur_seq = ((SeqMemPointerRecordEx*)cur)->seq(); +bool VMMemPointerIterator::insert_record(MemPointerRecord* rec) { + VMMemRegionEx new_rec; + assert(rec->is_allocation_record() || rec->is_commit_record(), + "Sanity check"); + if (MemTracker::track_callsite()) { + new_rec.init((MemPointerRecordEx*)rec); } else { - vm_rec->init((MemPointerRecord*)cur); - cur_seq = ((SeqMemPointerRecord*)cur)->seq(); + new_rec.init(rec); + } + return insert(&new_rec); +} + +bool VMMemPointerIterator::insert_record_after(MemPointerRecord* rec) { + VMMemRegionEx new_rec; + assert(rec->is_allocation_record() || rec->is_commit_record(), + "Sanity check"); + if (MemTracker::track_callsite()) { + new_rec.init((MemPointerRecordEx*)rec); + } else { + new_rec.init(rec); + } + return insert_after(&new_rec); +} + +// we don't consolidate reserved regions, since they may be categorized +// in different types. +bool VMMemPointerIterator::add_reserved_region(MemPointerRecord* rec) { + assert(rec->is_allocation_record(), "Sanity check"); + VMMemRegion* reserved_region = (VMMemRegion*)current(); + + // we don't have anything yet + if (reserved_region == NULL) { + return insert_record(rec); } - // only can consolidate when we have allocation record, - // which contains virtual memory range - if (!cur->is_allocation_record()) { - _itr.next(); + assert(reserved_region->is_reserved_region(), "Sanity check"); + // duplicated records + if (reserved_region->is_same_region(rec)) { + return true; + } + // Overlapping stack regions indicate that a JNI thread failed to + // detach from the VM before exiting. This leaks the JavaThread object. + if (CheckJNICalls) { + guarantee(FLAGS_TO_MEMORY_TYPE(reserved_region->flags()) != mtThreadStack || + !reserved_region->overlaps_region(rec), + "Attached JNI thread exited without being detached"); + } + // otherwise, we should not have overlapping reserved regions + assert(FLAGS_TO_MEMORY_TYPE(reserved_region->flags()) == mtThreadStack || + reserved_region->base() > rec->addr(), "Just check: locate()"); + assert(FLAGS_TO_MEMORY_TYPE(reserved_region->flags()) == mtThreadStack || + !reserved_region->overlaps_region(rec), "overlapping reserved regions"); + + return insert_record(rec); +} + +// we do consolidate committed regions +bool VMMemPointerIterator::add_committed_region(MemPointerRecord* rec) { + assert(rec->is_commit_record(), "Sanity check"); + VMMemRegion* reserved_rgn = (VMMemRegion*)current(); + assert(reserved_rgn->is_reserved_region() && reserved_rgn->contains_region(rec), + "Sanity check"); + + // thread's native stack is always marked as "committed", ignore + // the "commit" operation for creating stack guard pages + if (FLAGS_TO_MEMORY_TYPE(reserved_rgn->flags()) == mtThreadStack && + FLAGS_TO_MEMORY_TYPE(rec->flags()) != mtThreadStack) { return true; } - // allocation range - address base = cur->addr(); - address end = base + cur->size(); - - MemPointerRecord* next = (MemPointerRecord*)_itr.peek_next(); - // if the memory range is alive - bool live_vm_rec = true; - while (next != NULL && next->is_vm_pointer()) { - if (next->is_allocation_record()) { - assert(next->addr() >= base, "sorting order or overlapping"); - break; + // if the reserved region has any committed regions + VMMemRegion* committed_rgn = (VMMemRegion*)next(); + while (committed_rgn != NULL && committed_rgn->is_committed_region()) { + // duplicated commit records + if(committed_rgn->contains_region(rec)) { + return true; + } else if (committed_rgn->overlaps_region(rec)) { + // overlaps front part + if (rec->addr() < committed_rgn->addr()) { + committed_rgn->expand_region(rec->addr(), + committed_rgn->addr() - rec->addr()); + } else { + // overlaps tail part + address committed_rgn_end = committed_rgn->addr() + + committed_rgn->size(); + assert(committed_rgn_end < rec->addr() + rec->size(), + "overlap tail part"); + committed_rgn->expand_region(committed_rgn_end, + (rec->addr() + rec->size()) - committed_rgn_end); + } + } else if (committed_rgn->base() + committed_rgn->size() == rec->addr()) { + // adjunct each other + committed_rgn->expand_region(rec->addr(), rec->size()); + VMMemRegion* next_reg = (VMMemRegion*)next(); + // see if we can consolidate next committed region + if (next_reg != NULL && next_reg->is_committed_region() && + next_reg->base() == committed_rgn->base() + committed_rgn->size()) { + committed_rgn->expand_region(next_reg->base(), next_reg->size()); + // delete merged region + remove(); + } + return true; + } else if (committed_rgn->base() > rec->addr()) { + // found the location, insert this committed region + return insert_record(rec); } + committed_rgn = (VMMemRegion*)next(); + } + return insert_record(rec); +} - if (trackCallsite) { - next_seq = ((SeqMemPointerRecordEx*)next)->seq(); - } else { - next_seq = ((SeqMemPointerRecord*)next)->seq(); - } +bool VMMemPointerIterator::remove_uncommitted_region(MemPointerRecord* rec) { + assert(rec->is_uncommit_record(), "sanity check"); + VMMemRegion* cur; + cur = (VMMemRegion*)current(); + assert(cur->is_reserved_region() && cur->contains_region(rec), + "Sanity check"); + // thread's native stack is always marked as "committed", ignore + // the "commit" operation for creating stack guard pages + if (FLAGS_TO_MEMORY_TYPE(cur->flags()) == mtThreadStack && + FLAGS_TO_MEMORY_TYPE(rec->flags()) != mtThreadStack) { + return true; + } - if (next_seq < cur_seq) { - _itr.next(); - next = (MemPointerRecord*)_itr.peek_next(); - continue; - } - - if (next->is_deallocation_record()) { - if (next->addr() == base && next->size() == cur->size()) { - // the virtual memory range has been released - _itr.next(); - live_vm_rec = false; + cur = (VMMemRegion*)next(); + while (cur != NULL && cur->is_committed_region()) { + // region already uncommitted, must be due to duplicated record + if (cur->addr() >= rec->addr() + rec->size()) { + break; + } else if (cur->contains_region(rec)) { + // uncommit whole region + if (cur->is_same_region(rec)) { + remove(); break; - } else if (next->addr() < end) { // partial release - vm_rec->partial_release(next->addr(), next->size()); - _itr.next(); - } else { + } else if (rec->addr() == cur->addr() || + rec->addr() + rec->size() == cur->addr() + cur->size()) { + // uncommitted from either end of current memory region. + cur->exclude_region(rec->addr(), rec->size()); break; - } - } else if (next->is_commit_record()) { - if (next->addr() >= base && next->addr() + next->size() <= end) { - vm_rec->commit(next->size()); - _itr.next(); - } else { - assert(next->addr() >= base, "sorting order or overlapping"); - break; + } else { // split the committed region and release the middle + address high_addr = cur->addr() + cur->size(); + size_t sz = high_addr - rec->addr(); + cur->exclude_region(rec->addr(), sz); + sz = high_addr - (rec->addr() + rec->size()); + if (MemTracker::track_callsite()) { + MemPointerRecordEx tmp(rec->addr() + rec->size(), cur->flags(), sz, + ((VMMemRegionEx*)cur)->pc()); + return insert_record_after(&tmp); + } else { + MemPointerRecord tmp(rec->addr() + rec->size(), cur->flags(), sz); + return insert_record_after(&tmp); + } } - } else if (next->is_uncommit_record()) { - if (next->addr() >= base && next->addr() + next->size() <= end) { - vm_rec->uncommit(next->size()); - _itr.next(); - } else { - assert(next->addr() >= end, "sorting order or overlapping"); - break; - } - } else if (next->is_type_tagging_record()) { - if (next->addr() >= base && next->addr() < end ) { - vm_rec->tag(next->flags()); - _itr.next(); - } else { - break; - } - } else { - assert(false, "unknown record type"); } - next = (MemPointerRecord*)_itr.peek_next(); + cur = (VMMemRegion*)next(); } - _itr.next(); - return live_vm_rec; + + // we may not find committed record due to duplicated records + return true; } -MemPointer* StagingWalker::next() { - MemPointerRecord* cur_p = (MemPointerRecord*)_itr.current(); - if (cur_p == NULL) { - _end_of_array = true; - return NULL; +bool VMMemPointerIterator::remove_released_region(MemPointerRecord* rec) { + assert(rec->is_deallocation_record(), "Sanity check"); + VMMemRegion* cur = (VMMemRegion*)current(); + assert(cur->is_reserved_region() && cur->contains_region(rec), + "Sanity check"); + if (rec->is_same_region(cur)) { + // release whole reserved region +#ifdef ASSERT + VMMemRegion* next_region = (VMMemRegion*)peek_next(); + // should not have any committed memory in this reserved region + assert(next_region == NULL || !next_region->is_committed_region(), "Sanity check"); +#endif + remove(); + } else if (rec->addr() == cur->addr() || + rec->addr() + rec->size() == cur->addr() + cur->size()) { + // released region is at either end of this region + cur->exclude_region(rec->addr(), rec->size()); + assert(check_reserved_region(), "Integrity check"); + } else { // split the reserved region and release the middle + address high_addr = cur->addr() + cur->size(); + size_t sz = high_addr - rec->addr(); + cur->exclude_region(rec->addr(), sz); + sz = high_addr - rec->addr() - rec->size(); + if (MemTracker::track_callsite()) { + MemPointerRecordEx tmp(rec->addr() + rec->size(), cur->flags(), sz, + ((VMMemRegionEx*)cur)->pc()); + bool ret = insert_reserved_region(&tmp); + assert(!ret || check_reserved_region(), "Integrity check"); + return ret; + } else { + MemPointerRecord tmp(rec->addr() + rec->size(), cur->flags(), sz); + bool ret = insert_reserved_region(&tmp); + assert(!ret || check_reserved_region(), "Integrity check"); + return ret; + } + } + return true; +} + +bool VMMemPointerIterator::insert_reserved_region(MemPointerRecord* rec) { + // skip all 'commit' records associated with previous reserved region + VMMemRegion* p = (VMMemRegion*)next(); + while (p != NULL && p->is_committed_region() && + p->base() + p->size() < rec->addr()) { + p = (VMMemRegion*)next(); + } + return insert_record(rec); +} + +bool VMMemPointerIterator::split_reserved_region(VMMemRegion* rgn, address new_rgn_addr, size_t new_rgn_size) { + assert(rgn->contains_region(new_rgn_addr, new_rgn_size), "Not fully contained"); + address pc = (MemTracker::track_callsite() ? ((VMMemRegionEx*)rgn)->pc() : NULL); + if (rgn->base() == new_rgn_addr) { // new region is at the beginning of the region + size_t sz = rgn->size() - new_rgn_size; + // the original region becomes 'new' region + rgn->exclude_region(new_rgn_addr + new_rgn_size, sz); + // remaining becomes next region + MemPointerRecordEx next_rgn(new_rgn_addr + new_rgn_size, rgn->flags(), sz, pc); + return insert_reserved_region(&next_rgn); + } else if (rgn->base() + rgn->size() == new_rgn_addr + new_rgn_size) { + rgn->exclude_region(new_rgn_addr, new_rgn_size); + MemPointerRecordEx next_rgn(new_rgn_addr, rgn->flags(), new_rgn_size, pc); + return insert_reserved_region(&next_rgn); + } else { + // the orginal region will be split into three + address rgn_high_addr = rgn->base() + rgn->size(); + // first region + rgn->exclude_region(new_rgn_addr, (rgn_high_addr - new_rgn_addr)); + // the second region is the new region + MemPointerRecordEx new_rgn(new_rgn_addr, rgn->flags(), new_rgn_size, pc); + if (!insert_reserved_region(&new_rgn)) return false; + // the remaining region + MemPointerRecordEx rem_rgn(new_rgn_addr + new_rgn_size, rgn->flags(), + rgn_high_addr - (new_rgn_addr + new_rgn_size), pc); + return insert_reserved_region(&rem_rgn); + } +} + +static int sort_in_seq_order(const void* p1, const void* p2) { + assert(p1 != NULL && p2 != NULL, "Sanity check"); + const MemPointerRecord* mp1 = (MemPointerRecord*)p1; + const MemPointerRecord* mp2 = (MemPointerRecord*)p2; + return (mp1->seq() - mp2->seq()); +} + +bool StagingArea::init() { + if (MemTracker::track_callsite()) { + _malloc_data = new (std::nothrow)MemPointerArrayImpl<SeqMemPointerRecordEx>(); + _vm_data = new (std::nothrow)MemPointerArrayImpl<SeqMemPointerRecordEx>(); + } else { + _malloc_data = new (std::nothrow)MemPointerArrayImpl<SeqMemPointerRecord>(); + _vm_data = new (std::nothrow)MemPointerArrayImpl<SeqMemPointerRecord>(); } - MemPointerRecord* next_p; - if (cur_p->is_vm_pointer()) { - _is_vm_record = true; - if (!consolidate_vm_records(&_vm_record)) { - return next(); - } - } else { // malloc-ed pointer - _is_vm_record = false; - next_p = (MemPointerRecord*)_itr.peek_next(); - if (next_p != NULL && next_p->addr() == cur_p->addr()) { - assert(cur_p->is_allocation_record(), "sorting order"); - assert(!next_p->is_allocation_record(), "sorting order"); - _itr.next(); - if (cur_p->seq() < next_p->seq()) { - cur_p = next_p; - } - } - if (MemTracker::track_callsite()) { - _malloc_record.init((MemPointerRecordEx*)cur_p); - } else { - _malloc_record.init((MemPointerRecord*)cur_p); - } + if (_malloc_data != NULL && _vm_data != NULL && + !_malloc_data->out_of_memory() && + !_vm_data->out_of_memory()) { + return true; + } else { + if (_malloc_data != NULL) delete _malloc_data; + if (_vm_data != NULL) delete _vm_data; + _malloc_data = NULL; + _vm_data = NULL; + return false; + } +} - _itr.next(); - } - return current(); + +VMRecordIterator StagingArea::virtual_memory_record_walker() { + MemPointerArray* arr = vm_data(); + // sort into seq number order + arr->sort((FN_SORT)sort_in_seq_order); + return VMRecordIterator(arr); } + MemSnapshot::MemSnapshot() { if (MemTracker::track_callsite()) { _alloc_ptrs = new (std::nothrow) MemPointerArrayImpl<MemPointerRecordEx>(); _vm_ptrs = new (std::nothrow)MemPointerArrayImpl<VMMemRegionEx>(64, true); - _staging_area = new (std::nothrow)MemPointerArrayImpl<SeqMemPointerRecordEx>(); } else { _alloc_ptrs = new (std::nothrow) MemPointerArrayImpl<MemPointerRecord>(); _vm_ptrs = new (std::nothrow)MemPointerArrayImpl<VMMemRegion>(64, true); - _staging_area = new (std::nothrow)MemPointerArrayImpl<SeqMemPointerRecord>(); } + _staging_area.init(); _lock = new (std::nothrow) Mutex(Monitor::max_nonleaf - 1, "memSnapshotLock"); NOT_PRODUCT(_untracked_count = 0;) } @@ -181,11 +390,6 @@ assert(MemTracker::shutdown_in_progress(), "native memory tracking still on"); { MutexLockerEx locker(_lock); - if (_staging_area != NULL) { - delete _staging_area; - _staging_area = NULL; - } - if (_alloc_ptrs != NULL) { delete _alloc_ptrs; _alloc_ptrs = NULL; @@ -203,207 +407,226 @@ } } -void MemSnapshot::copy_pointer(MemPointerRecord* dest, const MemPointerRecord* src) { + +void MemSnapshot::copy_seq_pointer(MemPointerRecord* dest, const MemPointerRecord* src) { assert(dest != NULL && src != NULL, "Just check"); assert(dest->addr() == src->addr(), "Just check"); + assert(dest->seq() > 0 && src->seq() > 0, "not sequenced"); - MEMFLAGS flags = dest->flags(); + if (MemTracker::track_callsite()) { + *(SeqMemPointerRecordEx*)dest = *(SeqMemPointerRecordEx*)src; + } else { + *(SeqMemPointerRecord*)dest = *(SeqMemPointerRecord*)src; + } +} + +void MemSnapshot::assign_pointer(MemPointerRecord*dest, const MemPointerRecord* src) { + assert(src != NULL && dest != NULL, "Just check"); + assert(dest->seq() == 0 && src->seq() >0, "cast away sequence"); if (MemTracker::track_callsite()) { *(MemPointerRecordEx*)dest = *(MemPointerRecordEx*)src; } else { - *dest = *src; + *(MemPointerRecord*)dest = *(MemPointerRecord*)src; } } - -// merge a per-thread memory recorder to the staging area +// merge a recorder to the staging area bool MemSnapshot::merge(MemRecorder* rec) { assert(rec != NULL && !rec->out_of_memory(), "Just check"); - // out of memory - if (_staging_area == NULL || _staging_area->out_of_memory()) { - return false; - } - SequencedRecordIterator itr(rec->pointer_itr()); MutexLockerEx lock(_lock, true); - MemPointerIterator staging_itr(_staging_area); - MemPointerRecord *p1, *p2; - p1 = (MemPointerRecord*) itr.current(); - while (p1 != NULL) { - p2 = (MemPointerRecord*)staging_itr.locate(p1->addr()); - // we have not seen this memory block, so just add to staging area - if (p2 == NULL) { - if (!staging_itr.insert(p1)) { + MemPointerIterator malloc_staging_itr(_staging_area.malloc_data()); + MemPointerRecord* incoming_rec = (MemPointerRecord*) itr.current(); + MemPointerRecord* matched_rec; + + while (incoming_rec != NULL) { + if (incoming_rec->is_vm_pointer()) { + // we don't do anything with virtual memory records during merge + if (!_staging_area.vm_data()->append(incoming_rec)) { return false; } - } else if (p1->addr() == p2->addr()) { - MemPointerRecord* staging_next = (MemPointerRecord*)staging_itr.peek_next(); - // a memory block can have many tagging records, find right one to replace or - // right position to insert - while (staging_next != NULL && staging_next->addr() == p1->addr()) { - if ((staging_next->flags() & MemPointerRecord::tag_masks) <= - (p1->flags() & MemPointerRecord::tag_masks)) { - p2 = (MemPointerRecord*)staging_itr.next(); - staging_next = (MemPointerRecord*)staging_itr.peek_next(); - } else { - break; + } else { + // locate matched record and/or also position the iterator to proper + // location for this incoming record. + matched_rec = (MemPointerRecord*)malloc_staging_itr.locate(incoming_rec->addr()); + // we have not seen this memory block in this generation, + // so just add to staging area + if (matched_rec == NULL) { + if (!malloc_staging_itr.insert(incoming_rec)) { + return false; } - } - int df = (p1->flags() & MemPointerRecord::tag_masks) - - (p2->flags() & MemPointerRecord::tag_masks); - if (df == 0) { - assert(p1->seq() > 0, "not sequenced"); - assert(p2->seq() > 0, "not sequenced"); - if (p1->seq() > p2->seq()) { - copy_pointer(p2, p1); + } else if (incoming_rec->addr() == matched_rec->addr()) { + // whoever has higher sequence number wins + if (incoming_rec->seq() > matched_rec->seq()) { + copy_seq_pointer(matched_rec, incoming_rec); } - } else if (df < 0) { - if (!staging_itr.insert(p1)) { + } else if (incoming_rec->addr() < matched_rec->addr()) { + if (!malloc_staging_itr.insert(incoming_rec)) { return false; } } else { - if (!staging_itr.insert_after(p1)) { - return false; - } - } - } else if (p1->addr() < p2->addr()) { - if (!staging_itr.insert(p1)) { - return false; - } - } else { - if (!staging_itr.insert_after(p1)) { - return false; + ShouldNotReachHere(); } } - p1 = (MemPointerRecord*)itr.next(); + incoming_rec = (MemPointerRecord*)itr.next(); } NOT_PRODUCT(void check_staging_data();) return true; } - // promote data to next generation -void MemSnapshot::promote() { - assert(_alloc_ptrs != NULL && _staging_area != NULL && _vm_ptrs != NULL, - "Just check"); +bool MemSnapshot::promote() { + assert(_alloc_ptrs != NULL && _vm_ptrs != NULL, "Just check"); + assert(_staging_area.malloc_data() != NULL && _staging_area.vm_data() != NULL, + "Just check"); MutexLockerEx lock(_lock, true); - StagingWalker walker(_staging_area); - MemPointerIterator malloc_itr(_alloc_ptrs); - VMMemPointerIterator vm_itr(_vm_ptrs); - MemPointer* cur = walker.current(); - while (cur != NULL) { - if (walker.is_vm_record()) { - VMMemRegion* cur_vm = (VMMemRegion*)cur; - VMMemRegion* p = (VMMemRegion*)vm_itr.locate(cur_vm->addr()); - cur_vm = (VMMemRegion*)cur; - if (p != NULL && (p->contains(cur_vm) || p->base() == cur_vm->base())) { - assert(p->is_reserve_record() || - p->is_commit_record(), "wrong vm record type"); - // resize existing reserved range - if (cur_vm->is_reserve_record() && p->base() == cur_vm->base()) { - assert(cur_vm->size() >= p->committed_size(), "incorrect resizing"); - p->set_reserved_size(cur_vm->size()); - } else if (cur_vm->is_commit_record()) { - p->commit(cur_vm->committed_size()); - } else if (cur_vm->is_uncommit_record()) { - p->uncommit(cur_vm->committed_size()); - if (!p->is_reserve_record() && p->committed_size() == 0) { - vm_itr.remove(); - } - } else if (cur_vm->is_type_tagging_record()) { - p->tag(cur_vm->flags()); - } else if (cur_vm->is_release_record()) { - if (cur_vm->base() == p->base() && cur_vm->size() == p->size()) { - // release the whole range - vm_itr.remove(); - } else { - // partial release - p->partial_release(cur_vm->base(), cur_vm->size()); - } + + MallocRecordIterator malloc_itr = _staging_area.malloc_record_walker(); + bool promoted = false; + if (promote_malloc_records(&malloc_itr)) { + VMRecordIterator vm_itr = _staging_area.virtual_memory_record_walker(); + if (promote_virtual_memory_records(&vm_itr)) { + promoted = true; + } + } + + NOT_PRODUCT(check_malloc_pointers();) + _staging_area.clear(); + return promoted; +} + +bool MemSnapshot::promote_malloc_records(MemPointerArrayIterator* itr) { + MemPointerIterator malloc_snapshot_itr(_alloc_ptrs); + MemPointerRecord* new_rec = (MemPointerRecord*)itr->current(); + MemPointerRecord* matched_rec; + while (new_rec != NULL) { + matched_rec = (MemPointerRecord*)malloc_snapshot_itr.locate(new_rec->addr()); + // found matched memory block + if (matched_rec != NULL && new_rec->addr() == matched_rec->addr()) { + // snapshot already contains 'live' records + assert(matched_rec->is_allocation_record() || matched_rec->is_arena_memory_record(), + "Sanity check"); + // update block states + if (new_rec->is_allocation_record()) { + assign_pointer(matched_rec, new_rec); + } else if (new_rec->is_arena_memory_record()) { + if (new_rec->size() == 0) { + // remove size record once size drops to 0 + malloc_snapshot_itr.remove(); } else { - // we do see multiple reserver on the same vm range - assert((cur_vm->is_commit_record() || cur_vm->is_reserve_record()) && - cur_vm->base() == p->base() && cur_vm->size() == p->size(), "bad record"); - p->tag(cur_vm->flags()); + assign_pointer(matched_rec, new_rec); } } else { - if(cur_vm->is_reserve_record()) { - if (p == NULL || p->base() > cur_vm->base()) { - vm_itr.insert(cur_vm); + // a deallocation record + assert(new_rec->is_deallocation_record(), "Sanity check"); + // an arena record can be followed by a size record, we need to remove both + if (matched_rec->is_arena_record()) { + MemPointerRecord* next = (MemPointerRecord*)malloc_snapshot_itr.peek_next(); + if (next->is_arena_memory_record() && next->is_memory_record_of_arena(matched_rec)) { + malloc_snapshot_itr.remove(); + } + } + // the memory is deallocated, remove related record(s) + malloc_snapshot_itr.remove(); + } + } else { + // don't insert size 0 record + if (new_rec->is_arena_memory_record() && new_rec->size() == 0) { + new_rec = NULL; + } + + if (new_rec != NULL) { + if (new_rec->is_allocation_record() || new_rec->is_arena_memory_record()) { + if (matched_rec != NULL && new_rec->addr() > matched_rec->addr()) { + if (!malloc_snapshot_itr.insert_after(new_rec)) { + return false; + } } else { - vm_itr.insert_after(cur_vm); + if (!malloc_snapshot_itr.insert(new_rec)) { + return false; + } + } + } +#ifndef PRODUCT + else if (!has_allocation_record(new_rec->addr())) { + // NMT can not track some startup memory, which is allocated before NMT is on + _untracked_count ++; + } +#endif + } + } + new_rec = (MemPointerRecord*)itr->next(); + } + return true; +} + +bool MemSnapshot::promote_virtual_memory_records(MemPointerArrayIterator* itr) { + VMMemPointerIterator vm_snapshot_itr(_vm_ptrs); + MemPointerRecord* new_rec = (MemPointerRecord*)itr->current(); + VMMemRegion* reserved_rec; + while (new_rec != NULL) { + assert(new_rec->is_vm_pointer(), "Sanity check"); + + // locate a reserved region that contains the specified address, or + // the nearest reserved region has base address just above the specified + // address + reserved_rec = (VMMemRegion*)vm_snapshot_itr.locate(new_rec->addr()); + if (reserved_rec != NULL && reserved_rec->contains_region(new_rec)) { + // snapshot can only have 'live' records + assert(reserved_rec->is_reserved_region(), "Sanity check"); + if (new_rec->is_allocation_record()) { + if (!reserved_rec->is_same_region(new_rec)) { + // only deal with split a bigger reserved region into smaller regions. + // So far, CDS is the only use case. + if (!vm_snapshot_itr.split_reserved_region(reserved_rec, new_rec->addr(), new_rec->size())) { + return false; + } + } + } else if (new_rec->is_uncommit_record()) { + if (!vm_snapshot_itr.remove_uncommitted_region(new_rec)) { + return false; + } + } else if (new_rec->is_commit_record()) { + // insert or expand existing committed region to cover this + // newly committed region + if (!vm_snapshot_itr.add_committed_region(new_rec)) { + return false; + } + } else if (new_rec->is_deallocation_record()) { + // release part or all memory region + if (!vm_snapshot_itr.remove_released_region(new_rec)) { + return false; + } + } else if (new_rec->is_type_tagging_record()) { + // tag this reserved virtual memory range to a memory type. Can not re-tag a memory range + // to different type. + assert(FLAGS_TO_MEMORY_TYPE(reserved_rec->flags()) == mtNone || + FLAGS_TO_MEMORY_TYPE(reserved_rec->flags()) == FLAGS_TO_MEMORY_TYPE(new_rec->flags()), + "Sanity check"); + reserved_rec->tag(new_rec->flags()); + } else { + ShouldNotReachHere(); } } else { - // In theory, we should assert without conditions. However, in case of native - // thread stack, NMT explicitly releases the thread stack in Thread's destructor, - // due to platform dependent behaviors. On some platforms, we see uncommit/release - // native thread stack, but some, we don't. - assert(cur_vm->is_uncommit_record() || cur_vm->is_deallocation_record(), - err_msg("Should not reach here, pointer addr = [" INTPTR_FORMAT "], flags = [%x]", - cur_vm->addr(), cur_vm->flags())); - } - } - } else { - MemPointerRecord* cur_p = (MemPointerRecord*)cur; - MemPointerRecord* p = (MemPointerRecord*)malloc_itr.locate(cur->addr()); - if (p != NULL && cur_p->addr() == p->addr()) { - assert(p->is_allocation_record() || p->is_arena_size_record(), "untracked"); - if (cur_p->is_allocation_record() || cur_p->is_arena_size_record()) { - copy_pointer(p, cur_p); - } else { // deallocation record - assert(cur_p->is_deallocation_record(), "wrong record type"); - - // we are removing an arena record, we also need to remove its 'size' - // record behind it - if (p->is_arena_record()) { - MemPointerRecord* next_p = (MemPointerRecord*)malloc_itr.peek_next(); - if (next_p->is_arena_size_record()) { - assert(next_p->is_size_record_of_arena(p), "arena records dont match"); - malloc_itr.remove(); - } + /* + * The assertion failure indicates mis-matched virtual memory records. The likely + * scenario is, that some virtual memory operations are not going through os::xxxx_memory() + * api, which have to be tracked manually. (perfMemory is an example). + */ + assert(new_rec->is_allocation_record(), "Sanity check"); + if (!vm_snapshot_itr.add_reserved_region(new_rec)) { + return false; } - malloc_itr.remove(); - } - } else { - if (cur_p->is_arena_size_record()) { - MemPointerRecord* prev_p = (MemPointerRecord*)malloc_itr.peek_prev(); - if (prev_p != NULL && - (!prev_p->is_arena_record() || !cur_p->is_size_record_of_arena(prev_p))) { - // arena already deallocated - cur_p = NULL; - } - } - if (cur_p != NULL) { - if (cur_p->is_allocation_record() || cur_p->is_arena_size_record()) { - if (p != NULL && cur_p->addr() > p->addr()) { - malloc_itr.insert_after(cur); - } else { - malloc_itr.insert(cur); - } - } -#ifndef PRODUCT - else if (!has_allocation_record(cur_p->addr())){ - // NMT can not track some startup memory, which allocated before NMT - // is enabled - _untracked_count ++; - } -#endif - } - } - } - - cur = walker.next(); + } + new_rec = (MemPointerRecord*)itr->next(); } - NOT_PRODUCT(check_malloc_pointers();) - _staging_area->shrink(); - _staging_area->clear(); + return true; } - #ifndef PRODUCT void MemSnapshot::print_snapshot_stats(outputStream* st) { st->print_cr("Snapshot:"); @@ -413,8 +636,15 @@ st->print_cr("\tVM: %d/%d [%5.2f%%] %dKB", _vm_ptrs->length(), _vm_ptrs->capacity(), (100.0 * (float)_vm_ptrs->length()) / (float)_vm_ptrs->capacity(), _vm_ptrs->instance_size()/K); - st->print_cr("\tStaging: %d/%d [%5.2f%%] %dKB", _staging_area->length(), _staging_area->capacity(), - (100.0 * (float)_staging_area->length()) / (float)_staging_area->capacity(), _staging_area->instance_size()/K); + st->print_cr("\tMalloc staging Area: %d/%d [%5.2f%%] %dKB", _staging_area.malloc_data()->length(), + _staging_area.malloc_data()->capacity(), + (100.0 * (float)_staging_area.malloc_data()->length()) / (float)_staging_area.malloc_data()->capacity(), + _staging_area.malloc_data()->instance_size()/K); + + st->print_cr("\tVirtual memory staging Area: %d/%d [%5.2f%%] %dKB", _staging_area.vm_data()->length(), + _staging_area.vm_data()->capacity(), + (100.0 * (float)_staging_area.vm_data()->length()) / (float)_staging_area.vm_data()->capacity(), + _staging_area.vm_data()->instance_size()/K); st->print_cr("\tUntracked allocation: %d", _untracked_count); } @@ -433,7 +663,7 @@ } bool MemSnapshot::has_allocation_record(address addr) { - MemPointerArrayIteratorImpl itr(_staging_area); + MemPointerArrayIteratorImpl itr(_staging_area.malloc_data()); MemPointerRecord* cur = (MemPointerRecord*)itr.current(); while (cur != NULL) { if (cur->addr() == addr && cur->is_allocation_record()) { @@ -447,7 +677,7 @@ #ifdef ASSERT void MemSnapshot::check_staging_data() { - MemPointerArrayIteratorImpl itr(_staging_area); + MemPointerArrayIteratorImpl itr(_staging_area.malloc_data()); MemPointerRecord* cur = (MemPointerRecord*)itr.current(); MemPointerRecord* next = (MemPointerRecord*)itr.next(); while (next != NULL) { @@ -458,6 +688,41 @@ cur = next; next = (MemPointerRecord*)itr.next(); } + + MemPointerArrayIteratorImpl vm_itr(_staging_area.vm_data()); + cur = (MemPointerRecord*)vm_itr.current(); + while (cur != NULL) { + assert(cur->is_vm_pointer(), "virtual memory pointer only"); + cur = (MemPointerRecord*)vm_itr.next(); + } +} + +void MemSnapshot::dump_all_vm_pointers() { + MemPointerArrayIteratorImpl itr(_vm_ptrs); + VMMemRegion* ptr = (VMMemRegion*)itr.current(); + tty->print_cr("dump virtual memory pointers:"); + while (ptr != NULL) { + if (ptr->is_committed_region()) { + tty->print("\t"); + } + tty->print("[" PTR_FORMAT " - " PTR_FORMAT "] [%x]", ptr->addr(), + (ptr->addr() + ptr->size()), ptr->flags()); + + if (MemTracker::track_callsite()) { + VMMemRegionEx* ex = (VMMemRegionEx*)ptr; + if (ex->pc() != NULL) { + char buf[1024]; + if (os::dll_address_to_function_name(ex->pc(), buf, sizeof(buf), NULL)) { + tty->print_cr("\t%s", buf); + } else { + tty->print_cr(""); + } + } + } + + ptr = (VMMemRegion*)itr.next(); + } + tty->flush(); } #endif // ASSERT
--- a/src/share/vm/services/memSnapshot.hpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/services/memSnapshot.hpp Thu Nov 29 22:32:44 2012 -0800 @@ -31,7 +31,6 @@ #include "services/memBaseline.hpp" #include "services/memPtrArray.hpp" - // Snapshot pointer array iterator // The pointer array contains malloc-ed pointers @@ -111,40 +110,59 @@ MemPointerIterator(arr) { } - // locate an exiting record that contains specified address, or - // the record, where the record with specified address, should - // be inserted + // locate an existing reserved memory region that contains specified address, + // or the reserved region just above this address, where the incoming + // reserved region should be inserted. virtual MemPointer* locate(address addr) { - VMMemRegion* cur = (VMMemRegion*)current(); - VMMemRegion* next_p; + reset(); + VMMemRegion* reg = (VMMemRegion*)current(); + while (reg != NULL) { + if (reg->is_reserved_region()) { + if (reg->contains_address(addr) || addr < reg->base()) { + return reg; + } + } + reg = (VMMemRegion*)next(); + } + return NULL; + } + + // following methods update virtual memory in the context + // of 'current' position, which is properly positioned by + // callers via locate method. + bool add_reserved_region(MemPointerRecord* rec); + bool add_committed_region(MemPointerRecord* rec); + bool remove_uncommitted_region(MemPointerRecord* rec); + bool remove_released_region(MemPointerRecord* rec); - while (cur != NULL) { - if (cur->base() > addr) { - return cur; - } else { - // find nearest existing range that has base address <= addr - next_p = (VMMemRegion*)peek_next(); - if (next_p != NULL && next_p->base() <= addr) { - cur = (VMMemRegion*)next(); - continue; - } - } + // split a reserved region to create a new memory region with specified base and size + bool split_reserved_region(VMMemRegion* rgn, address new_rgn_addr, size_t new_rgn_size); + private: + bool insert_record(MemPointerRecord* rec); + bool insert_record_after(MemPointerRecord* rec); + + bool insert_reserved_region(MemPointerRecord* rec); - if (cur->is_reserve_record() && - cur->base() <= addr && - (cur->base() + cur->size() > addr)) { - return cur; - } else if (cur->is_commit_record() && - cur->base() <= addr && - (cur->base() + cur->committed_size() > addr)) { - return cur; + // reset current position + inline void reset() { _pos = 0; } +#ifdef ASSERT + // check integrity of records on current reserved memory region. + bool check_reserved_region() { + VMMemRegion* reserved_region = (VMMemRegion*)current(); + assert(reserved_region != NULL && reserved_region->is_reserved_region(), + "Sanity check"); + // all committed regions that follow current reserved region, should all + // belong to the reserved region. + VMMemRegion* next_region = (VMMemRegion*)next(); + for (; next_region != NULL && next_region->is_committed_region(); + next_region = (VMMemRegion*)next() ) { + if(!reserved_region->contains_region(next_region)) { + return false; } - cur = (VMMemRegion*)next(); } - return NULL; + return true; } -#ifdef ASSERT virtual bool is_dup_pointer(const MemPointer* ptr1, const MemPointer* ptr2) const { VMMemRegion* p1 = (VMMemRegion*)ptr1; @@ -162,73 +180,168 @@ #endif }; -class StagingWalker : public MemPointerArrayIterator { +class MallocRecordIterator : public MemPointerArrayIterator { private: MemPointerArrayIteratorImpl _itr; - bool _is_vm_record; - bool _end_of_array; - VMMemRegionEx _vm_record; - MemPointerRecordEx _malloc_record; + + public: - StagingWalker(MemPointerArray* arr): _itr(arr) { - _end_of_array = false; - next(); + MallocRecordIterator(MemPointerArray* arr) : _itr(arr) { } - // return the pointer at current position - MemPointer* current() const { - if (_end_of_array) { - return NULL; + virtual MemPointer* current() const { +#ifdef ASSERT + MemPointer* cur_rec = _itr.current(); + if (cur_rec != NULL) { + MemPointer* prev_rec = _itr.peek_prev(); + MemPointer* next_rec = _itr.peek_next(); + assert(prev_rec == NULL || prev_rec->addr() < cur_rec->addr(), "Sorting order"); + assert(next_rec == NULL || next_rec->addr() > cur_rec->addr(), "Sorting order"); } - if (is_vm_record()) { - return (MemPointer*)&_vm_record; - } else { - return (MemPointer*)&_malloc_record; +#endif + return _itr.current(); + } + virtual MemPointer* next() { + MemPointerRecord* next_rec = (MemPointerRecord*)_itr.next(); + // arena memory record is a special case, which we have to compare + // sequence number against its associated arena record. + if (next_rec != NULL && next_rec->is_arena_memory_record()) { + MemPointerRecord* prev_rec = (MemPointerRecord*)_itr.peek_prev(); + // if there is an associated arena record, it has to be previous + // record because of sorting order (by address) - NMT generates a pseudo address + // for arena's size record by offsetting arena's address, that guarantees + // the order of arena record and it's size record. + if (prev_rec != NULL && prev_rec->is_arena_record() && + next_rec->is_memory_record_of_arena(prev_rec)) { + if (prev_rec->seq() > next_rec->seq()) { + // Skip this arena memory record + // Two scenarios: + // - if the arena record is an allocation record, this early + // size record must be leftover by previous arena, + // and the last size record should have size = 0. + // - if the arena record is a deallocation record, this + // size record should be its cleanup record, which should + // also have size = 0. In other world, arena alway reset + // its size before gone (see Arena's destructor) + assert(next_rec->size() == 0, "size not reset"); + return _itr.next(); + } else { + assert(prev_rec->is_allocation_record(), + "Arena size record ahead of allocation record"); + } + } + } + return next_rec; + } + + MemPointer* peek_next() const { ShouldNotReachHere(); return NULL; } + MemPointer* peek_prev() const { ShouldNotReachHere(); return NULL; } + void remove() { ShouldNotReachHere(); } + bool insert(MemPointer* ptr) { ShouldNotReachHere(); return false; } + bool insert_after(MemPointer* ptr) { ShouldNotReachHere(); return false; } +}; + +// collapse duplicated records. Eliminating duplicated records here, is much +// cheaper than during promotion phase. However, it does have limitation - it +// can only eliminate duplicated records within the generation, there are +// still chances seeing duplicated records during promotion. +// We want to use the record with higher sequence number, because it has +// more accurate callsite pc. +class VMRecordIterator : public MemPointerArrayIterator { + private: + MemPointerArrayIteratorImpl _itr; + + public: + VMRecordIterator(MemPointerArray* arr) : _itr(arr) { + MemPointerRecord* cur = (MemPointerRecord*)_itr.current(); + MemPointerRecord* next = (MemPointerRecord*)_itr.peek_next(); + while (next != NULL) { + assert(cur != NULL, "Sanity check"); + assert(((SeqMemPointerRecord*)next)->seq() > ((SeqMemPointerRecord*)cur)->seq(), + "pre-sort order"); + + if (is_duplicated_record(cur, next)) { + _itr.next(); + next = (MemPointerRecord*)_itr.peek_next(); + } else { + break; + } } } - // return the next pointer and advance current position - MemPointer* next(); - - // type of 'current' record - bool is_vm_record() const { - return _is_vm_record; + virtual MemPointer* current() const { + return _itr.current(); } - // return the next poinger without advancing current position - MemPointer* peek_next() const { - assert(false, "not supported"); - return NULL; + // get next record, but skip the duplicated records + virtual MemPointer* next() { + MemPointerRecord* cur = (MemPointerRecord*)_itr.next(); + MemPointerRecord* next = (MemPointerRecord*)_itr.peek_next(); + while (next != NULL) { + assert(cur != NULL, "Sanity check"); + assert(((SeqMemPointerRecord*)next)->seq() > ((SeqMemPointerRecord*)cur)->seq(), + "pre-sort order"); + + if (is_duplicated_record(cur, next)) { + _itr.next(); + cur = next; + next = (MemPointerRecord*)_itr.peek_next(); + } else { + break; + } + } + return cur; } - MemPointer* peek_prev() const { - assert(false, "not supported"); - return NULL; + MemPointer* peek_next() const { ShouldNotReachHere(); return NULL; } + MemPointer* peek_prev() const { ShouldNotReachHere(); return NULL; } + void remove() { ShouldNotReachHere(); } + bool insert(MemPointer* ptr) { ShouldNotReachHere(); return false; } + bool insert_after(MemPointer* ptr) { ShouldNotReachHere(); return false; } + + private: + bool is_duplicated_record(MemPointerRecord* p1, MemPointerRecord* p2) const { + bool ret = (p1->addr() == p2->addr() && p1->size() == p2->size() && p1->flags() == p2->flags()); + assert(!(ret && FLAGS_TO_MEMORY_TYPE(p1->flags()) == mtThreadStack), "dup on stack record"); + return ret; } - // remove the pointer at current position - void remove() { - assert(false, "not supported"); +}; + +class StagingArea : public _ValueObj { + private: + MemPointerArray* _malloc_data; + MemPointerArray* _vm_data; + + public: + StagingArea() : _malloc_data(NULL), _vm_data(NULL) { + init(); } - // insert the pointer at current position - bool insert(MemPointer* ptr) { - assert(false, "not supported"); - return false; + ~StagingArea() { + if (_malloc_data != NULL) delete _malloc_data; + if (_vm_data != NULL) delete _vm_data; + } + + MallocRecordIterator malloc_record_walker() { + return MallocRecordIterator(malloc_data()); } - bool insert_after(MemPointer* ptr) { - assert(false, "not supported"); - return false; + VMRecordIterator virtual_memory_record_walker(); + + bool init(); + void clear() { + assert(_malloc_data != NULL && _vm_data != NULL, "Just check"); + _malloc_data->shrink(); + _malloc_data->clear(); + _vm_data->clear(); } - private: - // consolidate all records referring to this vm region - bool consolidate_vm_records(VMMemRegionEx* vm_rec); + inline MemPointerArray* malloc_data() { return _malloc_data; } + inline MemPointerArray* vm_data() { return _vm_data; } }; class MemBaseline; - class MemSnapshot : public CHeapObj<mtNMT> { private: // the following two arrays contain records of all known lived memory blocks @@ -237,9 +350,7 @@ // live virtual memory pointers MemPointerArray* _vm_ptrs; - // stagging a generation's data, before - // it can be prompted to snapshot - MemPointerArray* _staging_area; + StagingArea _staging_area; // the lock to protect this snapshot Monitor* _lock; @@ -252,18 +363,19 @@ virtual ~MemSnapshot(); // if we are running out of native memory - bool out_of_memory() const { - return (_alloc_ptrs == NULL || _staging_area == NULL || + bool out_of_memory() { + return (_alloc_ptrs == NULL || + _staging_area.malloc_data() == NULL || + _staging_area.vm_data() == NULL || _vm_ptrs == NULL || _lock == NULL || _alloc_ptrs->out_of_memory() || - _staging_area->out_of_memory() || _vm_ptrs->out_of_memory()); } // merge a per-thread memory recorder into staging area bool merge(MemRecorder* rec); // promote staged data to snapshot - void promote(); + bool promote(); void wait(long timeout) { @@ -276,11 +388,17 @@ NOT_PRODUCT(void check_staging_data();) NOT_PRODUCT(void check_malloc_pointers();) NOT_PRODUCT(bool has_allocation_record(address addr);) + // dump all virtual memory pointers in snapshot + DEBUG_ONLY( void dump_all_vm_pointers();) private: - // copy pointer data from src to dest - void copy_pointer(MemPointerRecord* dest, const MemPointerRecord* src); + // copy sequenced pointer from src to dest + void copy_seq_pointer(MemPointerRecord* dest, const MemPointerRecord* src); + // assign a sequenced pointer to non-sequenced pointer + void assign_pointer(MemPointerRecord*dest, const MemPointerRecord* src); + + bool promote_malloc_records(MemPointerArrayIterator* itr); + bool promote_virtual_memory_records(MemPointerArrayIterator* itr); }; - #endif // SHARE_VM_SERVICES_MEM_SNAPSHOT_HPP
--- a/src/share/vm/services/memTrackWorker.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/services/memTrackWorker.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -118,7 +118,10 @@ _head = (_head + 1) % MAX_GENERATIONS; } // promote this generation data to snapshot - snapshot->promote(); + if (!snapshot->promote()) { + // failed to promote, means out of memory + MemTracker::shutdown(MemTracker::NMT_out_of_memory); + } } else { snapshot->wait(1000); ThreadCritical tc;
--- a/src/share/vm/services/memTracker.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/services/memTracker.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -69,10 +69,12 @@ void MemTracker::init_tracking_options(const char* option_line) { _tracking_level = NMT_off; - if (strncmp(option_line, "=summary", 8) == 0) { + if (strcmp(option_line, "=summary") == 0) { _tracking_level = NMT_summary; - } else if (strncmp(option_line, "=detail", 8) == 0) { + } else if (strcmp(option_line, "=detail") == 0) { _tracking_level = NMT_detail; + } else if (strcmp(option_line, "=off") != 0) { + vm_exit_during_initialization("Syntax error, expecting -XX:NativeMemoryTracking=[off|summary|detail]", NULL); } } @@ -341,6 +343,7 @@ */ void MemTracker::create_memory_record(address addr, MEMFLAGS flags, size_t size, address pc, Thread* thread) { + assert(addr != NULL, "Sanity check"); if (!shutdown_in_progress()) { // single thread, we just write records direct to global recorder,' // with any lock @@ -358,7 +361,7 @@ if (thread != NULL) { if (thread->is_Java_thread() && ((JavaThread*)thread)->is_safepoint_visible()) { - JavaThread* java_thread = static_cast<JavaThread*>(thread); + JavaThread* java_thread = (JavaThread*)thread; JavaThreadState state = java_thread->thread_state(); if (SafepointSynchronize::safepoint_safe(java_thread, state)) { // JavaThreads that are safepoint safe, can run through safepoint, @@ -466,6 +469,8 @@ // it should guarantee that NMT is fully sync-ed. ThreadCritical tc; + SequenceGenerator::reset(); + // walk all JavaThreads to collect recorders SyncThreadRecorderClosure stc; Threads::threads_do(&stc); @@ -478,11 +483,12 @@ pending_recorders = _global_recorder; _global_recorder = NULL; } - SequenceGenerator::reset(); // check _worker_thread with lock to avoid racing condition if (_worker_thread != NULL) { _worker_thread->at_sync_point(pending_recorders); } + + assert(SequenceGenerator::peek() == 1, "Should not have memory activities during sync-point"); } }
--- a/src/share/vm/services/memTracker.hpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/services/memTracker.hpp Thu Nov 29 22:32:44 2012 -0800 @@ -39,8 +39,10 @@ #include "thread_solaris.inline.hpp" #endif -#ifdef _DEBUG_ - #define DEBUG_CALLER_PC os::get_caller_pc(3) +extern bool NMT_track_callsite; + +#ifdef ASSERT + #define DEBUG_CALLER_PC (NMT_track_callsite ? os::get_caller_pc(2) : 0) #else #define DEBUG_CALLER_PC 0 #endif @@ -85,7 +87,7 @@ NMT_shutdown // shutdown }; - + public: // native memory tracking level enum NMTLevel { NMT_off, // native memory tracking is off @@ -93,7 +95,6 @@ NMT_detail // track callsite also }; - public: enum ShutdownReason { NMT_shutdown_none, // no shutdown requested NMT_shutdown_user, // user requested shutdown @@ -117,6 +118,10 @@ _state >= NMT_bootstrapping_single_thread); } + static inline enum NMTLevel tracking_level() { + return _tracking_level; + } + // user readable reason for shutting down NMT static const char* reason() { switch(_reason) { @@ -184,7 +189,8 @@ // record a 'malloc' call static inline void record_malloc(address addr, size_t size, MEMFLAGS flags, address pc = 0, Thread* thread = NULL) { - if (NMT_CAN_TRACK(flags)) { + if (is_on() && NMT_CAN_TRACK(flags)) { + assert(size > 0, "Sanity check"); create_memory_record(addr, (flags|MemPointerRecord::malloc_tag()), size, pc, thread); } } @@ -197,19 +203,21 @@ // record a 'realloc' call static inline void record_realloc(address old_addr, address new_addr, size_t size, MEMFLAGS flags, address pc = 0, Thread* thread = NULL) { - if (is_on()) { + if (is_on() && NMT_CAN_TRACK(flags)) { + assert(size > 0, "Sanity check"); record_free(old_addr, flags, thread); record_malloc(new_addr, size, flags, pc, thread); } } - // record arena size + // record arena memory size static inline void record_arena_size(address addr, size_t size) { - // we add a positive offset to arena address, so we can have arena size record + // we add a positive offset to arena address, so we can have arena memory record // sorted after arena record if (is_on() && !UseMallocOnly) { + assert(addr != NULL, "Sanity check"); create_memory_record((addr + sizeof(void*)), MemPointerRecord::arena_size_tag(), size, - 0, NULL); + DEBUG_CALLER_PC, NULL); } } @@ -217,16 +225,39 @@ static inline void record_virtual_memory_reserve(address addr, size_t size, address pc = 0, Thread* thread = NULL) { if (is_on()) { - assert(size > 0, "reserve szero size"); + assert(size > 0, "Sanity check"); create_memory_record(addr, MemPointerRecord::virtual_memory_reserve_tag(), size, pc, thread); } } + static inline void record_thread_stack(address addr, size_t size, Thread* thr, + address pc = 0) { + if (is_on()) { + assert(size > 0 && thr != NULL, "Sanity check"); + create_memory_record(addr, MemPointerRecord::virtual_memory_reserve_tag() | mtThreadStack, + size, pc, thr); + create_memory_record(addr, MemPointerRecord::virtual_memory_commit_tag() | mtThreadStack, + size, pc, thr); + } + } + + static inline void release_thread_stack(address addr, size_t size, Thread* thr) { + if (is_on()) { + assert(size > 0 && thr != NULL, "Sanity check"); + assert(!thr->is_Java_thread(), "too early"); + create_memory_record(addr, MemPointerRecord::virtual_memory_uncommit_tag() | mtThreadStack, + size, DEBUG_CALLER_PC, thr); + create_memory_record(addr, MemPointerRecord::virtual_memory_release_tag() | mtThreadStack, + size, DEBUG_CALLER_PC, thr); + } + } + // record a virtual memory 'commit' call static inline void record_virtual_memory_commit(address addr, size_t size, - address pc = 0, Thread* thread = NULL) { + address pc, Thread* thread = NULL) { if (is_on()) { + assert(size > 0, "Sanity check"); create_memory_record(addr, MemPointerRecord::virtual_memory_commit_tag(), size, pc, thread); } @@ -236,8 +267,9 @@ static inline void record_virtual_memory_uncommit(address addr, size_t size, Thread* thread = NULL) { if (is_on()) { + assert(size > 0, "Sanity check"); create_memory_record(addr, MemPointerRecord::virtual_memory_uncommit_tag(), - size, 0, thread); + size, DEBUG_CALLER_PC, thread); } } @@ -245,8 +277,9 @@ static inline void record_virtual_memory_release(address addr, size_t size, Thread* thread = NULL) { if (is_on()) { + assert(size > 0, "Sanity check"); create_memory_record(addr, MemPointerRecord::virtual_memory_release_tag(), - size, 0, thread); + size, DEBUG_CALLER_PC, thread); } } @@ -257,7 +290,7 @@ assert(base > 0, "wrong base address"); assert((flags & (~mt_masks)) == 0, "memory type only"); create_memory_record(base, (flags | MemPointerRecord::virtual_memory_type_tag()), - 0, 0, thread); + 0, DEBUG_CALLER_PC, thread); } }
--- a/src/share/vm/utilities/vmError.cpp Thu Nov 29 19:41:00 2012 -0800 +++ b/src/share/vm/utilities/vmError.cpp Thu Nov 29 22:32:44 2012 -0800 @@ -453,7 +453,9 @@ JDK_Version::current().to_string(buf, sizeof(buf)); const char* runtime_name = JDK_Version::runtime_name() != NULL ? JDK_Version::runtime_name() : ""; - st->print_cr("# JRE version: %s (%s)", runtime_name, buf); + const char* runtime_version = JDK_Version::runtime_version() != NULL ? + JDK_Version::runtime_version() : ""; + st->print_cr("# JRE version: %s (%s) (build %s)", runtime_name, buf, runtime_version); st->print_cr("# Java VM: %s (%s %s %s %s)", Abstract_VM_Version::vm_name(), Abstract_VM_Version::vm_release(),
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/7184394/TestAESBase.java Thu Nov 29 22:32:44 2012 -0800 @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @author Tom Deneau + */ + +import javax.crypto.Cipher; +import javax.crypto.KeyGenerator; +import javax.crypto.SecretKey; +import javax.crypto.spec.IvParameterSpec; +import javax.crypto.spec.SecretKeySpec; +import java.security.AlgorithmParameters; + +import java.util.Random; +import java.util.Arrays; + +abstract public class TestAESBase { + int msgSize = Integer.getInteger("msgSize", 646); + boolean checkOutput = Boolean.getBoolean("checkOutput"); + boolean noReinit = Boolean.getBoolean("noReinit"); + int keySize = Integer.getInteger("keySize", 128); + String algorithm = System.getProperty("algorithm", "AES"); + String mode = System.getProperty("mode", "CBC"); + byte[] input; + byte[] encode; + byte[] expectedEncode; + byte[] decode; + byte[] expectedDecode; + Random random = new Random(0); + Cipher cipher; + Cipher dCipher; + String paddingStr = "PKCS5Padding"; + AlgorithmParameters algParams; + SecretKey key; + int ivLen; + + static int numThreads = 0; + int threadId; + static synchronized int getThreadId() { + int id = numThreads; + numThreads++; + return id; + } + + abstract public void run(); + + public void prepare() { + try { + System.out.println("\nmsgSize=" + msgSize + ", key size=" + keySize + ", reInit=" + !noReinit + ", checkOutput=" + checkOutput); + + int keyLenBytes = (keySize == 0 ? 16 : keySize/8); + byte keyBytes[] = new byte[keyLenBytes]; + if (keySize == 128) + keyBytes = new byte[] {-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7}; + else + random.nextBytes(keyBytes); + + key = new SecretKeySpec(keyBytes, algorithm); + if (threadId == 0) { + System.out.println("Algorithm: " + key.getAlgorithm() + "(" + + key.getEncoded().length * 8 + "bit)"); + } + input = new byte[msgSize]; + for (int i=0; i<input.length; i++) { + input[i] = (byte) (i & 0xff); + } + + cipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE"); + dCipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE"); + + ivLen = (algorithm.equals("AES") ? 16 : algorithm.equals("DES") ? 8 : 0); + IvParameterSpec initVector = new IvParameterSpec(new byte[ivLen]); + + cipher.init(Cipher.ENCRYPT_MODE, key, initVector); + algParams = cipher.getParameters(); + dCipher.init(Cipher.DECRYPT_MODE, key, algParams); + if (threadId == 0) { + childShowCipher(); + } + + // do one encode and decode in preparation + // this will also create the encode buffer and decode buffer + encode = cipher.doFinal(input); + decode = dCipher.doFinal(encode); + if (checkOutput) { + expectedEncode = (byte[]) encode.clone(); + expectedDecode = (byte[]) decode.clone(); + showArray(key.getEncoded() , "key: "); + showArray(input, "input: "); + showArray(encode, "encode: "); + showArray(decode, "decode: "); + } + } + catch (Exception e) { + e.printStackTrace(); + System.exit(1); + } + } + + void showArray(byte b[], String name) { + System.out.format("%s [%d]: ", name, b.length); + for (int i=0; i<Math.min(b.length, 32); i++) { + System.out.format("%02x ", b[i] & 0xff); + } + System.out.println(); + } + + void compareArrays(byte b[], byte exp[]) { + if (b.length != exp.length) { + System.out.format("different lengths for actual and expected output arrays\n"); + showArray(b, "test: "); + showArray(exp, "exp : "); + System.exit(1); + } + for (int i=0; i< exp.length; i++) { + if (b[i] != exp[i]) { + System.out.format("output error at index %d: got %02x, expected %02x\n", i, b[i] & 0xff, exp[i] & 0xff); + showArray(b, "test: "); + showArray(exp, "exp : "); + System.exit(1); + } + } + } + + + void showCipher(Cipher c, String kind) { + System.out.println(kind + " cipher provider: " + cipher.getProvider()); + System.out.println(kind + " cipher algorithm: " + cipher.getAlgorithm()); + } + + abstract void childShowCipher(); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/7184394/TestAESDecode.java Thu Nov 29 22:32:44 2012 -0800 @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @author Tom Deneau + */ + +import javax.crypto.Cipher; + +public class TestAESDecode extends TestAESBase { + @Override + public void run() { + try { + if (!noReinit) dCipher.init(Cipher.DECRYPT_MODE, key, algParams); + if (checkOutput) { + // checked version creates new output buffer each time + decode = dCipher.doFinal(encode, 0, encode.length); + compareArrays(decode, expectedDecode); + } else { + // non-checked version outputs to existing encode buffer for maximum speed + decode = new byte[dCipher.getOutputSize(encode.length)]; + dCipher.doFinal(encode, 0, encode.length, decode); + } + } + catch (Exception e) { + e.printStackTrace(); + System.exit(1); + } + } + + @Override + void childShowCipher() { + showCipher(dCipher, "Decryption"); + } + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/7184394/TestAESEncode.java Thu Nov 29 22:32:44 2012 -0800 @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @author Tom Deneau + */ + +import javax.crypto.Cipher; + +public class TestAESEncode extends TestAESBase { + @Override + public void run() { + try { + if (!noReinit) cipher.init(Cipher.ENCRYPT_MODE, key, algParams); + if (checkOutput) { + // checked version creates new output buffer each time + encode = cipher.doFinal(input, 0, msgSize); + compareArrays(encode, expectedEncode); + } else { + // non-checked version outputs to existing encode buffer for maximum speed + encode = new byte[cipher.getOutputSize(msgSize)]; + cipher.doFinal(input, 0, msgSize, encode); + } + } + catch (Exception e) { + e.printStackTrace(); + System.exit(1); + } + } + + @Override + void childShowCipher() { + showCipher(cipher, "Encryption"); + } + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/7184394/TestAESMain.java Thu Nov 29 22:32:44 2012 -0800 @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 7184394 + * @summary add intrinsics to use AES instructions + * + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true TestAESMain + * + * @author Tom Deneau + */ + +public class TestAESMain { + public static void main(String[] args) { + int iters = (args.length > 0 ? Integer.valueOf(args[0]) : 1000000); + System.out.println(iters + " iterations"); + TestAESEncode etest = new TestAESEncode(); + etest.prepare(); + long start = System.nanoTime(); + for (int i=0; i<iters; i++) { + etest.run(); + } + long end = System.nanoTime(); + System.out.println("TestAESEncode runtime was " + (double)((end - start)/1000000000.0) + " ms"); + + TestAESDecode dtest = new TestAESDecode(); + dtest.prepare(); + start = System.nanoTime(); + for (int i=0; i<iters; i++) { + dtest.run(); + } + end = System.nanoTime(); + System.out.println("TestAESDecode runtime was " + (double)((end - start)/1000000000.0) + " ms"); + } +}