Mercurial > hg > openjdk > bsd-port > hotspot

--- a/.hgtags	Fri Sep 08 09:32:12 2017 -0700
+++ b/.hgtags	Sat Feb 03 14:01:55 2018 -0800
@@ -848,3 +848,5 @@
 56ad25be7d88c2c2da562fe1e8879c8723d01da1 jdk7u141-b02
 75662a7ec1719b3133636d09bd078968579a55ab jdk7u151-b00
 d0c7cea0660f7a8188a7b8c1f6d1a6c8d6388fb0 jdk7u151-b01
+fc789043683d3cf424f97176bd77cf7abe5bd01a jdk7u161-b00
+2965926dc5176c075e7a68c5d82bfd0ffa91cd5e jdk7u161-b01
--- a/THIRD_PARTY_README	Fri Sep 08 09:32:12 2017 -0700
+++ b/THIRD_PARTY_README	Sat Feb 03 14:01:55 2018 -0800
@@ -3134,14 +3134,14 @@

 -------------------------------------------------------------------------------

-%% This notice is provided with respect to zlib v1.2.3, which is included
+%% This notice is provided with respect to zlib v1.2.11, which may be included
 with JRE 7, JDK 7, and OpenJDK 7

 --- begin of LICENSE ---

-  version 1.2.3, July 18th, 2005
-
-  Copyright (C) 1995-2005 Jean-loup Gailly and Mark Adler
+  version 1.2.11, January 15th, 2017
+
+  Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler

   This software is provided 'as-is', without any express or implied
   warranty.  In no event will the authors be held liable for any damages
--- a/src/cpu/ppc/vm/ppc.ad	Fri Sep 08 09:32:12 2017 -0700
+++ b/src/cpu/ppc/vm/ppc.ad	Sat Feb 03 14:01:55 2018 -0800
@@ -2445,6 +2445,11 @@
   return false;
 }

+// PPC AES support not yet implemented
+const bool Matcher::pass_original_key_for_aes() {
+  return false;
+}
+
 // RETURNS: whether this branch offset is short enough that a short
 // branch can be used.
 //
--- a/src/cpu/ppc/vm/stubGenerator_ppc.cpp	Fri Sep 08 09:32:12 2017 -0700
+++ b/src/cpu/ppc/vm/stubGenerator_ppc.cpp	Sat Feb 03 14:01:55 2018 -0800
@@ -2093,6 +2093,10 @@
     // arraycopy stubs used by compilers
     generate_arraycopy_stubs();

+    if (UseAESIntrinsics) {
+      guarantee(!UseAESIntrinsics, "not yet implemented.");
+    }
+
     // Safefetch stubs.
     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
                                                        &StubRoutines::_safefetch32_fault_pc,
--- a/src/cpu/sparc/vm/assembler_sparc.hpp	Fri Sep 08 09:32:12 2017 -0700
+++ b/src/cpu/sparc/vm/assembler_sparc.hpp	Sat Feb 03 14:01:55 2018 -0800
@@ -630,6 +630,7 @@
     orncc_op3    = 0x16,
     xnorcc_op3   = 0x17,
     addccc_op3   = 0x18,
+    aes4_op3     = 0x19,
     umulcc_op3   = 0x1a,
     smulcc_op3   = 0x1b,
     subccc_op3   = 0x1c,
@@ -663,6 +664,8 @@
     fpop1_op3    = 0x34,
     fpop2_op3    = 0x35,
     impdep1_op3  = 0x36,
+    aes3_op3     = 0x36,
+    flog3_op3    = 0x36,
     impdep2_op3  = 0x37,
     jmpl_op3     = 0x38,
     rett_op3     = 0x39,
@@ -724,41 +727,56 @@

   enum opfs {
     // selected opfs
-    fmovs_opf   = 0x01,
-    fmovd_opf   = 0x02,
-
-    fnegs_opf   = 0x05,
-    fnegd_opf   = 0x06,
-
-    fadds_opf   = 0x41,
-    faddd_opf   = 0x42,
-    fsubs_opf   = 0x45,
-    fsubd_opf   = 0x46,
-
-    fmuls_opf   = 0x49,
-    fmuld_opf   = 0x4a,
-    fdivs_opf   = 0x4d,
-    fdivd_opf   = 0x4e,
-
-    fcmps_opf   = 0x51,
-    fcmpd_opf   = 0x52,
-
-    fstox_opf   = 0x81,
-    fdtox_opf   = 0x82,
-    fxtos_opf   = 0x84,
-    fxtod_opf   = 0x88,
-    fitos_opf   = 0xc4,
-    fdtos_opf   = 0xc6,
-    fitod_opf   = 0xc8,
-    fstod_opf   = 0xc9,
-    fstoi_opf   = 0xd1,
-    fdtoi_opf   = 0xd2,
-
-    mdtox_opf   = 0x110,
-    mstouw_opf  = 0x111,
-    mstosw_opf  = 0x113,
-    mxtod_opf   = 0x118,
-    mwtos_opf   = 0x119
+    fmovs_opf          = 0x01,
+    fmovd_opf          = 0x02,
+
+    fnegs_opf          = 0x05,
+    fnegd_opf          = 0x06,
+
+    fadds_opf          = 0x41,
+    faddd_opf          = 0x42,
+    fsubs_opf          = 0x45,
+    fsubd_opf          = 0x46,
+
+    fmuls_opf          = 0x49,
+    fmuld_opf          = 0x4a,
+    fdivs_opf          = 0x4d,
+    fdivd_opf          = 0x4e,
+
+    fcmps_opf          = 0x51,
+    fcmpd_opf          = 0x52,
+
+    fstox_opf          = 0x81,
+    fdtox_opf          = 0x82,
+    fxtos_opf          = 0x84,
+    fxtod_opf          = 0x88,
+    fitos_opf          = 0xc4,
+    fdtos_opf          = 0xc6,
+    fitod_opf          = 0xc8,
+    fstod_opf          = 0xc9,
+    fstoi_opf          = 0xd1,
+    fdtoi_opf          = 0xd2,
+
+    mdtox_opf          = 0x110,
+    mstouw_opf         = 0x111,
+    mstosw_opf         = 0x113,
+    mxtod_opf          = 0x118,
+    mwtos_opf          = 0x119,
+
+    aes_kexpand0_opf   = 0x130,
+    aes_kexpand2_opf   = 0x131
+  };
+
+  enum op5s {
+    aes_eround01_op5     = 0x00,
+    aes_eround23_op5     = 0x01,
+    aes_dround01_op5     = 0x02,
+    aes_dround23_op5     = 0x03,
+    aes_eround01_l_op5   = 0x04,
+    aes_eround23_l_op5   = 0x05,
+    aes_dround01_l_op5   = 0x06,
+    aes_dround23_l_op5   = 0x07,
+    aes_kexpand1_op5     = 0x08
   };

   enum RCondition {  rc_z = 1,  rc_lez = 2,  rc_lz = 3, rc_nz = 5, rc_gz = 6, rc_gez = 7, rc_last = rc_gez  };
@@ -979,6 +997,7 @@
   static int immed(    bool        i)  { return  u_field(i ? 1 : 0,     13, 13); }
   static int opf_low6( int         w)  { return  u_field(w,             10,  5); }
   static int opf_low5( int         w)  { return  u_field(w,              9,  5); }
+  static int op5(      int         x)  { return  u_field(x,              8,  5); }
   static int trapcc(   CC         cc)  { return  u_field(cc,            12, 11); }
   static int sx(       int         i)  { return  u_field(i,             12, 12); } // shift x=1 means 64-bit
   static int opf(      int         x)  { return  u_field(x,             13,  5); }
@@ -1003,6 +1022,7 @@
   static int fd( FloatRegister r,  FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa), 29, 25); };
   static int fs1(FloatRegister r,  FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa), 18, 14); };
   static int fs2(FloatRegister r,  FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa),  4,  0); };
+  static int fs3(FloatRegister r,  FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa), 13,  9); };

   // some float instructions use this encoding on the op3 field
   static int alt_op3(int op, FloatRegisterImpl::Width w) {
@@ -1111,6 +1131,12 @@
     return x & ((1 << 10) - 1);
   }

+  // AES crypto instructions supported only on certain processors
+  static void aes_only() { assert( VM_Version::has_aes(), "This instruction only works on SPARC with AES instructions support"); }
+
+  // instruction only in VIS1
+  static void vis1_only() { assert( VM_Version::has_vis1(), "This instruction only works on SPARC with VIS1"); }
+
   // instruction only in VIS3
   static void vis3_only() { assert( VM_Version::has_vis3(), "This instruction only works on SPARC with VIS3"); }

@@ -1243,6 +1269,24 @@
   void addccc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(addc_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }


+  // 4-operand AES instructions
+
+  void aes_eround01(  FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround01_op5) | fs2(s2, FloatRegisterImpl::D) ); }
+  void aes_eround23(  FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround23_op5) | fs2(s2, FloatRegisterImpl::D) ); }
+  void aes_dround01(  FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround01_op5) | fs2(s2, FloatRegisterImpl::D) ); }
+  void aes_dround23(  FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround23_op5) | fs2(s2, FloatRegisterImpl::D) ); }
+  void aes_eround01_l(  FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround01_l_op5) | fs2(s2, FloatRegisterImpl::D) ); }
+  void aes_eround23_l(  FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround23_l_op5) | fs2(s2, FloatRegisterImpl::D) ); }
+  void aes_dround01_l(  FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround01_l_op5) | fs2(s2, FloatRegisterImpl::D) ); }
+  void aes_dround23_l(  FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround23_l_op5) | fs2(s2, FloatRegisterImpl::D) ); }
+  void aes_kexpand1(  FloatRegister s1, FloatRegister s2, int imm5a, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | u_field(imm5a, 13, 9) | op5(aes_kexpand1_op5) | fs2(s2, FloatRegisterImpl::D) ); }
+
+
+  // 3-operand AES instructions
+
+  void aes_kexpand0(  FloatRegister s1, FloatRegister s2, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes3_op3) | fs1(s1, FloatRegisterImpl::D) | opf(aes_kexpand0_opf) | fs2(s2, FloatRegisterImpl::D) ); }
+  void aes_kexpand2(  FloatRegister s1, FloatRegister s2, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes3_op3) | fs1(s1, FloatRegisterImpl::D) | opf(aes_kexpand2_opf) | fs2(s2, FloatRegisterImpl::D) ); }
+
   // pp 136

   inline void bpr(RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt = relocInfo::none);
@@ -1360,6 +1404,10 @@
   void fmul( FloatRegisterImpl::Width sw, FloatRegisterImpl::Width dw,  FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_long( op(arith_op) | fd(d, dw) | op3(fpop1_op3) | fs1(s1, sw) | opf(0x60 + sw + dw*4) | fs2(s2, sw)); }
   void fdiv( FloatRegisterImpl::Width w,                            FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_long( op(arith_op) | fd(d, w)  | op3(fpop1_op3) | fs1(s1, w)  | opf(0x4c + w)         | fs2(s2, w)); }

+  // FXORs/FXORd instructions
+
+  void fxor( FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, w) | op3(flog3_op3) | fs1(s1, w) | opf(0x6E - w) | fs2(s2, w)); }
+
   // pp 164

   void fsqrt( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { emit_long( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x28 + w) | fs2(s, w)); }
--- a/src/cpu/sparc/vm/sparc.ad	Fri Sep 08 09:32:12 2017 -0700
+++ b/src/cpu/sparc/vm/sparc.ad	Sat Feb 03 14:01:55 2018 -0800
@@ -1911,6 +1911,12 @@
   return false;
 }

+// Current (2013) SPARC platforms need to read original key
+// to construct decryption expanded key
+const bool Matcher::pass_original_key_for_aes() {
+  return true;
+}
+
 // USII supports fxtof through the whole range of number, USIII doesn't
 const bool Matcher::convL2FSupported(void) {
   return VM_Version::has_fast_fxtof();
--- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Fri Sep 08 09:32:12 2017 -0700
+++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Sat Feb 03 14:01:55 2018 -0800
@@ -3375,6 +3375,775 @@
     }
   }

+  address generate_aescrypt_encryptBlock() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "aesencryptBlock");
+    Label L_doLast128bit, L_storeOutput;
+    address start = __ pc();
+    Register from = O0; // source byte array
+    Register to = O1;   // destination byte array
+    Register key = O2;  // expanded key array
+    const Register keylen = O4; //reg for storing expanded key array length
+
+    // read expanded key length
+    __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
+
+    // load input into F54-F56; F30-F31 used as temp
+    __ ldf(FloatRegisterImpl::S, from, 0, F30);
+    __ ldf(FloatRegisterImpl::S, from, 4, F31);
+    __ fmov(FloatRegisterImpl::D, F30, F54);
+    __ ldf(FloatRegisterImpl::S, from, 8, F30);
+    __ ldf(FloatRegisterImpl::S, from, 12, F31);
+    __ fmov(FloatRegisterImpl::D, F30, F56);
+
+    // load expanded key
+    for ( int i = 0;  i <= 38; i += 2 ) {
+      __ ldf(FloatRegisterImpl::D, key, i*4, as_FloatRegister(i));
+    }
+
+    // perform cipher transformation
+    __ fxor(FloatRegisterImpl::D, F0, F54, F54);
+    __ fxor(FloatRegisterImpl::D, F2, F56, F56);
+    // rounds 1 through 8
+    for ( int i = 4;  i <= 28; i += 8 ) {
+      __ aes_eround01(as_FloatRegister(i), F54, F56, F58);
+      __ aes_eround23(as_FloatRegister(i+2), F54, F56, F60);
+      __ aes_eround01(as_FloatRegister(i+4), F58, F60, F54);
+      __ aes_eround23(as_FloatRegister(i+6), F58, F60, F56);
+    }
+    __ aes_eround01(F36, F54, F56, F58); //round 9
+    __ aes_eround23(F38, F54, F56, F60);
+
+    // 128-bit original key size
+    __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pt, L_doLast128bit);
+
+    for ( int i = 40;  i <= 50; i += 2 ) {
+      __ ldf(FloatRegisterImpl::D, key, i*4, as_FloatRegister(i) );
+    }
+    __ aes_eround01(F40, F58, F60, F54); //round 10
+    __ aes_eround23(F42, F58, F60, F56);
+    __ aes_eround01(F44, F54, F56, F58); //round 11
+    __ aes_eround23(F46, F54, F56, F60);
+
+    // 192-bit original key size
+    __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pt, L_storeOutput);
+
+    __ ldf(FloatRegisterImpl::D, key, 208, F52);
+    __ aes_eround01(F48, F58, F60, F54); //round 12
+    __ aes_eround23(F50, F58, F60, F56);
+    __ ldf(FloatRegisterImpl::D, key, 216, F46);
+    __ ldf(FloatRegisterImpl::D, key, 224, F48);
+    __ ldf(FloatRegisterImpl::D, key, 232, F50);
+    __ aes_eround01(F52, F54, F56, F58); //round 13
+    __ aes_eround23(F46, F54, F56, F60);
+    __ br(Assembler::always, false, Assembler::pt, L_storeOutput);
+    __ delayed()->nop();
+
+    __ BIND(L_doLast128bit);
+    __ ldf(FloatRegisterImpl::D, key, 160, F48);
+    __ ldf(FloatRegisterImpl::D, key, 168, F50);
+
+    __ BIND(L_storeOutput);
+    // perform last round of encryption common for all key sizes
+    __ aes_eround01_l(F48, F58, F60, F54); //last round
+    __ aes_eround23_l(F50, F58, F60, F56);
+
+    // store output into the destination array, F0-F1 used as temp
+    __ fmov(FloatRegisterImpl::D, F54, F0);
+    __ stf(FloatRegisterImpl::S, F0, to, 0);
+    __ stf(FloatRegisterImpl::S, F1, to, 4);
+    __ fmov(FloatRegisterImpl::D, F56, F0);
+    __ stf(FloatRegisterImpl::S, F0, to, 8);
+    __ retl();
+    __ delayed()->stf(FloatRegisterImpl::S, F1, to, 12);
+
+    return start;
+  }
+
+  address generate_aescrypt_decryptBlock() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "aesdecryptBlock");
+    address start = __ pc();
+    Label L_expand192bit, L_expand256bit, L_common_transform;
+    Register from = O0; // source byte array
+    Register to = O1;   // destination byte array
+    Register key = O2;  // expanded key array
+    Register original_key = O3;  // original key array only required during decryption
+    const Register keylen = O4;  // reg for storing expanded key array length
+
+    // read expanded key array length
+    __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
+
+    // load input into F52-F54; F30,F31 used as temp
+    __ ldf(FloatRegisterImpl::S, from, 0, F30);
+    __ ldf(FloatRegisterImpl::S, from, 4, F31);
+    __ fmov(FloatRegisterImpl::D, F30, F52);
+    __ ldf(FloatRegisterImpl::S, from, 8, F30);
+    __ ldf(FloatRegisterImpl::S, from, 12, F31);
+    __ fmov(FloatRegisterImpl::D, F30, F54);
+
+    // load original key from SunJCE expanded decryption key
+    for ( int i = 0;  i <= 3; i++ ) {
+      __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
+    }
+
+    // 256-bit original key size
+    __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_expand256bit);
+
+    // 192-bit original key size
+    __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_expand192bit);
+
+    // 128-bit original key size
+    // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
+    for ( int i = 0;  i <= 36; i += 4 ) {
+      __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+2), i/4, as_FloatRegister(i+4));
+      __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+4), as_FloatRegister(i+6));
+    }
+
+    // perform 128-bit key specific inverse cipher transformation
+    __ fxor(FloatRegisterImpl::D, F42, F54, F54);
+    __ fxor(FloatRegisterImpl::D, F40, F52, F52);
+    __ br(Assembler::always, false, Assembler::pt, L_common_transform);
+    __ delayed()->nop();
+
+    __ BIND(L_expand192bit);
+
+    // start loading rest of the 192-bit key
+    __ ldf(FloatRegisterImpl::S, original_key, 16, F4);
+    __ ldf(FloatRegisterImpl::S, original_key, 20, F5);
+
+    // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
+    for ( int i = 0;  i <= 36; i += 6 ) {
+      __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+4), i/6, as_FloatRegister(i+6));
+      __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+6), as_FloatRegister(i+8));
+      __ aes_kexpand2(as_FloatRegister(i+4), as_FloatRegister(i+8), as_FloatRegister(i+10));
+    }
+    __ aes_kexpand1(F42, F46, 7, F48);
+    __ aes_kexpand2(F44, F48, F50);
+
+    // perform 192-bit key specific inverse cipher transformation
+    __ fxor(FloatRegisterImpl::D, F50, F54, F54);
+    __ fxor(FloatRegisterImpl::D, F48, F52, F52);
+    __ aes_dround23(F46, F52, F54, F58);
+    __ aes_dround01(F44, F52, F54, F56);
+    __ aes_dround23(F42, F56, F58, F54);
+    __ aes_dround01(F40, F56, F58, F52);
+    __ br(Assembler::always, false, Assembler::pt, L_common_transform);
+    __ delayed()->nop();
+
+    __ BIND(L_expand256bit);
+
+    // load rest of the 256-bit key
+    for ( int i = 4;  i <= 7; i++ ) {
+      __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
+    }
+
+    // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
+    for ( int i = 0;  i <= 40; i += 8 ) {
+      __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+6), i/8, as_FloatRegister(i+8));
+      __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+8), as_FloatRegister(i+10));
+      __ aes_kexpand0(as_FloatRegister(i+4), as_FloatRegister(i+10), as_FloatRegister(i+12));
+      __ aes_kexpand2(as_FloatRegister(i+6), as_FloatRegister(i+12), as_FloatRegister(i+14));
+    }
+    __ aes_kexpand1(F48, F54, 6, F56);
+    __ aes_kexpand2(F50, F56, F58);
+
+    for ( int i = 0;  i <= 6; i += 2 ) {
+      __ fmov(FloatRegisterImpl::D, as_FloatRegister(58-i), as_FloatRegister(i));
+    }
+
+    // load input into F52-F54
+    __ ldf(FloatRegisterImpl::D, from, 0, F52);
+    __ ldf(FloatRegisterImpl::D, from, 8, F54);
+
+    // perform 256-bit key specific inverse cipher transformation
+    __ fxor(FloatRegisterImpl::D, F0, F54, F54);
+    __ fxor(FloatRegisterImpl::D, F2, F52, F52);
+    __ aes_dround23(F4, F52, F54, F58);
+    __ aes_dround01(F6, F52, F54, F56);
+    __ aes_dround23(F50, F56, F58, F54);
+    __ aes_dround01(F48, F56, F58, F52);
+    __ aes_dround23(F46, F52, F54, F58);
+    __ aes_dround01(F44, F52, F54, F56);
+    __ aes_dround23(F42, F56, F58, F54);
+    __ aes_dround01(F40, F56, F58, F52);
+
+    for ( int i = 0;  i <= 7; i++ ) {
+      __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
+    }
+
+    // perform inverse cipher transformations common for all key sizes
+    __ BIND(L_common_transform);
+    for ( int i = 38;  i >= 6; i -= 8 ) {
+      __ aes_dround23(as_FloatRegister(i), F52, F54, F58);
+      __ aes_dround01(as_FloatRegister(i-2), F52, F54, F56);
+      if ( i != 6) {
+        __ aes_dround23(as_FloatRegister(i-4), F56, F58, F54);
+        __ aes_dround01(as_FloatRegister(i-6), F56, F58, F52);
+      } else {
+        __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F54);
+        __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F52);
+      }
+    }
+
+    // store output to destination array, F0-F1 used as temp
+    __ fmov(FloatRegisterImpl::D, F52, F0);
+    __ stf(FloatRegisterImpl::S, F0, to, 0);
+    __ stf(FloatRegisterImpl::S, F1, to, 4);
+    __ fmov(FloatRegisterImpl::D, F54, F0);
+    __ stf(FloatRegisterImpl::S, F0, to, 8);
+    __ retl();
+    __ delayed()->stf(FloatRegisterImpl::S, F1, to, 12);
+
+    return start;
+  }
+
+  address generate_cipherBlockChaining_encryptAESCrypt() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
+    Label L_cbcenc128, L_cbcenc192, L_cbcenc256;
+    address start = __ pc();
+    Register from = O0; // source byte array
+    Register to = O1;   // destination byte array
+    Register key = O2;  // expanded key array
+    Register rvec = O3; // init vector
+    const Register len_reg = O4; // cipher length
+    const Register keylen = O5;  // reg for storing expanded key array length
+
+    // save cipher len to return in the end
+    __ mov(len_reg, L1);
+
+    // read expanded key length
+    __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
+
+    // load init vector
+    __ ldf(FloatRegisterImpl::D, rvec, 0, F60);
+    __ ldf(FloatRegisterImpl::D, rvec, 8, F62);
+    __ ldx(key,0,G1);
+    __ ldx(key,8,G2);
+
+    // start loading expanded key
+    for ( int i = 0, j = 16;  i <= 38; i += 2, j += 8 ) {
+      __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i));
+    }
+
+    // 128-bit original key size
+    __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pt, L_cbcenc128);
+
+    for ( int i = 40, j = 176;  i <= 46; i += 2, j += 8 ) {
+      __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i));
+    }
+
+    // 192-bit original key size
+    __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pt, L_cbcenc192);
+
+    for ( int i = 48, j = 208;  i <= 54; i += 2, j += 8 ) {
+      __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i));
+    }
+
+    // 256-bit original key size
+    __ br(Assembler::always, false, Assembler::pt, L_cbcenc256);
+    __ delayed()->nop();
+
+    __ align(OptoLoopAlignment);
+    __ BIND(L_cbcenc128);
+    __ ldx(from,0,G3);
+    __ ldx(from,8,G4);
+    __ xor3(G1,G3,G3);
+    __ xor3(G2,G4,G4);
+    __ movxtod(G3,F56);
+    __ movxtod(G4,F58);
+    __ fxor(FloatRegisterImpl::D, F60, F56, F60);
+    __ fxor(FloatRegisterImpl::D, F62, F58, F62);
+
+    // TEN_EROUNDS
+    for ( int i = 0;  i <= 32; i += 8 ) {
+      __ aes_eround01(as_FloatRegister(i), F60, F62, F56);
+      __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58);
+      if (i != 32 ) {
+        __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60);
+        __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62);
+      } else {
+        __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60);
+        __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62);
+      }
+    }
+
+    __ stf(FloatRegisterImpl::D, F60, to, 0);
+    __ stf(FloatRegisterImpl::D, F62, to, 8);
+    __ add(from, 16, from);
+    __ add(to, 16, to);
+    __ subcc(len_reg, 16, len_reg);
+    __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc128);
+    __ delayed()->nop();
+    __ stf(FloatRegisterImpl::D, F60, rvec, 0);
+    __ stf(FloatRegisterImpl::D, F62, rvec, 8);
+    __ retl();
+    __ delayed()->mov(L1, O0);
+
+    __ align(OptoLoopAlignment);
+    __ BIND(L_cbcenc192);
+    __ ldx(from,0,G3);
+    __ ldx(from,8,G4);
+    __ xor3(G1,G3,G3);
+    __ xor3(G2,G4,G4);
+    __ movxtod(G3,F56);
+    __ movxtod(G4,F58);
+    __ fxor(FloatRegisterImpl::D, F60, F56, F60);
+    __ fxor(FloatRegisterImpl::D, F62, F58, F62);
+
+    // TWELEVE_EROUNDS
+    for ( int i = 0;  i <= 40; i += 8 ) {
+      __ aes_eround01(as_FloatRegister(i), F60, F62, F56);
+      __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58);
+      if (i != 40 ) {
+        __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60);
+        __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62);
+      } else {
+        __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60);
+        __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62);
+      }
+    }
+
+    __ stf(FloatRegisterImpl::D, F60, to, 0);
+    __ stf(FloatRegisterImpl::D, F62, to, 8);
+    __ add(from, 16, from);
+    __ subcc(len_reg, 16, len_reg);
+    __ add(to, 16, to);
+    __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc192);
+    __ delayed()->nop();
+    __ stf(FloatRegisterImpl::D, F60, rvec, 0);
+    __ stf(FloatRegisterImpl::D, F62, rvec, 8);
+    __ retl();
+    __ delayed()->mov(L1, O0);
+
+    __ align(OptoLoopAlignment);
+    __ BIND(L_cbcenc256);
+    __ ldx(from,0,G3);
+    __ ldx(from,8,G4);
+    __ xor3(G1,G3,G3);
+    __ xor3(G2,G4,G4);
+    __ movxtod(G3,F56);
+    __ movxtod(G4,F58);
+    __ fxor(FloatRegisterImpl::D, F60, F56, F60);
+    __ fxor(FloatRegisterImpl::D, F62, F58, F62);
+
+    // FOURTEEN_EROUNDS
+    for ( int i = 0;  i <= 48; i += 8 ) {
+      __ aes_eround01(as_FloatRegister(i), F60, F62, F56);
+      __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58);
+      if (i != 48 ) {
+        __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60);
+        __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62);
+      } else {
+        __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60);
+        __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62);
+      }
+    }
+
+    __ stf(FloatRegisterImpl::D, F60, to, 0);
+    __ stf(FloatRegisterImpl::D, F62, to, 8);
+    __ add(from, 16, from);
+    __ subcc(len_reg, 16, len_reg);
+    __ add(to, 16, to);
+    __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc256);
+    __ delayed()->nop();
+    __ stf(FloatRegisterImpl::D, F60, rvec, 0);
+    __ stf(FloatRegisterImpl::D, F62, rvec, 8);
+    __ retl();
+    __ delayed()->mov(L1, O0);
+
+    return start;
+  }
+
+  address generate_cipherBlockChaining_decryptAESCrypt_Parallel() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
+    Label L_cbcdec_end, L_expand192bit, L_expand256bit, L_dec_first_block_start;
+    Label L_dec_first_block128, L_dec_first_block192, L_dec_next2_blocks128, L_dec_next2_blocks192, L_dec_next2_blocks256;
+    address start = __ pc();
+    Register from = I0; // source byte array
+    Register to = I1;   // destination byte array
+    Register key = I2;  // expanded key array
+    Register rvec = I3; // init vector
+    const Register len_reg = I4; // cipher length
+    const Register original_key = I5;  // original key array only required during decryption
+    const Register keylen = L6;  // reg for storing expanded key array length
+
+    // save cipher len before save_frame, to return in the end
+    __ mov(O4, L0);
+    __ save_frame(0); //args are read from I* registers since we save the frame in the beginning
+
+    // load original key from SunJCE expanded decryption key
+    for ( int i = 0;  i <= 3; i++ ) {
+      __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
+    }
+
+    // load initial vector
+    __ ldx(rvec,0,L0);
+    __ ldx(rvec,8,L1);
+
+    // read expanded key array length
+    __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
+
+    // 256-bit original key size
+    __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_expand256bit);
+
+    // 192-bit original key size
+    __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_expand192bit);
+
+    // 128-bit original key size
+    // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
+    for ( int i = 0;  i <= 36; i += 4 ) {
+      __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+2), i/4, as_FloatRegister(i+4));
+      __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+4), as_FloatRegister(i+6));
+    }
+
+    // load expanded key[last-1] and key[last] elements
+    __ movdtox(F40,L2);
+    __ movdtox(F42,L3);
+
+    __ and3(len_reg, 16, L4);
+    __ br_null(L4, false, Assembler::pt, L_dec_next2_blocks128);
+    __ delayed()->nop();
+
+    __ br(Assembler::always, false, Assembler::pt, L_dec_first_block_start);
+    __ delayed()->nop();
+
+    __ BIND(L_expand192bit);
+    // load rest of the 192-bit key
+    __ ldf(FloatRegisterImpl::S, original_key, 16, F4);
+    __ ldf(FloatRegisterImpl::S, original_key, 20, F5);
+
+    // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
+    for ( int i = 0;  i <= 36; i += 6 ) {
+      __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+4), i/6, as_FloatRegister(i+6));
+      __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+6), as_FloatRegister(i+8));
+      __ aes_kexpand2(as_FloatRegister(i+4), as_FloatRegister(i+8), as_FloatRegister(i+10));
+    }
+    __ aes_kexpand1(F42, F46, 7, F48);
+    __ aes_kexpand2(F44, F48, F50);
+
+    // load expanded key[last-1] and key[last] elements
+    __ movdtox(F48,L2);
+    __ movdtox(F50,L3);
+
+    __ and3(len_reg, 16, L4);
+    __ br_null(L4, false, Assembler::pt, L_dec_next2_blocks192);
+    __ delayed()->nop();
+
+    __ br(Assembler::always, false, Assembler::pt, L_dec_first_block_start);
+    __ delayed()->nop();
+
+    __ BIND(L_expand256bit);
+    // load rest of the 256-bit key
+    for ( int i = 4;  i <= 7; i++ ) {
+      __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
+    }
+
+    // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
+    for ( int i = 0;  i <= 40; i += 8 ) {
+      __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+6), i/8, as_FloatRegister(i+8));
+      __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+8), as_FloatRegister(i+10));
+      __ aes_kexpand0(as_FloatRegister(i+4), as_FloatRegister(i+10), as_FloatRegister(i+12));
+      __ aes_kexpand2(as_FloatRegister(i+6), as_FloatRegister(i+12), as_FloatRegister(i+14));
+    }
+    __ aes_kexpand1(F48, F54, 6, F56);
+    __ aes_kexpand2(F50, F56, F58);
+
+    // load expanded key[last-1] and key[last] elements
+    __ movdtox(F56,L2);
+    __ movdtox(F58,L3);
+
+    __ and3(len_reg, 16, L4);
+    __ br_null(L4, false, Assembler::pt, L_dec_next2_blocks256);
+    __ delayed()->nop();
+
+    __ BIND(L_dec_first_block_start);
+    __ ldx(from,0,L4);
+    __ ldx(from,8,L5);
+    __ xor3(L2,L4,G1);
+    __ movxtod(G1,F60);
+    __ xor3(L3,L5,G1);
+    __ movxtod(G1,F62);
+
+    // 128-bit original key size
+    __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pn, L_dec_first_block128);
+
+    // 192-bit original key size
+    __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_dec_first_block192);
+
+    __ aes_dround23(F54, F60, F62, F58);
+    __ aes_dround01(F52, F60, F62, F56);
+    __ aes_dround23(F50, F56, F58, F62);
+    __ aes_dround01(F48, F56, F58, F60);
+
+    __ BIND(L_dec_first_block192);
+    __ aes_dround23(F46, F60, F62, F58);
+    __ aes_dround01(F44, F60, F62, F56);
+    __ aes_dround23(F42, F56, F58, F62);
+    __ aes_dround01(F40, F56, F58, F60);
+
+    __ BIND(L_dec_first_block128);
+    for ( int i = 38;  i >= 6; i -= 8 ) {
+      __ aes_dround23(as_FloatRegister(i), F60, F62, F58);
+      __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
+      if ( i != 6) {
+        __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
+        __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
+      } else {
+        __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62);
+        __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60);
+      }
+    }
+
+    __ movxtod(L0,F56);
+    __ movxtod(L1,F58);
+    __ mov(L4,L0);
+    __ mov(L5,L1);
+    __ fxor(FloatRegisterImpl::D, F56, F60, F60);
+    __ fxor(FloatRegisterImpl::D, F58, F62, F62);
+
+    __ stf(FloatRegisterImpl::D, F60, to, 0);
+    __ stf(FloatRegisterImpl::D, F62, to, 8);
+
+    __ add(from, 16, from);
+    __ add(to, 16, to);
+    __ subcc(len_reg, 16, len_reg);
+    __ br(Assembler::equal, false, Assembler::pt, L_cbcdec_end);
+    __ delayed()->nop();
+
+    // 256-bit original key size
+    __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_dec_next2_blocks256);
+
+    // 192-bit original key size
+    __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_dec_next2_blocks192);
+
+    __ align(OptoLoopAlignment);
+    __ BIND(L_dec_next2_blocks128);
+    __ nop();
+
+    // F40:F42 used for first 16-bytes
+    __ ldx(from,0,G4);
+    __ ldx(from,8,G5);
+    __ xor3(L2,G4,G1);
+    __ movxtod(G1,F40);
+    __ xor3(L3,G5,G1);
+    __ movxtod(G1,F42);
+
+    // F60:F62 used for next 16-bytes
+    __ ldx(from,16,L4);
+    __ ldx(from,24,L5);
+    __ xor3(L2,L4,G1);
+    __ movxtod(G1,F60);
+    __ xor3(L3,L5,G1);
+    __ movxtod(G1,F62);
+
+    for ( int i = 38;  i >= 6; i -= 8 ) {
+      __ aes_dround23(as_FloatRegister(i), F40, F42, F44);
+      __ aes_dround01(as_FloatRegister(i-2), F40, F42, F46);
+      __ aes_dround23(as_FloatRegister(i), F60, F62, F58);
+      __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
+      if (i != 6 ) {
+        __ aes_dround23(as_FloatRegister(i-4), F46, F44, F42);
+        __ aes_dround01(as_FloatRegister(i-6), F46, F44, F40);
+        __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
+        __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
+      } else {
+        __ aes_dround23_l(as_FloatRegister(i-4), F46, F44, F42);
+        __ aes_dround01_l(as_FloatRegister(i-6), F46, F44, F40);
+        __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62);
+        __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60);
+      }
+    }
+
+    __ movxtod(L0,F46);
+    __ movxtod(L1,F44);
+    __ fxor(FloatRegisterImpl::D, F46, F40, F40);
+    __ fxor(FloatRegisterImpl::D, F44, F42, F42);
+
+    __ stf(FloatRegisterImpl::D, F40, to, 0);
+    __ stf(FloatRegisterImpl::D, F42, to, 8);
+
+    __ movxtod(G4,F56);
+    __ movxtod(G5,F58);
+    __ mov(L4,L0);
+    __ mov(L5,L1);
+    __ fxor(FloatRegisterImpl::D, F56, F60, F60);
+    __ fxor(FloatRegisterImpl::D, F58, F62, F62);
+
+    __ stf(FloatRegisterImpl::D, F60, to, 16);
+    __ stf(FloatRegisterImpl::D, F62, to, 24);
+
+    __ add(from, 32, from);
+    __ add(to, 32, to);
+    __ subcc(len_reg, 32, len_reg);
+    __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks128);
+    __ delayed()->nop();
+    __ br(Assembler::always, false, Assembler::pt, L_cbcdec_end);
+    __ delayed()->nop();
+
+    __ align(OptoLoopAlignment);
+    __ BIND(L_dec_next2_blocks192);
+    __ nop();
+
+    // F48:F50 used for first 16-bytes
+    __ ldx(from,0,G4);
+    __ ldx(from,8,G5);
+    __ xor3(L2,G4,G1);
+    __ movxtod(G1,F48);
+    __ xor3(L3,G5,G1);
+    __ movxtod(G1,F50);
+
+    // F60:F62 used for next 16-bytes
+    __ ldx(from,16,L4);
+    __ ldx(from,24,L5);
+    __ xor3(L2,L4,G1);
+    __ movxtod(G1,F60);
+    __ xor3(L3,L5,G1);
+    __ movxtod(G1,F62);
+
+    for ( int i = 46;  i >= 6; i -= 8 ) {
+      __ aes_dround23(as_FloatRegister(i), F48, F50, F52);
+      __ aes_dround01(as_FloatRegister(i-2), F48, F50, F54);
+      __ aes_dround23(as_FloatRegister(i), F60, F62, F58);
+      __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
+      if (i != 6 ) {
+        __ aes_dround23(as_FloatRegister(i-4), F54, F52, F50);
+        __ aes_dround01(as_FloatRegister(i-6), F54, F52, F48);
+        __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
+        __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
+      } else {
+        __ aes_dround23_l(as_FloatRegister(i-4), F54, F52, F50);
+        __ aes_dround01_l(as_FloatRegister(i-6), F54, F52, F48);
+        __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62);
+        __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60);
+      }
+    }
+
+    __ movxtod(L0,F54);
+    __ movxtod(L1,F52);
+    __ fxor(FloatRegisterImpl::D, F54, F48, F48);
+    __ fxor(FloatRegisterImpl::D, F52, F50, F50);
+
+    __ stf(FloatRegisterImpl::D, F48, to, 0);
+    __ stf(FloatRegisterImpl::D, F50, to, 8);
+
+    __ movxtod(G4,F56);
+    __ movxtod(G5,F58);
+    __ mov(L4,L0);
+    __ mov(L5,L1);
+    __ fxor(FloatRegisterImpl::D, F56, F60, F60);
+    __ fxor(FloatRegisterImpl::D, F58, F62, F62);
+
+    __ stf(FloatRegisterImpl::D, F60, to, 16);
+    __ stf(FloatRegisterImpl::D, F62, to, 24);
+
+    __ add(from, 32, from);
+    __ add(to, 32, to);
+    __ subcc(len_reg, 32, len_reg);
+    __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks192);
+    __ delayed()->nop();
+    __ br(Assembler::always, false, Assembler::pt, L_cbcdec_end);
+    __ delayed()->nop();
+
+    __ align(OptoLoopAlignment);
+    __ BIND(L_dec_next2_blocks256);
+    __ nop();
+
+    // F0:F2 used for first 16-bytes
+    __ ldx(from,0,G4);
+    __ ldx(from,8,G5);
+    __ xor3(L2,G4,G1);
+    __ movxtod(G1,F0);
+    __ xor3(L3,G5,G1);
+    __ movxtod(G1,F2);
+
+    // F60:F62 used for next 16-bytes
+    __ ldx(from,16,L4);
+    __ ldx(from,24,L5);
+    __ xor3(L2,L4,G1);
+    __ movxtod(G1,F60);
+    __ xor3(L3,L5,G1);
+    __ movxtod(G1,F62);
+
+    __ aes_dround23(F54, F0, F2, F4);
+    __ aes_dround01(F52, F0, F2, F6);
+    __ aes_dround23(F54, F60, F62, F58);
+    __ aes_dround01(F52, F60, F62, F56);
+    __ aes_dround23(F50, F6, F4, F2);
+    __ aes_dround01(F48, F6, F4, F0);
+    __ aes_dround23(F50, F56, F58, F62);
+    __ aes_dround01(F48, F56, F58, F60);
+    // save F48:F54 in temp registers
+    __ movdtox(F54,G2);
+    __ movdtox(F52,G3);
+    __ movdtox(F50,G6);
+    __ movdtox(F48,G1);
+    for ( int i = 46;  i >= 14; i -= 8 ) {
+      __ aes_dround23(as_FloatRegister(i), F0, F2, F4);
+      __ aes_dround01(as_FloatRegister(i-2), F0, F2, F6);
+      __ aes_dround23(as_FloatRegister(i), F60, F62, F58);
+      __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
+      __ aes_dround23(as_FloatRegister(i-4), F6, F4, F2);
+      __ aes_dround01(as_FloatRegister(i-6), F6, F4, F0);
+      __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
+      __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
+    }
+    // init F48:F54 with F0:F6 values (original key)
+    __ ldf(FloatRegisterImpl::D, original_key, 0, F48);
+    __ ldf(FloatRegisterImpl::D, original_key, 8, F50);
+    __ ldf(FloatRegisterImpl::D, original_key, 16, F52);
+    __ ldf(FloatRegisterImpl::D, original_key, 24, F54);
+    __ aes_dround23(F54, F0, F2, F4);
+    __ aes_dround01(F52, F0, F2, F6);
+    __ aes_dround23(F54, F60, F62, F58);
+    __ aes_dround01(F52, F60, F62, F56);
+    __ aes_dround23_l(F50, F6, F4, F2);
+    __ aes_dround01_l(F48, F6, F4, F0);
+    __ aes_dround23_l(F50, F56, F58, F62);
+    __ aes_dround01_l(F48, F56, F58, F60);
+    // re-init F48:F54 with their original values
+    __ movxtod(G2,F54);
+    __ movxtod(G3,F52);
+    __ movxtod(G6,F50);
+    __ movxtod(G1,F48);
+
+    __ movxtod(L0,F6);
+    __ movxtod(L1,F4);
+    __ fxor(FloatRegisterImpl::D, F6, F0, F0);
+    __ fxor(FloatRegisterImpl::D, F4, F2, F2);
+
+    __ stf(FloatRegisterImpl::D, F0, to, 0);
+    __ stf(FloatRegisterImpl::D, F2, to, 8);
+
+    __ movxtod(G4,F56);
+    __ movxtod(G5,F58);
+    __ mov(L4,L0);
+    __ mov(L5,L1);
+    __ fxor(FloatRegisterImpl::D, F56, F60, F60);
+    __ fxor(FloatRegisterImpl::D, F58, F62, F62);
+
+    __ stf(FloatRegisterImpl::D, F60, to, 16);
+    __ stf(FloatRegisterImpl::D, F62, to, 24);
+
+    __ add(from, 32, from);
+    __ add(to, 32, to);
+    __ subcc(len_reg, 32, len_reg);
+    __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks256);
+    __ delayed()->nop();
+
+    __ BIND(L_cbcdec_end);
+    __ stx(L0, rvec, 0);
+    __ stx(L1, rvec, 8);
+    __ restore();
+    __ mov(L0, O0);
+    __ retl();
+    __ delayed()->nop();
+
+    return start;
+  }
+
   void generate_initial() {
     // Generates all stubs and initializes the entry points

@@ -3431,6 +4200,14 @@

     // Don't initialize the platform math functions since sparc
     // doesn't have intrinsics for these operations.
+
+    // generate AES intrinsics code
+    if (UseAESIntrinsics) {
+      StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
+      StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
+      StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
+      StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
+    }
   }
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Fri Sep 08 09:32:12 2017 -0700
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Sat Feb 03 14:01:55 2018 -0800
@@ -242,7 +242,7 @@
   assert((OptoLoopAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size");

   char buf[512];
-  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
                (has_v9() ? ", v9" : (has_v8() ? ", v8" : "")),
                (has_hardware_popc() ? ", popc" : ""),
                (has_vis1() ? ", vis1" : ""),
@@ -250,6 +250,7 @@
                (has_vis3() ? ", vis3" : ""),
                (has_blk_init() ? ", blk_init" : ""),
                (has_cbcond() ? ", cbcond" : ""),
+               (has_aes() ? ", aes" : ""),
                (is_ultra3() ? ", ultra3" : ""),
                (is_sun4v() ? ", sun4v" : ""),
                (is_niagara_plus() ? ", niagara_plus" : (is_niagara() ? ", niagara" : "")),
@@ -273,6 +274,41 @@
   if (!has_vis1()) // Drop to 0 if no VIS1 support
     UseVIS = 0;

+  // T2 and above should have support for AES instructions
+  if (has_aes()) {
+    if (UseVIS > 0) { // AES intrinsics use FXOR instruction which is VIS1
+      if (FLAG_IS_DEFAULT(UseAES)) {
+        FLAG_SET_DEFAULT(UseAES, true);
+      }
+      if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+        FLAG_SET_DEFAULT(UseAESIntrinsics, true);
+      }
+      // we disable both the AES flags if either of them is disabled on the command line
+      if (!UseAES || !UseAESIntrinsics) {
+        FLAG_SET_DEFAULT(UseAES, false);
+        FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+      }
+    } else {
+        if (UseAES || UseAESIntrinsics) {
+          warning("SPARC AES intrinsics require VIS1 instruction support. Intrinsics will be disabled.");
+          if (UseAES) {
+            FLAG_SET_DEFAULT(UseAES, false);
+          }
+          if (UseAESIntrinsics) {
+            FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+          }
+        }
+    }
+  } else if (UseAES || UseAESIntrinsics) {
+    warning("AES instructions are not available on this CPU");
+    if (UseAES) {
+      FLAG_SET_DEFAULT(UseAES, false);
+    }
+    if (UseAESIntrinsics) {
+      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+    }
+  }
+
 #ifndef PRODUCT
   if (PrintMiscellaneous && Verbose) {
     tty->print("Allocation");
--- a/src/cpu/sparc/vm/vm_version_sparc.hpp	Fri Sep 08 09:32:12 2017 -0700
+++ b/src/cpu/sparc/vm/vm_version_sparc.hpp	Sat Feb 03 14:01:55 2018 -0800
@@ -49,7 +49,8 @@
     M_family             = 15,
     T_family             = 16,
     T1_model             = 17,
-    sparc5_instructions  = 18
+    sparc5_instructions  = 18,
+    aes_instructions     = 19
   };

   enum Feature_Flag_Set {
@@ -75,6 +76,7 @@
     T_family_m              = 1 << T_family,
     T1_model_m              = 1 << T1_model,
     sparc5_instructions_m   = 1 << sparc5_instructions,
+    aes_instructions_m      = 1 << aes_instructions,

     generic_v8_m        = v8_instructions_m | hardware_mul32_m | hardware_div32_m | hardware_fsmuld_m,
     generic_v9_m        = generic_v8_m | v9_instructions_m,
@@ -126,6 +128,7 @@
   static bool has_blk_init()            { return (_features & blk_init_instructions_m) != 0; }
   static bool has_cbcond()              { return (_features & cbcond_instructions_m) != 0; }
   static bool has_sparc5_instr()        { return (_features & sparc5_instructions_m) != 0; }
+  static bool has_aes()                 { return (_features & aes_instructions_m) != 0; }

   static bool supports_compare_and_exchange()
                                         { return has_v9(); }
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Fri Sep 08 09:32:12 2017 -0700
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Sat Feb 03 14:01:55 2018 -0800
@@ -2433,6 +2433,9 @@
   //   c_rarg3   - r vector byte array address
   //   c_rarg4   - input length
   //
+  // Output:
+  //   rax       - input length
+  //
   address generate_cipherBlockChaining_encryptAESCrypt() {
     assert(UseAES, "need AES instructions and misaligned SSE support");
     __ align(CodeEntryAlignment);
@@ -2513,7 +2516,7 @@
     __ movdqu(Address(rvec, 0), xmm_result);     // final value of r stored in rvec of CipherBlockChaining object

     handleSOERegisters(false /*restoring*/);
-    __ movl(rax, 0);                             // return 0 (why?)
+    __ movptr(rax, len_param); // return length
     __ leave();                                  // required for proper stackwalking of RuntimeStub frame
     __ ret(0);

@@ -2587,6 +2590,9 @@
   //   c_rarg3   - r vector byte array address
   //   c_rarg4   - input length
   //
+  // Output:
+  //   rax       - input length
+  //

   address generate_cipherBlockChaining_decryptAESCrypt() {
     assert(UseAES, "need AES instructions and misaligned SSE support");
@@ -2680,7 +2686,7 @@
     __ movptr(rvec , rvec_param);                                     // restore this since used in loop
     __ movdqu(Address(rvec, 0), xmm_temp);                            // final value of r stored in rvec of CipherBlockChaining object
     handleSOERegisters(false /*restoring*/);
-    __ movl(rax, 0);                                                  // return 0 (why?)
+    __ movptr(rax, len_param); // return length
     __ leave();                                                       // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Fri Sep 08 09:32:12 2017 -0700
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Sat Feb 03 14:01:55 2018 -0800
@@ -3246,6 +3246,9 @@
   //   c_rarg3   - r vector byte array address
   //   c_rarg4   - input length
   //
+  // Output:
+  //   rax       - input length
+  //
   address generate_cipherBlockChaining_encryptAESCrypt() {
     assert(UseAES, "need AES instructions and misaligned SSE support");
     __ align(CodeEntryAlignment);
@@ -3261,7 +3264,7 @@
 #ifndef _WIN64
     const Register len_reg     = c_rarg4;  // src len (must be multiple of blocksize 16)
 #else
-    const Address  len_mem(rsp, 6 * wordSize);  // length is on stack on Win64
+    const Address  len_mem(rbp, 6 * wordSize);  // length is on stack on Win64
     const Register len_reg     = r10;      // pick the first volatile windows register
 #endif
     const Register pos         = rax;
@@ -3288,6 +3291,8 @@
     for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
       __ movdqu(xmm_save(i), as_XMMRegister(i));
     }
+#else
+    __ push(len_reg); // Save
 #endif

     const XMMRegister xmm_key_shuf_mask = xmm_temp;  // used temporarily to swap key bytes up front
@@ -3330,8 +3335,10 @@
     for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
       __ movdqu(as_XMMRegister(i), xmm_save(i));
     }
+    __ movl(rax, len_mem);
+#else
+    __ pop(rax); // return length
 #endif
-    __ movl(rax, 0); // return 0 (why?)
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);

@@ -3400,6 +3407,9 @@
   //   c_rarg3   - r vector byte array address
   //   c_rarg4   - input length
   //
+  // Output:
+  //   rax       - input length
+  //

   address generate_cipherBlockChaining_decryptAESCrypt_Parallel() {
     assert(UseAES, "need AES instructions and misaligned SSE support");
@@ -3418,7 +3428,7 @@
 #ifndef _WIN64
     const Register len_reg     = c_rarg4;  // src len (must be multiple of blocksize 16)
 #else
-    const Address  len_mem(rsp, 6 * wordSize);  // length is on stack on Win64
+    const Address  len_mem(rbp, 6 * wordSize);  // length is on stack on Win64
     const Register len_reg     = r10;      // pick the first volatile windows register
 #endif
     const Register pos         = rax;
@@ -3439,7 +3449,10 @@
     for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
       __ movdqu(xmm_save(i), as_XMMRegister(i));
     }
+#else
+    __ push(len_reg); // Save
 #endif
+
     // the java expanded key ordering is rotated one position from what we want
     // so we start from 0x10 here and hit 0x00 last
     const XMMRegister xmm_key_shuf_mask = xmm1;  // used temporarily to swap key bytes up front
@@ -3545,8 +3558,10 @@
     for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
       __ movdqu(as_XMMRegister(i), xmm_save(i));
     }
+    __ movl(rax, len_mem);
+#else
+    __ pop(rax); // return length
 #endif
-    __ movl(rax, 0); // return 0 (why?)
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
--- a/src/cpu/x86/vm/x86.ad	Fri Sep 08 09:32:12 2017 -0700
+++ b/src/cpu/x86/vm/x86.ad	Sat Feb 03 14:01:55 2018 -0800
@@ -592,6 +592,12 @@
   return !AlignVector; // can be changed by flag
 }

+// x86 AES instructions are compatible with SunJCE expanded
+// keys, hence we do not need to pass the original key to stubs
+const bool Matcher::pass_original_key_for_aes() {
+  return false;
+}
+
 // Helper methods for MachSpillCopyNode::implementation().
 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
--- a/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Fri Sep 08 09:32:12 2017 -0700
+++ b/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Sat Feb 03 14:01:55 2018 -0800
@@ -132,6 +132,11 @@
 #endif
     if (av & AV_SPARC_CBCOND)       features |= cbcond_instructions_m;

+#ifndef AV_SPARC_AES
+#define AV_SPARC_AES 0x00020000  /* aes instrs supported */
+#endif
+    if (av & AV_SPARC_AES)       features |= aes_instructions_m;
+
   } else {
     // getisax(2) failed, use the old legacy code.
 #ifndef PRODUCT
--- a/src/share/vm/classfile/vmSymbols.hpp	Fri Sep 08 09:32:12 2017 -0700
+++ b/src/share/vm/classfile/vmSymbols.hpp	Sat Feb 03 14:01:55 2018 -0800
@@ -739,7 +739,7 @@
    do_intrinsic(_cipherBlockChaining_decryptAESCrypt, com_sun_crypto_provider_cipherBlockChaining, decrypt_name, byteArray_int_int_byteArray_int_signature, F_R)   \
    do_name(     encrypt_name,                                      "encrypt")                                           \
    do_name(     decrypt_name,                                      "decrypt")                                           \
-   do_signature(byteArray_int_int_byteArray_int_signature,         "([BII[BI)V")                                        \
+   do_signature(byteArray_int_int_byteArray_int_signature,         "([BII[BI)I")                                        \
                                                                                                                         \
   /* support for sun.misc.Unsafe */                                                                                     \
   do_class(sun_misc_Unsafe,               "sun/misc/Unsafe")                                                            \
--- a/src/share/vm/interpreter/linkResolver.cpp	Fri Sep 08 09:32:12 2017 -0700
+++ b/src/share/vm/interpreter/linkResolver.cpp	Sat Feb 03 14:01:55 2018 -0800
@@ -415,6 +415,40 @@
   }
 }

+void LinkResolver::check_method_loader_constraints(methodHandle& resolved_method,
+                                                   KlassHandle resolved_klass,
+                                                   Symbol* method_name,
+                                                   Symbol* method_signature,
+                                                   KlassHandle current_klass,
+                                                   const char* method_type, TRAPS) {
+  Handle loader (THREAD, instanceKlass::cast(current_klass())->class_loader());
+  Handle class_loader (THREAD, instanceKlass::cast(resolved_method->method_holder())->class_loader());
+  {
+    ResourceMark rm(THREAD);
+    char* failed_type_name =
+      SystemDictionary::check_signature_loaders(method_signature, loader,
+                                                class_loader, true, CHECK);
+    if (failed_type_name != NULL) {
+      const char* msg = "loader constraint violation: when resolving %s"
+        " \"%s\" the class loader (instance of %s) of the current class, %s,"
+        " and the class loader (instance of %s) for the method's defining class, %s, have"
+        " different Class objects for the type %s used in the signature";
+      char* sig = methodOopDesc::name_and_sig_as_C_string(Klass::cast(resolved_klass()), method_name, method_signature);
+      const char* loader1 = SystemDictionary::loader_name(loader());
+      char* current = instanceKlass::cast(current_klass())->name()->as_C_string();
+      const char* loader2 = SystemDictionary::loader_name(class_loader());
+      char* resolved = instanceKlass::cast(resolved_klass())->name()->as_C_string();
+      size_t buflen = strlen(msg) + strlen(sig) + strlen(loader1) +
+        strlen(current) + strlen(loader2) + strlen(resolved) +
+        strlen(failed_type_name) + strlen(method_type) + 1;
+      char* buf = NEW_RESOURCE_ARRAY_IN_THREAD(THREAD, char, buflen);
+      jio_snprintf(buf, buflen, msg, method_type, sig, loader1, current, loader2,
+                   resolved, failed_type_name);
+      THROW_MSG(vmSymbols::java_lang_LinkageError(), buf);
+    }
+  }
+}
+
 void LinkResolver::resolve_method(methodHandle& resolved_method, KlassHandle resolved_klass,
                                   Symbol* method_name, Symbol* method_signature,
                                   KlassHandle current_klass, bool check_access, TRAPS) {
@@ -478,32 +512,8 @@
                                CHECK);

     // check loader constraints
-    Handle loader (THREAD, instanceKlass::cast(current_klass())->class_loader());
-    Handle class_loader (THREAD, instanceKlass::cast(resolved_method->method_holder())->class_loader());
-    {
-      ResourceMark rm(THREAD);
-      char* failed_type_name =
-        SystemDictionary::check_signature_loaders(method_signature, loader,
-                                                  class_loader, true, CHECK);
-      if (failed_type_name != NULL) {
-        const char* msg = "loader constraint violation: when resolving method"
-          " \"%s\" the class loader (instance of %s) of the current class, %s,"
-          " and the class loader (instance of %s) for resolved class, %s, have"
-          " different Class objects for the type %s used in the signature";
-        char* sig = methodOopDesc::name_and_sig_as_C_string(Klass::cast(resolved_klass()),method_name,method_signature);
-        const char* loader1 = SystemDictionary::loader_name(loader());
-        char* current = instanceKlass::cast(current_klass())->name()->as_C_string();
-        const char* loader2 = SystemDictionary::loader_name(class_loader());
-        char* resolved = instanceKlass::cast(resolved_klass())->name()->as_C_string();
-        size_t buflen = strlen(msg) + strlen(sig) + strlen(loader1) +
-          strlen(current) + strlen(loader2) + strlen(resolved) +
-          strlen(failed_type_name);
-        char* buf = NEW_RESOURCE_ARRAY_IN_THREAD(THREAD, char, buflen);
-        jio_snprintf(buf, buflen, msg, sig, loader1, current, loader2,
-                     resolved, failed_type_name);
-        THROW_MSG(vmSymbols::java_lang_LinkageError(), buf);
-      }
-    }
+    check_method_loader_constraints(resolved_method, resolved_klass, method_name,
+                                    method_signature, current_klass, "method", CHECK);
   }
 }

@@ -540,34 +550,8 @@
   }

   if (check_access) {
-    HandleMark hm(THREAD);
-    Handle loader (THREAD, instanceKlass::cast(current_klass())->class_loader());
-    Handle class_loader (THREAD, instanceKlass::cast(resolved_method->method_holder())->class_loader());
-    {
-      ResourceMark rm(THREAD);
-      char* failed_type_name =
-        SystemDictionary::check_signature_loaders(method_signature, loader,
-                                                  class_loader, true, CHECK);
-      if (failed_type_name != NULL) {
-        const char* msg = "loader constraint violation: when resolving "
-          "interface method \"%s\" the class loader (instance of %s) of the "
-          "current class, %s, and the class loader (instance of %s) for "
-          "resolved class, %s, have different Class objects for the type %s "
-          "used in the signature";
-        char* sig = methodOopDesc::name_and_sig_as_C_string(Klass::cast(resolved_klass()),method_name,method_signature);
-        const char* loader1 = SystemDictionary::loader_name(loader());
-        char* current = instanceKlass::cast(current_klass())->name()->as_C_string();
-        const char* loader2 = SystemDictionary::loader_name(class_loader());
-        char* resolved = instanceKlass::cast(resolved_klass())->name()->as_C_string();
-        size_t buflen = strlen(msg) + strlen(sig) + strlen(loader1) +
-          strlen(current) + strlen(loader2) + strlen(resolved) +
-          strlen(failed_type_name);
-        char* buf = NEW_RESOURCE_ARRAY_IN_THREAD(THREAD, char, buflen);
-        jio_snprintf(buf, buflen, msg, sig, loader1, current, loader2,
-                     resolved, failed_type_name);
-        THROW_MSG(vmSymbols::java_lang_LinkageError(), buf);
-      }
-    }
+    check_method_loader_constraints(resolved_method, resolved_klass, method_name,
+                                    method_signature, current_klass, "interface method", CHECK);
   }
 }

@@ -819,6 +803,10 @@
                   methodOopDesc::name_and_sig_as_C_string(Klass::cast(resolved_klass()),
                                             resolved_method->name(),
                                             resolved_method->signature()));
+      } else if (sel_method() != resolved_method()) {
+        check_method_loader_constraints(sel_method, resolved_klass,
+                                        sel_method->name(), sel_method->signature(),
+                                        current_klass, "method", CHECK);
       }
     }
   }
--- a/src/share/vm/interpreter/linkResolver.hpp	Fri Sep 08 09:32:12 2017 -0700
+++ b/src/share/vm/interpreter/linkResolver.hpp	Sat Feb 03 14:01:55 2018 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -125,6 +125,9 @@
   static void resolve_pool  (KlassHandle& resolved_klass, Symbol*& method_name, Symbol*& method_signature, KlassHandle& current_klass, constantPoolHandle pool, int index, TRAPS);

   static void resolve_interface_method(methodHandle& resolved_method, KlassHandle resolved_klass, Symbol* method_name, Symbol* method_signature, KlassHandle current_klass, bool check_access, TRAPS);
+  static void check_method_loader_constraints(methodHandle& resolved_method, KlassHandle resolved_klass,
+                                              Symbol* method_name, Symbol* method_signature,
+                                              KlassHandle current_klass, const char* method_type, TRAPS);
   static void resolve_method          (methodHandle& resolved_method, KlassHandle resolved_klass, Symbol* method_name, Symbol* method_signature, KlassHandle current_klass, bool check_access, TRAPS);

   static void linktime_resolve_static_method    (methodHandle& resolved_method, KlassHandle resolved_klass, Symbol* method_name, Symbol* method_signature, KlassHandle current_klass, bool check_access, TRAPS);
--- a/src/share/vm/memory/filemap.cpp	Fri Sep 08 09:32:12 2017 -0700
+++ b/src/share/vm/memory/filemap.cpp	Sat Feb 03 14:01:55 2018 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -185,7 +185,12 @@
     tty->print_cr("   %s", _full_path);
   }

-  // Remove the existing file in case another process has it open.
+#ifdef _WINDOWS  // On Windows, need WRITE permission to remove the file.
+  chmod(_full_path, _S_IREAD | _S_IWRITE);
+#endif
+
+  // Use remove() to delete the existing file because, on Unix, this will
+  // allow processes that have it open continued access to the file.
   remove(_full_path);
   int fd = open(_full_path, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, 0444);
   if (fd < 0) {
--- a/src/share/vm/opto/chaitin.cpp	Fri Sep 08 09:32:12 2017 -0700
+++ b/src/share/vm/opto/chaitin.cpp	Sat Feb 03 14:01:55 2018 -0800
@@ -445,6 +445,9 @@
   // Peephole remove copies
   post_allocate_copy_removal();

+  // Merge multidefs if multiple defs representing the same value are used in a single block.
+  merge_multidefs();
+
 #ifdef ASSERT
   // Veify the graph after RA.
   verify(&live_arena);
--- a/src/share/vm/opto/chaitin.hpp	Fri Sep 08 09:32:12 2017 -0700
+++ b/src/share/vm/opto/chaitin.hpp	Sat Feb 03 14:01:55 2018 -0800
@@ -511,6 +511,32 @@
   // Extend the node to LRG mapping
   void add_reference( const Node *node, const Node *old_node);

+  // Record the first use of a def in the block for a register.
+  class RegDefUse {
+    Node* _def;
+    Node* _first_use;
+  public:
+    RegDefUse() : _def(NULL), _first_use(NULL) { }
+    Node* def() const       { return _def;       }
+    Node* first_use() const { return _first_use; }
+
+    void update(Node* def, Node* use) {
+      if (_def != def) {
+        _def = def;
+        _first_use = use;
+      }
+    }
+    void clear() {
+      _def = NULL;
+      _first_use = NULL;
+    }
+  };
+  typedef GrowableArray<RegDefUse> RegToDefUseMap;
+  int possibly_merge_multidef(Node *n, uint k, Block *block, RegToDefUseMap& reg2defuse);
+
+  // Merge nodes that are a part of a multidef lrg and produce the same value within a block.
+  void merge_multidefs();
+
 private:

   static int _final_loads, _final_stores, _final_copies, _final_memoves;
--- a/src/share/vm/opto/library_call.cpp	Fri Sep 08 09:32:12 2017 -0700
+++ b/src/share/vm/opto/library_call.cpp	Sat Feb 03 14:01:55 2018 -0800
@@ -290,6 +290,7 @@
   bool inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id);
   Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
   Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
+  Node* get_original_key_start_from_aescrypt_object(Node* aescrypt_object);
 };


@@ -5503,10 +5504,22 @@
   Node* k_start = get_key_start_from_aescrypt_object(aescrypt_object);
   if (k_start == NULL) return false;

-  // Call the stub.
-  make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::aescrypt_block_Type(),
-                    stubAddr, stubName, TypePtr::BOTTOM,
-                    src_start, dest_start, k_start);
+  if (Matcher::pass_original_key_for_aes()) {
+    // on SPARC we need to pass the original key since key expansion needs to happen in intrinsics due to
+    // compatibility issues between Java key expansion and SPARC crypto instructions
+    Node* original_k_start = get_original_key_start_from_aescrypt_object(aescrypt_object);
+    if (original_k_start == NULL) return false;
+
+    // Call the stub.
+    make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::aescrypt_block_Type(),
+                      stubAddr, stubName, TypePtr::BOTTOM,
+                      src_start, dest_start, k_start, original_k_start);
+  } else {
+    // Call the stub.
+    make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::aescrypt_block_Type(),
+                      stubAddr, stubName, TypePtr::BOTTOM,
+                      src_start, dest_start, k_start);
+  }

   return true;
 }
@@ -5584,14 +5597,29 @@
   if (objRvec == NULL) return false;
   Node* r_start = array_element_address(objRvec, intcon(0), T_BYTE);

-  // Call the stub, passing src_start, dest_start, k_start, r_start and src_len
-  make_runtime_call(RC_LEAF|RC_NO_FP,
-                    OptoRuntime::cipherBlockChaining_aescrypt_Type(),
-                    stubAddr, stubName, TypePtr::BOTTOM,
-                    src_start, dest_start, k_start, r_start, len);
-
-  // return is void so no result needs to be pushed
-
+  Node* cbcCrypt;
+  if (Matcher::pass_original_key_for_aes()) {
+    // on SPARC we need to pass the original key since key expansion needs to happen in intrinsics due to
+    // compatibility issues between Java key expansion and SPARC crypto instructions
+    Node* original_k_start = get_original_key_start_from_aescrypt_object(aescrypt_object);
+    if (original_k_start == NULL) return false;
+
+    // Call the stub, passing src_start, dest_start, k_start, r_start, src_len and original_k_start
+    cbcCrypt = make_runtime_call(RC_LEAF|RC_NO_FP,
+                                 OptoRuntime::cipherBlockChaining_aescrypt_Type(),
+                                 stubAddr, stubName, TypePtr::BOTTOM,
+                                 src_start, dest_start, k_start, r_start, len, original_k_start);
+  } else {
+    // Call the stub, passing src_start, dest_start, k_start, r_start and src_len
+    cbcCrypt = make_runtime_call(RC_LEAF|RC_NO_FP,
+                                 OptoRuntime::cipherBlockChaining_aescrypt_Type(),
+                                 stubAddr, stubName, TypePtr::BOTTOM,
+                                 src_start, dest_start, k_start, r_start, len);
+  }
+
+  // return cipher length (int)
+  Node* retvalue = _gvn.transform(new (C) ProjNode(cbcCrypt, TypeFunc::Parms));
+  set_result(retvalue);
   return true;
 }

@@ -5606,6 +5634,17 @@
   return k_start;
 }

+//------------------------------get_original_key_start_from_aescrypt_object-----------------------
+Node * LibraryCallKit::get_original_key_start_from_aescrypt_object(Node *aescrypt_object) {
+  Node* objAESCryptKey = load_field_from_object(aescrypt_object, "lastKey", "[B", /*is_exact*/ false);
+  assert (objAESCryptKey != NULL, "wrong version of com.sun.crypto.provider.AESCrypt");
+  if (objAESCryptKey == NULL) return (Node *) NULL;
+
+  // now have the array, need to get the start address of the lastKey array
+  Node* original_k_start = array_element_address(objAESCryptKey, intcon(0), T_BYTE);
+  return original_k_start;
+}
+
 //----------------------------inline_cipherBlockChaining_AESCrypt_predicate----------------------------
 // Return node representing slow path of predicate check.
 // the pseudo code we want to emulate with this predicate is:
--- a/src/share/vm/opto/machnode.hpp	Fri Sep 08 09:32:12 2017 -0700
+++ b/src/share/vm/opto/machnode.hpp	Sat Feb 03 14:01:55 2018 -0800
@@ -539,6 +539,29 @@
 #endif
 };

+// MachMergeNode is similar to a PhiNode in a sense it merges multiple values,
+// however it doesn't have a control input and is more like a MergeMem.
+// It is inserted after the register allocation is done to ensure that nodes use single
+// definition of a multidef lrg in a block.
+class MachMergeNode : public MachIdealNode {
+public:
+  MachMergeNode(Node *n1) {
+    init_class_id(Class_MachMerge);
+    add_req(NULL);
+    add_req(n1);
+  }
+  virtual const RegMask &out_RegMask() const { return in(1)->out_RegMask(); }
+  virtual const RegMask &in_RegMask(uint idx) const { return in(1)->in_RegMask(idx); }
+  virtual const class Type *bottom_type() const { return in(1)->bottom_type(); }
+  virtual uint ideal_reg() const { return Matcher::base2reg[bottom_type()->base()]; }
+  virtual uint oper_input_base() const { return 1; }
+  virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { }
+  virtual uint size(PhaseRegAlloc *ra_) const { return 0; }
+#ifndef PRODUCT
+  virtual const char *Name() const { return "MachMerge"; }
+#endif
+};
+
 //------------------------------MachBranchNode--------------------------------
 // Abstract machine branch Node
 class MachBranchNode : public MachIdealNode {
--- a/src/share/vm/opto/matcher.hpp	Fri Sep 08 09:32:12 2017 -0700
+++ b/src/share/vm/opto/matcher.hpp	Sat Feb 03 14:01:55 2018 -0800
@@ -267,6 +267,9 @@
   // CPU supports misaligned vectors store/load.
   static const bool misaligned_vectors_ok();

+  // Should original key array reference be passed to AES stubs
+  static const bool pass_original_key_for_aes();
+
   // Used to determine a "low complexity" 64-bit constant.  (Zero is simple.)
   // The standard of comparison is one (StoreL ConL) vs. two (StoreI ConI).
   // Depends on the details of 64-bit constant generation on the CPU.
--- a/src/share/vm/opto/node.hpp	Fri Sep 08 09:32:12 2017 -0700
+++ b/src/share/vm/opto/node.hpp	Sat Feb 03 14:01:55 2018 -0800
@@ -95,6 +95,7 @@
 class MachSafePointNode;
 class MachSpillCopyNode;
 class MachTempNode;
+class MachMergeNode;
 class Matcher;
 class MemBarNode;
 class MemBarStoreStoreNode;
@@ -586,6 +587,7 @@
       DEFINE_CLASS_ID(MachTemp,         Mach, 3)
       DEFINE_CLASS_ID(MachConstantBase, Mach, 4)
       DEFINE_CLASS_ID(MachConstant,     Mach, 5)
+      DEFINE_CLASS_ID(MachMerge,        Mach, 6)

     DEFINE_CLASS_ID(Type,  Node, 2)
       DEFINE_CLASS_ID(Phi,   Type, 0)
@@ -747,6 +749,7 @@
   DEFINE_CLASS_QUERY(MachSafePoint)
   DEFINE_CLASS_QUERY(MachSpillCopy)
   DEFINE_CLASS_QUERY(MachTemp)
+  DEFINE_CLASS_QUERY(MachMerge)
   DEFINE_CLASS_QUERY(Mem)
   DEFINE_CLASS_QUERY(MemBar)
   DEFINE_CLASS_QUERY(MemBarStoreStore)
--- a/src/share/vm/opto/phase.cpp	Fri Sep 08 09:32:12 2017 -0700
+++ b/src/share/vm/opto/phase.cpp	Sat Feb 03 14:01:55 2018 -0800
@@ -71,6 +71,7 @@
 elapsedTimer   Phase::_t_computeLive;
 elapsedTimer   Phase::_t_regAllocSplit;
 elapsedTimer   Phase::_t_postAllocCopyRemoval;
+elapsedTimer   Phase::_t_mergeMultidefs;
 elapsedTimer   Phase::_t_fixupSpills;

 // Subtimers for _t_output
@@ -132,11 +133,12 @@
     tty->print_cr ("      computeLive    : %3.3f sec", Phase::_t_computeLive.seconds());
     tty->print_cr ("      regAllocSplit  : %3.3f sec", Phase::_t_regAllocSplit.seconds());
     tty->print_cr ("      postAllocCopyRemoval: %3.3f sec", Phase::_t_postAllocCopyRemoval.seconds());
+    tty->print_cr ("      mergeMultidefs: %3.3f sec", Phase::_t_mergeMultidefs.seconds());
     tty->print_cr ("      fixupSpills    : %3.3f sec", Phase::_t_fixupSpills.seconds());
     double regalloc_subtotal = Phase::_t_ctorChaitin.seconds() +
       Phase::_t_buildIFGphysical.seconds() + Phase::_t_computeLive.seconds() +
       Phase::_t_regAllocSplit.seconds()    + Phase::_t_fixupSpills.seconds() +
-      Phase::_t_postAllocCopyRemoval.seconds();
+      Phase::_t_postAllocCopyRemoval.seconds() + Phase::_t_mergeMultidefs.seconds();
     double percent_of_regalloc = ((regalloc_subtotal == 0.0) ? 0.0 : (regalloc_subtotal / Phase::_t_registerAllocation.seconds() * 100.0));
     tty->print_cr ("      subtotal       : %3.3f sec,  %3.2f %%", regalloc_subtotal, percent_of_regalloc);
   }
--- a/src/share/vm/opto/phase.hpp	Fri Sep 08 09:32:12 2017 -0700
+++ b/src/share/vm/opto/phase.hpp	Sat Feb 03 14:01:55 2018 -0800
@@ -107,6 +107,7 @@
   static elapsedTimer   _t_computeLive;
   static elapsedTimer   _t_regAllocSplit;
   static elapsedTimer   _t_postAllocCopyRemoval;
+  static elapsedTimer   _t_mergeMultidefs;
   static elapsedTimer   _t_fixupSpills;

 // Subtimers for _t_output
--- a/src/share/vm/opto/postaloc.cpp	Fri Sep 08 09:32:12 2017 -0700
+++ b/src/share/vm/opto/postaloc.cpp	Sat Feb 03 14:01:55 2018 -0800
@@ -256,20 +256,6 @@
   // intermediate copies might be illegal, i.e., value is stored down to stack
   // then reloaded BUT survives in a register the whole way.
   Node *val = skip_copies(n->in(k));
-
-  if (val == x && nk_idx != 0 &&
-      regnd[nk_reg] != NULL && regnd[nk_reg] != x &&
-      n2lidx(x) == n2lidx(regnd[nk_reg])) {
-    // When rematerialzing nodes and stretching lifetimes, the
-    // allocator will reuse the original def for multidef LRG instead
-    // of the current reaching def because it can't know it's safe to
-    // do so.  After allocation completes if they are in the same LRG
-    // then it should use the current reaching def instead.
-    n->set_req(k, regnd[nk_reg]);
-    blk_adjust += yank_if_dead(val, current_block, &value, &regnd);
-    val = skip_copies(n->in(k));
-  }
-
   if (val == x) return blk_adjust; // No progress?

   int n_regs = RegMask::num_registers(val->ideal_reg());
@@ -375,6 +361,94 @@
   return false;
 }

+// The algorithms works as follows:
+// We traverse the block top to bottom. possibly_merge_multidef() is invoked for every input edge k
+// of the instruction n. We check to see if the input is a multidef lrg. If it is, we record the fact that we've
+// seen a definition (coming as an input) and add that fact to the reg2defuse array. The array maps registers to their
+// current reaching definitions (we track only multidefs though). With each definition we also associate the first
+// instruction we saw use it. If we encounter the situation when we observe an def (an input) that is a part of the
+// same lrg but is different from the previous seen def we merge the two with a MachMerge node and substitute
+// all the uses that we've seen so far to use the merge. After that we keep replacing the new defs in the same lrg
+// as they get encountered with the merge node and keep adding these defs to the merge inputs.
+void PhaseChaitin::merge_multidefs() {
+  NOT_PRODUCT( Compile::TracePhase t3("mergeMultidefs", &_t_mergeMultidefs, TimeCompiler); )
+  ResourceMark rm;
+  // Keep track of the defs seen in registers and collect their uses in the block.
+  RegToDefUseMap reg2defuse(_max_reg, _max_reg, RegDefUse());
+  for (uint i = 0; i < _cfg._num_blocks; i++) {
+    Block* block = _cfg._blocks[i];
+    for (uint j = 1; j < block->_nodes.size(); j++) {
+      Node* n = block->_nodes[j];
+      if (n->is_Phi()) continue;
+      for (uint k = 1; k < n->req(); k++) {
+        j += possibly_merge_multidef(n, k, block, reg2defuse);
+      }
+      // Null out the value produced by the instruction itself, since we're only interested in defs
+      // implicitly defined by the uses. We are actually interested in tracking only redefinitions
+      // of the multidef lrgs in the same register. For that matter it's enough to track changes in
+      // the base register only and ignore other effects of multi-register lrgs and fat projections.
+      // It is also ok to ignore defs coming from singledefs. After an implicit overwrite by one of
+      // those our register is guaranteed to be used by another lrg and we won't attempt to merge it.
+      uint lrg = n2lidx(n);
+      if (lrg > 0 && lrgs(lrg).is_multidef()) {
+        OptoReg::Name reg = lrgs(lrg).reg();
+        reg2defuse.at(reg).clear();
+      }
+    }
+    // Clear reg->def->use tracking for the next block
+    for (int j = 0; j < reg2defuse.length(); j++) {
+      reg2defuse.at(j).clear();
+    }
+  }
+}
+
+int PhaseChaitin::possibly_merge_multidef(Node *n, uint k, Block *block, RegToDefUseMap& reg2defuse) {
+  int blk_adjust = 0;
+
+  uint lrg = n2lidx(n->in(k));
+  if (lrg > 0 && lrgs(lrg).is_multidef()) {
+    OptoReg::Name reg = lrgs(lrg).reg();
+
+    Node* def = reg2defuse.at(reg).def();
+    if (def != NULL && lrg == n2lidx(def) && def != n->in(k)) {
+      // Same lrg but different node, we have to merge.
+      MachMergeNode* merge;
+      if (def->is_MachMerge()) { // is it already a merge?
+        merge = def->as_MachMerge();
+      } else {
+        merge = new (C) MachMergeNode(def);
+
+        // Insert the merge node into the block before the first use.
+        uint use_index = block->find_node(reg2defuse.at(reg).first_use());
+        block->_nodes.insert(use_index++, merge);
+
+        // Let the allocator know about the new node, use the same lrg
+        _names.extend(merge->_idx, lrg);
+        blk_adjust++;
+
+        // Fixup all the uses (there is at least one) that happened between the first
+        // use and before the current one.
+        for (; use_index < block->_nodes.size(); use_index++) {
+          Node* use = block->_nodes[use_index];
+          if (use == n) {
+            break;
+          }
+          use->replace_edge(def, merge);
+        }
+      }
+      if (merge->find_edge(n->in(k)) == -1) {
+        merge->add_req(n->in(k));
+      }
+      n->set_req(k, merge);
+    }
+
+    // update the uses
+    reg2defuse.at(reg).update(n->in(k), n);
+  }
+
+  return blk_adjust;
+}
+

 //------------------------------post_allocate_copy_removal---------------------
 // Post-Allocation peephole copy removal.  We do this in 1 pass over the
--- a/src/share/vm/opto/runtime.cpp	Fri Sep 08 09:32:12 2017 -0700
+++ b/src/share/vm/opto/runtime.cpp	Sat Feb 03 14:01:55 2018 -0800
@@ -827,12 +827,18 @@
 const TypeFunc* OptoRuntime::aescrypt_block_Type() {
   // create input type (domain)
   int num_args      = 3;
+  if (Matcher::pass_original_key_for_aes()) {
+    num_args = 4;
+  }
   int argcnt = num_args;
   const Type** fields = TypeTuple::fields(argcnt);
   int argp = TypeFunc::Parms;
   fields[argp++] = TypePtr::NOTNULL;    // src
   fields[argp++] = TypePtr::NOTNULL;    // dest
   fields[argp++] = TypePtr::NOTNULL;    // k array
+  if (Matcher::pass_original_key_for_aes()) {
+    fields[argp++] = TypePtr::NOTNULL;    // original k array
+  }
   assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
   const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);

@@ -847,6 +853,9 @@
 const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() {
   // create input type (domain)
   int num_args      = 5;
+  if (Matcher::pass_original_key_for_aes()) {
+    num_args = 6;
+  }
   int argcnt = num_args;
   const Type** fields = TypeTuple::fields(argcnt);
   int argp = TypeFunc::Parms;
@@ -855,13 +864,16 @@
   fields[argp++] = TypePtr::NOTNULL;    // k array
   fields[argp++] = TypePtr::NOTNULL;    // r array
   fields[argp++] = TypeInt::INT;        // src len
+  if (Matcher::pass_original_key_for_aes()) {
+    fields[argp++] = TypePtr::NOTNULL;    // original k array
+  }
   assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
   const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);

-  // no result type needed
+  // returning cipher len (int)
   fields = TypeTuple::fields(1);
-  fields[TypeFunc::Parms+0] = NULL; // void
-  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
+  fields[TypeFunc::Parms+0] = TypeInt::INT;
+  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields);
   return TypeFunc::make(domain, range);
 }
--- a/src/share/vm/runtime/arguments.cpp	Fri Sep 08 09:32:12 2017 -0700
+++ b/src/share/vm/runtime/arguments.cpp	Sat Feb 03 14:01:55 2018 -0800
@@ -2772,8 +2772,6 @@

       // Enable parallel GC and adaptive generation sizing
       FLAG_SET_CMDLINE(bool, UseParallelGC, true);
-      FLAG_SET_DEFAULT(ParallelGCThreads,
-                       Abstract_VM_Version::parallel_worker_threads());

       // Encourage steady state memory management
       FLAG_SET_CMDLINE(uintx, ThresholdTolerance, 100);
--- a/test/compiler/7184394/TestAESMain.java	Fri Sep 08 09:32:12 2017 -0700
+++ b/test/compiler/7184394/TestAESMain.java	Sat Feb 03 14:01:55 2018 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2014 Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -39,20 +39,32 @@
     System.out.println(iters + " iterations");
     TestAESEncode etest = new TestAESEncode();
     etest.prepare();
+    // warm-up for 20K iterations
+    System.out.println("Starting encryption warm-up");
+    for (int i=0; i<20000; i++) {
+      etest.run();
+    }
+    System.out.println("Finished encryption warm-up");
     long start = System.nanoTime();
     for (int i=0; i<iters; i++) {
       etest.run();
     }
     long end = System.nanoTime();
-    System.out.println("TestAESEncode runtime was " + (double)((end - start)/1000000000.0) + " ms");
+    System.out.println("TestAESEncode runtime was " + (double)((end - start)/1000000.0) + " ms");

     TestAESDecode dtest = new TestAESDecode();
     dtest.prepare();
+    // warm-up for 20K iterations
+    System.out.println("Starting decryption warm-up");
+    for (int i=0; i<20000; i++) {
+      dtest.run();
+    }
+    System.out.println("Finished decryption warm-up");
     start = System.nanoTime();
     for (int i=0; i<iters; i++) {
       dtest.run();
     }
     end = System.nanoTime();
-    System.out.println("TestAESDecode runtime was " + (double)((end - start)/1000000000.0) + " ms");
+    System.out.println("TestAESDecode runtime was " + (double)((end - start)/1000000.0) + " ms");
   }
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/arguments/TestAggressiveHeap.java	Sat Feb 03 14:01:55 2018 -0800
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test TestAggressiveHeap
+ * @key gc
+ * @bug 8179084
+ * @summary Test argument processing for -XX:+AggressiveHeap.
+ * @library /testlibrary
+ * @run driver TestAggressiveHeap
+ */
+
+import java.lang.management.ManagementFactory;
+import javax.management.MBeanServer;
+import javax.management.ObjectName;
+
+import com.oracle.java.testlibrary.OutputAnalyzer;
+import com.oracle.java.testlibrary.ProcessTools;
+
+public class TestAggressiveHeap {
+
+    public static void main(String args[]) throws Exception {
+        if (canUseAggressiveHeapOption()) {
+            testFlag();
+        }
+    }
+
+    // Note: Not a normal boolean flag; -XX:-AggressiveHeap is invalid.
+    private static final String option = "-XX:+AggressiveHeap";
+
+    // Option requires at least 256M, else error during option processing.
+    private static final long minMemory = 256 * 1024 * 1024;
+
+    // bool UseParallelGC := true {product}
+    private static final String parallelGCPattern =
+        " *bool +UseParallelGC *:= *true +\\{product\\}";
+
+    private static void testFlag() throws Exception {
+        ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(
+            option, "-XX:+PrintFlagsFinal", "-version");
+
+        OutputAnalyzer output = new OutputAnalyzer(pb.start());
+
+        output.shouldHaveExitValue(0);
+
+        String value = output.firstMatch(parallelGCPattern);
+        if (value == null) {
+            throw new RuntimeException(
+                option + " didn't set UseParallelGC");
+        }
+    }
+
+    private static boolean haveRequiredMemory() throws Exception {
+        MBeanServer server = ManagementFactory.getPlatformMBeanServer();
+        ObjectName os = new ObjectName("java.lang", "type", "OperatingSystem");
+        Object attr = server.getAttribute(os, "TotalPhysicalMemorySize");
+        String value = attr.toString();
+        long memory = Long.parseLong(value);
+        return memory >= minMemory;
+    }
+
+    private static boolean canUseAggressiveHeapOption() throws Exception {
+        if (!haveRequiredMemory()) {
+            System.out.println(
+                "Skipping test of " + option + " : insufficient memory");
+            return false;
+        }
+        return true;
+    }
+}
+