changeset 10564:20258ba5a788 jdk8u242-b04

Merge
author andrew
date Wed, 04 Dec 2019 16:23:46 +0000
parents ff1018e27c6e (current diff) e55d4d896e30 (diff)
children 371da86379cf
files
diffstat 33 files changed, 1004 insertions(+), 32 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/ppc/vm/vm_version_ppc.cpp	Wed Nov 27 05:33:18 2019 +0000
+++ b/src/cpu/ppc/vm/vm_version_ppc.cpp	Wed Dec 04 16:23:46 2019 +0000
@@ -194,6 +194,11 @@
     FLAG_SET_DEFAULT(UseAESIntrinsics, false);
   }
 
+  if (UseGHASHIntrinsics) {
+    warning("GHASH intrinsics are not available on this CPU");
+    FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
+  }
+
   if (has_vshasig()) {
     if (FLAG_IS_DEFAULT(UseSHA)) {
       UseSHA = true;
--- a/src/cpu/sparc/vm/assembler_sparc.hpp	Wed Nov 27 05:33:18 2019 +0000
+++ b/src/cpu/sparc/vm/assembler_sparc.hpp	Wed Dec 04 16:23:46 2019 +0000
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -129,6 +129,7 @@
     flog3_op3    = 0x36,
     edge_op3     = 0x36,
     fsrc_op3     = 0x36,
+    xmulx_op3    = 0x36,
     impdep2_op3  = 0x37,
     stpartialf_op3 = 0x37,
     jmpl_op3     = 0x38,
@@ -220,6 +221,8 @@
     mdtox_opf          = 0x110,
     mstouw_opf         = 0x111,
     mstosw_opf         = 0x113,
+    xmulx_opf          = 0x115,
+    xmulxhi_opf        = 0x116,
     mxtod_opf          = 0x118,
     mwtos_opf          = 0x119,
 
@@ -1212,6 +1215,9 @@
   void movwtos( Register s, FloatRegister d ) { vis3_only();  emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::S) | op3(mftoi_op3) | opf(mwtos_opf) | rs2(s)); }
   void movxtod( Register s, FloatRegister d ) { vis3_only();  emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(mftoi_op3) | opf(mxtod_opf) | rs2(s)); }
 
+  void xmulx(Register s1, Register s2, Register d) { vis3_only(); emit_int32( op(arith_op) | rd(d) | op3(xmulx_op3) | rs1(s1) | opf(xmulx_opf) | rs2(s2)); }
+  void xmulxhi(Register s1, Register s2, Register d) { vis3_only(); emit_int32( op(arith_op) | rd(d) | op3(xmulx_op3) | rs1(s1) | opf(xmulxhi_opf) | rs2(s2)); }
+
   // Crypto SHA instructions
 
   void sha1()   { sha1_only();    emit_int32( op(arith_op) | op3(sha_op3) | opf(sha1_opf)); }
--- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Wed Nov 27 05:33:18 2019 +0000
+++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Wed Dec 04 16:23:46 2019 +0000
@@ -4788,6 +4788,130 @@
     return start;
   }
 
+  /* Single and multi-block ghash operations */
+  address generate_ghash_processBlocks() {
+      __ align(CodeEntryAlignment);
+      Label L_ghash_loop, L_aligned, L_main;
+      StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
+      address start = __ pc();
+
+      Register state = I0;
+      Register subkeyH = I1;
+      Register data = I2;
+      Register len = I3;
+
+      __ save_frame(0);
+
+      __ ldx(state, 0, O0);
+      __ ldx(state, 8, O1);
+
+      // Loop label for multiblock operations
+      __ BIND(L_ghash_loop);
+
+      // Check if 'data' is unaligned
+      __ andcc(data, 7, G1);
+      __ br(Assembler::zero, false, Assembler::pt, L_aligned);
+      __ delayed()->nop();
+
+      Register left_shift = L1;
+      Register right_shift = L2;
+      Register data_ptr = L3;
+
+      // Get left and right shift values in bits
+      __ sll(G1, LogBitsPerByte, left_shift);
+      __ mov(64, right_shift);
+      __ sub(right_shift, left_shift, right_shift);
+
+      // Align to read 'data'
+      __ sub(data, G1, data_ptr);
+
+      // Load first 8 bytes of 'data'
+      __ ldx(data_ptr, 0, O4);
+      __ sllx(O4, left_shift, O4);
+      __ ldx(data_ptr, 8, O5);
+      __ srlx(O5, right_shift, G4);
+      __ bset(G4, O4);
+
+      // Load second 8 bytes of 'data'
+      __ sllx(O5, left_shift, O5);
+      __ ldx(data_ptr, 16, G4);
+      __ srlx(G4, right_shift, G4);
+      __ ba(L_main);
+      __ delayed()->bset(G4, O5);
+
+      // If 'data' is aligned, load normally
+      __ BIND(L_aligned);
+      __ ldx(data, 0, O4);
+      __ ldx(data, 8, O5);
+
+      __ BIND(L_main);
+      __ ldx(subkeyH, 0, O2);
+      __ ldx(subkeyH, 8, O3);
+
+      __ xor3(O0, O4, O0);
+      __ xor3(O1, O5, O1);
+
+      __ xmulxhi(O0, O3, G3);
+      __ xmulx(O0, O2, O5);
+      __ xmulxhi(O1, O2, G4);
+      __ xmulxhi(O1, O3, G5);
+      __ xmulx(O0, O3, G1);
+      __ xmulx(O1, O3, G2);
+      __ xmulx(O1, O2, O3);
+      __ xmulxhi(O0, O2, O4);
+
+      __ mov(0xE1, O0);
+      __ sllx(O0, 56, O0);
+
+      __ xor3(O5, G3, O5);
+      __ xor3(O5, G4, O5);
+      __ xor3(G5, G1, G1);
+      __ xor3(G1, O3, G1);
+      __ srlx(G2, 63, O1);
+      __ srlx(G1, 63, G3);
+      __ sllx(G2, 63, O3);
+      __ sllx(G2, 58, O2);
+      __ xor3(O3, O2, O2);
+
+      __ sllx(G1, 1, G1);
+      __ or3(G1, O1, G1);
+
+      __ xor3(G1, O2, G1);
+
+      __ sllx(G2, 1, G2);
+
+      __ xmulxhi(G1, O0, O1);
+      __ xmulx(G1, O0, O2);
+      __ xmulxhi(G2, O0, O3);
+      __ xmulx(G2, O0, G1);
+
+      __ xor3(O4, O1, O4);
+      __ xor3(O5, O2, O5);
+      __ xor3(O5, O3, O5);
+
+      __ sllx(O4, 1, O2);
+      __ srlx(O5, 63, O3);
+
+      __ or3(O2, O3, O0);
+
+      __ sllx(O5, 1, O1);
+      __ srlx(G1, 63, O2);
+      __ or3(O1, O2, O1);
+      __ xor3(O1, G3, O1);
+
+      __ deccc(len);
+      __ br(Assembler::notZero, true, Assembler::pt, L_ghash_loop);
+      __ delayed()->add(data, 16, data);
+
+      __ stx(O0, I0, 0);
+      __ stx(O1, I0, 8);
+
+      __ ret();
+      __ delayed()->restore();
+
+      return start;
+  }
+
   void generate_initial() {
     // Generates all stubs and initializes the entry points
 
@@ -4860,6 +4984,10 @@
       StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
       StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
     }
+    // generate GHASH intrinsics code
+    if (UseGHASHIntrinsics) {
+      StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
+    }
 
     // generate SHA1/SHA256/SHA512 intrinsics code
     if (UseSHA1Intrinsics) {
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Wed Nov 27 05:33:18 2019 +0000
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Wed Dec 04 16:23:46 2019 +0000
@@ -319,6 +319,17 @@
     }
   }
 
+  // GHASH/GCM intrinsics
+  if (has_vis3() && (UseVIS > 2)) {
+    if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
+      UseGHASHIntrinsics = true;
+    }
+  } else if (UseGHASHIntrinsics) {
+    if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
+      warning("GHASH intrinsics require VIS3 insructions support. Intriniscs will be disabled");
+    FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
+  }
+
   // SHA1, SHA256, and SHA512 instructions were added to SPARC T-series at different times
   if (has_sha1() || has_sha256() || has_sha512()) {
     if (UseVIS > 0) { // SHA intrinsics use VIS1 instructions
--- a/src/cpu/x86/vm/assembler_x86.cpp	Wed Nov 27 05:33:18 2019 +0000
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Wed Dec 04 16:23:46 2019 +0000
@@ -2575,6 +2575,15 @@
   emit_int8(shift);
 }
 
+void Assembler::pslldq(XMMRegister dst, int shift) {
+  // Shift left 128 bit value in xmm register by number of bytes.
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66);
+  emit_int8(0x73);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift);
+}
+
 void Assembler::ptest(XMMRegister dst, Address src) {
   assert(VM_Version::supports_sse4_1(), "");
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
--- a/src/cpu/x86/vm/assembler_x86.hpp	Wed Nov 27 05:33:18 2019 +0000
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Wed Dec 04 16:23:46 2019 +0000
@@ -1527,6 +1527,8 @@
 
   // Shift Right by bytes Logical DoubleQuadword Immediate
   void psrldq(XMMRegister dst, int shift);
+  // Shift Left by bytes Logical DoubleQuadword Immediate
+  void pslldq(XMMRegister dst, int shift);
 
   // Logical Compare 128bit
   void ptest(XMMRegister dst, XMMRegister src);
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Wed Nov 27 05:33:18 2019 +0000
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Wed Dec 04 16:23:46 2019 +0000
@@ -2719,6 +2719,169 @@
     return start;
   }
 
+  // byte swap x86 long
+  address generate_ghash_long_swap_mask() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
+    address start = __ pc();
+    __ emit_data(0x0b0a0908, relocInfo::none, 0);
+    __ emit_data(0x0f0e0d0c, relocInfo::none, 0);
+    __ emit_data(0x03020100, relocInfo::none, 0);
+    __ emit_data(0x07060504, relocInfo::none, 0);
+
+  return start;
+  }
+
+  // byte swap x86 byte array
+  address generate_ghash_byte_swap_mask() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
+    address start = __ pc();
+    __ emit_data(0x0c0d0e0f, relocInfo::none, 0);
+    __ emit_data(0x08090a0b, relocInfo::none, 0);
+    __ emit_data(0x04050607, relocInfo::none, 0);
+    __ emit_data(0x00010203, relocInfo::none, 0);
+  return start;
+  }
+
+  /* Single and multi-block ghash operations */
+  address generate_ghash_processBlocks() {
+    assert(UseGHASHIntrinsics, "need GHASH intrinsics and CLMUL support");
+    __ align(CodeEntryAlignment);
+    Label L_ghash_loop, L_exit;
+    StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
+    address start = __ pc();
+
+    const Register state        = rdi;
+    const Register subkeyH      = rsi;
+    const Register data         = rdx;
+    const Register blocks       = rcx;
+
+    const Address  state_param(rbp, 8+0);
+    const Address  subkeyH_param(rbp, 8+4);
+    const Address  data_param(rbp, 8+8);
+    const Address  blocks_param(rbp, 8+12);
+
+    const XMMRegister xmm_temp0 = xmm0;
+    const XMMRegister xmm_temp1 = xmm1;
+    const XMMRegister xmm_temp2 = xmm2;
+    const XMMRegister xmm_temp3 = xmm3;
+    const XMMRegister xmm_temp4 = xmm4;
+    const XMMRegister xmm_temp5 = xmm5;
+    const XMMRegister xmm_temp6 = xmm6;
+    const XMMRegister xmm_temp7 = xmm7;
+
+    __ enter();
+    handleSOERegisters(true);  // Save registers
+
+    __ movptr(state, state_param);
+    __ movptr(subkeyH, subkeyH_param);
+    __ movptr(data, data_param);
+    __ movptr(blocks, blocks_param);
+
+    __ movdqu(xmm_temp0, Address(state, 0));
+    __ pshufb(xmm_temp0, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
+
+    __ movdqu(xmm_temp1, Address(subkeyH, 0));
+    __ pshufb(xmm_temp1, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
+
+    __ BIND(L_ghash_loop);
+    __ movdqu(xmm_temp2, Address(data, 0));
+    __ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
+
+    __ pxor(xmm_temp0, xmm_temp2);
+
+    //
+    // Multiply with the hash key
+    //
+    __ movdqu(xmm_temp3, xmm_temp0);
+    __ pclmulqdq(xmm_temp3, xmm_temp1, 0);      // xmm3 holds a0*b0
+    __ movdqu(xmm_temp4, xmm_temp0);
+    __ pclmulqdq(xmm_temp4, xmm_temp1, 16);     // xmm4 holds a0*b1
+
+    __ movdqu(xmm_temp5, xmm_temp0);
+    __ pclmulqdq(xmm_temp5, xmm_temp1, 1);      // xmm5 holds a1*b0
+    __ movdqu(xmm_temp6, xmm_temp0);
+    __ pclmulqdq(xmm_temp6, xmm_temp1, 17);     // xmm6 holds a1*b1
+
+    __ pxor(xmm_temp4, xmm_temp5);      // xmm4 holds a0*b1 + a1*b0
+
+    __ movdqu(xmm_temp5, xmm_temp4);    // move the contents of xmm4 to xmm5
+    __ psrldq(xmm_temp4, 8);    // shift by xmm4 64 bits to the right
+    __ pslldq(xmm_temp5, 8);    // shift by xmm5 64 bits to the left
+    __ pxor(xmm_temp3, xmm_temp5);
+    __ pxor(xmm_temp6, xmm_temp4);      // Register pair <xmm6:xmm3> holds the result
+                                        // of the carry-less multiplication of
+                                        // xmm0 by xmm1.
+
+    // We shift the result of the multiplication by one bit position
+    // to the left to cope for the fact that the bits are reversed.
+    __ movdqu(xmm_temp7, xmm_temp3);
+    __ movdqu(xmm_temp4, xmm_temp6);
+    __ pslld (xmm_temp3, 1);
+    __ pslld(xmm_temp6, 1);
+    __ psrld(xmm_temp7, 31);
+    __ psrld(xmm_temp4, 31);
+    __ movdqu(xmm_temp5, xmm_temp7);
+    __ pslldq(xmm_temp4, 4);
+    __ pslldq(xmm_temp7, 4);
+    __ psrldq(xmm_temp5, 12);
+    __ por(xmm_temp3, xmm_temp7);
+    __ por(xmm_temp6, xmm_temp4);
+    __ por(xmm_temp6, xmm_temp5);
+
+    //
+    // First phase of the reduction
+    //
+    // Move xmm3 into xmm4, xmm5, xmm7 in order to perform the shifts
+    // independently.
+    __ movdqu(xmm_temp7, xmm_temp3);
+    __ movdqu(xmm_temp4, xmm_temp3);
+    __ movdqu(xmm_temp5, xmm_temp3);
+    __ pslld(xmm_temp7, 31);    // packed right shift shifting << 31
+    __ pslld(xmm_temp4, 30);    // packed right shift shifting << 30
+    __ pslld(xmm_temp5, 25);    // packed right shift shifting << 25
+    __ pxor(xmm_temp7, xmm_temp4);      // xor the shifted versions
+    __ pxor(xmm_temp7, xmm_temp5);
+    __ movdqu(xmm_temp4, xmm_temp7);
+    __ pslldq(xmm_temp7, 12);
+    __ psrldq(xmm_temp4, 4);
+    __ pxor(xmm_temp3, xmm_temp7);      // first phase of the reduction complete
+
+    //
+    // Second phase of the reduction
+    //
+    // Make 3 copies of xmm3 in xmm2, xmm5, xmm7 for doing these
+    // shift operations.
+    __ movdqu(xmm_temp2, xmm_temp3);
+    __ movdqu(xmm_temp7, xmm_temp3);
+    __ movdqu(xmm_temp5, xmm_temp3);
+    __ psrld(xmm_temp2, 1);     // packed left shifting >> 1
+    __ psrld(xmm_temp7, 2);     // packed left shifting >> 2
+    __ psrld(xmm_temp5, 7);     // packed left shifting >> 7
+    __ pxor(xmm_temp2, xmm_temp7);      // xor the shifted versions
+    __ pxor(xmm_temp2, xmm_temp5);
+    __ pxor(xmm_temp2, xmm_temp4);
+    __ pxor(xmm_temp3, xmm_temp2);
+    __ pxor(xmm_temp6, xmm_temp3);      // the result is in xmm6
+
+    __ decrement(blocks);
+    __ jcc(Assembler::zero, L_exit);
+    __ movdqu(xmm_temp0, xmm_temp6);
+    __ addptr(data, 16);
+    __ jmp(L_ghash_loop);
+
+    __ BIND(L_exit);
+       // Byte swap 16-byte result
+    __ pshufb(xmm_temp6, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
+    __ movdqu(Address(state, 0), xmm_temp6);   // store the result
+
+    handleSOERegisters(false);  // restore registers
+    __ leave();
+    __ ret(0);
+    return start;
+  }
+
   /**
    *  Arguments:
    *
@@ -3018,6 +3181,13 @@
       StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
     }
 
+    // Generate GHASH intrinsics code
+    if (UseGHASHIntrinsics) {
+      StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
+      StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
+      StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
+    }
+
     // Safefetch stubs.
     generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
                                                    &StubRoutines::_safefetch32_fault_pc,
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Wed Nov 27 05:33:18 2019 +0000
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Wed Dec 04 16:23:46 2019 +0000
@@ -3639,6 +3639,175 @@
     return start;
   }
 
+
+  // byte swap x86 long
+  address generate_ghash_long_swap_mask() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
+    address start = __ pc();
+    __ emit_data64(0x0f0e0d0c0b0a0908, relocInfo::none );
+    __ emit_data64(0x0706050403020100, relocInfo::none );
+  return start;
+  }
+
+  // byte swap x86 byte array
+  address generate_ghash_byte_swap_mask() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
+    address start = __ pc();
+    __ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none );
+    __ emit_data64(0x0001020304050607, relocInfo::none );
+  return start;
+  }
+
+  /* Single and multi-block ghash operations */
+  address generate_ghash_processBlocks() {
+    __ align(CodeEntryAlignment);
+    Label L_ghash_loop, L_exit;
+    StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
+    address start = __ pc();
+
+    const Register state        = c_rarg0;
+    const Register subkeyH      = c_rarg1;
+    const Register data         = c_rarg2;
+    const Register blocks       = c_rarg3;
+
+#ifdef _WIN64
+    const int XMM_REG_LAST  = 10;
+#endif
+
+    const XMMRegister xmm_temp0 = xmm0;
+    const XMMRegister xmm_temp1 = xmm1;
+    const XMMRegister xmm_temp2 = xmm2;
+    const XMMRegister xmm_temp3 = xmm3;
+    const XMMRegister xmm_temp4 = xmm4;
+    const XMMRegister xmm_temp5 = xmm5;
+    const XMMRegister xmm_temp6 = xmm6;
+    const XMMRegister xmm_temp7 = xmm7;
+    const XMMRegister xmm_temp8 = xmm8;
+    const XMMRegister xmm_temp9 = xmm9;
+    const XMMRegister xmm_temp10 = xmm10;
+
+    __ enter();
+
+#ifdef _WIN64
+    // save the xmm registers which must be preserved 6-10
+    __ subptr(rsp, -rsp_after_call_off * wordSize);
+    for (int i = 6; i <= XMM_REG_LAST; i++) {
+      __ movdqu(xmm_save(i), as_XMMRegister(i));
+    }
+#endif
+
+    __ movdqu(xmm_temp10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
+
+    __ movdqu(xmm_temp0, Address(state, 0));
+    __ pshufb(xmm_temp0, xmm_temp10);
+
+
+    __ BIND(L_ghash_loop);
+    __ movdqu(xmm_temp2, Address(data, 0));
+    __ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
+
+    __ movdqu(xmm_temp1, Address(subkeyH, 0));
+    __ pshufb(xmm_temp1, xmm_temp10);
+
+    __ pxor(xmm_temp0, xmm_temp2);
+
+    //
+    // Multiply with the hash key
+    //
+    __ movdqu(xmm_temp3, xmm_temp0);
+    __ pclmulqdq(xmm_temp3, xmm_temp1, 0);      // xmm3 holds a0*b0
+    __ movdqu(xmm_temp4, xmm_temp0);
+    __ pclmulqdq(xmm_temp4, xmm_temp1, 16);     // xmm4 holds a0*b1
+
+    __ movdqu(xmm_temp5, xmm_temp0);
+    __ pclmulqdq(xmm_temp5, xmm_temp1, 1);      // xmm5 holds a1*b0
+    __ movdqu(xmm_temp6, xmm_temp0);
+    __ pclmulqdq(xmm_temp6, xmm_temp1, 17);     // xmm6 holds a1*b1
+
+    __ pxor(xmm_temp4, xmm_temp5);      // xmm4 holds a0*b1 + a1*b0
+
+    __ movdqu(xmm_temp5, xmm_temp4);    // move the contents of xmm4 to xmm5
+    __ psrldq(xmm_temp4, 8);    // shift by xmm4 64 bits to the right
+    __ pslldq(xmm_temp5, 8);    // shift by xmm5 64 bits to the left
+    __ pxor(xmm_temp3, xmm_temp5);
+    __ pxor(xmm_temp6, xmm_temp4);      // Register pair <xmm6:xmm3> holds the result
+                                        // of the carry-less multiplication of
+                                        // xmm0 by xmm1.
+
+    // We shift the result of the multiplication by one bit position
+    // to the left to cope for the fact that the bits are reversed.
+    __ movdqu(xmm_temp7, xmm_temp3);
+    __ movdqu(xmm_temp8, xmm_temp6);
+    __ pslld(xmm_temp3, 1);
+    __ pslld(xmm_temp6, 1);
+    __ psrld(xmm_temp7, 31);
+    __ psrld(xmm_temp8, 31);
+    __ movdqu(xmm_temp9, xmm_temp7);
+    __ pslldq(xmm_temp8, 4);
+    __ pslldq(xmm_temp7, 4);
+    __ psrldq(xmm_temp9, 12);
+    __ por(xmm_temp3, xmm_temp7);
+    __ por(xmm_temp6, xmm_temp8);
+    __ por(xmm_temp6, xmm_temp9);
+
+    //
+    // First phase of the reduction
+    //
+    // Move xmm3 into xmm7, xmm8, xmm9 in order to perform the shifts
+    // independently.
+    __ movdqu(xmm_temp7, xmm_temp3);
+    __ movdqu(xmm_temp8, xmm_temp3);
+    __ movdqu(xmm_temp9, xmm_temp3);
+    __ pslld(xmm_temp7, 31);    // packed right shift shifting << 31
+    __ pslld(xmm_temp8, 30);    // packed right shift shifting << 30
+    __ pslld(xmm_temp9, 25);    // packed right shift shifting << 25
+    __ pxor(xmm_temp7, xmm_temp8);      // xor the shifted versions
+    __ pxor(xmm_temp7, xmm_temp9);
+    __ movdqu(xmm_temp8, xmm_temp7);
+    __ pslldq(xmm_temp7, 12);
+    __ psrldq(xmm_temp8, 4);
+    __ pxor(xmm_temp3, xmm_temp7);      // first phase of the reduction complete
+
+    //
+    // Second phase of the reduction
+    //
+    // Make 3 copies of xmm3 in xmm2, xmm4, xmm5 for doing these
+    // shift operations.
+    __ movdqu(xmm_temp2, xmm_temp3);
+    __ movdqu(xmm_temp4, xmm_temp3);
+    __ movdqu(xmm_temp5, xmm_temp3);
+    __ psrld(xmm_temp2, 1);     // packed left shifting >> 1
+    __ psrld(xmm_temp4, 2);     // packed left shifting >> 2
+    __ psrld(xmm_temp5, 7);     // packed left shifting >> 7
+    __ pxor(xmm_temp2, xmm_temp4);      // xor the shifted versions
+    __ pxor(xmm_temp2, xmm_temp5);
+    __ pxor(xmm_temp2, xmm_temp8);
+    __ pxor(xmm_temp3, xmm_temp2);
+    __ pxor(xmm_temp6, xmm_temp3);      // the result is in xmm6
+
+    __ decrement(blocks);
+    __ jcc(Assembler::zero, L_exit);
+    __ movdqu(xmm_temp0, xmm_temp6);
+    __ addptr(data, 16);
+    __ jmp(L_ghash_loop);
+
+    __ BIND(L_exit);
+    __ pshufb(xmm_temp6, xmm_temp10);          // Byte swap 16-byte result
+    __ movdqu(Address(state, 0), xmm_temp6);   // store the result
+
+#ifdef _WIN64
+    // restore xmm regs belonging to calling function
+    for (int i = 6; i <= XMM_REG_LAST; i++) {
+      __ movdqu(as_XMMRegister(i), xmm_save(i));
+    }
+#endif
+    __ leave();
+    __ ret(0);
+    return start;
+  }
+
   /**
    *  Arguments:
    *
@@ -4077,6 +4246,13 @@
       StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
     }
 
+    // Generate GHASH intrinsics code
+    if (UseGHASHIntrinsics) {
+      StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
+      StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
+      StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
+    }
+
     // Safefetch stubs.
     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
                                                        &StubRoutines::_safefetch32_fault_pc,
--- a/src/cpu/x86/vm/stubRoutines_x86.cpp	Wed Nov 27 05:33:18 2019 +0000
+++ b/src/cpu/x86/vm/stubRoutines_x86.cpp	Wed Dec 04 16:23:46 2019 +0000
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -33,6 +33,8 @@
 
 address StubRoutines::x86::_verify_mxcsr_entry = NULL;
 address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
+address StubRoutines::x86::_ghash_long_swap_mask_addr = NULL;
+address StubRoutines::x86::_ghash_byte_swap_mask_addr = NULL;
 
 uint64_t StubRoutines::x86::_crc_by128_masks[] =
 {
--- a/src/cpu/x86/vm/stubRoutines_x86.hpp	Wed Nov 27 05:33:18 2019 +0000
+++ b/src/cpu/x86/vm/stubRoutines_x86.hpp	Wed Dec 04 16:23:46 2019 +0000
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,10 +36,15 @@
   // masks and table for CRC32
   static uint64_t _crc_by128_masks[];
   static juint    _crc_table[];
+  // swap mask for ghash
+  static address _ghash_long_swap_mask_addr;
+  static address _ghash_byte_swap_mask_addr;
 
  public:
   static address verify_mxcsr_entry()    { return _verify_mxcsr_entry; }
   static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
   static address crc_by128_masks_addr()  { return (address)_crc_by128_masks; }
+  static address ghash_long_swap_mask_addr() { return _ghash_long_swap_mask_addr; }
+  static address ghash_byte_swap_mask_addr() { return _ghash_byte_swap_mask_addr; }
 
 #endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP
--- a/src/cpu/x86/vm/vm_version_x86.cpp	Wed Nov 27 05:33:18 2019 +0000
+++ b/src/cpu/x86/vm/vm_version_x86.cpp	Wed Dec 04 16:23:46 2019 +0000
@@ -594,6 +594,17 @@
     FLAG_SET_DEFAULT(UseAESIntrinsics, false);
   }
 
+  // GHASH/GCM intrinsics
+  if (UseCLMUL && (UseSSE > 2)) {
+    if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
+      UseGHASHIntrinsics = true;
+    }
+  } else if (UseGHASHIntrinsics) {
+    if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
+      warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
+    FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
+  }
+
   if (UseSHA) {
     warning("SHA instructions are not available on this CPU");
     FLAG_SET_DEFAULT(UseSHA, false);
--- a/src/share/vm/classfile/vmSymbols.hpp	Wed Nov 27 05:33:18 2019 +0000
+++ b/src/share/vm/classfile/vmSymbols.hpp	Wed Dec 04 16:23:46 2019 +0000
@@ -863,6 +863,12 @@
    do_name(     implCompressMB_name,                               "implCompressMultiBlock0")                           \
    do_signature(implCompressMB_signature,                          "([BII)I")                                           \
                                                                                                                         \
+  /* support for com.sun.crypto.provider.GHASH */                                                                       \
+  do_class(com_sun_crypto_provider_ghash, "com/sun/crypto/provider/GHASH")                                              \
+  do_intrinsic(_ghash_processBlocks, com_sun_crypto_provider_ghash, processBlocks_name, ghash_processBlocks_signature, F_S) \
+   do_name(processBlocks_name, "processBlocks")                                                                         \
+   do_signature(ghash_processBlocks_signature, "([BII[J[J)V")                                                           \
+                                                                                                                        \
   /* support for java.util.zip */                                                                                       \
   do_class(java_util_zip_CRC32,           "java/util/zip/CRC32")                                                        \
   do_intrinsic(_updateCRC32,               java_util_zip_CRC32,   update_name, int2_int_signature,               F_SN)  \
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Nov 27 05:33:18 2019 +0000
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Dec 04 16:23:46 2019 +0000
@@ -2520,6 +2520,12 @@
           }
         }
       }
+    } else if (GC_locker::should_discard(cause, gc_count_before)) {
+      // Return to be consistent with VMOp failure due to another
+      // collection slipping in after our gc_count but before our
+      // request is processed.  _gc_locker collections upgraded by
+      // GCLockerInvokesConcurrent are handled above and never discarded.
+      return;
     } else {
       if (cause == GCCause::_gc_locker || cause == GCCause::_wb_young_gc
           DEBUG_ONLY(|| cause == GCCause::_scavenge_alot)) {
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Wed Nov 27 05:33:18 2019 +0000
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Wed Dec 04 16:23:46 2019 +0000
@@ -530,6 +530,10 @@
     full_gc_count = Universe::heap()->total_full_collections();
   }
 
+  if (GC_locker::should_discard(cause, gc_count)) {
+    return;
+  }
+
   VM_ParallelGCSystemGC op(gc_count, full_gc_count, cause);
   VMThread::execute(&op);
 }
--- a/src/share/vm/gc_implementation/parallelScavenge/vmPSOperations.cpp	Wed Nov 27 05:33:18 2019 +0000
+++ b/src/share/vm/gc_implementation/parallelScavenge/vmPSOperations.cpp	Wed Dec 04 16:23:46 2019 +0000
@@ -52,11 +52,16 @@
   }
 }
 
+static bool is_cause_full(GCCause::Cause cause) {
+  return (cause != GCCause::_gc_locker) && (cause != GCCause::_wb_young_gc)
+         DEBUG_ONLY(&& (cause != GCCause::_scavenge_alot));
+}
+
 // Only used for System.gc() calls
 VM_ParallelGCSystemGC::VM_ParallelGCSystemGC(uint gc_count,
                                              uint full_gc_count,
                                              GCCause::Cause gc_cause) :
-  VM_GC_Operation(gc_count, gc_cause, full_gc_count, true /* full */)
+  VM_GC_Operation(gc_count, gc_cause, full_gc_count, is_cause_full(gc_cause))
 {
 }
 
@@ -68,8 +73,7 @@
     "must be a ParallelScavengeHeap");
 
   GCCauseSetter gccs(heap, _gc_cause);
-  if (_gc_cause == GCCause::_gc_locker || _gc_cause == GCCause::_wb_young_gc
-      DEBUG_ONLY(|| _gc_cause == GCCause::_scavenge_alot)) {
+  if (!_full) {
     // If (and only if) the scavenge fails, this will invoke a full gc.
     heap->invoke_scavenge();
   } else {
--- a/src/share/vm/gc_implementation/shared/vmGCOperations.cpp	Wed Nov 27 05:33:18 2019 +0000
+++ b/src/share/vm/gc_implementation/shared/vmGCOperations.cpp	Wed Dec 04 16:23:46 2019 +0000
@@ -201,6 +201,19 @@
   }
 }
 
+static bool is_full_gc(int max_level) {
+  // Return true if max_level is all generations
+  return (max_level == (GenCollectedHeap::heap()->n_gens() - 1));
+}
+
+VM_GenCollectFull::VM_GenCollectFull(uint gc_count_before,
+                                     uint full_gc_count_before,
+                                     GCCause::Cause gc_cause,
+                                     int max_level) :
+  VM_GC_Operation(gc_count_before, gc_cause, full_gc_count_before,
+                  is_full_gc(max_level) /* full */),
+  _max_level(max_level) { }
+
 void VM_GenCollectFull::doit() {
   SvcGCMarker sgcm(SvcGCMarker::FULL);
 
--- a/src/share/vm/gc_implementation/shared/vmGCOperations.hpp	Wed Nov 27 05:33:18 2019 +0000
+++ b/src/share/vm/gc_implementation/shared/vmGCOperations.hpp	Wed Dec 04 16:23:46 2019 +0000
@@ -201,9 +201,7 @@
   VM_GenCollectFull(uint gc_count_before,
                     uint full_gc_count_before,
                     GCCause::Cause gc_cause,
-                    int max_level)
-    : VM_GC_Operation(gc_count_before, gc_cause, full_gc_count_before, true /* full */),
-      _max_level(max_level) { }
+                    int max_level);
   ~VM_GenCollectFull() {}
   virtual VMOp_Type type() const { return VMOp_GenCollectFull; }
   virtual void doit();
--- a/src/share/vm/memory/gcLocker.cpp	Wed Nov 27 05:33:18 2019 +0000
+++ b/src/share/vm/memory/gcLocker.cpp	Wed Dec 04 16:23:46 2019 +0000
@@ -31,6 +31,7 @@
 volatile jint GC_locker::_jni_lock_count = 0;
 volatile bool GC_locker::_needs_gc       = false;
 volatile bool GC_locker::_doing_gc       = false;
+unsigned int  GC_locker::_total_collections = 0;
 
 #ifdef ASSERT
 volatile jint GC_locker::_debug_jni_lock_count = 0;
@@ -94,6 +95,11 @@
   }
 }
 
+bool GC_locker::should_discard(GCCause::Cause cause, uint total_collections) {
+  return (cause == GCCause::_gc_locker) &&
+         (_total_collections != total_collections);
+}
+
 void GC_locker::jni_lock(JavaThread* thread) {
   assert(!thread->in_critical(), "shouldn't currently be in a critical region");
   MutexLocker mu(JNICritical_lock);
@@ -117,7 +123,13 @@
   decrement_debug_jni_lock_count();
   thread->exit_critical();
   if (needs_gc() && !is_active_internal()) {
-    // We're the last thread out. Cause a GC to occur.
+    // We're the last thread out. Request a GC.
+    // Capture the current total collections, to allow detection of
+    // other collections that make this one unnecessary.  The value of
+    // total_collections() is only changed at a safepoint, so there
+    // must not be a safepoint between the lock becoming inactive and
+    // getting the count, else there may be unnecessary GCLocker GCs.
+    _total_collections = Universe::heap()->total_collections();
     _doing_gc = true;
     {
       // Must give up the lock while at a safepoint
--- a/src/share/vm/memory/gcLocker.hpp	Wed Nov 27 05:33:18 2019 +0000
+++ b/src/share/vm/memory/gcLocker.hpp	Wed Dec 04 16:23:46 2019 +0000
@@ -26,6 +26,7 @@
 #define SHARE_VM_MEMORY_GCLOCKER_HPP
 
 #include "gc_interface/collectedHeap.hpp"
+#include "gc_interface/gcCause.hpp"
 #include "memory/genCollectedHeap.hpp"
 #include "memory/universe.hpp"
 #include "oops/oop.hpp"
@@ -57,6 +58,7 @@
   static volatile bool _needs_gc;        // heap is filling, we need a GC
                                          // note: bool is typedef'd as jint
   static volatile bool _doing_gc;        // unlock_critical() is doing a GC
+  static uint _total_collections;        // value for _gc_locker collection
 
 #ifdef ASSERT
   // This lock count is updated for all operations and is used to
@@ -116,6 +118,12 @@
   // Sets _needs_gc if is_active() is true. Returns is_active().
   static bool check_active_before_gc();
 
+  // Return true if the designated collection is a GCLocker request
+  // that should be discarded.  Returns true if cause == GCCause::_gc_locker
+  // and the given total collection value indicates a collection has been
+  // done since the GCLocker request was made.
+  static bool should_discard(GCCause::Cause cause, uint total_collections);
+
   // Stalls the caller (who should not be in a jni critical section)
   // until needs_gc() clears. Note however that needs_gc() may be
   // set at a subsequent safepoint and/or cleared under the
--- a/src/share/vm/memory/genCollectedHeap.cpp	Wed Nov 27 05:33:18 2019 +0000
+++ b/src/share/vm/memory/genCollectedHeap.cpp	Wed Dec 04 16:23:46 2019 +0000
@@ -796,8 +796,11 @@
 #else  // INCLUDE_ALL_GCS
     ShouldNotReachHere();
 #endif // INCLUDE_ALL_GCS
-  } else if (cause == GCCause::_wb_young_gc) {
-    // minor collection for WhiteBox API
+  } else if ((cause == GCCause::_wb_young_gc) ||
+             (cause == GCCause::_gc_locker)) {
+    // minor collection for WhiteBox or GCLocker.
+    // _gc_locker collections upgraded by GCLockerInvokesConcurrent
+    // are handled above and never discarded.
     collect(cause, 0);
   } else {
 #ifdef ASSERT
@@ -835,6 +838,11 @@
   // Read the GC count while holding the Heap_lock
   unsigned int gc_count_before      = total_collections();
   unsigned int full_gc_count_before = total_full_collections();
+
+  if (GC_locker::should_discard(cause, gc_count_before)) {
+    return;
+  }
+
   {
     MutexUnlocker mu(Heap_lock);  // give up heap lock, execute gets it back
     VM_GenCollectFull op(gc_count_before, full_gc_count_before,
@@ -887,24 +895,16 @@
 
 void GenCollectedHeap::do_full_collection(bool clear_all_soft_refs,
                                           int max_level) {
-  int local_max_level;
-  if (!incremental_collection_will_fail(false /* don't consult_young */) &&
-      gc_cause() == GCCause::_gc_locker) {
-    local_max_level = 0;
-  } else {
-    local_max_level = max_level;
-  }
 
   do_collection(true                 /* full */,
                 clear_all_soft_refs  /* clear_all_soft_refs */,
                 0                    /* size */,
                 false                /* is_tlab */,
-                local_max_level      /* max_level */);
+                max_level            /* max_level */);
   // Hack XXX FIX ME !!!
   // A scavenge may not have been attempted, or may have
   // been attempted and failed, because the old gen was too full
-  if (local_max_level == 0 && gc_cause() == GCCause::_gc_locker &&
-      incremental_collection_will_fail(false /* don't consult_young */)) {
+  if (gc_cause() == GCCause::_gc_locker && incremental_collection_failed()) {
     if (PrintGCDetails) {
       gclog_or_tty->print_cr("GC locker: Trying a full collection "
                              "because scavenge failed");
--- a/src/share/vm/opto/escape.cpp	Wed Nov 27 05:33:18 2019 +0000
+++ b/src/share/vm/opto/escape.cpp	Wed Dec 04 16:23:46 2019 +0000
@@ -952,6 +952,7 @@
                   strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_decryptAESCrypt") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "ghash_processBlocks") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "sha1_implCompress") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "sha1_implCompressMB") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "sha256_implCompress") == 0 ||
--- a/src/share/vm/opto/library_call.cpp	Wed Nov 27 05:33:18 2019 +0000
+++ b/src/share/vm/opto/library_call.cpp	Wed Dec 04 16:23:46 2019 +0000
@@ -311,6 +311,7 @@
   Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
   Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
   Node* get_original_key_start_from_aescrypt_object(Node* aescrypt_object);
+  bool inline_ghash_processBlocks();
   bool inline_sha_implCompress(vmIntrinsics::ID id);
   bool inline_digestBase_implCompressMB(int predicate);
   bool inline_sha_implCompressMB(Node* digestBaseObj, ciInstanceKlass* instklass_SHA,
@@ -570,6 +571,10 @@
     predicates = 3;
     break;
 
+  case vmIntrinsics::_ghash_processBlocks:
+    if (!UseGHASHIntrinsics) return NULL;
+    break;
+
   case vmIntrinsics::_updateCRC32:
   case vmIntrinsics::_updateBytesCRC32:
   case vmIntrinsics::_updateByteBufferCRC32:
@@ -957,6 +962,9 @@
   case vmIntrinsics::_montgomerySquare:
     return inline_montgomerySquare();
 
+  case vmIntrinsics::_ghash_processBlocks:
+    return inline_ghash_processBlocks();
+
   case vmIntrinsics::_encodeISOArray:
     return inline_encodeISOArray();
 
@@ -6599,6 +6607,35 @@
   return _gvn.transform(region);
 }
 
+//------------------------------inline_ghash_processBlocks
+bool LibraryCallKit::inline_ghash_processBlocks() {
+  address stubAddr;
+  const char *stubName;
+  assert(UseGHASHIntrinsics, "need GHASH intrinsics support");
+
+  stubAddr = StubRoutines::ghash_processBlocks();
+  stubName = "ghash_processBlocks";
+
+  Node* data           = argument(0);
+  Node* offset         = argument(1);
+  Node* len            = argument(2);
+  Node* state          = argument(3);
+  Node* subkeyH        = argument(4);
+
+  Node* state_start  = array_element_address(state, intcon(0), T_LONG);
+  assert(state_start, "state is NULL");
+  Node* subkeyH_start  = array_element_address(subkeyH, intcon(0), T_LONG);
+  assert(subkeyH_start, "subkeyH is NULL");
+  Node* data_start  = array_element_address(data, offset, T_BYTE);
+  assert(data_start, "data is NULL");
+
+  Node* ghash = make_runtime_call(RC_LEAF|RC_NO_FP,
+                                  OptoRuntime::ghash_processBlocks_Type(),
+                                  stubAddr, stubName, TypePtr::BOTTOM,
+                                  state_start, subkeyH_start, data_start, len);
+  return true;
+}
+
 //------------------------------inline_sha_implCompress-----------------------
 //
 // Calculate SHA (i.e., SHA-1) for single-block byte[] array.
--- a/src/share/vm/opto/runtime.cpp	Wed Nov 27 05:33:18 2019 +0000
+++ b/src/share/vm/opto/runtime.cpp	Wed Dec 04 16:23:46 2019 +0000
@@ -92,7 +92,25 @@
 // At command line specify the parameters: -XX:+FullGCALot -XX:FullGCALotStart=100000000
 
 
+// GHASH block processing
+const TypeFunc* OptoRuntime::ghash_processBlocks_Type() {
+    int argcnt = 4;
 
+    const Type** fields = TypeTuple::fields(argcnt);
+    int argp = TypeFunc::Parms;
+    fields[argp++] = TypePtr::NOTNULL;    // state
+    fields[argp++] = TypePtr::NOTNULL;    // subkeyH
+    fields[argp++] = TypePtr::NOTNULL;    // data
+    fields[argp++] = TypeInt::INT;        // blocks
+    assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
+    const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
+
+    // result type needed
+    fields = TypeTuple::fields(1);
+    fields[TypeFunc::Parms+0] = NULL; // void
+    const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
+    return TypeFunc::make(domain, range);
+}
 
 // Compiled code entry points
 address OptoRuntime::_new_instance_Java                           = NULL;
--- a/src/share/vm/opto/runtime.hpp	Wed Nov 27 05:33:18 2019 +0000
+++ b/src/share/vm/opto/runtime.hpp	Wed Dec 04 16:23:46 2019 +0000
@@ -311,6 +311,8 @@
   static const TypeFunc* montgomeryMultiply_Type();
   static const TypeFunc* montgomerySquare_Type();
 
+  static const TypeFunc* ghash_processBlocks_Type();
+
   static const TypeFunc* updateBytesCRC32_Type();
 
   // leaf on stack replacement interpreter accessor types
--- a/src/share/vm/runtime/globals.hpp	Wed Nov 27 05:33:18 2019 +0000
+++ b/src/share/vm/runtime/globals.hpp	Wed Dec 04 16:23:46 2019 +0000
@@ -602,6 +602,9 @@
   product(bool, UseSHA, false,                                              \
           "Control whether SHA instructions can be used on SPARC")          \
                                                                             \
+  product(bool, UseGHASHIntrinsics, false,                                  \
+          "Use intrinsics for GHASH versions of crypto")                    \
+                                                                            \
   product(uintx, LargePageSizeInBytes, 0,                                   \
           "Large page size (0 to let VM choose the page size)")             \
                                                                             \
--- a/src/share/vm/runtime/stubRoutines.cpp	Wed Nov 27 05:33:18 2019 +0000
+++ b/src/share/vm/runtime/stubRoutines.cpp	Wed Dec 04 16:23:46 2019 +0000
@@ -124,6 +124,7 @@
 address StubRoutines::_aescrypt_decryptBlock               = NULL;
 address StubRoutines::_cipherBlockChaining_encryptAESCrypt = NULL;
 address StubRoutines::_cipherBlockChaining_decryptAESCrypt = NULL;
+address StubRoutines::_ghash_processBlocks                 = NULL;
 
 address StubRoutines::_sha1_implCompress     = NULL;
 address StubRoutines::_sha1_implCompressMB   = NULL;
--- a/src/share/vm/runtime/stubRoutines.hpp	Wed Nov 27 05:33:18 2019 +0000
+++ b/src/share/vm/runtime/stubRoutines.hpp	Wed Dec 04 16:23:46 2019 +0000
@@ -197,6 +197,7 @@
   static address _aescrypt_decryptBlock;
   static address _cipherBlockChaining_encryptAESCrypt;
   static address _cipherBlockChaining_decryptAESCrypt;
+  static address _ghash_processBlocks;
 
   static address _sha1_implCompress;
   static address _sha1_implCompressMB;
@@ -359,6 +360,7 @@
   static address aescrypt_decryptBlock()                { return _aescrypt_decryptBlock; }
   static address cipherBlockChaining_encryptAESCrypt()  { return _cipherBlockChaining_encryptAESCrypt; }
   static address cipherBlockChaining_decryptAESCrypt()  { return _cipherBlockChaining_decryptAESCrypt; }
+  static address ghash_processBlocks() { return _ghash_processBlocks; }
 
   static address sha1_implCompress()     { return _sha1_implCompress; }
   static address sha1_implCompressMB()   { return _sha1_implCompressMB; }
--- a/src/share/vm/runtime/vmStructs.cpp	Wed Nov 27 05:33:18 2019 +0000
+++ b/src/share/vm/runtime/vmStructs.cpp	Wed Dec 04 16:23:46 2019 +0000
@@ -810,6 +810,7 @@
      static_field(StubRoutines,                _aescrypt_decryptBlock,                        address)                               \
      static_field(StubRoutines,                _cipherBlockChaining_encryptAESCrypt,          address)                               \
      static_field(StubRoutines,                _cipherBlockChaining_decryptAESCrypt,          address)                               \
+     static_field(StubRoutines,                _ghash_processBlocks,                          address)                               \
      static_field(StubRoutines,                _updateBytesCRC32,                             address)                               \
      static_field(StubRoutines,                _crc_table_adr,                                address)                               \
      static_field(StubRoutines,                _multiplyToLen,                                address)                               \
--- a/test/compiler/7184394/TestAESBase.java	Wed Nov 27 05:33:18 2019 +0000
+++ b/test/compiler/7184394/TestAESBase.java	Wed Dec 04 16:23:46 2019 +0000
@@ -29,6 +29,7 @@
 import javax.crypto.Cipher;
 import javax.crypto.KeyGenerator;
 import javax.crypto.SecretKey;
+import javax.crypto.spec.GCMParameterSpec;
 import javax.crypto.spec.IvParameterSpec;
 import javax.crypto.spec.SecretKeySpec;
 import java.security.AlgorithmParameters;
@@ -62,8 +63,12 @@
   Random random = new Random(0);
   Cipher cipher;
   Cipher dCipher;
-  AlgorithmParameters algParams;
+  AlgorithmParameters algParams = null;
   SecretKey key;
+  GCMParameterSpec gcm_spec;
+  byte[] aad = { 0x11, 0x22, 0x33, 0x44, 0x55 };
+  int tlen = 12;
+  byte[] iv = new byte[16];
 
   static int numThreads = 0;
   int  threadId;
@@ -77,7 +82,10 @@
 
   public void prepare() {
     try {
-    System.out.println("\nalgorithm=" + algorithm + ", mode=" + mode + ", paddingStr=" + paddingStr + ", msgSize=" + msgSize + ", keySize=" + keySize + ", noReinit=" + noReinit + ", checkOutput=" + checkOutput + ", encInputOffset=" + encInputOffset + ", encOutputOffset=" + encOutputOffset + ", decOutputOffset=" + decOutputOffset + ", lastChunkSize=" +lastChunkSize );
+      System.out.println("\nalgorithm=" + algorithm + ", mode=" + mode + ", paddingStr=" + paddingStr +
+              ", msgSize=" + msgSize + ", keySize=" + keySize + ", noReinit=" + noReinit +
+              ", checkOutput=" + checkOutput + ", encInputOffset=" + encInputOffset + ", encOutputOffset=" +
+              encOutputOffset + ", decOutputOffset=" + decOutputOffset + ", lastChunkSize=" +lastChunkSize );
 
       if (encInputOffset % ALIGN != 0 || encOutputOffset % ALIGN != 0 || decOutputOffset % ALIGN !=0 )
         testingMisalignment = true;
@@ -98,16 +106,24 @@
       cipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE");
       dCipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE");
 
+      // CBC init
       if (mode.equals("CBC")) {
-        int ivLen = (algorithm.equals("AES") ? 16 : algorithm.equals("DES") ? 8 : 0);
-        IvParameterSpec initVector = new IvParameterSpec(new byte[ivLen]);
+        IvParameterSpec initVector = new IvParameterSpec(iv);
         cipher.init(Cipher.ENCRYPT_MODE, key, initVector);
+        algParams = cipher.getParameters();
+        dCipher.init(Cipher.DECRYPT_MODE, key, initVector);
+
+      // GCM init
+      } else if (mode.equals("GCM")) {
+        gcm_init(true);
+        gcm_init(false);
+
+      // ECB init
       } else {
-        algParams = cipher.getParameters();
         cipher.init(Cipher.ENCRYPT_MODE, key, algParams);
+        dCipher.init(Cipher.DECRYPT_MODE, key, algParams);
       }
-      algParams = cipher.getParameters();
-      dCipher.init(Cipher.DECRYPT_MODE, key, algParams);
+
       if (threadId == 0) {
         childShowCipher();
       }
@@ -188,4 +204,19 @@
   }
 
   abstract void childShowCipher();
+
+  void gcm_init(boolean encrypt) throws Exception {
+    gcm_spec = new GCMParameterSpec(tlen * 8, iv);
+    if (encrypt) {
+      // Get a new instance everytime because of reuse IV restrictions
+      cipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE");
+      cipher.init(Cipher.ENCRYPT_MODE, key, gcm_spec);
+      cipher.updateAAD(aad);
+    } else {
+      dCipher.init(Cipher.DECRYPT_MODE, key, gcm_spec);
+      dCipher.updateAAD(aad);
+
+
+    }
+  }
 }
--- a/test/compiler/7184394/TestAESDecode.java	Wed Nov 27 05:33:18 2019 +0000
+++ b/test/compiler/7184394/TestAESDecode.java	Wed Dec 04 16:23:46 2019 +0000
@@ -32,7 +32,11 @@
   @Override
   public void run() {
     try {
-      if (!noReinit) dCipher.init(Cipher.DECRYPT_MODE, key, algParams);
+      if (mode.equals("GCM")) {
+        gcm_init(false);
+      } else if (!noReinit) {
+        dCipher.init(Cipher.DECRYPT_MODE, key, algParams);
+      }
       decode = new byte[decodeLength];
       if (testingMisalignment) {
         int tempSize = dCipher.update(encode, encOutputOffset, (decodeMsgSize - lastChunkSize), decode, decOutputOffset);
--- a/test/compiler/7184394/TestAESEncode.java	Wed Nov 27 05:33:18 2019 +0000
+++ b/test/compiler/7184394/TestAESEncode.java	Wed Dec 04 16:23:46 2019 +0000
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -32,7 +32,11 @@
   @Override
   public void run() {
     try {
-      if (!noReinit) cipher.init(Cipher.ENCRYPT_MODE, key, algParams);
+      if (mode.equals("GCM")) {
+        gcm_init(true);
+      } else if (!noReinit) {
+        cipher.init(Cipher.ENCRYPT_MODE, key, algParams);
+      }
       encode = new byte[encodeLength];
       if (testingMisalignment) {
         int tempSize = cipher.update(input, encInputOffset, (msgSize - lastChunkSize), encode, encOutputOffset);
--- a/test/compiler/7184394/TestAESMain.java	Wed Nov 27 05:33:18 2019 +0000
+++ b/test/compiler/7184394/TestAESMain.java	Wed Dec 04 16:23:46 2019 +0000
@@ -41,6 +41,13 @@
  * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencInputOffset=1 -DencOutputOffset=1 TestAESMain
  * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 TestAESMain
  * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DpaddingStr=NoPadding -DmsgSize=640 TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencOutputOffset=1 TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DdecOutputOffset=1 TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DpaddingStr=NoPadding -DmsgSize=640 TestAESMain
  *
  * @author Tom Deneau
  */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/stress/gclocker/TestExcessGCLockerCollections.java	Wed Dec 04 16:23:46 2019 +0000
@@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package gc.stress.gclocker;
+
+// Based on Kim Barrett;s test for JDK-8048556
+
+/*
+ * @test TestExcessGCLockerCollections
+ * @key gc
+ * @bug 8048556
+ * @summary Check for GC Locker initiated GCs that immediately follow another
+ * GC and so have very little needing to be collected.
+ * @library /testlibrary
+ * @run driver/timeout=1000 gc.stress.gclocker.TestExcessGCLockerCollections 300 4 2
+ */
+
+import java.util.HashMap;
+import java.util.Map;
+
+import java.util.zip.Deflater;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import javax.management.MBeanServer;
+import javax.management.Notification;
+import javax.management.NotificationListener;
+import javax.management.openmbean.CompositeData;
+import java.lang.management.ManagementFactory;
+import java.lang.management.GarbageCollectorMXBean;
+import java.lang.management.MemoryUsage;
+import java.util.List;
+import com.sun.management.GarbageCollectionNotificationInfo;
+import com.sun.management.GcInfo;
+
+import com.oracle.java.testlibrary.Asserts;
+import com.oracle.java.testlibrary.ProcessTools;
+import com.oracle.java.testlibrary.OutputAnalyzer;
+
+class TestExcessGCLockerCollectionsStringConstants {
+    // Some constant strings used in both GC logging and error detection
+    static public final String GCLOCKER_CAUSE = "GCLocker Initiated GC";
+    static public final String USED_TOO_LOW = "TOO LOW";
+    static public final String USED_OK = "OK";
+}
+
+class TestExcessGCLockerCollectionsAux {
+    static private final int LARGE_MAP_SIZE = 64 * 1024;
+
+    static private final int MAP_ARRAY_LENGTH = 4;
+    static private final int MAP_SIZE = 1024;
+
+    static private final int BYTE_ARRAY_LENGTH = 128 * 1024;
+
+    static private void println(String str) { System.out.println(str); }
+    static private void println()           { System.out.println();    }
+
+    static private volatile boolean keepRunning = true;
+
+    static Map<Integer,String> populateMap(int size) {
+        Map<Integer,String> map = new HashMap<Integer,String>();
+        for (int i = 0; i < size; i += 1) {
+            Integer keyInt = Integer.valueOf(i);
+            String valStr = "value is [" + i + "]";
+            map.put(keyInt,valStr);
+        }
+        return map;
+    }
+
+    static private class AllocatingWorker implements Runnable {
+        private final Object[] array = new Object[MAP_ARRAY_LENGTH];
+        private int arrayIndex = 0;
+
+        private void doStep() {
+            Map<Integer,String> map = populateMap(MAP_SIZE);
+            array[arrayIndex] = map;
+            arrayIndex = (arrayIndex + 1) % MAP_ARRAY_LENGTH;
+        }
+
+        public void run() {
+            while (keepRunning) {
+                doStep();
+            }
+        }
+    }
+
+    static private class JNICriticalWorker implements Runnable {
+        private int count;
+
+        private void doStep() {
+            byte[] inputArray = new byte[BYTE_ARRAY_LENGTH];
+            for (int i = 0; i < inputArray.length; i += 1) {
+                inputArray[i] = (byte) (count + i);
+            }
+
+            Deflater deflater = new Deflater();
+            deflater.setInput(inputArray);
+            deflater.finish();
+
+            byte[] outputArray = new byte[2 * inputArray.length];
+            deflater.deflate(outputArray);
+
+            count += 1;
+        }
+
+        public void run() {
+            while (keepRunning) {
+                doStep();
+            }
+        }
+    }
+
+    static class GCNotificationListener implements NotificationListener {
+        static private final double MIN_USED_PERCENT = 40.0;
+
+        static private final List<String> newGenPoolNames = Arrays.asList(
+                "G1 Eden Space",           // OpenJDK G1GC: -XX:+UseG1GC
+                "PS Eden Space",           // OpenJDK ParallelGC: -XX:+ParallelGC
+                "Par Eden Space",          // OpenJDK ConcMarkSweepGC: -XX:+ConcMarkSweepGC
+                "Eden Space"               // OpenJDK SerialGC: -XX:+UseSerialGC
+                                           // OpenJDK ConcMarkSweepGC: -XX:+ConcMarkSweepGC -XX:-UseParNewGC
+        );
+
+        @Override
+        public void handleNotification(Notification notification, Object handback) {
+            try {
+                if (notification.getType().equals(GarbageCollectionNotificationInfo.GARBAGE_COLLECTION_NOTIFICATION)) {
+                    GarbageCollectionNotificationInfo info =
+                            GarbageCollectionNotificationInfo.from((CompositeData) notification.getUserData());
+
+                    String gc_cause = info.getGcCause();
+
+                    if (gc_cause.equals(TestExcessGCLockerCollectionsStringConstants.GCLOCKER_CAUSE)) {
+                        Map<String, MemoryUsage> memory_before_gc = info.getGcInfo().getMemoryUsageBeforeGc();
+
+                        for (String newGenPoolName : newGenPoolNames) {
+                            MemoryUsage usage = memory_before_gc.get(newGenPoolName);
+                            if (usage == null) continue;
+
+                            double startTime = ((double) info.getGcInfo().getStartTime()) / 1000.0;
+                            long used = usage.getUsed();
+                            long committed = usage.getCommitted();
+                            long max = usage.getMax();
+                            double used_percent = (((double) used) / Math.max(committed, max)) * 100.0;
+
+                            System.out.printf("%6.3f: (%s) %d/%d/%d, %8.4f%% (%s)\n",
+                                              startTime, gc_cause, used, committed, max, used_percent,
+                                              ((used_percent < MIN_USED_PERCENT) ? TestExcessGCLockerCollectionsStringConstants.USED_TOO_LOW
+                                                                                 : TestExcessGCLockerCollectionsStringConstants.USED_OK));
+                        }
+                    }
+                }
+            } catch (RuntimeException ex) {
+                System.err.println("Exception during notification processing:" + ex);
+                ex.printStackTrace();
+            }
+        }
+
+        public static boolean register() {
+            try {
+                MBeanServer mbeanServer = ManagementFactory.getPlatformMBeanServer();
+
+                // Get the list of MX
+                List<GarbageCollectorMXBean> gc_mxbeans = ManagementFactory.getGarbageCollectorMXBeans();
+
+                // Create the notification listener
+                GCNotificationListener gcNotificationListener = new GCNotificationListener();
+
+                for (GarbageCollectorMXBean gcbean : gc_mxbeans) {
+                  // Add notification listener for the MXBean
+                  mbeanServer.addNotificationListener(gcbean.getObjectName(), gcNotificationListener, null, null);
+                }
+            } catch (Exception ex) {
+                System.err.println("Exception during mbean registration:" + ex);
+                ex.printStackTrace();
+                // We've failed to set up, terminate
+                return false;
+            }
+
+            return true;
+        }
+    }
+
+    static public Map<Integer,String> largeMap;
+
+    static public void main(String args[]) {
+        long durationSec = Long.parseLong(args[0]);
+        int allocThreadNum = Integer.parseInt(args[1]);
+        int jniCriticalThreadNum = Integer.parseInt(args[2]);
+
+        println("Running for " + durationSec + " secs");
+
+        if (!GCNotificationListener.register()) {
+          println("failed to register GC notification listener");
+          System.exit(-1);
+        }
+
+        largeMap = populateMap(LARGE_MAP_SIZE);
+
+        println("Starting " + allocThreadNum + " allocating threads");
+        for (int i = 0; i < allocThreadNum; i += 1) {
+            new Thread(new AllocatingWorker()).start();
+        }
+
+        println("Starting " + jniCriticalThreadNum + " jni critical threads");
+        for (int i = 0; i < jniCriticalThreadNum; i += 1) {
+            new Thread(new JNICriticalWorker()).start();
+        }
+
+        long durationMS = (long) (1000 * durationSec);
+        long start = System.currentTimeMillis();
+        long now = start;
+        long soFar = now - start;
+        while (soFar < durationMS) {
+            try {
+                Thread.sleep(durationMS - soFar);
+            } catch (Exception e) {
+            }
+            now = System.currentTimeMillis();
+            soFar = now - start;
+        }
+        println("Done.");
+        keepRunning = false;
+    }
+}
+
+public class TestExcessGCLockerCollections {
+    private static final String USED_OK_LINE =
+        "\\(" + TestExcessGCLockerCollectionsStringConstants.GCLOCKER_CAUSE + "\\)"
+              + " .* " +
+        "\\(" + TestExcessGCLockerCollectionsStringConstants.USED_OK + "\\)";
+    private static final String USED_TOO_LOW_LINE =
+        "\\(" + TestExcessGCLockerCollectionsStringConstants.GCLOCKER_CAUSE + "\\)"
+              + " .* " +
+        "\\(" + TestExcessGCLockerCollectionsStringConstants.USED_TOO_LOW + "\\)";
+
+    private static final String[] COMMON_OPTIONS = new String[] {
+        "-Xmx1G", "-Xms1G", "-Xmn256M" };
+
+    public static void main(String args[]) throws Exception {
+        if (args.length < 3) {
+            System.out.println("usage: TestExcessGCLockerCollections" +
+                               " <duration sec> <alloc threads>" +
+                               " <jni critical threads>");
+            throw new RuntimeException("Invalid arguments");
+        }
+
+        ArrayList<String> finalArgs = new ArrayList<String>();
+        finalArgs.addAll(Arrays.asList(COMMON_OPTIONS));
+        finalArgs.add(TestExcessGCLockerCollectionsAux.class.getName());
+        finalArgs.addAll(Arrays.asList(args));
+
+        // GC and other options obtained from test framework.
+        ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(
+            true, finalArgs.toArray(new String[0]));
+        OutputAnalyzer output = new OutputAnalyzer(pb.start());
+        output.shouldHaveExitValue(0);
+        //System.out.println("------------- begin stdout ----------------");
+        //System.out.println(output.getStdout());
+        //System.out.println("------------- end stdout ----------------");
+        output.stdoutShouldMatch(USED_OK_LINE);
+        output.stdoutShouldNotMatch(USED_TOO_LOW_LINE);
+    }
+}