# HG changeset patch # User lana # Date 1495126104 0 # Node ID 6427ba02ae4b1a68619d89f39aa0224919e315c2 # Parent b3ee8ab233ed2e60f0266fb90be6b7cd88567604# Parent 507f8a7678b4b8dd38fb2e6bb2d6d8f8e2abc292 Merge diff -r b3ee8ab233ed -r 6427ba02ae4b src/cpu/x86/vm/macroAssembler_x86.cpp --- a/src/cpu/x86/vm/macroAssembler_x86.cpp Thu May 18 14:54:53 2017 +0000 +++ b/src/cpu/x86/vm/macroAssembler_x86.cpp Thu May 18 16:48:24 2017 +0000 @@ -4134,28 +4134,33 @@ if ((dst_enc < 16) && (nds_enc < 16)) { vandps(dst, nds, negate_field, vector_len); } else if ((src_enc < 16) && (dst_enc < 16)) { - movss(src, nds); + evmovdqul(src, nds, Assembler::AVX_512bit); vandps(dst, src, negate_field, vector_len); } else if (src_enc < 16) { - movss(src, nds); + evmovdqul(src, nds, Assembler::AVX_512bit); vandps(src, src, negate_field, vector_len); - movss(dst, src); + evmovdqul(dst, src, Assembler::AVX_512bit); } else if (dst_enc < 16) { - movdqu(src, xmm0); - movss(xmm0, nds); + evmovdqul(src, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); vandps(dst, xmm0, negate_field, vector_len); - movdqu(xmm0, src); - } else if (nds_enc < 16) { - movdqu(src, xmm0); - vandps(xmm0, nds, negate_field, vector_len); - movss(dst, xmm0); - movdqu(xmm0, src); - } else { - movdqu(src, xmm0); - movss(xmm0, nds); - vandps(xmm0, xmm0, negate_field, vector_len); - movss(dst, xmm0); - movdqu(xmm0, src); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + } else { + if (src_enc != dst_enc) { + evmovdqul(src, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + vandps(xmm0, xmm0, negate_field, vector_len); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + vandps(xmm0, xmm0, negate_field, vector_len); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } } } @@ -4166,28 +4171,33 @@ if ((dst_enc < 16) && (nds_enc < 16)) { vandpd(dst, nds, negate_field, vector_len); } else if ((src_enc < 16) && (dst_enc < 16)) { - movsd(src, nds); + evmovdqul(src, nds, Assembler::AVX_512bit); vandpd(dst, src, negate_field, vector_len); } else if (src_enc < 16) { - movsd(src, nds); + evmovdqul(src, nds, Assembler::AVX_512bit); vandpd(src, src, negate_field, vector_len); - movsd(dst, src); + evmovdqul(dst, src, Assembler::AVX_512bit); } else if (dst_enc < 16) { - movdqu(src, xmm0); - movsd(xmm0, nds); + evmovdqul(src, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); vandpd(dst, xmm0, negate_field, vector_len); - movdqu(xmm0, src); - } else if (nds_enc < 16) { - movdqu(src, xmm0); - vandpd(xmm0, nds, negate_field, vector_len); - movsd(dst, xmm0); - movdqu(xmm0, src); - } else { - movdqu(src, xmm0); - movsd(xmm0, nds); - vandpd(xmm0, xmm0, negate_field, vector_len); - movsd(dst, xmm0); - movdqu(xmm0, src); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + } else { + if (src_enc != dst_enc) { + evmovdqul(src, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + vandpd(xmm0, xmm0, negate_field, vector_len); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, src, Assembler::AVX_512bit); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, nds, Assembler::AVX_512bit); + vandpd(xmm0, xmm0, negate_field, vector_len); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } } } @@ -4934,6 +4944,24 @@ } } +void MacroAssembler::pshufd(XMMRegister dst, Address src, int mode) { + if (VM_Version::supports_avx512vl()) { + Assembler::pshufd(dst, src, mode); + } else { + int dst_enc = dst->encoding(); + if (dst_enc < 16) { + Assembler::pshufd(dst, src, mode); + } else { + subptr(rsp, 64); + evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); + Assembler::pshufd(xmm0, src, mode); + evmovdqul(dst, xmm0, Assembler::AVX_512bit); + evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); + addptr(rsp, 64); + } + } +} + // This instruction exists within macros, ergo we cannot control its input // when emitted through those patterns. void MacroAssembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { diff -r b3ee8ab233ed -r 6427ba02ae4b src/cpu/x86/vm/macroAssembler_x86.hpp --- a/src/cpu/x86/vm/macroAssembler_x86.hpp Thu May 18 14:54:53 2017 +0000 +++ b/src/cpu/x86/vm/macroAssembler_x86.hpp Thu May 18 16:48:24 2017 +0000 @@ -1232,6 +1232,9 @@ void punpcklbw(XMMRegister dst, XMMRegister src); void punpcklbw(XMMRegister dst, Address src) { Assembler::punpcklbw(dst, src); } + void pshufd(XMMRegister dst, Address src, int mode); + void pshufd(XMMRegister dst, XMMRegister src, int mode) { Assembler::pshufd(dst, src, mode); } + void pshuflw(XMMRegister dst, XMMRegister src, int mode); void pshuflw(XMMRegister dst, Address src, int mode) { Assembler::pshuflw(dst, src, mode); }