Mercurial > hg > icedtea7-forest-aarch64 > hotspot
changeset 5686:8e3cc52cbcef
Add support for AES Intrinsics
backport from jdk8 also incorporates 3 subsequent updates
author | adinn |
---|---|
date | Mon, 24 Nov 2014 13:37:15 +0000 |
parents | 4868ef1912f1 |
children | f7326d2a6cda |
files | src/cpu/aarch64/vm/assembler_aarch64.cpp src/cpu/aarch64/vm/assembler_aarch64.hpp src/cpu/aarch64/vm/icache_aarch64.cpp src/cpu/aarch64/vm/icache_aarch64.hpp src/cpu/aarch64/vm/stubGenerator_aarch64.cpp src/cpu/aarch64/vm/vm_version_aarch64.cpp |
diffstat | 6 files changed, 682 insertions(+), 245 deletions(-) [+] |
line wrap: on
line diff
--- a/src/cpu/aarch64/vm/assembler_aarch64.cpp Fri Nov 21 20:35:24 2014 +0000 +++ b/src/cpu/aarch64/vm/assembler_aarch64.cpp Mon Nov 24 13:37:15 2014 +0000 @@ -1194,18 +1194,11 @@ } #ifndef PRODUCT - { - address PC = __ pc(); - __ bl(__ pc()+(1<<27)-4); - NativeCall* call = nativeCall_at(PC); - ptrdiff_t offset = call->destination()-PC; - assert(offset == (1<<27)-4, "broken branch coding"); - PC = __ pc(); - __ bl(__ pc()-(1<<27)); - call = nativeCall_at(PC); - offset = call->destination()-PC; - assert(offset == -(1<<27), "broken branch coding"); - } + + address PC = __ pc(); + __ ld1(v0, __ T16B, Address(r16)); // No offset + __ ld1(v0, __ T16B, __ post(r16, 0)); // Post-index + __ ld1(v0, __ T16B, Address(r16, r17)); // #endif // PRODUCT @@ -3805,131 +3798,131 @@ if (UseNeon) { cmp(len, 64); br(Assembler::LT, L_by16); - v_eor(v16, T16B, v16, v16); + eor(v16, T16B, v16, v16); Label L_fold; add(tmp, table0, 4*256*sizeof(juint)); // Point at the Neon constants - v_ld1(v0, v1, T2D, buf, 32); - v_ld1r(v4, T2D, tmp, 8); - v_ld1r(v5, T2D, tmp, 8); - v_ld1r(v6, T2D, tmp, 8); - v_ld1r(v7, T2D, tmp, 8); - v_mov(v16, T4S, 0, crc); - - v_eor(v0, T16B, v0, v16); + ld1(v0, v1, T2D, post(buf, 32)); + ld1r(v4, T2D, post(tmp, 8)); + ld1r(v5, T2D, post(tmp, 8)); + ld1r(v6, T2D, post(tmp, 8)); + ld1r(v7, T2D, post(tmp, 8)); + mov(v16, T4S, 0, crc); + + eor(v0, T16B, v0, v16); sub(len, len, 64); BIND(L_fold); - v_pmull(v22, T8H, v0, v5, T8B); - v_pmull(v20, T8H, v0, v7, T8B); - v_pmull(v23, T8H, v0, v4, T8B); - v_pmull(v21, T8H, v0, v6, T8B); + pmull(v22, T8H, v0, v5, T8B); + pmull(v20, T8H, v0, v7, T8B); + pmull(v23, T8H, v0, v4, T8B); + pmull(v21, T8H, v0, v6, T8B); - v_pmull2(v18, T8H, v0, v5, T16B); - v_pmull2(v16, T8H, v0, v7, T16B); - v_pmull2(v19, T8H, v0, v4, T16B); - v_pmull2(v17, T8H, v0, v6, T16B); + pmull2(v18, T8H, v0, v5, T16B); + pmull2(v16, T8H, v0, v7, T16B); + pmull2(v19, T8H, v0, v4, T16B); + pmull2(v17, T8H, v0, v6, T16B); - v_uzp1(v24, v20, v22, T8H); - v_uzp2(v25, v20, v22, T8H); - v_eor(v20, T16B, v24, v25); + uzp1(v24, v20, v22, T8H); + uzp2(v25, v20, v22, T8H); + eor(v20, T16B, v24, v25); - v_uzp1(v26, v16, v18, T8H); - v_uzp2(v27, v16, v18, T8H); - v_eor(v16, T16B, v26, v27); + uzp1(v26, v16, v18, T8H); + uzp2(v27, v16, v18, T8H); + eor(v16, T16B, v26, v27); - v_ushll2(v22, T4S, v20, T8H, 8); - v_ushll(v20, T4S, v20, T4H, 8); + ushll2(v22, T4S, v20, T8H, 8); + ushll(v20, T4S, v20, T4H, 8); - v_ushll2(v18, T4S, v16, T8H, 8); - v_ushll(v16, T4S, v16, T4H, 8); + ushll2(v18, T4S, v16, T8H, 8); + ushll(v16, T4S, v16, T4H, 8); - v_eor(v22, T16B, v23, v22); - v_eor(v18, T16B, v19, v18); - v_eor(v20, T16B, v21, v20); - v_eor(v16, T16B, v17, v16); + eor(v22, T16B, v23, v22); + eor(v18, T16B, v19, v18); + eor(v20, T16B, v21, v20); + eor(v16, T16B, v17, v16); - v_uzp1(v17, v16, v20, T2D); - v_uzp2(v21, v16, v20, T2D); - v_eor(v17, T16B, v17, v21); + uzp1(v17, v16, v20, T2D); + uzp2(v21, v16, v20, T2D); + eor(v17, T16B, v17, v21); - v_ushll2(v20, T2D, v17, T4S, 16); - v_ushll(v16, T2D, v17, T2S, 16); + ushll2(v20, T2D, v17, T4S, 16); + ushll(v16, T2D, v17, T2S, 16); - v_eor(v20, T16B, v20, v22); - v_eor(v16, T16B, v16, v18); + eor(v20, T16B, v20, v22); + eor(v16, T16B, v16, v18); - v_uzp1(v17, v20, v16, T2D); - v_uzp2(v21, v20, v16, T2D); - v_eor(v28, T16B, v17, v21); + uzp1(v17, v20, v16, T2D); + uzp2(v21, v20, v16, T2D); + eor(v28, T16B, v17, v21); - v_pmull(v22, T8H, v1, v5, T8B); - v_pmull(v20, T8H, v1, v7, T8B); - v_pmull(v23, T8H, v1, v4, T8B); - v_pmull(v21, T8H, v1, v6, T8B); + pmull(v22, T8H, v1, v5, T8B); + pmull(v20, T8H, v1, v7, T8B); + pmull(v23, T8H, v1, v4, T8B); + pmull(v21, T8H, v1, v6, T8B); - v_pmull2(v18, T8H, v1, v5, T16B); - v_pmull2(v16, T8H, v1, v7, T16B); - v_pmull2(v19, T8H, v1, v4, T16B); - v_pmull2(v17, T8H, v1, v6, T16B); + pmull2(v18, T8H, v1, v5, T16B); + pmull2(v16, T8H, v1, v7, T16B); + pmull2(v19, T8H, v1, v4, T16B); + pmull2(v17, T8H, v1, v6, T16B); - v_ld1(v0, v1, T2D, buf, 32); + ld1(v0, v1, T2D, post(buf, 32)); - v_uzp1(v24, v20, v22, T8H); - v_uzp2(v25, v20, v22, T8H); - v_eor(v20, T16B, v24, v25); + uzp1(v24, v20, v22, T8H); + uzp2(v25, v20, v22, T8H); + eor(v20, T16B, v24, v25); - v_uzp1(v26, v16, v18, T8H); - v_uzp2(v27, v16, v18, T8H); - v_eor(v16, T16B, v26, v27); + uzp1(v26, v16, v18, T8H); + uzp2(v27, v16, v18, T8H); + eor(v16, T16B, v26, v27); - v_ushll2(v22, T4S, v20, T8H, 8); - v_ushll(v20, T4S, v20, T4H, 8); + ushll2(v22, T4S, v20, T8H, 8); + ushll(v20, T4S, v20, T4H, 8); - v_ushll2(v18, T4S, v16, T8H, 8); - v_ushll(v16, T4S, v16, T4H, 8); + ushll2(v18, T4S, v16, T8H, 8); + ushll(v16, T4S, v16, T4H, 8); - v_eor(v22, T16B, v23, v22); - v_eor(v18, T16B, v19, v18); - v_eor(v20, T16B, v21, v20); - v_eor(v16, T16B, v17, v16); + eor(v22, T16B, v23, v22); + eor(v18, T16B, v19, v18); + eor(v20, T16B, v21, v20); + eor(v16, T16B, v17, v16); - v_uzp1(v17, v16, v20, T2D); - v_uzp2(v21, v16, v20, T2D); - v_eor(v16, T16B, v17, v21); + uzp1(v17, v16, v20, T2D); + uzp2(v21, v16, v20, T2D); + eor(v16, T16B, v17, v21); - v_ushll2(v20, T2D, v16, T4S, 16); - v_ushll(v16, T2D, v16, T2S, 16); + ushll2(v20, T2D, v16, T4S, 16); + ushll(v16, T2D, v16, T2S, 16); - v_eor(v20, T16B, v22, v20); - v_eor(v16, T16B, v16, v18); + eor(v20, T16B, v22, v20); + eor(v16, T16B, v16, v18); - v_uzp1(v17, v20, v16, T2D); - v_uzp2(v21, v20, v16, T2D); - v_eor(v20, T16B, v17, v21); + uzp1(v17, v20, v16, T2D); + uzp2(v21, v20, v16, T2D); + eor(v20, T16B, v17, v21); - v_shl(v16, v28, T2D, 1); - v_shl(v17, v20, T2D, 1); + shl(v16, v28, T2D, 1); + shl(v17, v20, T2D, 1); - v_eor(v0, T16B, v0, v16); - v_eor(v1, T16B, v1, v17); + eor(v0, T16B, v0, v16); + eor(v1, T16B, v1, v17); subs(len, len, 32); br(Assembler::GE, L_fold); mov(crc, 0); - v_mov(tmp, v0, T1D, 0); + mov(tmp, v0, T1D, 0); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true); - v_mov(tmp, v0, T1D, 1); + mov(tmp, v0, T1D, 1); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true); - v_mov(tmp, v1, T1D, 0); + mov(tmp, v1, T1D, 0); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true); - v_mov(tmp, v1, T1D, 1); + mov(tmp, v1, T1D, 1); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true);
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp Fri Nov 21 20:35:24 2014 +0000 +++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp Mon Nov 24 13:37:15 2014 +0000 @@ -442,15 +442,16 @@ } } - Register base() { - guarantee((_mode == base_plus_offset | _mode == base_plus_offset_reg), + Register base() const { + guarantee((_mode == base_plus_offset | _mode == base_plus_offset_reg + | _mode == post), "wrong mode"); return _base; } - long offset() { + long offset() const { return _offset; } - Register index() { + Register index() const { return _index; } mode getMode() const { @@ -1872,7 +1873,7 @@ * We just use FloatRegister in the following. They are exactly the same * as SIMD registers. */ -public: + public: enum SIMD_Arrangement { T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D @@ -1882,7 +1883,136 @@ S32, D64, Q128 }; - void v_shl(FloatRegister Vd, FloatRegister Vn, SIMD_Arrangement T, int shift){ + private: + + void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, int op1, int op2) { + starti; + f(0,31), f((int)T & 1, 30); + f(op1, 29, 21), f(0, 20, 16), f(op2, 15, 12); + f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); + } + void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, + int imm, int op1, int op2) { + starti; + f(0,31), f((int)T & 1, 30); + f(op1 | 0b100, 29, 21), f(0b11111, 20, 16), f(op2, 15, 12); + f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); + } + void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, + Register Xm, int op1, int op2) { + starti; + f(0,31), f((int)T & 1, 30); + f(op1 | 0b100, 29, 21), rf(Xm, 16), f(op2, 15, 12); + f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); + } + + void ld_st(FloatRegister Vt, SIMD_Arrangement T, Address a, int op1, int op2) { + switch (a.getMode()) { + case Address::base_plus_offset: + guarantee(a.offset() == 0, "no offset allowed here"); + ld_st(Vt, T, a.base(), op1, op2); + break; + case Address::post: + ld_st(Vt, T, a.base(), a.offset(), op1, op2); + break; + case Address::base_plus_offset_reg: + ld_st(Vt, T, a.base(), a.index(), op1, op2); + break; + default: + ShouldNotReachHere(); + } + } + + public: + +#define INSN1(NAME, op1, op2) \ + void NAME(FloatRegister Vt, SIMD_Arrangement T, const Address &a) { \ + ld_st(Vt, T, a, op1, op2); \ + } + +#define INSN2(NAME, op1, op2) \ + void NAME(FloatRegister Vt, FloatRegister Vt2, SIMD_Arrangement T, const Address &a) { \ + assert(Vt->successor() == Vt2, "Registers must be ordered"); \ + ld_st(Vt, T, a, op1, op2); \ + } + +#define INSN3(NAME, op1, op2) \ + void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \ + SIMD_Arrangement T, const Address &a) { \ + assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3, \ + "Registers must be ordered"); \ + ld_st(Vt, T, a, op1, op2); \ + } + +#define INSN4(NAME, op1, op2) \ + void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \ + FloatRegister Vt4, SIMD_Arrangement T, const Address &a) { \ + assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3 && \ + Vt3->successor() == Vt4, "Registers must be ordered"); \ + ld_st(Vt, T, a, op1, op2); \ + } + + INSN1(ld1, 0b001100010, 0b0111); + INSN2(ld1, 0b001100010, 0b1010); + INSN3(ld1, 0b001100010, 0b0110); + INSN4(ld1, 0b001100010, 0b0010); + + INSN2(ld2, 0b001100010, 0b1000); + INSN3(ld3, 0b001100010, 0b0100); + INSN4(ld4, 0b001100010, 0b0000); + + INSN1(st1, 0b001100000, 0b0111); + INSN2(st1, 0b001100000, 0b1010); + INSN3(st1, 0b001100000, 0b0110); + INSN4(st1, 0b001100000, 0b0010); + + INSN2(st2, 0b001100000, 0b1000); + INSN3(st3, 0b001100000, 0b0100); + INSN4(st4, 0b001100000, 0b0000); + + INSN1(ld1r, 0b001101010, 0b1100); + INSN2(ld2r, 0b001101011, 0b1100); + INSN3(ld3r, 0b001101010, 0b1110); + INSN4(ld4r, 0b001101011, 0b1110); + +#undef INSN1 +#undef INSN2 +#undef INSN3 +#undef INSN4 + +#define INSN(NAME, opc) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \ + starti; \ + assert(T == T8B || T == T16B, "must be T8B or T16B"); \ + f(0, 31), f((int)T & 1, 30), f(opc, 29, 21); \ + rf(Vm, 16), f(0b000111, 15, 10), rf(Vn, 5), rf(Vd, 0); \ + } + + INSN(eor, 0b101110001); + INSN(orr, 0b001110101); + INSN(andr, 0b001110001); + INSN(bic, 0b001110011); + INSN(bif, 0b101110111); + INSN(bit, 0b101110101); + INSN(bsl, 0b101110011); + INSN(orn, 0b001110111); + +#undef INSN + +#define INSN(NAME, opc) \ + void NAME(FloatRegister Vd, FloatRegister Vn) { \ + starti; \ + f(opc, 31, 10), rf(Vn, 5), rf(Vd, 0); \ + } + + INSN(aese, 0b0100111000101000010010); + INSN(aesd, 0b0100111000101000010110); + INSN(aesmc, 0b0100111000101000011010); + INSN(aesimc, 0b0100111000101000011110); + +#undef INSN + + void shl(FloatRegister Vd, FloatRegister Vn, SIMD_Arrangement T, int shift){ starti; /* The encodings for the immh:immb fields (bits 22:16) are * 0001 xxx 8B/16B, shift = xxx @@ -1895,7 +2025,7 @@ f(0b010101, 15, 10), rf(Vn, 5), rf(Vd, 0); } - void v_ushll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { + void ushll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { starti; /* The encodings for the immh:immb fields (bits 22:16) are * 0001 xxx 8H, 8B/16b shift = xxx @@ -1908,22 +2038,22 @@ f(0, 31), f(Tb & 1, 30), f(0b1011110, 29, 23), f((1 << ((Tb>>1)+3))|shift, 22, 16); f(0b101001, 15, 10), rf(Vn, 5), rf(Vd, 0); } - void v_ushll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { - v_ushll(Vd, Ta, Vn, Tb, shift); + void ushll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { + ushll(Vd, Ta, Vn, Tb, shift); } - void v_uzp1(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement T, int op = 0){ + void uzp1(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement T, int op = 0){ starti; f(0, 31), f((T & 0x1), 30), f(0b001110, 29, 24), f((T >> 1), 23, 22), f(0, 21); rf(Vm, 16), f(0, 15), f(op, 14), f(0b0110, 13, 10), rf(Vn, 5), rf(Vd, 0); } - void v_uzp2(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement T){ - v_uzp1(Vd, Vn, Vm, T, 1); + void uzp2(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement T){ + uzp1(Vd, Vn, Vm, T, 1); } // Move from general purpose register // mov Vd.T[index], Rn - void v_mov(FloatRegister Vd, SIMD_Arrangement T, int index, Register Xn) { + void mov(FloatRegister Vd, SIMD_Arrangement T, int index, Register Xn) { starti; f(0b01001110000, 31, 21), f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16); f(0b000111, 15, 10), rf(Xn, 5), rf(Vd, 0); @@ -1931,7 +2061,7 @@ // Move to general purpose register // mov Rd, Vn.T[index] - void v_mov(Register Xd, FloatRegister Vn, SIMD_Arrangement T, int index) { + void mov(Register Xd, FloatRegister Vn, SIMD_Arrangement T, int index) { starti; f(0, 31), f((T >= T1D) ? 1:0, 30), f(0b001110000, 29, 21); f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16); @@ -1939,149 +2069,23 @@ } // We do not handle the 1Q arrangement. - void v_pmull(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) { + void pmull(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) { starti; assert(Ta == T8H && (Tb == T8B || Tb == T16B), "Invalid Size specifier"); f(0, 31), f(Tb & 1, 30), f(0b001110001, 29, 21), rf(Vm, 16), f(0b111000, 15, 10); rf(Vn, 5), rf(Vd, 0); } - void v_pmull2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) { - v_pmull(Vd, Ta, Vn, Vm, Tb); - } - - void v_ld1(FloatRegister Vt, SIMD_Arrangement T, Register Xn) { - starti; - f(0,31), f((int)T & 1, 30), f(0b00110001000000, 29, 16), f(0b0111, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - void v_ld1(FloatRegister Vt, FloatRegister Vt2, SIMD_Arrangement T, Register Xn) { - starti; - assert((Vt2->encoding_nocheck()) == ((Vt->encoding_nocheck() + 1) % 32), "Invalid Vt2"); - f(0,31), f((int)T & 1, 30), f(0b00110001000000, 29, 16), f(0b1010, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - void v_ld1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, SIMD_Arrangement T, Register Xn) { - starti; - assert((Vt2->encoding_nocheck()) == ((Vt->encoding_nocheck() + 1) % 32), "Invalid Vt2"); - assert((Vt3->encoding_nocheck()) == ((Vt->encoding_nocheck() + 2) % 32), "Invalid Vt3"); - f(0,31), f((int)T & 1, 30), f(0b00110001000000, 29, 16), f(0b0110, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - void v_ld1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, FloatRegister Vt4, SIMD_Arrangement T, Register Xn) { - starti; - assert((Vt2->encoding_nocheck()) == ((Vt->encoding_nocheck() + 1) % 32), "Invalid Vt2"); - assert((Vt3->encoding_nocheck()) == ((Vt->encoding_nocheck() + 2) % 32), "Invalid Vt3"); - assert((Vt4->encoding_nocheck()) == ((Vt->encoding_nocheck() + 3) % 32), "Invalid Vt4"); - f(0,31), f((int)T & 1, 30), f(0b00110001000000, 29, 16), f(0b0010, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); + void pmull2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) { + pmull(Vd, Ta, Vn, Vm, Tb); } - void v_ld1(FloatRegister Vt, SIMD_Arrangement T, Register Xn, int imm) { - starti; - assert((8 << ((int)T & 1)) == imm, "size/imm mismatch"); - f(0, 31), f((int)T & 1, 30), f(0b001100110, 29, 21), f(0b11111, 20, 16), f(0b0111, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - void v_ld1(FloatRegister Vt, SIMD_Arrangement T, Register Xn, Register Xm) { - starti; - f(0, 31), f((int)T & 1, 30), f(0b001100110, 29, 21), rf(Xm, 16), f(0b0111, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - void v_ld1(FloatRegister Vt, FloatRegister Vt2, SIMD_Arrangement T, Register Xn, int imm) { - starti; - assert((16 << ((int)T & 1)) == imm, "size/imm mismatch"); - assert((Vt2->encoding_nocheck()) == ((Vt->encoding_nocheck() + 1) % 32), "Invalid Vt2"); - f(0, 31), f((int)T & 1, 30), f(0b001100110, 29, 21), f(0b11111, 20, 16), f(0b1010, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - void v_ld1(FloatRegister Vt, FloatRegister Vt2, SIMD_Arrangement T, Register Xn, Register Xm) { - starti; - assert((Vt2->encoding_nocheck()) == ((Vt->encoding_nocheck() + 1) % 32), "Invalid Vt2"); - f(0, 31), f((int)T & 1, 30), f(0b001100110, 29, 21), rf(Xm, 16), f(0b1010, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - void v_ld1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, SIMD_Arrangement T, Register Xn, int imm) { - starti; - assert((24 << ((int)T & 1)) == imm, "size/imm mismatch"); - assert((Vt2->encoding_nocheck()) == ((Vt->encoding_nocheck() + 1) % 32), "Invalid Vt2"); - assert((Vt3->encoding_nocheck()) == ((Vt->encoding_nocheck() + 2) % 32), "Invalid Vt3"); - f(0, 31), f((int)T & 1, 30), f(0b001100110, 29, 21), f(0b11111, 20, 16), f(0b0110, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - void v_ld1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, SIMD_Arrangement T, Register Xn, Register Xm) { + void rev32(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) + { starti; - assert((Vt2->encoding_nocheck()) == ((Vt->encoding_nocheck() + 1) % 32), "Invalid Vt2"); - assert((Vt3->encoding_nocheck()) == ((Vt->encoding_nocheck() + 2) % 32), "Invalid Vt3"); - f(0, 31), f((int)T & 1, 30), f(0b001100110, 29, 21), rf(Xm, 16), f(0b0110, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - void v_ld1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, FloatRegister Vt4, SIMD_Arrangement T, Register Xn, int imm) { - starti; - assert((32 << ((int)T & 1)) == imm, "size/imm mismatch"); - assert((Vt2->encoding_nocheck()) == ((Vt->encoding_nocheck() + 1) % 32), "Invalid Vt2"); - assert((Vt3->encoding_nocheck()) == ((Vt->encoding_nocheck() + 2) % 32), "Invalid Vt3"); - assert((Vt4->encoding_nocheck()) == ((Vt->encoding_nocheck() + 3) % 32), "Invalid Vt4"); - f(0, 31), f((int)T & 1, 30), f(0b001100110, 29, 21), f(0b11111, 20, 16), f(0b0010, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - void v_ld1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, FloatRegister Vt4, SIMD_Arrangement T, Register Xn, Register Xm) { - starti; - assert((Vt2->encoding_nocheck()) == ((Vt->encoding_nocheck() + 1) % 32), "Invalid Vt2"); - assert((Vt3->encoding_nocheck()) == ((Vt->encoding_nocheck() + 2) % 32), "Invalid Vt3"); - assert((Vt4->encoding_nocheck()) == ((Vt->encoding_nocheck() + 3) % 32), "Invalid Vt4"); - f(0, 31), f((int)T & 1, 30), f(0b001100110, 29, 21), rf(Xm, 16), f(0b0010, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - - void v_st1(FloatRegister Vt, SIMD_Arrangement T, Register Xn) { - starti; - f(0, 31), f((int)T & 1, 30), f(0b00110000000000, 29, 16), f(0b0111, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - void v_st1(FloatRegister Vt, FloatRegister Vt2, SIMD_Arrangement T, Register Xn) { - starti; - assert((Vt2->encoding_nocheck()) == ((Vt->encoding_nocheck() + 1) % 32), "Invalid Vt2"); - f(0, 31), f((int)T & 1, 30), f(0b00110000000000, 29, 16), f(0b1010, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - void v_st1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, SIMD_Arrangement T, Register Xn) { - starti; - assert((Vt2->encoding_nocheck()) == ((Vt->encoding_nocheck() + 1) % 32), "Invalid Vt2"); - assert((Vt3->encoding_nocheck()) == ((Vt->encoding_nocheck() + 2) % 32), "Invalid Vt3"); - f(0, 31), f((int)T & 1, 30), f(0b00110000000000, 29, 16), f(0b0110, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - void v_st1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, FloatRegister Vt4, SIMD_Arrangement T, Register Xn) { - starti; - assert((Vt2->encoding_nocheck()) == ((Vt->encoding_nocheck() + 1) % 32), "Invalid Vt2"); - assert((Vt3->encoding_nocheck()) == ((Vt->encoding_nocheck() + 2) % 32), "Invalid Vt3"); - assert((Vt4->encoding_nocheck()) == ((Vt->encoding_nocheck() + 3) % 32), "Invalid Vt4"); - f(0, 31), f((int)T & 1, 30), f(0b00110000000000, 29, 16), f(0b0010, 15, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - - void v_ld1r(FloatRegister Vt, SIMD_Arrangement T, Register Xn) { - starti; - f(0, 31), f((int)T & 1, 30), f(0b001101010000001100, 29, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - void v_ld1r(FloatRegister Vt, SIMD_Arrangement T, Register Xn, Register Xm) { - starti; - f(0, 31), f((int)T & 1, 30), f(0b001101110, 29, 21), rf(Xm, 16); - f(0b1100, 15, 12), f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - void v_ld1r(FloatRegister Vt, SIMD_Arrangement T, Register Xn, int imm) { - starti; - assert((1 << ((int)T & 3)) == imm, "size/imm mismatch"); - f(0, 31), f((int)T & 1, 30), f(0b001101110111111100, 29, 12); - f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); - } - - void v_eor(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { - starti; - assert(T == T8B || T == T16B, "must be T8B or T16B"); - f(0, 31), f((int)T & 1, 30), f(0b101110001, 29, 21); - rf(Vm, 16), f(0b000111, 15, 10), rf(Vn, 5), rf(Vd, 0); + assert(T <= T8H, "must be one of T8B, T16B, T4H, T8H"); + f(0, 31), f((int)T & 1, 30), f(0b101110, 29, 24); + f(T <= T16B ? 0b00 : 0b01, 23, 22), f(0b100000000010, 21, 10); + rf(Vn, 5), rf(Vd, 0); } // CRC32 instructions @@ -2276,6 +2280,8 @@ class MacroAssembler: public Assembler { friend class LIR_Assembler; + using Assembler::mov; + protected: // Support for VM calls
--- a/src/cpu/aarch64/vm/icache_aarch64.cpp Fri Nov 21 20:35:24 2014 +0000 +++ b/src/cpu/aarch64/vm/icache_aarch64.cpp Mon Nov 24 13:37:15 2014 +0000 @@ -32,7 +32,10 @@ void ICacheStubGenerator::generate_icache_flush( ICache::flush_icache_stub_t* flush_icache_stub) { - aarch64TestHook(); // Give anyone who calls this a surprise *flush_icache_stub = (ICache::flush_icache_stub_t)NULL; } + +void ICache::initialize() { + aarch64TestHook(); +}
--- a/src/cpu/aarch64/vm/icache_aarch64.hpp Fri Nov 21 20:35:24 2014 +0000 +++ b/src/cpu/aarch64/vm/icache_aarch64.hpp Mon Nov 24 13:37:15 2014 +0000 @@ -33,7 +33,7 @@ class ICache : public AbstractICache { public: - static void initialize() {} + static void initialize(); static void invalidate_word(address addr) { __clear_cache((char *)addr, (char *)(addr + 3)); }
--- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Fri Nov 21 20:35:24 2014 +0000 +++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Mon Nov 24 13:37:15 2014 +0000 @@ -1676,6 +1676,414 @@ /*dest_uninitialized*/true); } + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // + address generate_aescrypt_encryptBlock() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); + + Label L_doLast; + + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register keylen = rscratch1; + + address start = __ pc(); + __ enter(); + + __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + + __ ld1(v0, __ T16B, from); // get 16 bytes of input + + __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64)); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + __ rev32(v3, __ T16B, v3); + __ rev32(v4, __ T16B, v4); + __ aese(v0, v1); + __ aesmc(v0, v0); + __ aese(v0, v2); + __ aesmc(v0, v0); + __ aese(v0, v3); + __ aesmc(v0, v0); + __ aese(v0, v4); + __ aesmc(v0, v0); + + __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64)); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + __ rev32(v3, __ T16B, v3); + __ rev32(v4, __ T16B, v4); + __ aese(v0, v1); + __ aesmc(v0, v0); + __ aese(v0, v2); + __ aesmc(v0, v0); + __ aese(v0, v3); + __ aesmc(v0, v0); + __ aese(v0, v4); + __ aesmc(v0, v0); + + __ ld1(v1, v2, __ T16B, __ post(key, 32)); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + + __ cmpw(keylen, 44); + __ br(Assembler::EQ, L_doLast); + + __ aese(v0, v1); + __ aesmc(v0, v0); + __ aese(v0, v2); + __ aesmc(v0, v0); + + __ ld1(v1, v2, __ T16B, __ post(key, 32)); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + + __ cmpw(keylen, 52); + __ br(Assembler::EQ, L_doLast); + + __ aese(v0, v1); + __ aesmc(v0, v0); + __ aese(v0, v2); + __ aesmc(v0, v0); + + __ ld1(v1, v2, __ T16B, __ post(key, 32)); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + + __ BIND(L_doLast); + + __ aese(v0, v1); + __ aesmc(v0, v0); + __ aese(v0, v2); + + __ ld1(v1, __ T16B, key); + __ rev32(v1, __ T16B, v1); + __ eor(v0, __ T16B, v0, v1); + + __ st1(v0, __ T16B, to); + + __ mov(r0, 0); + + __ leave(); + __ ret(lr); + + return start; + } + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // + address generate_aescrypt_decryptBlock() { + assert(UseAES, "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); + Label L_doLast; + + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register keylen = rscratch1; + + address start = __ pc(); + __ enter(); // required for proper stackwalking of RuntimeStub frame + + __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + + __ ld1(v0, __ T16B, from); // get 16 bytes of input + + __ ld1(v5, __ T16B, __ post(key, 16)); + __ rev32(v5, __ T16B, v5); + + __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64)); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + __ rev32(v3, __ T16B, v3); + __ rev32(v4, __ T16B, v4); + __ aesd(v0, v1); + __ aesimc(v0, v0); + __ aesd(v0, v2); + __ aesimc(v0, v0); + __ aesd(v0, v3); + __ aesimc(v0, v0); + __ aesd(v0, v4); + __ aesimc(v0, v0); + + __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64)); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + __ rev32(v3, __ T16B, v3); + __ rev32(v4, __ T16B, v4); + __ aesd(v0, v1); + __ aesimc(v0, v0); + __ aesd(v0, v2); + __ aesimc(v0, v0); + __ aesd(v0, v3); + __ aesimc(v0, v0); + __ aesd(v0, v4); + __ aesimc(v0, v0); + + __ ld1(v1, v2, __ T16B, __ post(key, 32)); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + + __ cmpw(keylen, 44); + __ br(Assembler::EQ, L_doLast); + + __ aesd(v0, v1); + __ aesimc(v0, v0); + __ aesd(v0, v2); + __ aesimc(v0, v0); + + __ ld1(v1, v2, __ T16B, __ post(key, 32)); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + + __ cmpw(keylen, 52); + __ br(Assembler::EQ, L_doLast); + + __ aesd(v0, v1); + __ aesimc(v0, v0); + __ aesd(v0, v2); + __ aesimc(v0, v0); + + __ ld1(v1, v2, __ T16B, __ post(key, 32)); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + + __ BIND(L_doLast); + + __ aesd(v0, v1); + __ aesimc(v0, v0); + __ aesd(v0, v2); + + __ eor(v0, __ T16B, v0, v5); + + __ st1(v0, __ T16B, to); + + __ mov(r0, 0); + + __ leave(); + __ ret(lr); + + return start; + } + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // c_rarg3 - r vector byte array address + // c_rarg4 - input length + // + // Output: + // x0 - input length + // + address generate_cipherBlockChaining_encryptAESCrypt() { + assert(UseAES, "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); + + Label L_loadkeys_44, L_loadkeys_52, L_aes_loop, L_rounds_44, L_rounds_52; + + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register rvec = c_rarg3; // r byte array initialized from initvector array address + // and left with the results of the last encryption block + const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16) + const Register keylen = rscratch1; + + address start = __ pc(); + __ enter(); + + __ mov(rscratch1, len_reg); + __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + + __ ld1(v0, __ T16B, rvec); + + __ cmpw(keylen, 52); + __ br(Assembler::CC, L_loadkeys_44); + __ br(Assembler::EQ, L_loadkeys_52); + + __ ld1(v17, v18, __ T16B, __ post(key, 32)); + __ rev32(v17, __ T16B, v17); + __ rev32(v18, __ T16B, v18); + __ BIND(L_loadkeys_52); + __ ld1(v19, v20, __ T16B, __ post(key, 32)); + __ rev32(v19, __ T16B, v19); + __ rev32(v20, __ T16B, v20); + __ BIND(L_loadkeys_44); + __ ld1(v21, v22, v23, v24, __ T16B, __ post(key, 64)); + __ rev32(v21, __ T16B, v21); + __ rev32(v22, __ T16B, v22); + __ rev32(v23, __ T16B, v23); + __ rev32(v24, __ T16B, v24); + __ ld1(v25, v26, v27, v28, __ T16B, __ post(key, 64)); + __ rev32(v25, __ T16B, v25); + __ rev32(v26, __ T16B, v26); + __ rev32(v27, __ T16B, v27); + __ rev32(v28, __ T16B, v28); + __ ld1(v29, v30, v31, __ T16B, key); + __ rev32(v29, __ T16B, v29); + __ rev32(v30, __ T16B, v30); + __ rev32(v31, __ T16B, v31); + + __ BIND(L_aes_loop); + __ ld1(v1, __ T16B, __ post(from, 16)); + __ eor(v0, __ T16B, v0, v1); + + __ br(Assembler::CC, L_rounds_44); + __ br(Assembler::EQ, L_rounds_52); + + __ aese(v0, v17); __ aesmc(v0, v0); + __ aese(v0, v18); __ aesmc(v0, v0); + __ BIND(L_rounds_52); + __ aese(v0, v19); __ aesmc(v0, v0); + __ aese(v0, v20); __ aesmc(v0, v0); + __ BIND(L_rounds_44); + __ aese(v0, v21); __ aesmc(v0, v0); + __ aese(v0, v22); __ aesmc(v0, v0); + __ aese(v0, v23); __ aesmc(v0, v0); + __ aese(v0, v24); __ aesmc(v0, v0); + __ aese(v0, v25); __ aesmc(v0, v0); + __ aese(v0, v26); __ aesmc(v0, v0); + __ aese(v0, v27); __ aesmc(v0, v0); + __ aese(v0, v28); __ aesmc(v0, v0); + __ aese(v0, v29); __ aesmc(v0, v0); + __ aese(v0, v30); + __ eor(v0, __ T16B, v0, v31); + + __ st1(v0, __ T16B, __ post(to, 16)); + __ sub(len_reg, len_reg, 16); + __ cbnz(len_reg, L_aes_loop); + + __ st1(v0, __ T16B, rvec); + + __ mov(r0, rscratch2); + + __ leave(); + __ ret(lr); + } + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // c_rarg3 - r vector byte array address + // c_rarg4 - input length + // + // Output: + // rax - input length + // + address generate_cipherBlockChaining_decryptAESCrypt() { + assert(UseAES, "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); + + Label L_loadkeys_44, L_loadkeys_52, L_aes_loop, L_rounds_44, L_rounds_52; + + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register rvec = c_rarg3; // r byte array initialized from initvector array address + // and left with the results of the last encryption block + const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16) + const Register keylen = rscratch1; + + address start = __ pc(); + __ enter(); + + __ mov(rscratch2, len_reg); + __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + + __ ld1(v2, __ T16B, rvec); + + __ ld1(v31, __ T16B, __ post(key, 16)); + __ rev32(v31, __ T16B, v31); + + __ cmpw(keylen, 52); + __ br(Assembler::CC, L_loadkeys_44); + __ br(Assembler::EQ, L_loadkeys_52); + + __ ld1(v17, v18, __ T16B, __ post(key, 32)); + __ rev32(v17, __ T16B, v17); + __ rev32(v18, __ T16B, v18); + __ BIND(L_loadkeys_52); + __ ld1(v19, v20, __ T16B, __ post(key, 32)); + __ rev32(v19, __ T16B, v19); + __ rev32(v20, __ T16B, v20); + __ BIND(L_loadkeys_44); + __ ld1(v21, v22, v23, v24, __ T16B, __ post(key, 64)); + __ rev32(v21, __ T16B, v21); + __ rev32(v22, __ T16B, v22); + __ rev32(v23, __ T16B, v23); + __ rev32(v24, __ T16B, v24); + __ ld1(v25, v26, v27, v28, __ T16B, __ post(key, 64)); + __ rev32(v25, __ T16B, v25); + __ rev32(v26, __ T16B, v26); + __ rev32(v27, __ T16B, v27); + __ rev32(v28, __ T16B, v28); + __ ld1(v29, v30, __ T16B, key); + __ rev32(v29, __ T16B, v29); + __ rev32(v30, __ T16B, v30); + + __ BIND(L_aes_loop); + __ ld1(v0, __ T16B, __ post(from, 16)); + __ orr(v1, __ T16B, v0, v0); + + __ br(Assembler::CC, L_rounds_44); + __ br(Assembler::EQ, L_rounds_52); + + __ aesd(v0, v17); __ aesimc(v0, v0); + __ aesd(v0, v17); __ aesimc(v0, v0); + __ BIND(L_rounds_52); + __ aesd(v0, v19); __ aesimc(v0, v0); + __ aesd(v0, v20); __ aesimc(v0, v0); + __ BIND(L_rounds_44); + __ aesd(v0, v21); __ aesimc(v0, v0); + __ aesd(v0, v22); __ aesimc(v0, v0); + __ aesd(v0, v23); __ aesimc(v0, v0); + __ aesd(v0, v24); __ aesimc(v0, v0); + __ aesd(v0, v25); __ aesimc(v0, v0); + __ aesd(v0, v26); __ aesimc(v0, v0); + __ aesd(v0, v27); __ aesimc(v0, v0); + __ aesd(v0, v28); __ aesimc(v0, v0); + __ aesd(v0, v29); __ aesimc(v0, v0); + __ aesd(v0, v30); + __ eor(v0, __ T16B, v0, v31); + __ eor(v0, __ T16B, v0, v2); + + __ st1(v0, __ T16B, __ post(to, 16)); + __ orr(v2, __ T16B, v1, v1); + + __ sub(len_reg, len_reg, 16); + __ cbnz(len_reg, L_aes_loop); + + __ st1(v2, __ T16B, rvec); + + __ mov(r0, rscratch2); + + __ leave(); + __ ret(lr); + + return start; + } + // AARCH64 use safefetch stubs unless we are building for the simulator // in which case the x86 asm code in linux_aarch64.S is used @@ -1930,6 +2338,13 @@ generate_arraycopy_stubs(); #ifndef BUILTIN_SIM + if (UseAESIntrinsics) { + StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); + StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); + StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); + StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt(); + } + // Safefetch stubs. generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, &StubRoutines::_safefetch32_fault_pc,
--- a/src/cpu/aarch64/vm/vm_version_aarch64.cpp Fri Nov 21 20:35:24 2014 +0000 +++ b/src/cpu/aarch64/vm/vm_version_aarch64.cpp Mon Nov 24 13:37:15 2014 +0000 @@ -47,6 +47,10 @@ #include <sys/auxv.h> #include <asm/hwcap.h> +#ifndef HWCAP_AES +#define HWCAP_AES (1<<3) +#endif + #ifndef HWCAP_CRC32 #define HWCAP_CRC32 (1<<7) #endif @@ -121,6 +125,22 @@ if (UseCRC32 && (auxv & HWCAP_CRC32) == 0) { warning("UseCRC32 specified, but not supported on this CPU"); } + if (auxv & HWCAP_AES) { + UseAES = UseAES || FLAG_IS_DEFAULT(UseAES); + UseAESIntrinsics = + UseAESIntrinsics || (UseAES && FLAG_IS_DEFAULT(UseAESIntrinsics)); + if (UseAESIntrinsics && !UseAES) { + warning("UseAESIntrinsics enabled, but UseAES not, enabling"); + UseAES = true; + } + } else { + if (UseAES) { + warning("UseAES specified, but not supported on this CPU"); + } + if (UseAESIntrinsics) { + warning("UseAESIntrinsics specified, but not supported on this CPU"); + } + } #endif if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {