# HG changeset patch # User never # Date 1297292543 28800 # Node ID e9a2b8e0572a86b7b2e8ec0f3eb257cef2618976 # Parent 908c5092d72a8e7697995d4016035d28892f45d1 7016474: string compare intrinsic improvements Reviewed-by: kvn diff -r 908c5092d72a -r e9a2b8e0572a src/cpu/x86/vm/assembler_x86.cpp --- a/src/cpu/x86/vm/assembler_x86.cpp Mon Jan 31 16:28:40 2011 -0500 +++ b/src/cpu/x86/vm/assembler_x86.cpp Wed Feb 09 15:02:23 2011 -0800 @@ -2349,6 +2349,17 @@ a_byte(p); } +void Assembler::por(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + + emit_byte(0x66); + int encode = prefix_and_encode(dst->encoding(), src->encoding()); + emit_byte(0x0F); + + emit_byte(0xEB); + emit_byte(0xC0 | encode); +} + void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); @@ -8636,7 +8647,7 @@ // Compare strings. void MacroAssembler::string_compare(Register str1, Register str2, Register cnt1, Register cnt2, Register result, - XMMRegister vec1, XMMRegister vec2) { + XMMRegister vec1) { Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL; // Compute the minimum of the string lengths and the @@ -8683,62 +8694,85 @@ bind(LSkip2); } - // Advance to next character - addptr(str1, 2); - addptr(str2, 2); + Address::ScaleFactor scale = Address::times_2; + int stride = 8; + + // Advance to next element + addptr(str1, 16/stride); + addptr(str2, 16/stride); if (UseSSE42Intrinsics) { - // With SSE4.2, use double quad vector compare - Label COMPARE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; + Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; + int pcmpmask = 0x19; // Setup to compare 16-byte vectors - movl(cnt1, cnt2); - andl(cnt2, 0xfffffff8); // cnt2 holds the vector count - andl(cnt1, 0x00000007); // cnt1 holds the tail count - testl(cnt2, cnt2); + movl(result, cnt2); + andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count jccb(Assembler::zero, COMPARE_TAIL); - lea(str2, Address(str2, cnt2, Address::times_2)); - lea(str1, Address(str1, cnt2, Address::times_2)); - negptr(cnt2); - - bind(COMPARE_VECTORS); - movdqu(vec1, Address(str1, cnt2, Address::times_2)); - movdqu(vec2, Address(str2, cnt2, Address::times_2)); - pxor(vec1, vec2); - ptest(vec1, vec1); - jccb(Assembler::notZero, VECTOR_NOT_EQUAL); - addptr(cnt2, 8); - jcc(Assembler::notZero, COMPARE_VECTORS); - jmpb(COMPARE_TAIL); + lea(str1, Address(str1, result, scale)); + lea(str2, Address(str2, result, scale)); + negptr(result); + + // pcmpestri + // inputs: + // vec1- substring + // rax - negative string length (elements count) + // mem - scaned string + // rdx - string length (elements count) + // pcmpmask - cmp mode: 11000 (string compare with negated result) + // + 00 (unsigned bytes) or + 01 (unsigned shorts) + // outputs: + // rcx - first mismatched element index + assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri"); + + bind(COMPARE_WIDE_VECTORS); + movdqu(vec1, Address(str1, result, scale)); + pcmpestri(vec1, Address(str2, result, scale), pcmpmask); + // After pcmpestri cnt1(rcx) contains mismatched element index + + jccb(Assembler::below, VECTOR_NOT_EQUAL); // CF==1 + addptr(result, stride); + subptr(cnt2, stride); + jccb(Assembler::notZero, COMPARE_WIDE_VECTORS); + + // compare wide vectors tail + testl(result, result); + jccb(Assembler::zero, LENGTH_DIFF_LABEL); + + movl(cnt2, stride); + movl(result, stride); + negptr(result); + movdqu(vec1, Address(str1, result, scale)); + pcmpestri(vec1, Address(str2, result, scale), pcmpmask); + jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL); // Mismatched characters in the vectors bind(VECTOR_NOT_EQUAL); - lea(str1, Address(str1, cnt2, Address::times_2)); - lea(str2, Address(str2, cnt2, Address::times_2)); - movl(cnt1, 8); - - // Compare tail (< 8 chars), or rescan last vectors to - // find 1st mismatched characters - bind(COMPARE_TAIL); - testl(cnt1, cnt1); - jccb(Assembler::zero, LENGTH_DIFF_LABEL); - movl(cnt2, cnt1); + addptr(result, cnt1); + movptr(cnt2, result); + load_unsigned_short(result, Address(str1, cnt2, scale)); + load_unsigned_short(cnt1, Address(str2, cnt2, scale)); + subl(result, cnt1); + jmpb(POP_LABEL); + + bind(COMPARE_TAIL); // limit is zero + movl(cnt2, result); // Fallthru to tail compare } // Shift str2 and str1 to the end of the arrays, negate min - lea(str1, Address(str1, cnt2, Address::times_2, 0)); - lea(str2, Address(str2, cnt2, Address::times_2, 0)); + lea(str1, Address(str1, cnt2, scale, 0)); + lea(str2, Address(str2, cnt2, scale, 0)); negptr(cnt2); - // Compare the rest of the characters + // Compare the rest of the elements bind(WHILE_HEAD_LABEL); - load_unsigned_short(result, Address(str1, cnt2, Address::times_2, 0)); - load_unsigned_short(cnt1, Address(str2, cnt2, Address::times_2, 0)); + load_unsigned_short(result, Address(str1, cnt2, scale, 0)); + load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0)); subl(result, cnt1); jccb(Assembler::notZero, POP_LABEL); increment(cnt2); - jcc(Assembler::notZero, WHILE_HEAD_LABEL); + jccb(Assembler::notZero, WHILE_HEAD_LABEL); // Strings are equal up to min length. Return the length difference. bind(LENGTH_DIFF_LABEL); @@ -8747,7 +8781,7 @@ // Discard the stored length difference bind(POP_LABEL); - addptr(rsp, wordSize); + pop(cnt1); // That's it bind(DONE_LABEL); @@ -8795,6 +8829,7 @@ if (UseSSE42Intrinsics) { // With SSE4.2, use double quad vector compare Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; + // Compare 16-byte vectors andl(result, 0x0000000e); // tail count (in bytes) andl(limit, 0xfffffff0); // vector count (in bytes) @@ -8808,11 +8843,23 @@ movdqu(vec1, Address(ary1, limit, Address::times_1)); movdqu(vec2, Address(ary2, limit, Address::times_1)); pxor(vec1, vec2); + ptest(vec1, vec1); jccb(Assembler::notZero, FALSE_LABEL); addptr(limit, 16); jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); + testl(result, result); + jccb(Assembler::zero, TRUE_LABEL); + + movdqu(vec1, Address(ary1, result, Address::times_1, -16)); + movdqu(vec2, Address(ary2, result, Address::times_1, -16)); + pxor(vec1, vec2); + + ptest(vec1, vec1); + jccb(Assembler::notZero, FALSE_LABEL); + jmpb(TRUE_LABEL); + bind(COMPARE_TAIL); // limit is zero movl(limit, result); // Fallthru to tail compare diff -r 908c5092d72a -r e9a2b8e0572a src/cpu/x86/vm/assembler_x86.hpp --- a/src/cpu/x86/vm/assembler_x86.hpp Mon Jan 31 16:28:40 2011 -0500 +++ b/src/cpu/x86/vm/assembler_x86.hpp Wed Feb 09 15:02:23 2011 -0800 @@ -1277,6 +1277,9 @@ void prefetcht2(Address src); void prefetchw(Address src); + // POR - Bitwise logical OR + void por(XMMRegister dst, XMMRegister src); + // Shuffle Packed Doublewords void pshufd(XMMRegister dst, XMMRegister src, int mode); void pshufd(XMMRegister dst, Address src, int mode); @@ -2293,7 +2296,7 @@ // Compare strings. void string_compare(Register str1, Register str2, Register cnt1, Register cnt2, Register result, - XMMRegister vec1, XMMRegister vec2); + XMMRegister vec1); // Compare char[] arrays. void char_arrays_equals(bool is_array_equ, Register ary1, Register ary2, diff -r 908c5092d72a -r e9a2b8e0572a src/cpu/x86/vm/x86_32.ad --- a/src/cpu/x86/vm/x86_32.ad Mon Jan 31 16:28:40 2011 -0500 +++ b/src/cpu/x86/vm/x86_32.ad Wed Feb 09 15:02:23 2011 -0800 @@ -12629,16 +12629,16 @@ ins_pipe( pipe_slow ); %} -instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eBXRegI cnt2, - eAXRegI result, regXD tmp1, regXD tmp2, eFlagsReg cr) %{ +instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, + eAXRegI result, regXD tmp1, eFlagsReg cr) %{ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); - effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); - - format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1, $tmp2" %} + effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + + format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} ins_encode %{ __ string_compare($str1$$Register, $str2$$Register, $cnt1$$Register, $cnt2$$Register, $result$$Register, - $tmp1$$XMMRegister, $tmp2$$XMMRegister); + $tmp1$$XMMRegister); %} ins_pipe( pipe_slow ); %} diff -r 908c5092d72a -r e9a2b8e0572a src/cpu/x86/vm/x86_64.ad --- a/src/cpu/x86/vm/x86_64.ad Mon Jan 31 16:28:40 2011 -0500 +++ b/src/cpu/x86/vm/x86_64.ad Wed Feb 09 15:02:23 2011 -0800 @@ -11583,17 +11583,17 @@ ins_pipe(pipe_slow); %} -instruct string_compare(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rbx_RegI cnt2, - rax_RegI result, regD tmp1, regD tmp2, rFlagsReg cr) +instruct string_compare(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2, + rax_RegI result, regD tmp1, rFlagsReg cr) %{ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); - effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); - - format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1, $tmp2" %} + effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + + format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} ins_encode %{ __ string_compare($str1$$Register, $str2$$Register, $cnt1$$Register, $cnt2$$Register, $result$$Register, - $tmp1$$XMMRegister, $tmp2$$XMMRegister); + $tmp1$$XMMRegister); %} ins_pipe( pipe_slow ); %}