Mercurial > hg > openjdk6-mips
view hotspot/src/cpu/mips/vm/stubGenerator_mips.cpp @ 10:7eeee95a5a53
Fix five bugs related to safepoint_poll, double-precision operand, verify_oop operation and safepoint_return respectively.
1. pc_offset for oopMap at safepoint_poll used by add_debug_info_branch
must be the offset of the instruction which causes an exception.
2. To avoid the failure of type-checking, when value of LIRConst, which
is single-precision or double-precision, is got through a common path.
However, as_jint_lo_bits and as_jint_hi_bits, which are much more
general, should be used.
3. In the stack2reg function, when operand is double-precision, two
float registers are filled with content of the same stack address. We
should not do that. Fix it.
4. In the verify_oop_addr function, the address of the object to be
verified may use SP, so the object must be loaded before changing SP.
5. Let safepoint_return use AT.
6. Do some codes cleaning work.
author | YANG Yongqiang <yangyongqiang@loongson.cn> |
---|---|
date | Sat, 23 Oct 2010 21:08:56 +0000 |
parents | c1e1428eff7c |
children |
line wrap: on
line source
/* * Copyright 2003-2008 Sun Microsystems, Inc. All Rights Reserved. * Copyright 2010 Lemote, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, * CA 95054 USA or visit www.sun.com if you need additional information or * have any questions. * */ #include "incls/_precompiled.incl" #include "incls/_stubGenerator_mips.cpp.incl" // Declaration and definition of StubGenerator (no .hpp file). // For a more detailed description of the stub routine structure // see the comment in stubRoutines.hpp #define __ _masm-> //#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) //#define a__ ((Assembler*)_masm)-> //#ifdef PRODUCT //#define BLOCK_COMMENT(str) /* nothing */ //#else //#define BLOCK_COMMENT(str) __ block_comment(str) //#endif //#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions // Stub Code definitions static address handle_unsafe_access() { JavaThread* thread = JavaThread::current(); address pc = thread->saved_exception_pc(); // pc is the instruction which we must emulate // doing a no-op is fine: return garbage from the load // therefore, compute npc //address npc = Assembler::locate_next_instruction(pc); address npc = (address)((unsigned long)pc + sizeof(unsigned long)); // request an async exception thread->set_pending_unsafe_access_error(); // return address of next instruction to execute return npc; } class StubGenerator: public StubCodeGenerator { private: // ABI mips o32 // This fig is not MIPS ABI. It is call Java from C ABI. // Call stubs are used to call Java from C // // [ return_from_Java ] // [ argument word n-1 ] <--- sp // ... // [ argument word 0 ] // ... //-10 [ S6 ] // -9 [ S5 ] // -8 [ S4 ] // -7 [ S3 ] // -6 [ S0 ] // -5 [ TSR(S2) ] // -4 [ LVP(S7) ] // -3 [ BCP(S1) ] // -2 [ saved fp ] <--- fp_after_call // -1 [ return address ] // 0 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp // 1 [ result ] <--- a1 // 2 [ result_type ] <--- a2 // 3 [ method ] <--- a3 // 4 [ entry_point ] // 5 [ parameters ] // 6 [ parameter_size ] // 7 [ thread ] address generate_call_stub(address& return_address) { //assert((int)frame::entry_frame_after_call_words == -(int)rsp_after_call_off + 1 && // (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, // "adjust this code"); StubCodeMark mark(this, "StubRoutines", "call_stub"); address start = __ pc(); // same as in generate_catch_exception()! // stub code // save ra and fp __ sw(RA, SP, (-1) * wordSize); __ sw(FP, SP, (-2) * wordSize); __ sw(BCP, SP, (-3) * wordSize); __ sw(LVP, SP, (-4) * wordSize); __ sw(TSR, SP, (-5) * wordSize); __ sw(S1, SP, (-6) * wordSize); __ sw(S3, SP, (-7) * wordSize); __ sw(S4, SP, (-8) * wordSize); __ sw(S5, SP, (-9) * wordSize); __ sw(S6, SP, (-10) * wordSize); #ifdef OPT_THREAD __ get_thread(TREG); #endif // lw parameter_size __ lw(T0, SP, 6 * wordSize); // I think 14 is the max gap between argument and callee saved register __ addi(FP, SP, (-2) * wordSize); __ addi(SP, SP, (-10) * wordSize); __ sw(A0, FP, 2 * wordSize); __ sw(A1, FP, 3 * wordSize); __ sw(A2, FP, 4 * wordSize); __ sw(A3, FP, 5 * wordSize); #ifdef ASSERT // make sure we have no pending exceptions { Label L; __ lw(T2, FP, 9 * wordSize); __ lw(T3, T2, in_bytes(Thread::pending_exception_offset())); __ beq(T3, ZERO, L); __ delayed()->nop(); /* FIXME: I do not know how to realize stop in mips arch, do it in the future */ __ stop("StubRoutines::call_stub: entered with pending exception"); __ bind(L); } #endif // pass parameters if any Label parameters_done; // judge if the parameter_size equals 0 __ beq(T0, ZERO, parameters_done); __ delayed()->nop(); __ sll(AT,T0,Interpreter::logStackElementSize()); __ sub(SP, SP, AT); __ move(AT, -StackAlignmentInBytes); __ andr(SP, SP , AT); // Copy Java parameters in reverse order (receiver last) // Note that the argument order is inverted in the process // source is edx[ecx: N-1..0] // dest is esp[ebx: 0..N-1] Label loop; __ lw(T2, FP, 7 * wordSize); // parameter pointer in T2,refernce to the stack arch __ move(T4, ZERO); __ bind(loop); if (TaggedStackInterpreter) { __ sll(T5, T0, 3); __ add(T5, T5, T2); __ lw(AT, T5, -2*wordSize); __ sll(T5,T4,3); __ add(T5,T5, SP); __ sw(AT, T5, Interpreter::expr_tag_offset_in_bytes(0)); } // get parameter __ sll(T5, T0, 2); __ add(T5, T5, T2); __ lw(AT, T5, -wordSize); __ sll(T5,T4,2); __ add(T5,T5, SP); __ sw(AT, T5, Interpreter::expr_offset_in_bytes(0)); __ addi(T4,T4,1); __ addi(T0,T0,-1); __ bne(T0, ZERO, loop); __ delayed()->nop(); // advance to next parameter // call Java function __ bind(parameters_done); // receiver in V0, methodOop in T7 __ move(T7, A3); __ lw(T9, FP, 6 * wordSize); // get entry_point __ move(T5,SP); //set sender sp __ jalr(T9); __ delayed()->nop(); return_address = __ pc(); Label common_return; __ bind(common_return); // store result depending on type // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) __ lw(T0, FP, 3 * wordSize); // result --> T0 Label is_long, is_float, is_double, exit; __ lw(T2, FP, 4 * wordSize); // result_type --> T2 __ addi(T3, T2, (-1) * T_LONG); __ beq(T3, ZERO, is_long); __ delayed()->addi(T3, T2, (-1) * T_FLOAT); __ beq(T3, ZERO, is_float); __ delayed()->addi(T3, T2, (-1) * T_DOUBLE); __ beq(T3, ZERO, is_double); __ delayed()->nop(); // handle T_INT case __ sw(V0, T0, 0 * wordSize); __ bind(exit); // restore __ addi(SP, FP, 2 * wordSize ); __ lw(RA, SP, -1 * wordSize); __ lw(FP, SP, -2 * wordSize); __ lw(BCP, SP, -3 * wordSize); __ lw(LVP, SP, -4 * wordSize); __ lw(TSR, SP, -5 * wordSize); __ lw(S1, SP, (-6) * wordSize); __ lw(S3, SP, (-7) * wordSize); __ lw(S4, SP, (-8) * wordSize); __ lw(S5, SP, (-9) * wordSize); __ lw(S6, SP, (-10) * wordSize); // return __ jr(RA); __ delayed()->nop(); // handle return types different from T_INT __ bind(is_long); __ sw(V0, T0, 0 * wordSize); __ sw(V1, T0, 1 * wordSize); __ b(exit); __ delayed()->nop(); __ bind(is_float); __ swc1(F0, T0, 0 * wordSize); __ b(exit); __ delayed()->nop(); __ bind(is_double); __ swc1(F0, T0, 0 * wordSize); __ swc1(F1, T0, 1 * wordSize); __ b(exit); __ delayed()->nop(); //FIXME, 1.6 x86 version add operation of fpu here StubRoutines::gs2::set_call_stub_compiled_return(__ pc()); __ b(common_return); __ delayed()->nop(); return start; } // Return point for a Java call if there's an exception thrown in // Java code. The exception is caught and transformed into a // pending exception stored in JavaThread that can be tested from // within the VM. // // Note: Usually the parameters are removed by the callee. In case // of an exception crossing an activation frame boundary, that is // not the case if the callee is compiled code => need to setup the // rsp. // // rax: exception oop address generate_catch_exception() { StubCodeMark mark(this, "StubRoutines", "catch_exception"); address start = __ pc(); Register thread = TREG; // get thread directly #ifndef OPT_THREAD __ lw(thread, FP, 9 * wordSize); #endif #ifdef ASSERT // verify that threads correspond { Label L; __ get_thread(T7); __ beq(T7, thread, L); __ delayed()->nop(); __ stop("StubRoutines::catch_exception: threads must correspond"); __ bind(L); } #endif // set pending exception __ verify_oop(V0); __ sw(V0, thread, in_bytes(Thread::pending_exception_offset())); __ move(AT, (int)__FILE__); __ sw(AT, thread, in_bytes(Thread::exception_file_offset ())); __ move(AT, (int)__LINE__); __ sw(AT, thread, in_bytes(Thread::exception_line_offset ())); // complete return to VM assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before"); __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none); __ delayed()->nop(); return start; } // Continuation point for runtime calls returning with a pending // exception. The pending exception check happened in the runtime // or native call stub. The pending exception in Thread is // converted into a Java-level exception. // // Contract with Java-level exception handlers: // rax: exception // rdx: throwing pc // // NOTE: At entry of this stub, exception-pc must be on stack !! address generate_forward_exception() { StubCodeMark mark(this, "StubRoutines", "forward exception"); //Register thread = TREG; Register thread = T8; address start = __ pc(); // Upon entry, the sp points to the return address returning into Java // (interpreted or compiled) code; i.e., the return address becomes the // throwing pc. // // Arguments pushed before the runtime call are still on the stack but // the exception handler will reset the stack pointer -> ignore them. // A potential result in registers can be ignored as well. #ifdef ASSERT // make sure this code is only executed if there is a pending exception #ifndef OPT_THREAD __ get_thread(thread); #endif { Label L; __ lw(AT, thread, in_bytes(Thread::pending_exception_offset())); __ bne(AT, ZERO, L); __ delayed()->nop(); __ stop("StubRoutines::forward exception: no pending exception (1)"); __ bind(L); } #endif // compute exception handler into T9 __ lw(A0, SP, 0); __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), A0); __ move(T9, V0); __ pop(V1); #ifndef OPT_THREAD __ get_thread(thread); #endif __ lw(V0, thread, in_bytes(Thread::pending_exception_offset())); __ sw(ZERO, thread, in_bytes(Thread::pending_exception_offset())); #ifdef ASSERT // make sure exception is set { Label L; __ bne(V0, ZERO, L); __ delayed()->nop(); __ stop("StubRoutines::forward exception: no pending exception (2)"); __ bind(L); } #endif // continue at exception handler (return address removed) // V0: exception // T9: exception handler // V1: throwing pc __ verify_oop(V0); __ jr(T9); __ delayed()->nop(); return start; } // Support for intptr_t get_previous_fp() // // This routine is used to find the previous frame pointer for the // caller (current_frame_guess). This is used as part of debugging // ps() is seemingly lost trying to find frames. // This code assumes that caller current_frame_guess) has a frame. address generate_get_previous_fp() { StubCodeMark mark(this, "StubRoutines", "get_previous_fp"); const Address old_fp (FP, 0); const Address older_fp (V0, 0); address start = __ pc(); __ enter(); __ lw(V0, old_fp); // callers fp __ lw(V0, older_fp); // the frame for ps() __ leave(); __ jr(RA); __ delayed()->nop(); return start; } // The following routine generates a subroutine to throw an // asynchronous UnknownError when an unsafe access gets a fault that // could not be reasonably prevented by the programmer. (Example: // SIGBUS/OBJERR.) address generate_handler_for_unsafe_access() { StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access"); address start = __ pc(); __ pushad(); // push registers // Address next_pc(esp, RegisterImpl::number_of_registers * BytesPerWord); __ call(CAST_FROM_FN_PTR(address, handle_unsafe_access), relocInfo::runtime_call_type); __ delayed()->nop(); __ sw(V0, SP, RegisterImpl::number_of_registers * BytesPerWord); __ popad(); __ jr(RA); __ delayed()->nop(); return start; } // Non-destructive plausibility checks for oops // // Arguments: // all args on stack! // // Stack after saving c_rarg3: // [tos + 0]: saved c_rarg3 // [tos + 1]: saved c_rarg2 // [tos + 2]: saved r12 (several TemplateTable methods use it) // [tos + 3]: saved flags // [tos + 4]: return address // * [tos + 5]: error message (char*) // * [tos + 6]: object to verify (oop) // * [tos + 7]: saved rax - saved by caller and bashed // * = popped on exit address generate_verify_oop() { StubCodeMark mark(this, "StubRoutines", "verify_oop"); address start = __ pc(); __ verify_oop_subroutine(); address end = __ pc(); return start; } // // Generate overlap test for array copy stubs // // Input: // A0 - array1 // A1 - array2 // A2 - element count // // Note: this code can only use %eax, %ecx, and %edx // //use T4,T5 as temp void array_overlap_test(address no_overlap_target, int log2_elem_size) { int elem_size = 1 << log2_elem_size; Address::ScaleFactor sf = Address::times_1; switch (log2_elem_size) { case 0: sf = Address::times_1; break; case 1: sf = Address::times_2; break; case 2: sf = Address::times_4; break; case 3: sf = Address::times_8; break; } __ sll(T5, A2, sf); __ add(T5, T5, A0); __ lea(T4, Address(T5, -elem_size)); __ sub(AT, A1,A0); __ blez(AT, no_overlap_target); __ delayed()->nop(); __ sub(AT, A1, T4); __ bgtz(AT, no_overlap_target); __ delayed()->nop(); } // // Generate store check for array // // Input: // %edi - starting address // %ecx - element count // // The 2 input registers are overwritten // // // Generate store check for array // // Input: // T4 - starting address(edi) // T5 - element count (ecx) // // The 2 input registers are overwritten // void array_store_check() { BarrierSet* bs = Universe::heap()->barrier_set(); assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); CardTableModRefBS* ct = (CardTableModRefBS*)bs; assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); Label l_0; __ sll(AT, T5, Address::times_4); __ add(AT, T4, AT); __ lea(T5, Address(AT, -4)); __ shr(T4, CardTableModRefBS::card_shift); __ shr(T5, CardTableModRefBS::card_shift); __ sub(T5, T5, T4); __ bind(l_0); __ add(AT, T4, T5); __ sw(ZERO, AT, (int)ct->byte_map_base); __ addi(T5, T5, -4); __ bgez(T5, l_0); __ delayed()->nop(); } // Arguments: // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary // ignored // name - stub name string // // Inputs: // c_rarg0 - source array address // c_rarg1 - destination array address // c_rarg2 - element count, treated as ssize_t, can be zero // // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, // we let the hardware handle it. The one to eight bytes within words, // dwords or qwords that span cache line boundaries will still be loaded // and stored atomically. // // Side Effects: // disjoint_byte_copy_entry is set to the no-overlap entry point // used by generate_conjoint_byte_copy(). // address generate_disjoint_byte_copy(bool aligned, const char *name) { StubCodeMark mark(this, "StubRoutines", name); __ align(CodeEntryAlignment); address start = __ pc(); Label l_0, l_1, l_2, l_3, l_4, l_5, l_6; __ push(T3); __ push(T4); __ push(T5); __ push(T8); __ move(T5, A2); __ move(T3, A0); __ move(T4, A1); __ move(T8, T5); // original count in T5 __ addi(AT, T5, -3 ); __ blez(AT, l_4); __ delayed()->nop(); if (!aligned) { // align source address at dword address boundary __ move(T5, 4); __ sub(T5, T5, T3); __ andi(T5, T5, 3); __ beq(T5, ZERO, l_1); __ delayed()->nop(); __ sub(T8,T8,T5); __ bind(l_0); __ lb(AT, T3, 0); __ sb(AT, T4, 0); __ addi(T3, T3, 1); __ addi(T4, T4, 1); __ addi(T5 ,T5, 1); __ bne(T5, ZERO, l_0); __ delayed()->nop(); __ bind(l_1); __ move(T5, T8); } __ shr(T5, 2); __ beq(T5, ZERO, l_4); // no dwords to move __ delayed()->nop(); // copy aligned dwords __ bind(l_2); __ align(16); __ bind(l_3); __ lw(AT, T3, 0); __ sw(AT, T4, 0 ); __ addi(T3, T3, 4); __ addi(T4, T4, 4); __ addi(T5, T5, -1); __ bne(T5, ZERO, l_3); __ delayed()->nop(); __ bind(l_4); __ move(T5, T8); __ andi(T5, T5, 3); __ beq(T5, ZERO, l_6); __ delayed()->nop(); // copy suffix __ bind(l_5); __ lb(AT, T3, 0); __ sb(AT, T4, 0); __ addi(T3, T3, 1); __ addi(T4, T4, 1); __ addi(T5, T5, -1); __ bne(T5, ZERO, l_5 ); __ delayed()->nop(); __ bind(l_6); __ pop(T8); __ pop(T5); __ pop(T4); __ pop(T3); __ jr(RA); __ delayed()->nop(); return start; } // Arguments: // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary // ignored // name - stub name string // // Inputs: // c_rarg0 - source array address // c_rarg1 - destination array address // c_rarg2 - element count, treated as ssize_t, can be zero // // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, // we let the hardware handle it. The one to eight bytes within words, // dwords or qwords that span cache line boundaries will still be loaded // and stored atomically. // address generate_conjoint_byte_copy(bool aligned, const char *name) { Label l_1, l_2, l_3, l_4, l_5; StubCodeMark mark(this, "StubRoutines", name); __ align(CodeEntryAlignment); address start = __ pc(); address nooverlap_target = aligned ? StubRoutines::arrayof_jbyte_disjoint_arraycopy() : StubRoutines::jbyte_disjoint_arraycopy(); __ push(T3); __ push(T4); __ push(T5); __ push(T8); array_overlap_test(nooverlap_target, 0); // copy from high to low __ move(T5, A2); __ move(T3, A0); __ move(T4, A1); __ add(AT, T3, T5); __ lea(T3, Address(AT, -4)); __ add(AT, T4, T5); __ lea(T4, Address(AT, -4)); __ move(T8, T5); __ addi(AT, T5, -3); __ blez(AT, l_3); __ delayed()->nop(); __ shr(T5, 2); __ align(16); __ bind(l_1); __ lw(AT, T3, 0); __ sw(AT, T4, 0); __ addi(T3, T3, -4); __ addi(T4, T4, -4); __ addi(T5, T5, -1); __ bne(T5, ZERO, l_1); __ delayed()->nop(); __ b(l_3); __ delayed()->nop(); // copy dwords aligned or not with repeat move __ bind(l_2); __ bind(l_3); // copy suffix (0-3 bytes) __ andi(T8, T8, 3); __ beq(T8, ZERO, l_5); __ delayed()->nop(); __ addi(T3, T3, 3); __ bind(l_4); __ lb(AT, T3, 0); __ sb(AT, T4, 0); __ addi(T3, T3, -1); __ addi(T4, T4, -1); __ addi(T5, T5, -1); __ bne(T5, ZERO, l_4); __ delayed()->nop(); __ bind(l_5); __ pop(T8); __ pop(T5); __ pop(T4); __ pop(T3); __ jr(RA); __ delayed()->nop(); return start; } // Arguments: // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary // ignored // name - stub name string // // Inputs: // c_rarg0 - source array address // c_rarg1 - destination array address // c_rarg2 - element count, treated as ssize_t, can be zero // // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we // let the hardware handle it. The two or four words within dwords // or qwords that span cache line boundaries will still be loaded // and stored atomically. // // Side Effects: // disjoint_short_copy_entry is set to the no-overlap entry point // used by generate_conjoint_short_copy(). // address generate_disjoint_short_copy(bool aligned, const char *name) { Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8; StubCodeMark mark(this, "StubRoutines", name); __ align(CodeEntryAlignment); address start = __ pc(); __ push(T3); __ push(T4); __ push(T5); __ push(T8); __ move(T5, A2); __ move(T3, A0); __ move(T4, A1); if (!aligned) { __ beq(T5, ZERO, l_5); __ delayed()->nop(); // align source address at dword address boundary __ move(T8, T3); // original from __ andi(T8, T8, 3); // either 0 or 2 __ beq(T8, ZERO, l_1); // no prefix __ delayed()->nop(); // copy prefix __ lh(AT, T3, 0); __ sh(AT, T4, 0); __ add(T3, T3, T8); __ add(T4, T4, T8); __ addi(T5, T5, -1); __ bind(l_1); } __ move(T8, T5); // word count less prefix __ sra(T5, T5, 1); __ beq(T5, ZERO, l_4); __ delayed()->nop(); // copy aligned dwords __ bind(l_2); __ align(16); __ bind(l_3); __ lw(AT, T3, 0); __ sw(AT, T4, 0 ); __ addi(T3, T3, 4); __ addi(T4, T4, 4); __ addi(T5, T5, -1); __ bne(T5, ZERO, l_3); __ delayed()->nop(); __ bind(l_4); __ andi(T8, T8, 1); __ beq(T8, ZERO, l_5); __ delayed()->nop(); // copy suffix __ lh(AT, T3, 0); __ sh(AT, T4, 0); __ bind(l_5); __ pop(T8); __ pop(T5); __ pop(T4); __ pop(T3); __ jr(RA); __ delayed()->nop(); return start; } // Arguments: // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary // ignored // name - stub name string // // Inputs: // c_rarg0 - source array address // c_rarg1 - destination array address // c_rarg2 - element count, treated as ssize_t, can be zero // // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we // let the hardware handle it. The two or four words within dwords // or qwords that span cache line boundaries will still be loaded // and stored atomically. // address generate_conjoint_short_copy(bool aligned, const char *name) { Label l_1, l_2, l_3, l_4, l_5; StubCodeMark mark(this, "StubRoutines", name); __ align(CodeEntryAlignment); address start = __ pc(); address nooverlap_target = aligned ? StubRoutines::arrayof_jshort_disjoint_arraycopy() : StubRoutines::jshort_disjoint_arraycopy(); __ push(T3); __ push(T4); __ push(T5); __ push(T8); array_overlap_test(nooverlap_target, 1); /* __ pushl(esi); __ movl(ecx, Address(esp, 4+12)); // count __ pushl(edi); __ movl(esi, Address(esp, 8+ 4)); // from __ movl(edi, Address(esp, 8+ 8)); // to */ __ move(T5, A2); __ move(T3, A0); __ move(T4, A1); // copy dwords from high to low // __ leal(esi, Address(esi, ecx, Address::times_2, -4)); // from + count*2 - 4 __ sll(AT, T5, Address::times_2); __ add(AT, T3, AT); __ lea(T3, Address( AT, -4)); //__ std(); //__ leal(edi, Address(edi, ecx, Address::times_2, -4)); // to + count*2 - 4 __ sll(AT,T5 , Address::times_2); __ add(AT, T4, AT); __ lea(T4, Address( AT, -4)); // __ movl(eax, ecx); __ move(T8, T5); __ bind(l_1); // __ sarl(ecx, 1); // dword count __ sra(T5,T5, 1); //__ jcc(Assembler::equal, l_4); // no dwords to move __ beq(T5, ZERO, l_4); __ delayed()->nop(); /* __ cmpl(ecx, 32); __ jcc(Assembler::above, l_3); // > 32 dwords // copy dwords with loop __ subl(edi, esi); */ __ align(16); __ bind(l_2); //__ movl(edx, Address(esi)); __ lw(AT, T3, 0); //__ movl(Address(edi, esi, Address::times_1), edx); __ sw(AT, T4, 0); //__ subl(esi, 4); __ addi(T3, T3, -4); __ addi(T4, T4, -4); //__ decl(ecx); __ addi(T5, T5, -1); // __ jcc(Assembler::notEqual, l_2); __ bne(T5, ZERO, l_2); __ delayed()->nop(); // __ addl(edi, esi); // __ jmp(l_4); __ b(l_4); __ delayed()->nop(); // copy dwords with repeat move __ bind(l_3); // __ rep_movl(); __ bind(l_4); // __ andl(eax, 1); // suffix count __ andi(T8, T8, 1); // suffix count //__ jcc(Assembler::equal, l_5); // no suffix __ beq(T8, ZERO, l_5 ); __ delayed()->nop(); // copy suffix // __ movw(edx, Address(esi, 2)); __ lh(AT, T3, 2); // __ movw(Address(edi, 2), edx); __ sh(AT, T4, 2); __ bind(l_5); // __ cld(); // __ popl(edi); // __ popl(esi); // __ ret(0); __ pop(T8); __ pop(T5); __ pop(T4); __ pop(T3); __ jr(RA); __ delayed()->nop(); return start; } // Arguments: // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary // ignored // is_oop - true => oop array, so generate store check code // name - stub name string // // Inputs: // c_rarg0 - source array address // c_rarg1 - destination array address // c_rarg2 - element count, treated as ssize_t, can be zero // // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let // the hardware handle it. The two dwords within qwords that span // cache line boundaries will still be loaded and stored atomicly. // // Side Effects: // disjoint_int_copy_entry is set to the no-overlap entry point // used by generate_conjoint_int_oop_copy(). // address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) { Label l_2, l_3, l_4, l_stchk; StubCodeMark mark(this, "StubRoutines", name); __ align(CodeEntryAlignment); address start = __ pc(); /* __ pushl(esi); __ movl(ecx, Address(esp, 4+12)); // count __ pushl(edi); __ movl(esi, Address(esp, 8+ 4)); // from __ movl(edi, Address(esp, 8+ 8)); // to */ __ push(T3); __ push(T4); __ push(T5); __ push(T8); __ move(T5, A2); __ move(T3, A0); __ move(T4, A1); // __ cmpl(ecx, 32); // __ jcc(Assembler::belowEqual, l_2); // <= 32 dwords // __ rep_movl(); __ b(l_2); __ delayed()->nop(); if (is_oop) { // __ jmp(l_stchk); __ b(l_stchk); __ delayed()->nop(); } // __ popl(edi); // __ popl(esi); // __ ret(0); __ pop(T8); __ pop(T5); __ pop(T4); __ pop(T3); __ jr(RA); __ delayed()->nop(); __ bind(l_2); // __ subl(edi, esi); // __ testl(ecx, ecx); // __ jcc(Assembler::zero, l_4); __ beq(T5, ZERO, l_4); __ delayed()->nop(); __ align(16); __ bind(l_3); //__ movl(edx, Address(esi)); __ lw(AT, T3, 0); // __ movl(Address(edi, esi, Address::times_1), edx); __ sw(AT, T4, 0); // __ addl(esi, 4); __ addi(T3, T3, 4); __ addi(T4, T4, 4); // __ decl(ecx); __ addi(T5, T5, -1); // __ jcc(Assembler::notEqual, l_3); __ bne(T5, ZERO, l_3); __ delayed()->nop(); if (is_oop) { __ bind(l_stchk); // __ movl(edi, Address(esp, 8+ 8)); // __ movl(ecx, Address(esp, 8+ 12)); __ move(T4, A1); __ move(T5, A2); array_store_check(); } __ bind(l_4); // __ popl(edi); // __ popl(esi); // __ ret(0); __ pop(T8); __ pop(T5); __ pop(T4); __ pop(T3); __ jr(RA); __ delayed()->nop(); return start; } // Arguments: // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary // ignored // is_oop - true => oop array, so generate store check code // name - stub name string // // Inputs: // c_rarg0 - source array address // c_rarg1 - destination array address // c_rarg2 - element count, treated as ssize_t, can be zero // // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let // the hardware handle it. The two dwords within qwords that span // cache line boundaries will still be loaded and stored atomicly. // address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) { Label l_2, l_3, l_4, l_stchk; StubCodeMark mark(this, "StubRoutines", name); __ align(CodeEntryAlignment); address start = __ pc(); address nooverlap_target; if (is_oop) { nooverlap_target = aligned ? StubRoutines::arrayof_oop_disjoint_arraycopy() : StubRoutines::oop_disjoint_arraycopy(); }else { nooverlap_target = aligned ? StubRoutines::arrayof_jint_disjoint_arraycopy() : StubRoutines::jint_disjoint_arraycopy(); } __ push(T3); __ push(T4); __ push(T5); __ push(T8); array_overlap_test(nooverlap_target, 2); /* __ pushl(esi); __ movl(ecx, Address(esp, 4+12)); // count __ pushl(edi); __ movl(esi, Address(esp, 8+ 4)); // from __ movl(edi, Address(esp, 8+ 8)); // to */ __ move(T5, A2); __ move(T3, A0); __ move(T4, A1); //__ leal(esi, Address(esi, ecx, Address::times_4, -4)); // from + count*4 - 4 __ sll(AT, T5, Address::times_4); __ add(AT, T3, AT); __ lea(T3 , Address(AT, -4)); //__ std(); //__ leal(edi, Address(edi, ecx, Address::times_4, -4)); // to + count*4 - 4 __ sll(AT, T5, Address::times_4); __ add(AT, T4, AT); __ lea(T4 , Address(AT, -4)); // __ cmpl(ecx, 32); // __ jcc(Assembler::above, l_3); // > 32 dwords // __ testl(ecx, ecx); //__ jcc(Assembler::zero, l_4); __ beq(T5, ZERO, l_4); __ delayed()->nop(); // __ subl(edi, esi); __ align(16); __ bind(l_2); // __ movl(edx, Address(esi)); __ lw(AT, T3, 0); // __ movl(Address(esi, edi, Address::times_1), edx); __ sw(AT, T4, 0); // __ subl(esi, 4); __ addi(T3, T3, -4); __ addi(T4, T4, -4); // __ decl(ecx); __ addi(T5, T5, -1); //__ jcc(Assembler::notEqual, l_2); __ bne(T5, ZERO, l_2); __ delayed()->nop(); if (is_oop) { // __ jmp(l_stchk); __ b( l_stchk); __ delayed()->nop(); } __ bind(l_4); // __ cld(); // __ popl(edi); // __ popl(esi); // __ ret(0); __ pop(T8); __ pop(T5); __ pop(T4); __ pop(T3); __ jr(RA); __ delayed()->nop(); __ bind(l_3); // __ rep_movl(); if (is_oop) { __ bind(l_stchk); // __ movl(edi, Address(esp, 8+ 8)); __ move(T4, A1); // __ movl(ecx, Address(esp, 8+ 12)); __ move(T5, A2); array_store_check(); } // __ cld(); // __ popl(edi); // __ popl(esi); // __ ret(0); __ pop(T8); __ pop(T5); __ pop(T4); __ pop(T3); __ jr(RA); __ delayed()->nop(); return start; } #if 0 // Arguments: // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes // ignored // is_oop - true => oop array, so generate store check code // name - stub name string // // Inputs: // c_rarg0 - source array address // c_rarg1 - destination array address // c_rarg2 - element count, treated as ssize_t, can be zero // // Side Effects: // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the // no-overlap entry point used by generate_conjoint_long_oop_copy(). // address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) { __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", name); address start = __ pc(); Label L_copy_32_bytes, L_copy_8_bytes, L_exit; const Register from = rdi; // source array address const Register to = rsi; // destination array address const Register qword_count = rdx; // elements count const Register end_from = from; // source array end address const Register end_to = rcx; // destination array end address const Register saved_to = to; // End pointers are inclusive, and if count is not zero they point // to the last unit copied: end_to[0] := end_from[0] __ enter(); // required for proper stackwalking of RuntimeStub frame // Save no-overlap entry point for generate_conjoint_long_oop_copy() assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. if (is_oop) { disjoint_oop_copy_entry = __ pc(); // no registers are destroyed by this call gen_write_ref_array_pre_barrier(/* dest */ c_rarg1, /* count */ c_rarg2); } else { disjoint_long_copy_entry = __ pc(); } BLOCK_COMMENT("Entry:"); // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) setup_arg_regs(); // from => rdi, to => rsi, count => rdx // r9 and r10 may be used to save non-volatile registers // 'from', 'to' and 'qword_count' are now valid // Copy from low to high addresses. Use 'to' as scratch. __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); __ negptr(qword_count); __ jmp(L_copy_32_bytes); // Copy trailing qwords __ BIND(L_copy_8_bytes); __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); __ increment(qword_count); __ jcc(Assembler::notZero, L_copy_8_bytes); if (is_oop) { __ jmp(L_exit); } else { inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); restore_arg_regs(); __ xorptr(rax, rax); // return 0 __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); } // Copy 64-byte chunks copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); if (is_oop) { __ BIND(L_exit); gen_write_ref_array_post_barrier(saved_to, end_to, rax); inc_counter_np(SharedRuntime::_oop_array_copy_ctr); } else { inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); } restore_arg_regs(); __ xorptr(rax, rax); // return 0 __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); return start; } // Arguments: // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes // ignored // is_oop - true => oop array, so generate store check code // name - stub name string // // Inputs: // c_rarg0 - source array address // c_rarg1 - destination array address // c_rarg2 - element count, treated as ssize_t, can be zero // address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) { __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", name); address start = __ pc(); Label L_copy_32_bytes, L_copy_8_bytes, L_exit; const Register from = rdi; // source array address const Register to = rsi; // destination array address const Register qword_count = rdx; // elements count const Register saved_count = rcx; __ enter(); // required for proper stackwalking of RuntimeStub frame assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. address disjoint_copy_entry = NULL; if (is_oop) { assert(!UseCompressedOops, "shouldn't be called for compressed oops"); disjoint_copy_entry = disjoint_oop_copy_entry; oop_copy_entry = __ pc(); array_overlap_test(disjoint_oop_copy_entry, Address::times_8); } else { disjoint_copy_entry = disjoint_long_copy_entry; long_copy_entry = __ pc(); array_overlap_test(disjoint_long_copy_entry, Address::times_8); } BLOCK_COMMENT("Entry:"); // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) array_overlap_test(disjoint_copy_entry, Address::times_8); setup_arg_regs(); // from => rdi, to => rsi, count => rdx // r9 and r10 may be used to save non-volatile registers // 'from', 'to' and 'qword_count' are now valid if (is_oop) { // Save to and count for store barrier __ movptr(saved_count, qword_count); // No registers are destroyed by this call gen_write_ref_array_pre_barrier(to, saved_count); } __ jmp(L_copy_32_bytes); // Copy trailing qwords __ BIND(L_copy_8_bytes); __ movq(rax, Address(from, qword_count, Address::times_8, -8)); __ movq(Address(to, qword_count, Address::times_8, -8), rax); __ decrement(qword_count); __ jcc(Assembler::notZero, L_copy_8_bytes); if (is_oop) { __ jmp(L_exit); } else { inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); restore_arg_regs(); __ xorptr(rax, rax); // return 0 __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); } // Copy in 32-bytes chunks copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); if (is_oop) { __ BIND(L_exit); __ lea(rcx, Address(to, saved_count, Address::times_8, -8)); gen_write_ref_array_post_barrier(to, rcx, rax); inc_counter_np(SharedRuntime::_oop_array_copy_ctr); } else { inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); } restore_arg_regs(); __ xorptr(rax, rax); // return 0 __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); return start; } // Helper for generating a dynamic type check. // Smashes no registers. void generate_type_check(Register sub_klass, Register super_check_offset, Register super_klass, Label& L_success) { assert_different_registers(sub_klass, super_check_offset, super_klass); BLOCK_COMMENT("type_check:"); Label L_miss; // a couple of useful fields in sub_klass: int ss_offset = (klassOopDesc::header_size() * HeapWordSize + Klass::secondary_supers_offset_in_bytes()); int sc_offset = (klassOopDesc::header_size() * HeapWordSize + Klass::secondary_super_cache_offset_in_bytes()); Address secondary_supers_addr(sub_klass, ss_offset); Address super_cache_addr( sub_klass, sc_offset); // if the pointers are equal, we are done (e.g., String[] elements) __ cmpptr(super_klass, sub_klass); __ jcc(Assembler::equal, L_success); // check the supertype display: Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); __ cmpptr(super_klass, super_check_addr); // test the super type __ jcc(Assembler::equal, L_success); // if it was a primary super, we can just fail immediately __ cmpl(super_check_offset, sc_offset); __ jcc(Assembler::notEqual, L_miss); // Now do a linear scan of the secondary super-klass chain. // The repne_scan instruction uses fixed registers, which we must spill. // (We need a couple more temps in any case.) // This code is rarely used, so simplicity is a virtue here. inc_counter_np(SharedRuntime::_partial_subtype_ctr); { __ push(rax); __ push(rcx); __ push(rdi); assert_different_registers(sub_klass, super_klass, rax, rcx, rdi); __ movptr(rdi, secondary_supers_addr); // Load the array length. __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes())); // Skip to start of data. __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); // Scan rcx words at [rdi] for occurance of rax // Set NZ/Z based on last compare __ movptr(rax, super_klass); if (UseCompressedOops) { // Compare against compressed form. Don't need to uncompress because // looks like orig rax is restored in popq below. __ encode_heap_oop(rax); __ repne_scanl(); } else { __ repne_scan(); } // Unspill the temp. registers: __ pop(rdi); __ pop(rcx); __ pop(rax); __ jcc(Assembler::notEqual, L_miss); } // Success. Cache the super we found and proceed in triumph. __ movptr(super_cache_addr, super_klass); // note: rax is dead __ jmp(L_success); // Fall through on failure! __ BIND(L_miss); } // // Generate checkcasting array copy stub // // Input: // c_rarg0 - source array address // c_rarg1 - destination array address // c_rarg2 - element count, treated as ssize_t, can be zero // c_rarg3 - size_t ckoff (super_check_offset) // not Win64 // c_rarg4 - oop ckval (super_klass) // Win64 // rsp+40 - oop ckval (super_klass) // // Output: // rax == 0 - success // rax == -1^K - failure, where K is partial transfer count // address generate_checkcast_copy(const char *name) { Label L_load_element, L_store_element, L_do_card_marks, L_done; // Input registers (after setup_arg_regs) const Register from = rdi; // source array address const Register to = rsi; // destination array address const Register length = rdx; // elements count const Register ckoff = rcx; // super_check_offset const Register ckval = r8; // super_klass // Registers used as temps (r13, r14 are save-on-entry) const Register end_from = from; // source array end address const Register end_to = r13; // destination array end address const Register count = rdx; // -(count_remaining) const Register r14_length = r14; // saved copy of length // End pointers are inclusive, and if length is not zero they point // to the last unit copied: end_to[0] := end_from[0] const Register rax_oop = rax; // actual oop copied const Register r11_klass = r11; // oop._klass //--------------------------------------------------------------- // Assembler stub will be used for this call to arraycopy // if the two arrays are subtypes of Object[] but the // destination array type is not equal to or a supertype // of the source type. Each element must be separately // checked. __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", name); address start = __ pc(); __ enter(); // required for proper stackwalking of RuntimeStub frame checkcast_copy_entry = __ pc(); BLOCK_COMMENT("Entry:"); #ifdef ASSERT // caller guarantees that the arrays really are different // otherwise, we would have to make conjoint checks { Label L; array_overlap_test(L, TIMES_OOP); __ stop("checkcast_copy within a single array"); __ bind(L); } #endif //ASSERT // allocate spill slots for r13, r14 enum { saved_r13_offset, saved_r14_offset, saved_rbp_offset, saved_rip_offset, saved_rarg0_offset }; __ subptr(rsp, saved_rbp_offset * wordSize); __ movptr(Address(rsp, saved_r13_offset * wordSize), r13); __ movptr(Address(rsp, saved_r14_offset * wordSize), r14); setup_arg_regs(4); // from => rdi, to => rsi, length => rdx // ckoff => rcx, ckval => r8 // r9 and r10 may be used to save non-volatile registers #ifdef _WIN64 // last argument (#4) is on stack on Win64 const int ckval_offset = saved_rarg0_offset + 4; __ movptr(ckval, Address(rsp, ckval_offset * wordSize)); #endif // check that int operands are properly extended to size_t assert_clean_int(length, rax); assert_clean_int(ckoff, rax); #ifdef ASSERT BLOCK_COMMENT("assert consistent ckoff/ckval"); // The ckoff and ckval must be mutually consistent, // even though caller generates both. { Label L; int sco_offset = (klassOopDesc::header_size() * HeapWordSize + Klass::super_check_offset_offset_in_bytes()); __ cmpl(ckoff, Address(ckval, sco_offset)); __ jcc(Assembler::equal, L); __ stop("super_check_offset inconsistent"); __ bind(L); } #endif //ASSERT // Loop-invariant addresses. They are exclusive end pointers. Address end_from_addr(from, length, TIMES_OOP, 0); Address end_to_addr(to, length, TIMES_OOP, 0); // Loop-variant addresses. They assume post-incremented count < 0. Address from_element_addr(end_from, count, TIMES_OOP, 0); Address to_element_addr(end_to, count, TIMES_OOP, 0); gen_write_ref_array_pre_barrier(to, count); // Copy from low to high addresses, indexed from the end of each array. __ lea(end_from, end_from_addr); __ lea(end_to, end_to_addr); __ movptr(r14_length, length); // save a copy of the length assert(length == count, ""); // else fix next line: __ negptr(count); // negate and test the length __ jcc(Assembler::notZero, L_load_element); // Empty array: Nothing to do. __ xorptr(rax, rax); // return 0 on (trivial) success __ jmp(L_done); // ======== begin loop ======== // (Loop is rotated; its entry is L_load_element.) // Loop control: // for (count = -count; count != 0; count++) // Base pointers src, dst are biased by 8*(count-1),to last element. __ align(16); __ BIND(L_store_element); __ store_heap_oop(to_element_addr, rax_oop); // store the oop __ increment(count); // increment the count toward zero __ jcc(Assembler::zero, L_do_card_marks); // ======== loop entry is here ======== __ BIND(L_load_element); __ load_heap_oop(rax_oop, from_element_addr); // load the oop __ testptr(rax_oop, rax_oop); __ jcc(Assembler::zero, L_store_element); __ load_klass(r11_klass, rax_oop);// query the object klass generate_type_check(r11_klass, ckoff, ckval, L_store_element); // ======== end loop ======== // It was a real error; we must depend on the caller to finish the job. // Register rdx = -1 * number of *remaining* oops, r14 = *total* oops. // Emit GC store barriers for the oops we have copied (r14 + rdx), // and report their number to the caller. assert_different_registers(rax, r14_length, count, to, end_to, rcx); __ lea(end_to, to_element_addr); gen_write_ref_array_post_barrier(to, end_to, rscratch1); __ movptr(rax, r14_length); // original oops __ addptr(rax, count); // K = (original - remaining) oops __ notptr(rax); // report (-1^K) to caller __ jmp(L_done); // Come here on success only. __ BIND(L_do_card_marks); __ addptr(end_to, -wordSize); // make an inclusive end pointer gen_write_ref_array_post_barrier(to, end_to, rscratch1); __ xorptr(rax, rax); // return 0 on success // Common exit point (success or failure). __ BIND(L_done); __ movptr(r13, Address(rsp, saved_r13_offset * wordSize)); __ movptr(r14, Address(rsp, saved_r14_offset * wordSize)); inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); restore_arg_regs(); __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); return start; } // // Generate 'unsafe' array copy stub // Though just as safe as the other stubs, it takes an unscaled // size_t argument instead of an element count. // // Input: // c_rarg0 - source array address // c_rarg1 - destination array address // c_rarg2 - byte count, treated as ssize_t, can be zero // // Examines the alignment of the operands and dispatches // to a long, int, short, or byte copy loop. // address generate_unsafe_copy(const char *name) { Label L_long_aligned, L_int_aligned, L_short_aligned; // Input registers (before setup_arg_regs) const Register from = c_rarg0; // source array address const Register to = c_rarg1; // destination array address const Register size = c_rarg2; // byte count (size_t) // Register used as a temp const Register bits = rax; // test copy of low bits __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", name); address start = __ pc(); __ enter(); // required for proper stackwalking of RuntimeStub frame // bump this on entry, not on exit: inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr); __ mov(bits, from); __ orptr(bits, to); __ orptr(bits, size); __ testb(bits, BytesPerLong-1); __ jccb(Assembler::zero, L_long_aligned); __ testb(bits, BytesPerInt-1); __ jccb(Assembler::zero, L_int_aligned); __ testb(bits, BytesPerShort-1); __ jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry)); __ BIND(L_short_aligned); __ shrptr(size, LogBytesPerShort); // size => short_count __ jump(RuntimeAddress(short_copy_entry)); __ BIND(L_int_aligned); __ shrptr(size, LogBytesPerInt); // size => int_count __ jump(RuntimeAddress(int_copy_entry)); __ BIND(L_long_aligned); __ shrptr(size, LogBytesPerLong); // size => qword_count __ jump(RuntimeAddress(long_copy_entry)); return start; } // Perform range checks on the proposed arraycopy. // Kills temp, but nothing else. // Also, clean the sign bits of src_pos and dst_pos. void arraycopy_range_checks(Register src, // source array oop (c_rarg0) Register src_pos, // source position (c_rarg1) Register dst, // destination array oo (c_rarg2) Register dst_pos, // destination position (c_rarg3) Register length, Register temp, Label& L_failed) { BLOCK_COMMENT("arraycopy_range_checks:"); // if (src_pos + length > arrayOop(src)->length()) FAIL; __ movl(temp, length); __ addl(temp, src_pos); // src_pos + length __ cmpl(temp, Address(src, arrayOopDesc::length_offset_in_bytes())); __ jcc(Assembler::above, L_failed); // if (dst_pos + length > arrayOop(dst)->length()) FAIL; __ movl(temp, length); __ addl(temp, dst_pos); // dst_pos + length __ cmpl(temp, Address(dst, arrayOopDesc::length_offset_in_bytes())); __ jcc(Assembler::above, L_failed); // Have to clean up high 32-bits of 'src_pos' and 'dst_pos'. // Move with sign extension can be used since they are positive. __ movslq(src_pos, src_pos); __ movslq(dst_pos, dst_pos); BLOCK_COMMENT("arraycopy_range_checks done"); } // // Generate generic array copy stubs // // Input: // c_rarg0 - src oop // c_rarg1 - src_pos (32-bits) // c_rarg2 - dst oop // c_rarg3 - dst_pos (32-bits) // not Win64 // c_rarg4 - element count (32-bits) // Win64 // rsp+40 - element count (32-bits) // // Output: // rax == 0 - success // rax == -1^K - failure, where K is partial transfer count // address generate_generic_copy(const char *name) { Label L_failed, L_failed_0, L_objArray; Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs; // Input registers const Register src = c_rarg0; // source array oop const Register src_pos = c_rarg1; // source position const Register dst = c_rarg2; // destination array oop const Register dst_pos = c_rarg3; // destination position // elements count is on stack on Win64 #ifdef _WIN64 #define C_RARG4 Address(rsp, 6 * wordSize) #else #define C_RARG4 c_rarg4 #endif { int modulus = CodeEntryAlignment; int target = modulus - 5; // 5 = sizeof jmp(L_failed) int advance = target - (__ offset() % modulus); if (advance < 0) advance += modulus; if (advance > 0) __ nop(advance); } StubCodeMark mark(this, "StubRoutines", name); // Short-hop target to L_failed. Makes for denser prologue code. __ BIND(L_failed_0); __ jmp(L_failed); assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed"); __ align(CodeEntryAlignment); address start = __ pc(); __ enter(); // required for proper stackwalking of RuntimeStub frame // bump this on entry, not on exit: inc_counter_np(SharedRuntime::_generic_array_copy_ctr); //----------------------------------------------------------------------- // Assembler stub will be used for this call to arraycopy // if the following conditions are met: // // (1) src and dst must not be null. // (2) src_pos must not be negative. // (3) dst_pos must not be negative. // (4) length must not be negative. // (5) src klass and dst klass should be the same and not NULL. // (6) src and dst should be arrays. // (7) src_pos + length must not exceed length of src. // (8) dst_pos + length must not exceed length of dst. // // if (src == NULL) return -1; __ testptr(src, src); // src oop size_t j1off = __ offset(); __ jccb(Assembler::zero, L_failed_0); // if (src_pos < 0) return -1; __ testl(src_pos, src_pos); // src_pos (32-bits) __ jccb(Assembler::negative, L_failed_0); // if (dst == NULL) return -1; __ testptr(dst, dst); // dst oop __ jccb(Assembler::zero, L_failed_0); // if (dst_pos < 0) return -1; __ testl(dst_pos, dst_pos); // dst_pos (32-bits) size_t j4off = __ offset(); __ jccb(Assembler::negative, L_failed_0); // The first four tests are very dense code, // but not quite dense enough to put four // jumps in a 16-byte instruction fetch buffer. // That's good, because some branch predicters // do not like jumps so close together. // Make sure of this. guarantee(((j1off ^ j4off) & ~15) != 0, "I$ line of 1st & 4th jumps"); // registers used as temp const Register r11_length = r11; // elements count to copy const Register r10_src_klass = r10; // array klass const Register r9_dst_klass = r9; // dest array klass // if (length < 0) return -1; __ movl(r11_length, C_RARG4); // length (elements count, 32-bits value) __ testl(r11_length, r11_length); __ jccb(Assembler::negative, L_failed_0); __ load_klass(r10_src_klass, src); #ifdef ASSERT // assert(src->klass() != NULL); BLOCK_COMMENT("assert klasses not null"); { Label L1, L2; __ testptr(r10_src_klass, r10_src_klass); __ jcc(Assembler::notZero, L2); // it is broken if klass is NULL __ bind(L1); __ stop("broken null klass"); __ bind(L2); __ load_klass(r9_dst_klass, dst); __ cmpq(r9_dst_klass, 0); __ jcc(Assembler::equal, L1); // this would be broken also BLOCK_COMMENT("assert done"); } #endif // Load layout helper (32-bits) // // |array_tag| | header_size | element_type | |log2_element_size| // 32 30 24 16 8 2 0 // // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 // int lh_offset = klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes(); const Register rax_lh = rax; // layout helper __ movl(rax_lh, Address(r10_src_klass, lh_offset)); // Handle objArrays completely differently... jint objArray_lh = Klass::array_layout_helper(T_OBJECT); __ cmpl(rax_lh, objArray_lh); __ jcc(Assembler::equal, L_objArray); // if (src->klass() != dst->klass()) return -1; __ load_klass(r9_dst_klass, dst); __ cmpq(r10_src_klass, r9_dst_klass); __ jcc(Assembler::notEqual, L_failed); // if (!src->is_Array()) return -1; __ cmpl(rax_lh, Klass::_lh_neutral_value); __ jcc(Assembler::greaterEqual, L_failed); // At this point, it is known to be a typeArray (array_tag 0x3). #ifdef ASSERT { Label L; __ cmpl(rax_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift)); __ jcc(Assembler::greaterEqual, L); __ stop("must be a primitive array"); __ bind(L); } #endif arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length, r10, L_failed); // typeArrayKlass // // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize); // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize); // const Register r10_offset = r10; // array offset const Register rax_elsize = rax_lh; // element size __ movl(r10_offset, rax_lh); __ shrl(r10_offset, Klass::_lh_header_size_shift); __ andptr(r10_offset, Klass::_lh_header_size_mask); // array_offset __ addptr(src, r10_offset); // src array offset __ addptr(dst, r10_offset); // dst array offset BLOCK_COMMENT("choose copy loop based on element size"); __ andl(rax_lh, Klass::_lh_log2_element_size_mask); // rax_lh -> rax_elsize // next registers should be set before the jump to corresponding stub const Register from = c_rarg0; // source array address const Register to = c_rarg1; // destination array address const Register count = c_rarg2; // elements count // 'from', 'to', 'count' registers should be set in such order // since they are the same as 'src', 'src_pos', 'dst'. __ BIND(L_copy_bytes); __ cmpl(rax_elsize, 0); __ jccb(Assembler::notEqual, L_copy_shorts); __ lea(from, Address(src, src_pos, Address::times_1, 0));// src_addr __ lea(to, Address(dst, dst_pos, Address::times_1, 0));// dst_addr __ movl2ptr(count, r11_length); // length __ jump(RuntimeAddress(byte_copy_entry)); __ BIND(L_copy_shorts); __ cmpl(rax_elsize, LogBytesPerShort); __ jccb(Assembler::notEqual, L_copy_ints); __ lea(from, Address(src, src_pos, Address::times_2, 0));// src_addr __ lea(to, Address(dst, dst_pos, Address::times_2, 0));// dst_addr __ movl2ptr(count, r11_length); // length __ jump(RuntimeAddress(short_copy_entry)); __ BIND(L_copy_ints); __ cmpl(rax_elsize, LogBytesPerInt); __ jccb(Assembler::notEqual, L_copy_longs); __ lea(from, Address(src, src_pos, Address::times_4, 0));// src_addr __ lea(to, Address(dst, dst_pos, Address::times_4, 0));// dst_addr __ movl2ptr(count, r11_length); // length __ jump(RuntimeAddress(int_copy_entry)); __ BIND(L_copy_longs); #ifdef ASSERT { Label L; __ cmpl(rax_elsize, LogBytesPerLong); __ jcc(Assembler::equal, L); __ stop("must be long copy, but elsize is wrong"); __ bind(L); } #endif __ lea(from, Address(src, src_pos, Address::times_8, 0));// src_addr __ lea(to, Address(dst, dst_pos, Address::times_8, 0));// dst_addr __ movl2ptr(count, r11_length); // length __ jump(RuntimeAddress(long_copy_entry)); // objArrayKlass __ BIND(L_objArray); // live at this point: r10_src_klass, src[_pos], dst[_pos] Label L_plain_copy, L_checkcast_copy; // test array classes for subtyping __ load_klass(r9_dst_klass, dst); __ cmpq(r10_src_klass, r9_dst_klass); // usual case is exact equality __ jcc(Assembler::notEqual, L_checkcast_copy); // Identically typed arrays can be copied without element-wise checks. arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length, r10, L_failed); __ lea(from, Address(src, src_pos, TIMES_OOP, arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr __ lea(to, Address(dst, dst_pos, TIMES_OOP, arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr __ movl2ptr(count, r11_length); // length __ BIND(L_plain_copy); __ jump(RuntimeAddress(oop_copy_entry)); __ BIND(L_checkcast_copy); // live at this point: r10_src_klass, !r11_length { // assert(r11_length == C_RARG4); // will reload from here Register r11_dst_klass = r11; __ load_klass(r11_dst_klass, dst); // Before looking at dst.length, make sure dst is also an objArray. __ cmpl(Address(r11_dst_klass, lh_offset), objArray_lh); __ jcc(Assembler::notEqual, L_failed); // It is safe to examine both src.length and dst.length. #ifndef _WIN64 arraycopy_range_checks(src, src_pos, dst, dst_pos, C_RARG4, rax, L_failed); #else __ movl(r11_length, C_RARG4); // reload arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length, rax, L_failed); __ load_klass(r11_dst_klass, dst); // reload #endif // Marshal the base address arguments now, freeing registers. __ lea(from, Address(src, src_pos, TIMES_OOP, arrayOopDesc::base_offset_in_bytes(T_OBJECT))); __ lea(to, Address(dst, dst_pos, TIMES_OOP, arrayOopDesc::base_offset_in_bytes(T_OBJECT))); __ movl(count, C_RARG4); // length (reloaded) Register sco_temp = c_rarg3; // this register is free now assert_different_registers(from, to, count, sco_temp, r11_dst_klass, r10_src_klass); assert_clean_int(count, sco_temp); // Generate the type check. int sco_offset = (klassOopDesc::header_size() * HeapWordSize + Klass::super_check_offset_offset_in_bytes()); __ movl(sco_temp, Address(r11_dst_klass, sco_offset)); assert_clean_int(sco_temp, rax); generate_type_check(r10_src_klass, sco_temp, r11_dst_klass, L_plain_copy); // Fetch destination element klass from the objArrayKlass header. int ek_offset = (klassOopDesc::header_size() * HeapWordSize + objArrayKlass::element_klass_offset_in_bytes()); __ movptr(r11_dst_klass, Address(r11_dst_klass, ek_offset)); __ movl(sco_temp, Address(r11_dst_klass, sco_offset)); assert_clean_int(sco_temp, rax); // the checkcast_copy loop needs two extra arguments: assert(c_rarg3 == sco_temp, "#3 already in place"); __ movptr(C_RARG4, r11_dst_klass); // dst.klass.element_klass __ jump(RuntimeAddress(checkcast_copy_entry)); } __ BIND(L_failed); __ xorptr(rax, rax); __ notptr(rax); // return -1 __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); return start; } #undef length_arg #endif //FIXME address generate_disjoint_long_copy(bool aligned, const char *name) { Label l_1, l_2; StubCodeMark mark(this, "StubRoutines", name); __ align(CodeEntryAlignment); address start = __ pc(); // __ movl(ecx, Address(esp, 4+8)); // count // __ movl(eax, Address(esp, 4+0)); // from // __ movl(edx, Address(esp, 4+4)); // to __ move(T5, A2); __ move(T3, A0); __ move(T4, A1); __ push(T3); __ push(T4); __ push(T5); //__ subl(edx, eax); //__ jmp(l_2); __ b(l_2); __ delayed()->nop(); __ align(16); __ bind(l_1); // if (VM_Version::supports_mmx()) { // __ movq(mmx0, Address(eax)); // __ movq(Address(eax, edx, Address::times_1), mmx0); // } else { // __ fild_d(Address(eax)); __ ld(AT, T3, 0); // __ fistp_d(Address(eax, edx, Address::times_1)); __ sd (AT, T4, 0); // } // __ addl(eax, 8); __ addi(T3, T3, 8); __ addi(T4, T4, 8); __ bind(l_2); // __ decl(ecx); __ addi(T5, T5, -1); // __ jcc(Assembler::greaterEqual, l_1); __ bgez(T5, l_1); __ delayed()->nop(); // if (VM_Version::supports_mmx()) { // __ emms(); // } // __ ret(0); __ pop(T5); __ pop(T4); __ pop(T3); __ jr(RA); __ delayed()->nop(); return start; } address generate_conjoint_long_copy(bool aligned, const char *name) { Label l_1, l_2; StubCodeMark mark(this, "StubRoutines", name); __ align(CodeEntryAlignment); address start = __ pc(); address nooverlap_target = aligned ? StubRoutines::arrayof_jlong_disjoint_arraycopy() : StubRoutines::jlong_disjoint_arraycopy(); __ push(T3); __ push(T4); __ push(T5); array_overlap_test(nooverlap_target, 3); /* __ movl(ecx, Address(esp, 4+8)); // count __ movl(eax, Address(esp, 4+0)); // from __ movl(edx, Address(esp, 4+4)); // to __ jmp(l_2); */ __ move(T5, A2); __ move(T3, A0); __ move(T4, A1); __ sll(AT, T5, Address::times_8); __ add(AT, T3, AT); __ lea(T3 , Address(AT, -8)); __ sll(AT, T5, Address::times_8); __ add(AT, T4, AT); __ lea(T4 , Address(AT, -8)); __ b(l_2); __ delayed()->nop(); __ align(16); __ bind(l_1); /* if (VM_Version::supports_mmx()) { __ movq(mmx0, Address(eax, ecx, Address::times_8)); __ movq(Address(edx, ecx,Address::times_8), mmx0); } else { __ fild_d(Address(eax, ecx, Address::times_8)); __ fistp_d(Address(edx, ecx,Address::times_8)); } */ __ ld(AT, T3, 0); __ sd (AT, T4, 0); __ addi(T3, T3, -8); __ addi(T4, T4,-8); __ bind(l_2); // __ decl(ecx); __ addi(T5, T5, -1); //__ jcc(Assembler::greaterEqual, l_1); __ bgez(T5, l_1); __ delayed()->nop(); // if (VM_Version::supports_mmx()) { // __ emms(); // } // __ ret(0); __ pop(T5); __ pop(T4); __ pop(T3); __ jr(RA); __ delayed()->nop(); return start; } void generate_arraycopy_stubs() { /* // Call the conjoint generation methods immediately after // the disjoint ones so that short branches from the former // to the latter can be generated. StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy"); StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, "jbyte_arraycopy"); StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy"); StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy"); StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy"); StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy"); StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, false, "jlong_disjoint_arraycopy"); StubRoutines::_jlong_arraycopy = generate_conjoint_long_oop_copy(false, false, "jlong_arraycopy"); if (UseCompressedOops) { StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, "oop_disjoint_arraycopy"); StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, "oop_arraycopy"); } else { StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, "oop_disjoint_arraycopy"); StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, "oop_arraycopy"); } StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy"); StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy"); StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy"); // We don't generate specialized code for HeapWord-aligned source // arrays, so just use the code we've already generated StubRoutines::_arrayof_jbyte_disjoint_arraycopy = StubRoutines::_jbyte_disjoint_arraycopy; StubRoutines::_arrayof_jbyte_arraycopy = StubRoutines::_jbyte_arraycopy; StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy; StubRoutines::_arrayof_jshort_arraycopy = StubRoutines::_jshort_arraycopy; StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy; StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy; StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy; StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy; StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy; StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; */ StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy"); StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy"); StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy"); StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, "oop_disjoint_arraycopy"); StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy"); StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, "arrayof_jbyte_disjoint_arraycopy"); // if (VM_Version::supports_mmx()) //if (false) // StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_mmx_copy_aligned("arrayof_jshort_disjoint_arraycopy"); // else StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy"); StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(true, false, "arrayof_jint_disjoint_arraycopy"); StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(true, true, "arrayof_oop_disjoint_arraycopy"); StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, "arrayof_jlong_disjoint_arraycopy"); StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, "jbyte_arraycopy"); StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy"); StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy"); StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, "oop_arraycopy"); StubRoutines::_jlong_arraycopy = generate_conjoint_long_copy(false, "jlong_arraycopy"); StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy(true, "arrayof_jbyte_arraycopy"); StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, "arrayof_jshort_arraycopy"); StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_oop_copy(true, false, "arrayof_jint_arraycopy"); StubRoutines::_arrayof_oop_arraycopy = generate_conjoint_int_oop_copy(true, true, "arrayof_oop_arraycopy"); StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy(true, "arrayof_jlong_arraycopy"); } #undef __ #define __ masm-> // Continuation point for throwing of implicit exceptions that are // not handled in the current activation. Fabricates an exception // oop and initiates normal exception dispatching in this // frame. Since we need to preserve callee-saved values (currently // only for C2, but done for C1 as well) we need a callee-saved oop // map and therefore have to make these stubs into RuntimeStubs // rather than BufferBlobs. If the compiler needs all registers to // be preserved between the fault point and the exception handler // then it must assume responsibility for that in // AbstractCompiler::continuation_for_implicit_null_exception or // continuation_for_implicit_division_by_zero_exception. All other // implicit exceptions (e.g., NullPointerException or // AbstractMethodError on entry) are either at call sites or // otherwise assume that stack unwinding will be initiated, so // caller saved registers were assumed volatile in the compiler. address generate_throw_exception(const char* name, address runtime_entry, bool restore_saved_exception_pc) { // Information about frame layout at time of blocking runtime call. // Note that we only have to preserve callee-saved registers since // the compilers are responsible for supplying a continuation point // if they expect all registers to be preserved. enum layout { thread_off, // last_java_sp S7_off, // callee saved register sp + 1 S6_off, // callee saved register sp + 2 S5_off, // callee saved register sp + 3 S4_off, // callee saved register sp + 4 S3_off, // callee saved register sp + 5 S2_off, // callee saved register sp + 6 S1_off, // callee saved register sp + 7 S0_off, // callee saved register sp + 8 FP_off, ret_address, framesize }; int insts_size = 2048; int locs_size = 32; // CodeBuffer* code = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false, // NULL, NULL, NULL, false, NULL, name, false); CodeBuffer code (name , insts_size, locs_size); OopMapSet* oop_maps = new OopMapSet(); MacroAssembler* masm = new MacroAssembler(&code); address start = __ pc(); /* __ move(AT, (int)&jerome1 ); __ sw(SP, AT, 0); __ move(AT, (int)&jerome2 ); __ sw(FP, AT, 0); __ move(AT, (int)&jerome3 ); __ sw(RA, AT, 0); __ move(AT, (int)&jerome4 ); __ sw(ZERO, AT, 0); __ move(AT, (int)&jerome5 ); __ sw(ZERO, AT, 0); __ move(AT, (int)&jerome6 ); __ sw(ZERO, AT, 0); __ move(AT, (int)&jerome7 ); __ sw(ZERO, AT, 0); __ move(AT, (int)&jerome10 ); __ sw(ZERO, AT, 0); __ pushad(); //__ enter(); __ call(CAST_FROM_FN_PTR(address, SharedRuntime::print_call_statistics), relocInfo::runtime_call_type); __ delayed()->nop(); //__ leave(); __ popad(); */ // This is an inlined and slightly modified version of call_VM // which has the ability to fetch the return PC out of // thread-local storage and also sets up last_Java_sp slightly // differently than the real call_VM #ifndef OPT_THREAD Register java_thread = A0; __ get_thread(java_thread); #else Register java_thread = TREG; #endif if (restore_saved_exception_pc) { __ lw(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset())); // eax } __ enter(); // required for proper stackwalking of RuntimeStub frame __ addi(SP, SP, (-1) * (framesize-2) * wordSize); // prolog __ sw(S0, SP, S0_off * wordSize); __ sw(S1, SP, S1_off * wordSize); __ sw(S2, SP, S2_off * wordSize); __ sw(S3, SP, S3_off * wordSize); __ sw(S4, SP, S4_off * wordSize); __ sw(S5, SP, S5_off * wordSize); __ sw(S6, SP, S6_off * wordSize); __ sw(S7, SP, S7_off * wordSize); int frame_complete = __ pc() - start; // push java thread (becomes first argument of C function) __ sw(java_thread, SP, thread_off * wordSize); if (java_thread!=A0) __ move(A0, java_thread); // Set up last_Java_sp and last_Java_fp __ set_last_Java_frame(java_thread, SP, FP, NULL); __ relocate(relocInfo::internal_pc_type); { int save_pc = (int)__ pc() + 12 + NativeCall::return_address_offset; __ lui(AT, Assembler::split_high(save_pc)); __ addiu(AT, AT, Assembler::split_low(save_pc)); } __ sw(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); // Call runtime __ lui(T9, Assembler::split_high((int)runtime_entry)); __ addiu(T9, T9, Assembler::split_low((int)runtime_entry)); __ jalr(T9); __ delayed()->nop(); // Generate oop map OopMap* map = new OopMap(framesize, 0); oop_maps->add_gc_map(__ offset(), map); // restore the thread (cannot use the pushed argument since arguments // may be overwritten by C code generated by an optimizing compiler); // however can use the register value directly if it is callee saved. #ifndef OPT_THREAD __ get_thread(java_thread); #endif __ lw(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); // __ reset_last_Java_frame(java_thread, true); __ reset_last_Java_frame(java_thread, true, true); // Restore callee save registers. This must be done after resetting the Java frame __ lw(S0, SP, S0_off * wordSize); __ lw(S1, SP, S1_off * wordSize); __ lw(S2, SP, S2_off * wordSize); __ lw(S3, SP, S3_off * wordSize); __ lw(S4, SP, S4_off * wordSize); __ lw(S5, SP, S5_off * wordSize); __ lw(S6, SP, S6_off * wordSize); __ lw(S7, SP, S7_off * wordSize); // discard arguments __ addi(SP, SP, (framesize-2) * wordSize); // epilog // __ leave(); // required for proper stackwalking of RuntimeStub frame __ addi(SP, FP, wordSize); __ lw(FP, SP, -1*wordSize); // check for pending exceptions #ifdef ASSERT Label L; __ lw(AT, java_thread, in_bytes(Thread::pending_exception_offset())); __ bne(AT, ZERO, L); __ delayed()->nop(); __ should_not_reach_here(); __ bind(L); #endif //ASSERT __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); __ delayed()->nop(); RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code,frame_complete, framesize, oop_maps, false); return stub->entry_point(); } // Initialization void generate_initial() { /* // Generates all stubs and initializes the entry points // This platform-specific stub is needed by generate_call_stub() StubRoutines::x86::_mxcsr_std = generate_fp_mask("mxcsr_std", 0x0000000000001F80); // entry points that exist in all platforms Note: This is code // that could be shared among different platforms - however the // benefit seems to be smaller than the disadvantage of having a // much more complicated generator structure. See also comment in // stubRoutines.hpp. StubRoutines::_forward_exception_entry = generate_forward_exception(); StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); // is referenced by megamorphic call StubRoutines::_catch_exception_entry = generate_catch_exception(); // atomic calls StubRoutines::_atomic_xchg_entry = generate_atomic_xchg(); StubRoutines::_atomic_xchg_ptr_entry = generate_atomic_xchg_ptr(); StubRoutines::_atomic_cmpxchg_entry = generate_atomic_cmpxchg(); StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long(); StubRoutines::_atomic_add_entry = generate_atomic_add(); StubRoutines::_atomic_add_ptr_entry = generate_atomic_add_ptr(); StubRoutines::_fence_entry = generate_orderaccess_fence(); StubRoutines::_handler_for_unsafe_access_entry = generate_handler_for_unsafe_access(); // platform dependent StubRoutines::x86::_get_previous_fp_entry = generate_get_previous_fp(); StubRoutines::x86::_verify_mxcsr_entry = generate_verify_mxcsr(); */ // Generates all stubs and initializes the entry points //------------------------------------------------------------- //----------------------------------------------------------- // entry points that exist in all platforms // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller // than the disadvantage of having a much more complicated generator structure. // See also comment in stubRoutines.hpp. StubRoutines::_forward_exception_entry = generate_forward_exception(); StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); // is referenced by megamorphic call StubRoutines::_catch_exception_entry = generate_catch_exception(); StubRoutines::_handler_for_unsafe_access_entry = generate_handler_for_unsafe_access(); // platform dependent StubRoutines::gs2::_get_previous_fp_entry = generate_get_previous_fp(); } void generate_all() { // Generates all stubs and initializes the entry points // These entry points require SharedInfo::stack0 to be set up in // non-core builds and need to be relocatable, so they each // fabricate a RuntimeStub internally. /* StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime:: throw_AbstractMethodError), false); StubRoutines::_throw_IncompatibleClassChangeError_entry = generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime:: throw_IncompatibleClassChangeError), false); StubRoutines::_throw_ArithmeticException_entry = generate_throw_exception("ArithmeticException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime:: throw_ArithmeticException), true); StubRoutines::_throw_NullPointerException_entry = generate_throw_exception("NullPointerException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime:: throw_NullPointerException), true); StubRoutines::_throw_NullPointerException_at_call_entry = generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime:: throw_NullPointerException_at_call), false); StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime:: throw_StackOverflowError), false); // entry points that are platform specific StubRoutines::x86::_f2i_fixup = generate_f2i_fixup(); StubRoutines::x86::_f2l_fixup = generate_f2l_fixup(); StubRoutines::x86::_d2i_fixup = generate_d2i_fixup(); StubRoutines::x86::_d2l_fixup = generate_d2l_fixup(); StubRoutines::x86::_float_sign_mask = generate_fp_mask("float_sign_mask", 0x7FFFFFFF7FFFFFFF); StubRoutines::x86::_float_sign_flip = generate_fp_mask("float_sign_flip", 0x8000000080000000); StubRoutines::x86::_double_sign_mask = generate_fp_mask("double_sign_mask", 0x7FFFFFFFFFFFFFFF); StubRoutines::x86::_double_sign_flip = generate_fp_mask("double_sign_flip", 0x8000000000000000); // support for verify_oop (must happen after universe_init) StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); // arraycopy stubs used by compilers generate_arraycopy_stubs(); */ StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); StubRoutines::_throw_ArithmeticException_entry = generate_throw_exception("ArithmeticException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_ArithmeticException), true); StubRoutines::_throw_NullPointerException_entry = generate_throw_exception("NullPointerException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException), true); StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false); StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false); //------------------------------------------------------ //------------------------------------------------------------------ // entry points that are platform specific // support for verify_oop (must happen after universe_init) StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); #ifndef CORE // arraycopy stubs used by compilers generate_arraycopy_stubs(); #endif } public: StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { if (all) { generate_all(); } else { generate_initial(); } } }; // end class declaration /* address StubGenerator::disjoint_byte_copy_entry = NULL; address StubGenerator::disjoint_short_copy_entry = NULL; address StubGenerator::disjoint_int_copy_entry = NULL; address StubGenerator::disjoint_long_copy_entry = NULL; address StubGenerator::disjoint_oop_copy_entry = NULL; address StubGenerator::byte_copy_entry = NULL; address StubGenerator::short_copy_entry = NULL; address StubGenerator::int_copy_entry = NULL; address StubGenerator::long_copy_entry = NULL; address StubGenerator::oop_copy_entry = NULL; address StubGenerator::checkcast_copy_entry = NULL; */ void StubGenerator_generate(CodeBuffer* code, bool all) { StubGenerator g(code, all); }