Mercurial > hg > openjdk6-mips
view hotspot/src/cpu/mips/vm/templateTable_mips.cpp @ 18:d2a6a000ff33
Fix a bug in FrameMap::nr2floatreg.
In FrameMap::nr2floatreg, it is wrong that multiplying 2 to the argument
rnr.
author | YANG Yongqiang <yangyongqiang@loongson.cn> |
---|---|
date | Sat, 30 Oct 2010 17:47:17 +0800 |
parents | 85b046e5468b |
children |
line wrap: on
line source
/* * Copyright 2003-2008 Sun Microsystems, Inc. All Rights Reserved. * Copyright 2010 Lemote, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, * CA 95054 USA or visit www.sun.com if you need additional information or * have any questions. * */ #include "incls/_precompiled.incl" #include "incls/_templateTable_mips.cpp.incl" #ifndef CC_INTERP #define __ _masm-> // Platform-dependent initialization void TemplateTable::pd_initialize() { // No mips specific initialization } // Address computation: local variables // we use t8 as the local variables pointer register, by yjl 6/27/2005 static inline Address iaddress(int n) { //return Address(r14, Interpreter::local_offset_in_bytes(n)); return Address(LVP, Interpreter::local_offset_in_bytes(n)); } static inline Address laddress(int n) { return iaddress(n + 1); } static inline Address faddress(int n) { return iaddress(n); } static inline Address daddress(int n) { return laddress(n); } static inline Address aaddress(int n) { return iaddress(n); } static inline Address haddress(int n) { return iaddress(n + 0); } //FIXME , can not use add and sll /* static inline Address iaddress(Register r) { return Address(r14, r, Address::times_8, Interpreter::value_offset_in_bytes()); } static inline Address laddress(Register r) { return Address(r14, r, Address::times_8, Interpreter::local_offset_in_bytes(1)); } static inline Address faddress(Register r) { return iaddress(r); } static inline Address daddress(Register r) { return laddress(r); } static inline Address aaddress(Register r) { return iaddress(r); } */ static inline Address at_sp() { return Address(SP, 0); } static inline Address at_sp_p1() { return Address(SP, 1 * wordSize); } static inline Address at_sp_p2() { return Address(SP, 2 * wordSize); } // At top of Java expression stack which may be different than esp(). It // isn't for category 1 objects. static inline Address at_tos () { //return Address(rsp, Interpreter::expr_offset_in_bytes(0)); Address tos = Address(SP, Interpreter::expr_offset_in_bytes(0)); return tos; } static inline Address at_tos_p1() { //return Address(rsp, Interpreter::expr_offset_in_bytes(1)); return Address(SP, Interpreter::expr_offset_in_bytes(1)); } static inline Address at_tos_p2() { //return Address(rsp, Interpreter::expr_offset_in_bytes(2)); return Address(SP, Interpreter::expr_offset_in_bytes(2)); } static inline Address at_tos_p3() { //return Address(rsp, Interpreter::expr_offset_in_bytes(3)); return Address(SP, Interpreter::expr_offset_in_bytes(3)); } /* // Condition conversion static Assembler::Condition j_not(TemplateTable::Condition cc) { switch (cc) { case TemplateTable::equal : return Assembler::notEqual; case TemplateTable::not_equal : return Assembler::equal; case TemplateTable::less : return Assembler::greaterEqual; case TemplateTable::less_equal : return Assembler::greater; case TemplateTable::greater : return Assembler::lessEqual; case TemplateTable::greater_equal: return Assembler::less; } ShouldNotReachHere(); return Assembler::zero; } */ // Miscelaneous helper routines // Store an oop (or NULL) at the address described by obj. // If val == noreg this means store a NULL /* static void do_oop_store(InterpreterMacroAssembler* _masm, Address obj, Register val, BarrierSet::Name barrier, bool precise) { assert(val == noreg || val == rax, "parameter is just for looks"); switch (barrier) { #ifndef SERIALGC case BarrierSet::G1SATBCT: case BarrierSet::G1SATBCTLogging: { // flatten object address if needed if (obj.index() == noreg && obj.disp() == 0) { if (obj.base() != rdx) { __ movq(rdx, obj.base()); } } else { __ leaq(rdx, obj); } __ g1_write_barrier_pre(rdx, r8, rbx, val != noreg); if (val == noreg) { __ store_heap_oop(Address(rdx, 0), NULL_WORD); } else { __ store_heap_oop(Address(rdx, 0), val); __ g1_write_barrier_post(rdx, val, r8, rbx); } } break; #endif // SERIALGC case BarrierSet::CardTableModRef: case BarrierSet::CardTableExtension: { if (val == noreg) { __ store_heap_oop(obj, NULL_WORD); } else { __ store_heap_oop(obj, val); // flatten object address if needed if (!precise || (obj.index() == noreg && obj.disp() == 0)) { __ store_check(obj.base()); } else { __ leaq(rdx, obj); __ store_check(rdx); } } } break; case BarrierSet::ModRef: case BarrierSet::Other: if (val == noreg) { __ store_heap_oop(obj, NULL_WORD); } else { __ store_heap_oop(obj, val); } break; default : ShouldNotReachHere(); } } */ // we use S1 as bcp, be sure you have bcp in S1 before you call any of the Template generator Address TemplateTable::at_bcp(int offset) { assert(_desc->uses_bcp(), "inconsistent uses_bcp information"); return Address(BCP, offset); } #define callee_saved_register(R) assert((R>=S0 && R<=S7), "should use callee saved registers!") // bytecode folding void TemplateTable::patch_bytecode(Bytecodes::Code bytecode, Register bc, Register scratch, bool load_bc_into_scratch/*=true*/) { if (!RewriteBytecodes) { return; } // the pair bytecodes have already done the load. if (load_bc_into_scratch) { __ move(bc, bytecode); } Label patch_done; if (JvmtiExport::can_post_breakpoint()) { Label fast_patch; // if a breakpoint is present we can't rewrite the stream directly __ lbu(scratch, at_bcp(0)); __ move(AT, Bytecodes::_breakpoint); __ bne(scratch, AT, fast_patch); __ delayed()->nop(); __ get_method(scratch); // Let breakpoint table handling rewrite to quicker bytecode __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), scratch, BCP, bc); __ b(patch_done); __ delayed()->nop(); __ bind(fast_patch); } #ifdef ASSERT Label okay; __ lbu(scratch, at_bcp(0)); __ move(AT, (int)Bytecodes::java_code(bytecode)); __ beq(scratch, AT, okay); __ delayed()->nop(); __ beq(scratch, bc, patch_done); __ delayed()->nop(); __ stop("patching the wrong bytecode"); __ bind(okay); #endif // patch bytecode __ sb(bc, at_bcp(0)); __ bind(patch_done); } // Individual instructions void TemplateTable::nop() { transition(vtos, vtos); // nothing to do } void TemplateTable::shouldnotreachhere() { transition(vtos, vtos); __ stop("shouldnotreachhere bytecode"); } void TemplateTable::aconst_null() { transition(vtos, atos); __ move(FSR, ZERO); } void TemplateTable::iconst(int value) { transition(vtos, itos); if (value == 0) { //__ xorl(rax, rax); __ move(FSR, ZERO); } else { //__ movl(rax, value); __ move(FSR, value); } } void TemplateTable::lconst(int value) { transition(vtos, ltos); if (value == 0) { __ move(FSR, ZERO); } else { __ move(FSR, value); } assert(value >= 0, "check this code"); __ move(SSR, ZERO); } const static float _f0 = 0.0, _f1 = 1.0, _f2 = 2.0; const static double _d0 = 0.0, _d1 = 1.0; void TemplateTable::fconst(int value) { transition(vtos, ftos); if (value == 0) { __ lui(AT, Assembler::split_high((int)&_f0)); __ lwc1(FSF, AT, Assembler::split_low((int)&_f0)); } else if (value == 1) { __ lui(AT, Assembler::split_high((int)&_f1)); __ lwc1(FSF, AT, Assembler::split_low((int)&_f1)); } else if (value == 2) { __ lui(AT, Assembler::split_high((int)&_f2)); __ lwc1(FSF, AT, Assembler::split_low((int)&_f2)); } else { ShouldNotReachHere(); } } void TemplateTable::dconst(int value) { transition(vtos, dtos); if (value == 0) { __ lui(AT, Assembler::split_high((int)&_d0)); __ lwc1(FSF, AT, Assembler::split_low((int)&_d0)); __ lwc1(SSF, AT, Assembler::split_low((int)&_d0)+4); } else if (value == 1) { __ lui(AT, Assembler::split_high((int)&_d1)); __ lwc1(FSF, AT, Assembler::split_low((int)&_d1)); __ lwc1(SSF, AT, Assembler::split_low((int)&_d1)+4); } else { ShouldNotReachHere(); } } void TemplateTable::bipush() { transition(vtos, itos); __ lb(FSR, at_bcp(1)); } void TemplateTable::sipush() { transition(vtos, itos); __ load_two_bytes_from_at_bcp(FSR, AT, 1); __ hswap(FSR); } // used register : T2, T3, T4 // T2 : index // T3 : cpool // T4 : tag void TemplateTable::ldc(bool wide) { transition(vtos, vtos); Label call_ldc, notFloat, notClass, Done; // get index in cpool if (wide) { __ load_two_bytes_from_at_bcp(T2, AT, 1); __ huswap(T2); } else { __ lbu(T2, at_bcp(1)); } __ get_cpool_and_tags(T3, T4); const int base_offset = constantPoolOopDesc::header_size() * wordSize; const int tags_offset = typeArrayOopDesc::header_size(T_BYTE) * wordSize; // get type __ add(AT, T4, T2); __ lb(T4, AT, tags_offset); //now T4 is the tag // unresolved string - get the resolved string __ addiu(AT, T4, - JVM_CONSTANT_UnresolvedString); __ beq(AT, ZERO, call_ldc); __ delayed()->nop(); // unresolved class - get the resolved class __ addiu(AT, T4, - JVM_CONSTANT_UnresolvedClass); __ beq(AT, ZERO, call_ldc); __ delayed()->nop(); // unresolved class in error (resolution failed) - call into runtime // so that the same error from first resolution attempt is thrown. // __ cmpl(edx, JVM_CONSTANT_UnresolvedClassInError); __ addiu(AT, T4, -JVM_CONSTANT_UnresolvedClassInError); // __ jccb(Assembler::equal, call_ldc); __ beq(AT, ZERO, call_ldc); __ delayed()->nop(); // resolved class - need to call vm to get java mirror of the class __ addiu(AT, T4, - JVM_CONSTANT_Class); __ bne(AT, ZERO, notClass); __ delayed()->sll(T2, T2, 2); __ bind(call_ldc); __ move(A1, wide); call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1); // __ sw(FSR, SP, - 1 * wordSize); __ push(atos); __ b(Done); // __ delayed()->addi(SP, SP, - 1 * wordSize); __ delayed()->nop(); __ bind(notClass); __ addiu(AT, T4, -JVM_CONSTANT_Float); __ bne(AT, ZERO, notFloat); __ delayed()->nop(); // ftos __ add(AT, T3, T2); __ lwc1(FSF, AT, base_offset); __ swc1(FSF, SP, - 1 * wordSize); __ b(Done); __ delayed()->addi(SP, SP, - 1 * wordSize); __ bind(notFloat); #ifdef ASSERT { Label L; __ addiu(AT, T4, -JVM_CONSTANT_Integer); __ beq(AT, ZERO, L); __ delayed()->addiu(AT, T4, -JVM_CONSTANT_String); __ beq(AT, ZERO, L); __ delayed()->nop(); __ stop("unexpected tag type in ldc"); __ bind(L); } #endif // atos and itos Label isOop; __ add(AT, T3, T2); __ lw(FSR, AT, base_offset); // String is only oop type we will see here __ addiu(AT, T4, -JVM_CONSTANT_String); //__ bne(AT, ZERO, Done); __ beq(AT,ZERO,isOop); __ delayed()->nop(); __ push(itos); __ b(Done); __ delayed()->nop(); __ bind(isOop); __ push(atos); if (VerifyOops) { __ verify_oop(FSR); } __ bind(Done); } // used register: T2, T3, T4 // T2 : index // T3 : cpool // T4 : tag void TemplateTable::ldc2_w() { transition(vtos, vtos); Label Long, Done; // get index in cpool __ load_two_bytes_from_at_bcp(T2, AT, 1); __ huswap(T2); __ get_cpool_and_tags(T3, T4); const int base_offset = constantPoolOopDesc::header_size() * wordSize; const int tags_offset = typeArrayOopDesc::header_size(T_BYTE) * wordSize; // get type in T4 __ add(AT, T4, T2); __ lb(T4, AT, tags_offset); __ addiu(AT, T4, - JVM_CONSTANT_Double); __ bne(AT, ZERO, Long); __ delayed()->sll(T2, T2, 2); // dtos __ addu(AT, T3, T2); __ lwc1(FSF, AT, base_offset + 0 * wordSize); __ lwc1(SSF, AT, base_offset + 1 * wordSize); __ swc1(FSF, SP, - 2*wordSize); __ swc1(SSF, SP, - 1*wordSize); __ b(Done); __ delayed()->addi(SP, SP, -8); // ltos __ bind(Long); __ add(AT, T3, T2); __ lw(FSR, AT, base_offset + 0 * wordSize); __ lw(SSR, AT, base_offset + 1 * wordSize); __ push(ltos); __ bind(Done); } // we compute the actual local variable address here // the x86 dont do so for it has scaled index memory access model, we dont have, so do here //FIXME void TemplateTable::locals_index(Register reg, int offset) { __ lbu(reg, at_bcp(offset)); __ sll(reg, reg, 2); __ sub(reg, LVP, reg); } // this method will do bytecode folding of the two form: // iload iload iload caload // used register : T2, T3 // T2 : bytecode // T3 : folded code void TemplateTable::iload() { transition(vtos, itos); if (RewriteFrequentPairs) { Label rewrite, done; // get the next bytecode in T2 __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload))); // if _iload, wait to rewrite to iload2. We only want to rewrite the // last two iloads in a pair. Comparing against fast_iload means that // the next bytecode is neither an iload or a caload, and therefore // an iload pair. __ move(AT, Bytecodes::_iload); __ beq(AT, T2, done); __ delayed()->nop(); __ move(AT, Bytecodes::_fast_iload); __ beq(AT, T2, rewrite); __ delayed(); __ move(T3, Bytecodes::_fast_iload2); // if _caload, rewrite to fast_icaload __ move(AT, Bytecodes::_caload); __ beq(AT, T2, rewrite); __ delayed(); __ move(T3, Bytecodes::_fast_icaload); // rewrite so iload doesn't check again. __ move(T3, Bytecodes::_fast_iload); // rewrite // T3 : fast bytecode __ bind(rewrite); patch_bytecode(Bytecodes::_iload, T3, T2, false); __ bind(done); } // Get the local value into tos locals_index(T2); __ lw(FSR, T2, 0); debug_only(__ verify_local_tag(frame::TagValue, T2)); } // used register T2 // T2 : index void TemplateTable::fast_iload2() { transition(vtos, itos); locals_index(T2); __ lw(FSR, T2, 0); debug_only(__ verify_local_tag(frame::TagValue, T2)); __ push(itos); locals_index(T2, 3); __ lw(FSR, T2, 0); debug_only(__ verify_local_tag(frame::TagValue, T2)); } // used register T2 // T2 : index void TemplateTable::fast_iload() { transition(vtos, itos); locals_index(T2); __ lw(FSR, T2, 0); debug_only(__ verify_local_tag(frame::TagValue, T2)); } // used register T2 // T2 : index void TemplateTable::lload() { transition(vtos, ltos); locals_index(T2); __ lw(FSR, T2, -4); __ lw(SSR, T2, 0); debug_only(__ verify_local_tag(frame::TagValue, T2)); } // used register T2 // T2 : index void TemplateTable::fload() { transition(vtos, ftos); locals_index(T2); __ lwc1(FSF, T2, 0); debug_only(__ verify_local_tag(frame::TagValue, T2)); } // used register T2 // T2 : index void TemplateTable::dload() { transition(vtos, dtos); locals_index(T2); if (TaggedStackInterpreter) { // Get double out of locals array, onto temp stack and load with // float instruction into ST0 // __ movl(eax, laddress(ebx)); __ sll(AT,T2,Interpreter::stackElementScale()); __ add(AT, LVP, AT); __ lwc1(FSF, AT, Interpreter::local_offset_in_bytes(1)); // __ movl(edx, haddress(ebx)); __ lwc1(SSF, AT, Interpreter::local_offset_in_bytes(0)); // __ pushl(edx); // push hi first // __ pushl(eax); // __ fld_d(Address(esp)); // __ addl(esp, 2*wordSize); debug_only(__ verify_local_tag(frame::TagCategory2, T2)); } else { __ lwc1(FSF, T2, -4); __ lwc1(SSF, T2, 0); } } // used register T2 // T2 : index void TemplateTable::aload() { transition(vtos, atos); locals_index(T2); __ lw(FSR, T2, 0); debug_only(__ verify_local_tag(frame::TagCategory2, T2)); } void TemplateTable::locals_index_wide(Register reg) { __ load_two_bytes_from_at_bcp(reg, AT, 2); __ huswap(reg); __ sll(reg, reg, 2); __ sub(reg, LVP, reg); } // used register T2 // T2 : index void TemplateTable::wide_iload() { transition(vtos, itos); locals_index_wide(T2); __ lw(FSR, T2, 0); debug_only(__ verify_local_tag(frame::TagCategory2, T2)); } // used register T2 // T2 : index void TemplateTable::wide_lload() { transition(vtos, ltos); locals_index_wide(T2); __ lw(FSR, T2, -4); __ lw(SSR, T2, 0); debug_only(__ verify_local_tag(frame::TagCategory2, T2)); } // used register T2 // T2 : index void TemplateTable::wide_fload() { transition(vtos, ftos); locals_index_wide(T2); __ lwc1(FSF, T2, 0); debug_only(__ verify_local_tag(frame::TagCategory2, T2)); } // used register T2 // T2 : index void TemplateTable::wide_dload() { transition(vtos, dtos); locals_index_wide(T2); if (TaggedStackInterpreter) { // Get double out of locals array, onto temp stack and load with // float instruction into ST0 // __ movl(eax, laddress(ebx)); // __ movl(edx, haddress(ebx)); __ sll(AT,T2,Interpreter::stackElementScale()); __ add(AT, LVP, AT); __ lwc1(FSF, AT, Interpreter::local_offset_in_bytes(1)); // __ movl(edx, haddress(ebx)); __ lwc1(SSF, AT, Interpreter::local_offset_in_bytes(0)); // __ pushl(edx); // push hi first // __ pushl(eax); // __ fld_d(Address(esp)); // __ addl(esp, 2*wordSize); debug_only(__ verify_local_tag(frame::TagCategory2, T2)); } else { __ lwc1(FSF, T2, -4); __ lwc1(SSF, T2, 0); } } // used register T2 // T2 : index void TemplateTable::wide_aload() { transition(vtos, atos); locals_index_wide(T2); __ lw(FSR, T2, 0); debug_only(__ verify_local_tag(frame::TagCategory2, T2)); } // we use A2 as the regiser for index, BE CAREFUL! // we dont use our tge 29 now, for later optimization void TemplateTable::index_check(Register array, Register index) { // Pop ptr into array __ pop_ptr(array); index_check_without_pop(array, index); } void TemplateTable::index_check_without_pop(Register array, Register index) { // destroys ebx // check array __ null_check(array, arrayOopDesc::length_offset_in_bytes()); // check index Label ok; __ lw(AT, array, arrayOopDesc::length_offset_in_bytes()); #ifndef OPT_RANGECHECK __ sltu(AT, index, AT); __ bne(AT, ZERO, ok); __ delayed()->nop(); //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2 if (A2!=index) __ move(A2, index); __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry); __ delayed()->nop(); __ bind(ok); #else __ lw(AT, array, arrayOopDesc::length_offset_in_bytes()); __ move(A2, index); __ tgeu(A2, AT, 29); #endif } void TemplateTable::iaload() { transition(itos, itos); // __ pop(SSR); index_check(SSR, FSR); __ shl(FSR, 2); __ add(FSR, SSR, FSR); //FSR: index __ lw(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT)); } void TemplateTable::laload() { transition(itos, ltos); // __ pop(SSR); index_check(SSR, FSR); __ sll(AT, FSR, 3); __ add(AT, SSR, AT); __ lw(FSR, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); __ lw(SSR, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 1 * wordSize); } void TemplateTable::faload() { transition(itos, ftos); // __ pop(SSR); index_check(SSR, FSR); __ shl(FSR, 2); __ add(FSR, SSR, FSR); __ lwc1(FSF, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); } void TemplateTable::daload() { transition(itos, dtos); //__ pop(SSR); index_check(SSR, FSR); __ sll(AT, FSR, 3); __ add(AT, SSR, AT); __ lwc1(FSF, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); __ lwc1(SSF, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 1 * wordSize); } void TemplateTable::aaload() { transition(itos, atos); //__ pop(SSR); index_check(SSR, FSR); __ shl(FSR, 2); __ add(FSR, SSR, FSR); __ lw(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); } void TemplateTable::baload() { transition(itos, itos); //__ pop(SSR); index_check(SSR, FSR); __ add(FSR, SSR, FSR); __ lb(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); } void TemplateTable::caload() { transition(itos, itos); // __ pop(SSR); index_check(SSR, FSR); __ shl(FSR, 1); __ add(FSR, SSR, FSR); __ lhu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)); } // iload followed by caload frequent pair // used register : T2 // T2 : index void TemplateTable::fast_icaload() { transition(vtos, itos); // load index out of locals locals_index(T2); __ lw(FSR, T2, 0); debug_only(__ verify_local_tag(frame::TagValue, T2)); // __ pop(SSR); index_check(SSR, FSR); __ shl(FSR, 1); __ add(FSR, SSR, FSR); __ lhu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)); } void TemplateTable::saload() { transition(itos, itos); // __ pop(SSR); index_check(SSR, FSR); __ shl(FSR, 1); __ add(FSR, SSR, FSR); __ lh(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)); } void TemplateTable::iload(int n) { transition(vtos, itos); __ lw(FSR, iaddress(n)); debug_only(__ verify_local_tag(frame::TagValue, T2)); } void TemplateTable::lload(int n) { transition(vtos, ltos); __ lw(FSR, laddress(n)); __ lw(SSR, haddress(n)); debug_only(__ verify_local_tag(frame::TagValue, T2)); } void TemplateTable::fload(int n) { transition(vtos, ftos); __ lwc1(FSF, faddress(n)); debug_only(__ verify_local_tag(frame::TagValue, T2)); } //FIXME here void TemplateTable::dload(int n) { transition(vtos, dtos); if (TaggedStackInterpreter) { // Get double out of locals array, onto temp stack and load with // float instruction into ST0 //__ movl(eax, laddress(n)); //__ movl(edx, haddress(n)); //__ pushl(edx); // push hi first //__ pushl(eax); // __ fld_d(Address(esp)); // __ addl(esp, 2*wordSize); // reset esp __ lwc1(FSF, laddress(n)); __ lwc1(SSF, haddress(n)); debug_only(__ verify_local_tag(frame::TagCategory2, T2)); } else { __ lwc1(FSF, laddress(n)); __ lwc1(SSF, haddress(n)); } } void TemplateTable::aload(int n) { transition(vtos, atos); __ lw(FSR, aaddress(n)); debug_only(__ verify_local_tag(frame::TagCategory2, T2)); } // used register : T2, T3 // T2 : bytecode // T3 : folded code void TemplateTable::aload_0() { transition(vtos, atos); // According to bytecode histograms, the pairs: // // _aload_0, _fast_igetfield // _aload_0, _fast_agetfield // _aload_0, _fast_fgetfield // // occur frequently. If RewriteFrequentPairs is set, the (slow) _aload_0 // bytecode checks if the next bytecode is either _fast_igetfield, // _fast_agetfield or _fast_fgetfield and then rewrites the // current bytecode into a pair bytecode; otherwise it rewrites the current // bytecode into _fast_aload_0 that doesn't do the pair check anymore. // // Note: If the next bytecode is _getfield, the rewrite must be delayed, // otherwise we may miss an opportunity for a pair. // // Also rewrite frequent pairs // aload_0, aload_1 // aload_0, iload_1 // These bytecodes with a small amount of code are most profitable to rewrite if (RewriteFrequentPairs) { Label rewrite, done; // get the next bytecode in T2 __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0))); // do actual aload_0 aload(0); // if _getfield then wait with rewrite __ move(AT, Bytecodes::_getfield); __ beq(AT, T2, done); __ delayed()->nop(); // if _igetfield then reqrite to _fast_iaccess_0 assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition"); __ move(AT, Bytecodes::_fast_igetfield); __ beq(AT, T2, rewrite); __ delayed(); __ move(T3, Bytecodes::_fast_iaccess_0); // if _agetfield then reqrite to _fast_aaccess_0 assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition"); __ move(AT, Bytecodes::_fast_agetfield); __ beq(AT, T2, rewrite); __ delayed(); __ move(T3, Bytecodes::_fast_aaccess_0); // if _fgetfield then reqrite to _fast_faccess_0 assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition"); __ move(AT, Bytecodes::_fast_fgetfield); __ beq(AT, T2, rewrite); __ delayed(); __ move(T3, Bytecodes::_fast_faccess_0); // else rewrite to _fast_aload0 assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition"); __ move(T3, Bytecodes::_fast_aload_0); // rewrite __ bind(rewrite); patch_bytecode(Bytecodes::_aload_0, T3, T2, false); __ bind(done); } else { aload(0); } } void TemplateTable::istore() { transition(itos, vtos); locals_index(T2); __ sw(FSR, T2, 0); debug_only(__ verify_local_tag(frame::TagCategory2, T2)); } void TemplateTable::lstore() { transition(ltos, vtos); locals_index(T2); __ sw(FSR, T2, -4); __ sw(SSR, T2, 0); debug_only(__ verify_local_tag(frame::TagCategory2, T2)); } void TemplateTable::fstore() { transition(ftos, vtos); locals_index(T2); __ swc1(FSF, T2, 0); debug_only(__ verify_local_tag(frame::TagCategory2, T2)); } void TemplateTable::dstore() { transition(dtos, vtos); locals_index(T2); if (TaggedStackInterpreter) { // Store double on stack and reload into locals nonadjacently // __ subl(esp, 2 * wordSize); // __ fstp_d(Address(esp)); // __ popl(eax); // __ popl(edx); //__ movl(laddress(ebx), eax); //__ movl(haddress(ebx), edx); // __ swc1(FSF, laddress(T2)); //__ swc1(SSF, haddress(T2)); __ sll(AT,T2,Interpreter::stackElementScale()); __ add(AT, LVP, AT); __ lwc1(FSF, AT, Interpreter::local_offset_in_bytes(1)); // __ movl(edx, haddress(ebx)); __ lwc1(SSF, AT, Interpreter::local_offset_in_bytes(0)); __ tag_local(frame::TagCategory2, T2); } else { __ swc1(FSF, T2, -4); __ swc1(SSF, T2, 0); } } void TemplateTable::astore() { transition(vtos, vtos); // __ pop(FSR); __ pop_ptr(FSR, SSR); locals_index(T2); __ sw(FSR, T2, 0); __ tag_local(SSR, T2); // need to store same tag in local may be returnAddr } void TemplateTable::wide_istore() { transition(vtos, vtos); // __ pop(FSR); __ pop_i(FSR); locals_index_wide(T2); __ sw(FSR, T2, 0); __ tag_local(frame::TagValue, T2); } void TemplateTable::wide_lstore() { transition(vtos, vtos); //__ pop2(FSR, SSR); //__ pop_l(FSR, SSR); __ pop_l(FSR); //aoqi:FIXME Is this right? locals_index_wide(T2); __ sw(FSR, T2, -4); __ sw(SSR, T2, 0); __ tag_local(frame::TagCategory2, T2); } void TemplateTable::wide_fstore() { wide_istore(); } void TemplateTable::wide_dstore() { wide_lstore(); } void TemplateTable::wide_astore() { // wide_istore(); transition(vtos, vtos); // __ pop_ptr(eax, edx); __ pop_ptr(FSR, SSR); // locals_index_wide(ebx); locals_index_wide(T2); //__ movl(aaddress(ebx), eax); // __ sw(FSR, aaddress(T2)); __ sll(AT,T2,Interpreter::stackElementScale()); __ add(AT, LVP, AT); __ addi(AT, AT, Interpreter::value_offset_in_bytes()); __ tag_local(SSR,AT ); } // used register : T2 void TemplateTable::iastore() { transition(itos, vtos); /* __ pop2(SSR, T2); index_check(T2, SSR); __ shl(SSR, 2); __ add(T2, T2, SSR); __ sw(FSR, T2, arrayOopDesc::base_offset_in_bytes(T_INT)); */ // __ pop_i(ebx); __ pop_i(SSR); index_check(T2, SSR); // prefer index in ebx __ shl(SSR, Address::times_4); __ add(T2, T2, SSR); __ sw(FSR, T2, arrayOopDesc::base_offset_in_bytes(T_INT)); } // used register T2, T3 void TemplateTable::lastore() { transition(ltos, vtos); // __ pop2(T2, T3); __ pop_i (T2); index_check(T3, T2); __ shl(T2, 3); __ add(T3, T3, T2); __ sw(FSR, T3, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); __ sw(SSR, T3, arrayOopDesc::base_offset_in_bytes(T_LONG) + 1 * wordSize); } // used register T2 void TemplateTable::fastore() { transition(ftos, vtos); //__ pop2(SSR, T2); __ pop_i(SSR); index_check(T2, SSR); __ shl(SSR, 2); __ add(T2, T2, SSR); __ swc1(FSF, T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); } // used register T2, T3 void TemplateTable::dastore() { transition(dtos, vtos); //__ pop2(T2, T3); __ pop_i (T2); index_check(T3, T2); __ shl(T2, Address::times_8); __ addu(T3, T3, T2); __ swc1(FSF, T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); __ swc1(SSF, T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 1 * wordSize); } // used register : T2, T3, T4 // T2 : array // T3 : subklass // T4 : supklass void TemplateTable::aastore() { Label is_null, ok_is_subtype, done; transition(vtos, vtos); // stack: ..., array, index, value // __ lw(FSR, at_sp()); // Value // __ lw(SSR, at_sp_p1()); // Index // __ lw(T2, at_sp_p2()); // Array __ lw(FSR, at_tos()); // Value __ lw(SSR, at_tos_p1()); // Index __ lw(T2, at_tos_p2()); // Array // index_check(T2, SSR); index_check_without_pop(T2, SSR); // do array store check - check for NULL value first __ beq(FSR, ZERO, is_null); __ delayed()->nop(); __ profile_checkcast(false, T3); // Blows T3 // Move subklass into T3 __ lw(T3, Address(FSR, oopDesc::klass_offset_in_bytes())); // Move superklass into T4 __ lw(T4, Address(T2, oopDesc::klass_offset_in_bytes())); __ lw(T4, Address(T4, sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes())); // Compress array+index*4+12 into a single register. T2 __ sll(AT, SSR, 2); __ add(T2, T2, AT); __ addi(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); // Generate subtype check. // Superklass in T4. Subklass in T3. __ gen_subtype_check(T4, T3, ok_is_subtype); // Come here on failure // object is at FSR __ jmp(Interpreter::_throw_ArrayStoreException_entry); __ delayed()->nop(); // Come here on success __ bind(ok_is_subtype); __ sw(FSR, T2, 0); __ store_check(T2); __ b(done); __ delayed()->nop(); // Have a NULL in FSR, EDX=T2, SSR=index. Store NULL at ary[idx] __ bind(is_null); __ profile_checkcast(true, T3); //blows T3 __ sll(AT, SSR, 2); __ add(T2, T2, AT); __ sw(FSR, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); __ bind(done); __ addi(SP, SP, 3 * Interpreter::stackElementSize()); } void TemplateTable::bastore() { transition(itos, vtos); //__ pop2(SSR, T2); __ pop_i (SSR); index_check(T2, SSR); __ add(SSR, T2, SSR); __ sb(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); } void TemplateTable::castore() { transition(itos, vtos); //__ pop2(SSR, T2); __ pop_i(SSR); index_check(T2, SSR); __ shl(SSR, 1); __ add(SSR, T2, SSR); __ sh(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)); } void TemplateTable::sastore() { castore(); } void TemplateTable::istore(int n) { transition(itos, vtos); __ sw(FSR, iaddress(n)); __ tag_local(frame::TagValue, n); } void TemplateTable::lstore(int n) { transition(ltos, vtos); __ sw(FSR, laddress(n)); __ sw(SSR, haddress(n)); __ tag_local(frame::TagCategory2, n); } void TemplateTable::fstore(int n) { transition(ftos, vtos); __ swc1(FSF, faddress(n)); __ tag_local(frame::TagValue, n); } //FIXME, void TemplateTable::dstore(int n) { transition(dtos, vtos); if (TaggedStackInterpreter) { /* __ subl(esp, 2 * wordSize); __ fstp_d(Address(esp)); __ popl(eax); __ popl(edx); __ movl(laddress(n), eax); __ movl(haddress(n), edx); */ __ swc1(FSF, laddress(n)); __ swc1(SSF, haddress(n)); __ tag_local(frame::TagCategory2, n); } else { __ swc1(FSF, laddress(n)); __ swc1(SSF, haddress(n)); } } void TemplateTable::astore(int n) { transition(vtos, vtos); //__ pop(FSR); __ pop_ptr(FSR, SSR); __ sw(FSR, aaddress(n)); __ tag_local(SSR, n); } void TemplateTable::pop() { transition(vtos, vtos); // __ pop(); __ addi(SP, SP, Interpreter::stackElementSize()); } void TemplateTable::pop2() { transition(vtos, vtos); //__ pop2(); __ addi(SP, SP, 2*Interpreter::stackElementSize()); } void TemplateTable::dup() { transition(vtos, vtos); // stack: ..., a // __ lw(AT, SP, 0); // __ push(AT); __ load_ptr_and_tag(0, FSR, SSR); __ push_ptr(FSR, SSR); // stack: ..., a, a } // blows FSR void TemplateTable::dup_x1() { transition(vtos, vtos); // stack: ..., a, b __ load_ptr_and_tag(0, FSR, SSR); // load b __ load_ptr_and_tag(1, T5, T4); // load a __ store_ptr_and_tag(1, FSR, SSR); // store b __ store_ptr_and_tag(0, T5, T4); // store a __ push_ptr(FSR, SSR); // push b // stack: ..., b, a, b } // blows FSR void TemplateTable::dup_x2() { transition(vtos, vtos); // stack: ..., a, b, c __ load_ptr_and_tag(0, FSR, SSR); // load c __ load_ptr_and_tag(2, T5, T4); // load a __ store_ptr_and_tag(2, FSR, SSR); // store c in a __ push_ptr(FSR, SSR); // push c // stack: ..., c, b, c, c __ load_ptr_and_tag(2, FSR, SSR); // load b __ store_ptr_and_tag(2, T5, T4); // store a in b // stack: ..., c, a, c, c __ store_ptr_and_tag(1, FSR, SSR); // store b in c // stack: ..., c, a, b, c } // blows FSR void TemplateTable::dup2() { transition(vtos, vtos); // stack: ..., a, b __ load_ptr_and_tag(1, FSR, SSR); // load a __ push_ptr(FSR, SSR); // push a __ load_ptr_and_tag(1, FSR, SSR); // load b __ push_ptr(FSR, SSR); // push b // stack: ..., a, b, a, b } // blows FSR void TemplateTable::dup2_x1() { transition(vtos, vtos); // stack: ..., a, b, c __ load_ptr_and_tag(0, T5, T4); // load c __ load_ptr_and_tag(1, FSR, SSR); // load b __ push_ptr(FSR, SSR); // push b __ push_ptr(T5, T4); // push c // stack: ..., a, b, c, b, c __ store_ptr_and_tag(3, T5, T4); // store c in b // stack: ..., a, c, c, b, c __ load_ptr_and_tag(4, T5, T4); // load a __ store_ptr_and_tag(2, T5, T4); // store a in 2nd c // stack: ..., a, c, a, b, c __ store_ptr_and_tag(4, FSR, SSR); // store b in a // stack: ..., b, c, a, b, c // stack: ..., b, c, a, b, c } // blows FSR, SSR void TemplateTable::dup2_x2() { transition(vtos, vtos); // stack: ..., a, b, c, d // stack: ..., a, b, c, d __ load_ptr_and_tag(0, T5, T4); // load d __ load_ptr_and_tag(1, FSR, SSR); // load c __ push_ptr(FSR, SSR); // push c __ push_ptr(T5, T4); // push d // stack: ..., a, b, c, d, c, d __ load_ptr_and_tag(4, FSR, SSR); // load b __ store_ptr_and_tag(2, FSR, SSR); // store b in d __ store_ptr_and_tag(4, T5, T4); // store d in b // stack: ..., a, d, c, b, c, d __ load_ptr_and_tag(5, T5, T4); // load a __ load_ptr_and_tag(3, FSR, SSR); // load c __ store_ptr_and_tag(3, T5, T4); // store a in c __ store_ptr_and_tag(5, FSR, SSR); // store c in a // stack: ..., c, d, a, b, c, d // stack: ..., c, d, a, b, c, d } // blows FSR void TemplateTable::swap() { transition(vtos, vtos); // stack: ..., a, b __ load_ptr_and_tag(1, T5, T4); // load a __ load_ptr_and_tag(0, FSR, SSR); // load b __ store_ptr_and_tag(0, T5, T4); // store a in b __ store_ptr_and_tag(1, FSR, SSR); // store b in a // stack: ..., b, a } void TemplateTable::iop2(Operation op) { transition(itos, itos); switch (op) { case add : __ pop_i(SSR); __ addu(FSR, SSR, FSR); break; case sub : __ pop_i(SSR); __ subu(FSR, SSR, FSR); break; case mul : __ lw(SSR, SP, 0); __ mult(SSR, FSR); __ addi(SP, SP, wordSize); __ nop(); __ mflo(FSR); break; case _and : __ pop_i(SSR); __ andr(FSR, SSR, FSR); break; case _or : __ pop_i(SSR); __ orr(FSR, SSR, FSR); break; case _xor : __ pop_i(SSR); __ xorr(FSR, SSR, FSR); break; case shl : __ pop_i(SSR); __ sllv(FSR, SSR, FSR); break; // implicit masking of lower 5 bits by Intel shift instr. mips also case shr : __ pop_i(SSR); __ srav(FSR, SSR, FSR); break; // implicit masking of lower 5 bits by Intel shift instr. mips also case ushr : __ pop_i(SSR); __ srlv(FSR, SSR, FSR); break; // implicit masking of lower 5 bits by Intel shift instr. mips also default : ShouldNotReachHere(); } } // the result stored in FSR, SSR, // used registers : T2, T3 void TemplateTable::lop2(Operation op) { transition(ltos, ltos); //__ pop2(T2, T3); __ pop_l(T2, T3); switch (op) { case add : __ addu(FSR, T2, FSR); __ sltu(AT, FSR, T2); __ addu(SSR, T3, SSR); __ addu(SSR, SSR, AT); break; case sub : __ subu(FSR, T2, FSR); __ sltu(AT, T2, FSR); __ subu(SSR, T3, SSR); __ subu(SSR, SSR, AT); break; case _and: __ andr(FSR, T2, FSR); __ andr(SSR, T3, SSR); break; case _or : __ orr(FSR, T2, FSR); __ orr(SSR, T3, SSR); break; case _xor: __ xorr(FSR, T2, FSR); __ xorr(SSR, T3, SSR); break; default : ShouldNotReachHere(); } } // java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception, // the result is 0x80000000 // the godson2 cpu do the same, so we need not handle this specially like x86 void TemplateTable::idiv() { transition(itos, itos); Label not_zero; //__ pop(SSR); __ pop_i(SSR); __ div(SSR, FSR); __ bne(FSR, ZERO, not_zero); __ delayed()->nop(); //__ brk(7); __ jmp(Interpreter::_throw_ArithmeticException_entry); __ delayed()->nop(); __ bind(not_zero); __ mflo(FSR); } void TemplateTable::irem() { transition(itos, itos); Label not_zero; //__ pop(SSR); __ pop_i(SSR); __ div(SSR, FSR); __ bne(FSR, ZERO, not_zero); __ delayed()->nop(); //__ brk(7); __ jmp(Interpreter::_throw_ArithmeticException_entry); __ delayed()->nop(); __ bind(not_zero); __ mfhi(FSR); } // the multiplier in SSR||FSR, the multiplicand in stack // the result in SSR||FSR // used registers : T2, T3 void TemplateTable::lmul() { transition(ltos, ltos); Label zero, quick, done; //__ lw(T2, SP, 0); //__ lw(T3, SP, 4); __ pop_l(T2, T3); __ orr(AT, T2, FSR); __ beq(AT, ZERO, zero); //__ delayed()->addi(SP, SP, 2 * wordSize); __ delayed()->nop(); __ orr(AT, T3, SSR); __ beq(AT, ZERO, quick); __ delayed()->nop(); __ multu(T2, SSR); __ nop(); __ nop(); __ mflo(SSR); __ multu(T3, FSR); __ nop(); __ nop(); __ mflo(T3); __ bind(quick); __ multu(T2, FSR); __ addu(SSR, SSR, T3); __ nop(); __ mflo(FSR); __ mfhi(T2); __ b(done); __ delayed()->addu(SSR, SSR, T2); __ bind(zero); __ move(SSR, ZERO); __ bind(done); } // NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry void TemplateTable::ldiv() { transition(ltos, ltos); Label normal; __ orr(AT, FSR, SSR); __ bne(AT, ZERO, normal); __ delayed()->nop(); //__ brk(7); //generate FPE __ jmp(Interpreter::_throw_ArithmeticException_entry); __ delayed()->nop(); __ bind(normal); __ move(A0, FSR); __ move(A1, SSR); //__ lw(A2, SP, 0); //__ lw(A3, SP, 4); //__ addi(SP, SP, 2 * wordSize); __ pop_l (A2, A3); __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::ldiv), 4); } // NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry void TemplateTable::lrem() { transition(ltos, ltos); Label normal; __ orr(AT, FSR, SSR); __ bne(AT, ZERO, normal); __ delayed()->nop(); __ jmp(Interpreter::_throw_ArithmeticException_entry); __ delayed()->nop(); __ bind(normal); __ move(A0, FSR); __ move(A1, SSR); __ pop_l (A2, A3); __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::lrem), 4); } // result in SSR||FSR // used registers : T2, T3 void TemplateTable::lshl() { transition(itos, ltos); Label normal, done, notZero; __ pop_l(T2, T3); __ andi(FSR, FSR, 0x3f); // the bit to be shifted __ bne(FSR, ZERO, notZero); __ delayed()-> nop(); __ move(FSR, T2); __ b(done); __ delayed(); __ move(SSR, T3); __ bind(notZero); __ sltiu(AT, FSR, BitsPerWord); __ bne(AT, ZERO, normal); // <BitsPerWord? __ delayed()->nop(); __ addi(AT, FSR, - BitsPerWord); __ sllv(SSR, T2, AT); __ b(done); __ delayed(); __ move(FSR, ZERO); __ bind(normal); __ sllv(SSR, T3, FSR); __ move(AT, BitsPerWord); __ sub(AT, AT, FSR); __ srlv(AT, T2, AT); __ orr(SSR, SSR, AT); __ sllv(FSR, T2, FSR); __ bind(done); } // used registers : T2, T3 void TemplateTable::lshr() { transition(itos, ltos); Label normal, done, notZero; __ pop_l(T2, T3); __ andi(FSR, FSR, 0x3f); // the bit to be shifted __ bne(FSR, ZERO, notZero); __ delayed()-> nop(); __ move(FSR, T2); // zero shift must be handled specially __ b(done); __ delayed(); __ move(SSR, T3); __ bind(notZero); __ sltiu(AT, FSR, BitsPerWord); __ bne(AT, ZERO, normal); // shift < BitsPerWord? __ delayed()->nop(); __ addi(AT, FSR, -BitsPerWord); // quick __ srav(FSR, T3, AT); __ b(done); __ delayed()->sra(SSR, T3, BitsPerWord-1); __ bind(normal); __ srav(SSR, T3, FSR); // normal __ move(AT, BitsPerWord); __ sub(AT, AT, FSR); __ srlv(FSR, T2, FSR); __ sllv(AT, T3, AT); __ orr(FSR, FSR, AT); __ bind(done); } // used registers : T2, T3 void TemplateTable::lushr() { transition(itos, ltos); Label normal, done, notZero; __ pop_l(T2, T3); __ andi(FSR, FSR, 0x3f); // the bit to be shifted __ bne(FSR, ZERO, notZero); __ delayed()->nop(); __ move(FSR, T2); // zero shift must be handled specially __ b(done); __ delayed(); __ move(SSR, T3); __ bind(notZero); __ sltiu(AT, FSR, BitsPerWord); __ bne(AT, ZERO, normal); // shift < BitsPerWord? __ delayed()->nop(); __ addi(AT, FSR, - BitsPerWord); // quick __ srlv(FSR, T3, AT); __ b(done); __ delayed(); __ move(SSR, ZERO); __ bind(normal); // normal __ srlv(SSR, T3, FSR); __ move(AT, BitsPerWord); __ sub(AT, AT, FSR); __ srlv(FSR, T2, FSR); __ sllv(AT, T3, AT); __ orr(FSR, FSR, AT); __ bind(done); } // result in FSF void TemplateTable::fop2(Operation op) { transition(ftos, ftos); __ pop_ftos_to_esp(); // pop ftos into esp switch (op) { case add: __ lwc1(FTF, at_sp()); __ add_s(FSF, FTF, FSF); break; case sub: __ lwc1(FTF, at_sp()); __ sub_s(FSF, FTF, FSF); break; case mul: __ lwc1(FTF, at_sp()); __ mul_s(FSF, FTF, FSF); break; case div: __ lwc1(FTF, at_sp()); __ div_s(FSF, FTF, FSF); break; case rem: __ mfc1(FSR, FSF); __ mtc1(FSR, F12); __ lwc1(FTF, at_sp()); __ rem_s(FSF, FTF, F12, FSF); break; default : ShouldNotReachHere(); } __ addi(SP, SP, 1 * wordSize); } // result in SSF||FSF // i dont handle the strict flags void TemplateTable::dop2(Operation op) { transition(dtos, dtos); __ pop_dtos_to_esp(); // pop dtos into esp switch (op) { case add: __ lwc1(FTF, at_sp()); __ lwc1(STF, at_sp_p1()); __ add_d(FSF, FTF, FSF); break; case sub: __ lwc1(FTF, at_sp()); __ lwc1(STF, at_sp_p1()); __ sub_d(FSF, FTF, FSF); break; case mul: __ lwc1(FTF, at_sp()); __ lwc1(STF, at_sp_p1()); __ mul_d(FSF, FTF, FSF); break; case div: __ lwc1(FTF, at_sp()); __ lwc1(STF, at_sp_p1()); __ div_d(FSF, FTF, FSF); break; case rem: __ mfc1(FSR, FSF); __ mfc1(SSR, SSF); __ mtc1(FSR, F12); __ mtc1(SSR, F13); __ lwc1(FTF, at_sp()); __ lwc1(STF, at_sp_p1()); __ rem_d(FSF, FTF, F12, FSF); break; default : ShouldNotReachHere(); } __ addi(SP, SP, 2 * wordSize); } void TemplateTable::ineg() { transition(itos, itos); __ neg(FSR); } void TemplateTable::lneg() { transition(ltos, ltos); __ nor(FSR, ZERO, FSR); __ addiu(FSR, FSR, 1); __ sltiu(AT, FSR, 1); __ nor(SSR, ZERO, SSR); __ addu(SSR, SSR, AT); } /* // Note: 'double' and 'long long' have 32-bits alignment on x86. static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { // Use the expression (adr)&(~0xF) to provide 128-bits aligned address // of 128-bits operands for SSE instructions. jlong *operand = (jlong*)(((intptr_t)adr)&((intptr_t)(~0xF))); // Store the value to a 128-bits operand. operand[0] = lo; operand[1] = hi; return operand; } // Buffer for 128-bits masks used by SSE instructions. static jlong float_signflip_pool[2*2]; static jlong double_signflip_pool[2*2]; */ void TemplateTable::fneg() { transition(ftos, ftos); __ neg_s(FSF, FSF); } void TemplateTable::dneg() { transition(dtos, dtos); __ neg_d(FSF, FSF); } // used registers : T2 void TemplateTable::iinc() { transition(vtos, vtos); locals_index(T2); __ lw(FSR, T2, 0); __ lb(AT, at_bcp(2)); // get constant __ addu(FSR, FSR, AT); __ sw(FSR, T2, 0); } // used register : T2 void TemplateTable::wide_iinc() { transition(vtos, vtos); locals_index_wide(T2); __ load_two_bytes_from_at_bcp(FSR, AT, 4); __ hswap(FSR); __ lw(AT, T2, 0); __ addu(FSR, AT, FSR); __ sw(FSR, T2, 0); } void TemplateTable::convert() { // Checking #ifdef ASSERT { TosState tos_in = ilgl; TosState tos_out = ilgl; switch (bytecode()) { case Bytecodes::_i2l: // fall through case Bytecodes::_i2f: // fall through case Bytecodes::_i2d: // fall through case Bytecodes::_i2b: // fall through case Bytecodes::_i2c: // fall through case Bytecodes::_i2s: tos_in = itos; break; case Bytecodes::_l2i: // fall through case Bytecodes::_l2f: // fall through case Bytecodes::_l2d: tos_in = ltos; break; case Bytecodes::_f2i: // fall through case Bytecodes::_f2l: // fall through case Bytecodes::_f2d: tos_in = ftos; break; case Bytecodes::_d2i: // fall through case Bytecodes::_d2l: // fall through case Bytecodes::_d2f: tos_in = dtos; break; default : ShouldNotReachHere(); } switch (bytecode()) { case Bytecodes::_l2i: // fall through case Bytecodes::_f2i: // fall through case Bytecodes::_d2i: // fall through case Bytecodes::_i2b: // fall through case Bytecodes::_i2c: // fall through case Bytecodes::_i2s: tos_out = itos; break; case Bytecodes::_i2l: // fall through case Bytecodes::_f2l: // fall through case Bytecodes::_d2l: tos_out = ltos; break; case Bytecodes::_i2f: // fall through case Bytecodes::_l2f: // fall through case Bytecodes::_d2f: tos_out = ftos; break; case Bytecodes::_i2d: // fall through case Bytecodes::_l2d: // fall through case Bytecodes::_f2d: tos_out = dtos; break; default : ShouldNotReachHere(); } transition(tos_in, tos_out); } #endif // ASSERT // Conversion // (Note: use pushl(ecx)/popl(ecx) for 1/2-word stack-ptr manipulation) switch (bytecode()) { case Bytecodes::_i2l: __ extend_sign(SSR, FSR); break; case Bytecodes::_i2f: __ mtc1(FSR, FSF); __ cvt_s_w(FSF, FSF); break; case Bytecodes::_i2d: __ mtc1(FSR, FSF); __ cvt_d_w(FSF, FSF); break; case Bytecodes::_i2b: __ shl(FSR, 24); __ sar(FSR, 24); break; case Bytecodes::_i2c: __ andi(FSR, FSR, 0xFFFF); // truncate upper 16 bits break; case Bytecodes::_i2s: __ shl(FSR, 16); __ sar(FSR, 16); break; case Bytecodes::_l2i: /* nothing to do */ break; case Bytecodes::_l2f: __ mtc1(FSR, FSF); __ mtc1(SSR, SSF); __ cvt_s_l(FSF, FSF); break; case Bytecodes::_l2d: __ mtc1(FSR, FSF); __ mtc1(SSR, SSF); __ cvt_d_l(FSF, FSF); break; case Bytecodes::_f2i: { Label L; __ c_un_s(FSF, FSF); //NaN? __ bc1t(L); __ delayed(); __ move(FSR, ZERO); __ trunc_w_s(FSF, FSF); __ mfc1(FSR, FSF); __ bind(L); } break; case Bytecodes::_f2l: { Label L; __ move(SSR, ZERO); __ c_un_s(FSF, FSF); //NaN? __ bc1t(L); __ delayed(); __ move(FSR, ZERO); __ trunc_l_s(FSF, FSF); __ mfc1(FSR, FSF); __ mfc1(SSR, SSF); __ bind(L); } break; case Bytecodes::_f2d: __ cvt_d_s(FSF, FSF); break; case Bytecodes::_d2i: { Label L; __ c_un_d(FSF, FSF); //NaN? __ bc1t(L); __ delayed(); __ move(FSR, ZERO); __ trunc_w_d(FSF, FSF); __ mfc1(FSR, FSF); __ bind(L); } break; case Bytecodes::_d2l: { Label L; __ move(SSR, ZERO); __ c_un_d(FSF, FSF); //NaN? __ bc1t(L); __ delayed(); __ move(FSR, ZERO); __ trunc_l_d(FSF, FSF); __ mfc1(FSR, FSF); __ mfc1(SSR, SSF); __ bind(L); } break; case Bytecodes::_d2f: __ cvt_s_d(FSF, FSF); break; default : ShouldNotReachHere(); } } void TemplateTable::lcmp() { transition(ltos, itos); Label low, high, done; __ lw(T3, SP, 4); // __ pop_l(T2, T3); __ slt(AT, T3, SSR); __ bne(AT, ZERO, low); __ delayed()->addi(SP, SP, 8); // __ delayed()->nop(); __ slt(AT, SSR, T3); __ bne(AT, ZERO, high); __ delayed()->nop(); __ lw(T2, SP, -8); __ sltu(AT, T2, FSR); __ bne(AT, ZERO, low); __ delayed(); __ sltu(AT, FSR, T2); __ bne(AT, ZERO, high); __ delayed()->nop(); __ b(done); __ delayed(); __ move(FSR, 0); __ bind(low); __ b(done); __ delayed(); __ move(FSR, -1); __ bind(high); __ b(done); __ delayed(); __ move(FSR, 1); __ bind(done); } void TemplateTable::float_cmp(bool is_float, int unordered_result) { Label less, done; __ move(FSR, ZERO); if (is_float) { __ pop_ftos_to_esp(); __ lwc1(FTF, at_sp()); __ c_eq_s(FTF, FSF); __ bc1t(done); __ delayed()->addi(SP, SP, 1 * wordSize); if (unordered_result<0) __ c_ult_s(FTF, FSF); else __ c_olt_s(FTF, FSF); } else { __ pop_dtos_to_esp(); __ lwc1(FTF, at_sp()); __ lwc1(STF, at_sp_p1()); __ c_eq_d(FTF, FSF); __ bc1t(done); __ delayed()->addi(SP, SP, 2 * wordSize); if (unordered_result<0) __ c_ult_d(FTF, FSF); else __ c_olt_d(FTF, FSF); } __ bc1t(less); __ delayed()->nop(); __ b(done); __ delayed(); __ move(FSR, 1); __ bind(less); __ move(FSR, -1); __ bind(done); } // used registers : T3, T4, T7 // FSR : return bci, this is defined by the vm specification // T3 : method // T4 : offset // T7 : next bytecode, this is required by dispatch_base void TemplateTable::branch(bool is_jsr, bool is_wide) { __ get_method(T3); __ profile_taken_branch(T4, T7); // only C2 meaningful #ifndef CORE const ByteSize be_offset = methodOopDesc::backedge_counter_offset() + InvocationCounter::counter_offset(); const ByteSize inv_offset = methodOopDesc::invocation_counter_offset() + InvocationCounter::counter_offset(); const int method_offset = frame::interpreter_frame_method_offset * wordSize; #endif // CORE // Load up T4 with the branch displacement if (!is_wide) { __ load_two_bytes_from_at_bcp(T4, AT, 1); __ hswap(T4); } else { __ lw(T4, at_bcp(1)); __ swap(T4); } // Handle all the JSR stuff here, then exit. // It's much shorter and cleaner than intermingling with the // non-JSR normal-branch stuff occuring below. if (is_jsr) { // Pre-load the next target bytecode into T7 __ add(AT, BCP, T4); __ lbu(T7, AT, 0); // compute return address as bci in FSR __ addi(FSR, BCP, (is_wide?5:3) - in_bytes(constMethodOopDesc::codes_offset())); __ lw(AT, T3, in_bytes(methodOopDesc::const_offset())); __ sub(FSR, FSR, AT); // Adjust the bcp in BCP by the displacement in T4 __ add(BCP, BCP, T4); // jsr returns atos that is not an oop // __ dispatch_only_noverify(atos); // Push return address // __ push_i(eax); __ push_i(FSR); // jsr returns vtos __ dispatch_only_noverify(vtos); return; } // Normal (non-jsr) branch handling // Adjust the bcp in S0 by the displacement in T4 __ add(BCP, BCP, T4); #ifdef CORE // Pre-load the next target bytecode into EBX __ lbu(T7, BCP, 0); // continue with the bytecode @ target __ dispatch_only(vtos); #else assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters"); Label backedge_counter_overflow; Label profile_method; Label dispatch; if (UseLoopCounter) { // increment backedge counter for backward branches // eax: MDO // ebx: MDO bumped taken-count // T3: method // T4: target offset // BCP: target bcp // LVP: locals pointer __ bgtz(T4, dispatch); // check if forward or backward branch __ delayed()->nop(); // increment back edge counter __ lw(T0, T3, in_bytes(be_offset)); __ increment(T0, InvocationCounter::count_increment); __ sw(T0, T3, in_bytes(be_offset)); // load invocation counter __ lw(T1, T3, in_bytes(inv_offset)); // buffer bit added, mask no needed // by yjl 10/24/2005 //__ move(AT, InvocationCounter::count_mask_value); //__ andr(T1, T1, AT); // add backedge counter & invocation counter __ add(T1, T1, T0); if (ProfileInterpreter) { // Test to see if we should create a method data oop __ lui(AT, Assembler::split_high(int(&InvocationCounter::InterpreterProfileLimit))); __ lw(AT, AT, Assembler::split_low(int(&InvocationCounter::InterpreterProfileLimit))); __ slt(AT, T1, AT); __ bne(AT, ZERO, dispatch); __ delayed()->nop(); // if no method data exists, go to profile method __ test_method_data_pointer(T1, profile_method); if (UseOnStackReplacement) { // check for overflow against ebx which is the MDO taken count __ lui(AT, Assembler::split_high(int(&InvocationCounter::InterpreterBackwardBranchLimit))); __ lw(AT, AT, Assembler::split_low(int(&InvocationCounter::InterpreterBackwardBranchLimit))); // the value T7 Is get from the beginning profile_taken_branch __ slt(AT, T7, AT); __ bne(AT, ZERO, dispatch); __ delayed()->nop(); // When ProfileInterpreter is on, the backedge_count comes // from the methodDataOop, which value does not get reset on // the call to frequency_counter_overflow(). // To avoid excessive calls to the overflow routine while // the method is being compiled, add a second test to make // sure the overflow function is called only once every // overflow_frequency. const int overflow_frequency = 1024; __ andi(T7, T7, overflow_frequency-1); __ beq(T7, ZERO, backedge_counter_overflow); __ delayed()->nop(); } } else { if (UseOnStackReplacement) { // check for overflow against eax, which is the sum of the counters __ lui(AT, Assembler::split_high(int(&InvocationCounter::InterpreterBackwardBranchLimit))); __ lw(AT, AT, Assembler::split_low(int(&InvocationCounter::InterpreterBackwardBranchLimit))); __ slt(AT, T1, AT); __ beq(AT, ZERO, backedge_counter_overflow); __ delayed()->nop(); } } __ bind(dispatch); } // Pre-load the next target bytecode into T7 __ lbu(T7, BCP, 0); // continue with the bytecode @ target // FSR: return bci for jsr's, unused otherwise // T7: target bytecode // BCP: target bcp __ dispatch_only(vtos); if (UseLoopCounter) { if (ProfileInterpreter) { // Out-of-line code to allocate method data oop. __ bind(profile_method); __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method), BCP); __ lbu(T2, BCP, 0); __ lw(T3, FP, method_offset); __ lw(T3, T3, in_bytes(methodOopDesc::method_data_offset())); __ sw(T3, FP, frame::interpreter_frame_mdx_offset * wordSize); __ test_method_data_pointer(T3, dispatch); // offset non-null mdp by MDO::data_offset() + IR::profile_method() __ addi(T3, T3, in_bytes(methodDataOopDesc::data_offset())); __ add(T3, T3, T1); __ sw(T3, FP, frame::interpreter_frame_mdx_offset * wordSize); __ b(dispatch); __ delayed()->nop(); } if (UseOnStackReplacement) { // invocation counter overflow __ bind(backedge_counter_overflow); __ sub(T4, BCP, T4); // branch bcp call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), T4); __ lbu(T7, BCP, 0); // V0: osr nmethod (osr ok) or NULL (osr not possible) // V1: osr adapter frame return address // T7: target bytecode // LVP: locals pointer // BCP: bcp __ beq(V0, ZERO, dispatch); __ delayed()->nop(); // nmethod may have been invalidated (VM may block upon call_VM return) __ lw(T3, V0, nmethod::entry_bci_offset()); __ move(AT, InvalidOSREntryBci); __ beq(AT, T3, dispatch); __ delayed()->nop(); // We need to prepare to execute the OSR method. First we must // migrate the locals and monitors off of the stack. //eax V0: osr nmethod (osr ok) or NULL (osr not possible) //ebx V1: osr adapter frame return address //edx T7: target bytecode //edi LVP: locals pointer //esi BCP: bcp //__ movl(esi, eax); // save the nmethod __ move(BCP, V0); // const Register thread = ecx; const Register thread = T8; __ get_thread(thread); call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); // eax is OSR buffer, move it to expected parameter location //refer to osrBufferPointer in c1_LIRAssembler_mips.cpp // __ movl(ecx, eax); __ move(T0, V0); // pop the interpreter frame // __ movl(edx, Address(ebp, frame::interpreter_frame_sender_sp_offset // * wordSize)); // get sender sp __ lw(T8, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize)); //FIXME, shall we keep the return address on the stack? __ leave(); // remove frame anchor // __ popl(edi); // get return address //__ addi(SP, SP, wordSize); // get return address // __ pop(LVP); __ move(LVP, RA); // __ movl(esp, edx); // set sp to sender sp __ move(SP, T8 ); Label skip; Label chkint; // The interpreter frame we have removed may be returning to // either the callstub or the interpreter. Since we will // now be returning from a compiled (OSR) nmethod we must // adjust the return to the return were it can handler compiled // results and clean the fpu stack. This is very similar to // what a i2c adapter must do. // Are we returning to the call stub? #if 0 // __ cmpl(edi, (int)StubRoutines::_call_stub_return_address); __ addi(AT, LVP, -(int)StubRoutines::_call_stub_return_address); // __ jcc(Assembler::notEqual, chkint); __ bne(AT, ZERO, chkint); __ delayed()->nop(); // yes adjust to the specialized call stub return. // assert(StubRoutines::i486::get_call_stub_compiled_return() != NULL, // "must be set"); assert(StubRoutines::gs2::get_call_stub_compiled_return() != NULL, "must be set"); // __ movl(edi, (intptr_t) StubRoutines::i486::get_call_stub_compiled_return()); __ move(LVP, (intptr_t) StubRoutines::gs2::get_call_stub_compiled_return()); // __ jmp(skip); __ b(skip); __ delayed()->nop(); __ bind(chkint); // Are we returning to the interpreter? Look for sentinel //__ cmpl(Address(edi, -8), Interpreter::return_sentinel); __ lw(AT, LVP , -8); __ addi(AT, AT, -Interpreter::return_sentinel); //__ jcc(Assembler::notEqual, skip); __ bne(AT, ZERO, skip); __ delayed()->nop(); // Adjust to compiled return back to interpreter // __ movl(edi, Address(edi, -4)); __ lw(LVP, LVP, -4); __ bind(skip); #endif // Align stack pointer for compiled code (note that caller is // responsible for undoing this fixup by remembering the old SP // in an ebp-relative location) // __ andl(esp, -(StackAlignmentInBytes)); __ move(AT, -(StackAlignmentInBytes)); __ andr(SP , SP , AT); // push the (possibly adjusted) return address // __ pushl(edi); //__ push(LVP); // __ move(RA, LVP); // and begin the OSR nmethod // __ jmp(Address(esi, nmethod::osr_entry_point_offset())); //refer to osr_entry in c1_LIRAssembler_mips.cpp __ lw(AT, BCP, nmethod::osr_entry_point_offset()); __ jr(AT); __ delayed()->nop(); } } #endif // not CORE } void TemplateTable::if_0cmp(Condition cc) { transition(itos, vtos); // assume branch is more often taken than not (loops use backward branches) Label not_taken; switch(cc) { case not_equal: __ beq(FSR, ZERO, not_taken); break; case equal: __ bne(FSR, ZERO, not_taken); break; case less: __ bgez(FSR, not_taken); break; case less_equal: __ bgtz(FSR, not_taken); break; case greater: __ blez(FSR, not_taken); break; case greater_equal: __ bltz(FSR, not_taken); break; } __ delayed()->nop(); branch(false, false); __ bind(not_taken); __ profile_not_taken_branch(FSR); } void TemplateTable::if_icmp(Condition cc) { transition(itos, vtos); // assume branch is more often taken than not (loops use backward branches) Label not_taken; //__ lw(SSR, SP, 0); __ pop_i(SSR); switch(cc) { case not_equal: __ beq(SSR, FSR, not_taken); break; case equal: __ bne(SSR, FSR, not_taken); break; case less: __ slt(AT, SSR, FSR); __ beq(AT, ZERO, not_taken); break; case less_equal: __ slt(AT, FSR, SSR); __ bne(AT, ZERO, not_taken); break; case greater: __ slt(AT, FSR, SSR); __ beq(AT, ZERO, not_taken); break; case greater_equal: __ slt(AT, SSR, FSR); __ bne(AT, ZERO, not_taken); break; } // __ delayed()->addi(SP, SP, 1 * wordSize); __ delayed()->nop(); branch(false, false); __ bind(not_taken); __ profile_not_taken_branch(FSR); } void TemplateTable::if_nullcmp(Condition cc) { transition(atos, vtos); // assume branch is more often taken than not (loops use backward branches) Label not_taken; switch(cc) { case not_equal: __ beq(FSR, ZERO, not_taken); break; case equal: __ bne(FSR, ZERO, not_taken); break; default: ShouldNotReachHere(); } __ delayed()->nop(); branch(false, false); __ bind(not_taken); __ profile_not_taken_branch(FSR); } void TemplateTable::if_acmp(Condition cc) { transition(atos, vtos); // assume branch is more often taken than not (loops use backward branches) Label not_taken; // __ lw(SSR, SP, 0); __ pop_ptr(SSR); switch(cc) { case not_equal: __ beq(SSR, FSR, not_taken); break; case equal: __ bne(SSR, FSR, not_taken); break; default: ShouldNotReachHere(); } // __ delayed()->addi(SP, SP, 4); __ delayed()->nop(); branch(false, false); __ bind(not_taken); __ profile_not_taken_branch(FSR); } // used registers : T1, T2, T3 // T1 : method // T2 : returb bci void TemplateTable::ret() { transition(vtos, vtos); locals_index(T2); __ lw(T2, T2, 0); __ profile_ret(T2, T3); __ get_method(T1); __ lw(BCP, T1, in_bytes(methodOopDesc::const_offset())); __ add(BCP, BCP, T2); __ addi(BCP, BCP, in_bytes(constMethodOopDesc::codes_offset())); __ dispatch_next(vtos); } // used registers : T1, T2, T3 // T1 : method // T2 : returb bci void TemplateTable::wide_ret() { transition(vtos, vtos); locals_index_wide(T2); __ lw(T2, T2, 0); // get return bci, compute return bcp __ profile_ret(T2, T3); __ get_method(T1); __ lw(BCP, T1, in_bytes(methodOopDesc::const_offset())); __ add(BCP, BCP, T2); __ addi(BCP, BCP, in_bytes(constMethodOopDesc::codes_offset())); __ dispatch_next(vtos); } // used register T2, T3, T4, T7 // T2 : bytecode pointer // T3 : low // T4 : high // T7 : dest bytecode, required by dispatch_base void TemplateTable::tableswitch() { Label default_case, continue_execution; transition(itos, vtos); // align BCP __ addi(T2, BCP, wordSize); __ move(AT, -wordSize); __ andr(T2, T2, AT); // load lo & hi __ lw(T3, T2, 1*wordSize); __ swap(T3); __ lw(T4, T2, 2*wordSize); __ swap(T4); // check against lo & hi __ slt(AT, FSR, T3); __ bne(AT, ZERO, default_case); __ delayed()->nop(); __ slt(AT, T4, FSR); __ bne(AT, ZERO, default_case); __ delayed()->nop(); // lookup dispatch offset, in T4 big endian __ sub(FSR, FSR, T3); __ sll(AT, FSR, 2); __ add(AT, T2, AT); __ lw(T4, AT, 3*wordSize); __ profile_switch_case(FSR, T2, T3); __ bind(continue_execution); __ swap(T4); __ add(BCP, BCP, T4); __ lbu(T7, BCP, 0); __ dispatch_only(vtos); // handle default __ bind(default_case); __ profile_switch_default(FSR); __ lw(T4, T2, 0); __ b(continue_execution); __ delayed()->nop(); } void TemplateTable::lookupswitch() { transition(itos, itos); __ stop("lookupswitch bytecode should have been rewritten"); } // used registers : T2, T3, T4, T7 // T2 : bytecode pointer // T3 : pair index // T4 : offset // T7 : dest bytecode // the data after the opcode is the same as lookupswith // see Rewriter::rewrite_method for more information void TemplateTable::fast_linearswitch() { transition(itos, vtos); Label loop_entry, loop, found, continue_execution; // swap eax so we can avoid swapping the table entries __ swap(FSR); // align BCP __ addi(T2, BCP, wordSize); __ move(AT, -wordSize); __ andr(T2, T2, AT); // set counter __ lw(T3, T2, wordSize); __ swap(T3); __ b(loop_entry); __ delayed()->nop(); // table search __ bind(loop); // get the entry value __ sll(AT, T3, 3); __ add(AT, T2, AT); __ lw(AT, AT, 2 * wordSize); // found? __ beq(FSR, AT, found); __ delayed()->nop(); __ bind(loop_entry); __ bgtz(T3, loop); __ delayed()->addiu(T3, T3, -1); // default case __ profile_switch_default(FSR); __ lw(T4, T2, 0); __ b(continue_execution); __ delayed()->nop(); // entry found -> get offset __ bind(found); __ sll(AT, T3, 3); __ add(AT, T2, AT); __ lw(T4, AT, 3 * wordSize); __ profile_switch_case(T3, FSR, T2); // continue execution __ bind(continue_execution); __ swap(T4); __ add(BCP, BCP, T4); __ lbu(T7, BCP, 0); __ dispatch_only(vtos); } // used registers : T0, T1, T2, T3, T4, T7 // T2 : pairs address(array) // T7 : dest bytecode // the data after the opcode is the same as lookupswith // see Rewriter::rewrite_method for more information void TemplateTable::fast_binaryswitch() { transition(itos, vtos); // Implementation using the following core algorithm: // // int binary_search(int key, LookupswitchPair* array, int n) { // // Binary search according to "Methodik des Programmierens" by // // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985. // int i = 0; // int j = n; // while (i+1 < j) { // // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q) // // with Q: for all i: 0 <= i < n: key < a[i] // // where a stands for the array and assuming that the (inexisting) // // element a[n] is infinitely big. // int h = (i + j) >> 1; // // i < h < j // if (key < array[h].fast_match()) { // j = h; // } else { // i = h; // } // } // // R: a[i] <= key < a[i+1] or Q // // (i.e., if key is within array, i is the correct index) // return i; // } // register allocation const Register array = T2; const Register i=T3, j=T4; const Register h=T1; const Register temp=T0; const Register key=FSR; // setup array __ addi(array, BCP, 3*wordSize); __ move(AT, -wordSize); __ andr(array, array, AT); // initialize i & j __ move(i, ZERO); __ lw(j, array, - 1 * wordSize); // Convert j into native byteordering __ swap(j); // and start Label entry; __ b(entry); __ delayed()->nop(); // binary search loop { Label loop; __ bind(loop); // int h = (i + j) >> 1; __ add(h, i, j); __ shr(h, 1); // if (key < array[h].fast_match()) { // j = h; // } else { // i = h; // } // Convert array[h].match to native byte-ordering before compare __ sll(AT, h, 3); __ add(AT, array, AT); __ lw(temp, AT, 0*wordSize); __ swap(temp); { Label set_i, end_of_if; __ slt(AT, key, temp); __ beq(AT, ZERO, set_i); __ delayed()->nop(); __ b(end_of_if); __ delayed(); __ move(j, h); __ bind(set_i); __ move(i, h); __ bind(end_of_if); } // while (i+1 < j) __ bind(entry); __ addi(h, i, 1); __ slt(AT, h, j); __ bne(AT, ZERO, loop); __ delayed()->nop(); } // end of binary search, result index is i (must check again!) Label default_case; // Convert array[i].match to native byte-ordering before compare __ sll(AT, i, 3); __ add(AT, array, AT); __ lw(temp, AT, 0 * wordSize); __ swap(temp); __ bne(key, temp, default_case); __ delayed()->nop(); // entry found -> j = offset __ sll(AT, i, 3); __ add(AT, array, AT); __ lw(j, AT, 1 * wordSize); __ profile_switch_case(i, key, array); __ swap(j); __ add(BCP, BCP, j); __ lbu(T7, BCP, 0); __ dispatch_only(vtos); // default case -> j = default offset __ bind(default_case); __ profile_switch_default(i); __ lw(j, array, - 2 * wordSize); __ swap(j); __ add(BCP, BCP, j); __ lbu(T7, BCP, 0); __ dispatch_only(vtos); } void TemplateTable::_return(TosState state) { transition(state, state); assert(_desc->calls_vm(), "inconsistent calls_vm information"); // call in remove_activation if (_desc->bytecode() == Bytecodes::_return_register_finalizer) { assert(state == vtos, "only valid state"); //__ movl(eax, aaddress(0)); __ lw(T4, aaddress(0)); //__ movl(edi, Address(eax, oopDesc::klass_offset_in_bytes())); __ lw(LVP, T4, oopDesc::klass_offset_in_bytes()); //__ movl(edi, Address(edi, Klass::access_flags_offset_in_bytes() //+ sizeof(oopDesc))); __ lw(LVP, LVP, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc)); //__ testl(edi, JVM_ACC_HAS_FINALIZER); __ move(AT, JVM_ACC_HAS_FINALIZER); __ andr(AT, AT, LVP);//by_css //__ andi(AT, LVP, JVM_ACC_HAS_FINALIZER); Label skip_register_finalizer; //__ jcc(Assembler::zero, skip_register_finalizer); __ beq(AT, ZERO, skip_register_finalizer); __ delayed()->nop(); //__ call_VM(noreg, CAST_FROM_FN_PTR(address, //InterpreterRuntime::register_finalizer), eax); __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), T4); __ bind(skip_register_finalizer); } __ remove_activation(state, T9); __ jr(T9); __ delayed()->nop(); } // ---------------------------------------------------------------------------- // Volatile variables demand their effects be made known to all CPU's // in order. Store buffers on most chips allow reads & writes to // reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode // without some kind of memory barrier (i.e., it's not sufficient that // the interpreter does not reorder volatile references, the hardware // also must not reorder them). // // According to the new Java Memory Model (JMM): // (1) All volatiles are serialized wrt to each other. ALSO reads & // writes act as aquire & release, so: // (2) A read cannot let unrelated NON-volatile memory refs that // happen after the read float up to before the read. It's OK for // non-volatile memory refs that happen before the volatile read to // float down below it. // (3) Similar a volatile write cannot let unrelated NON-volatile // memory refs that happen BEFORE the write float down to after the // write. It's OK for non-volatile memory refs that happen after the // volatile write to float up before it. // // We only put in barriers around volatile refs (they are expensive), // not _between_ memory refs (that would require us to track the // flavor of the previous memory refs). Requirements (2) and (3) // require some barriers before volatile stores and after volatile // loads. These nearly cover requirement (1) but miss the // volatile-store-volatile-load case. This final case is placed after // volatile-stores although it could just as well go before // volatile-loads. //void TemplateTable::volatile_barrier(Assembler::Membar_mask_bits // order_constraint) { void TemplateTable::volatile_barrier( ) { // Helper function to insert a is-volatile test and memory barrier //if (os::is_MP()) { // Not needed on single CPU // __ membar(order_constraint); //} if( !os::is_MP() ) return; // Not needed on single CPU __ sync(); } // we dont shift left 2 bits in get_cache_and_index_at_bcp // for we always need shift the index we use it. the ConstantPoolCacheEntry // is 16-byte long, index is the index in // constantPoolCacheOopDesc, so cache + base_offset() + index * 16 is // the corresponding ConstantPoolCacheEntry // used registers : T2 // NOTE : the returned index need also shift left 4 to get the address! void TemplateTable::resolve_cache_and_index(int byte_no, Register Rcache, Register index) { assert(byte_no == 1 || byte_no == 2, "byte_no out of range"); Register temp = T2; assert_different_registers(Rcache, index, temp); const int shift_count = (1 + byte_no)*BitsPerByte; Label resolved; __ get_cache_and_index_at_bcp(Rcache, index, 1); // is resolved? __ sll(AT, index, 4); __ add(AT, Rcache, AT); __ lw(AT, AT, in_bytes(constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::indices_offset())); __ shr(AT, shift_count); __ andi(AT, AT, 0xff); __ addi(AT, AT, - bytecode()); __ beq(AT, ZERO, resolved); __ delayed()->nop(); // resolve first time through address entry; switch (bytecode()) { case Bytecodes::_getstatic : // fall through case Bytecodes::_putstatic : // fall through case Bytecodes::_getfield : // fall through case Bytecodes::_putfield : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put); break; case Bytecodes::_invokevirtual : // fall through case Bytecodes::_invokespecial : // fall through case Bytecodes::_invokestatic : // fall through case Bytecodes::_invokeinterface: entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke); break; default : ShouldNotReachHere(); } __ move(A1, (int)bytecode()); __ call_VM(NOREG, entry, A1); // Update registers with resolved info __ get_cache_and_index_at_bcp(Rcache, index, 1); __ bind(resolved); } // The Rcache and index registers must be set before call void TemplateTable::load_field_cp_cache_entry(Register obj, Register cache, Register index, Register off, Register flags, bool is_static = false) { assert_different_registers(cache, index, flags, off); ByteSize cp_base_offset = constantPoolCacheOopDesc::base_offset(); // Field offset __ shl(index, 4); __ add(index, cache, index); __ lw(off, index, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset())); // Flags __ lw(flags, index, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset())); // klass overwrite register if (is_static) { __ lw(obj, index, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset())); __ verify_oop(obj); } } // get the method, itable_index and flags of the current invoke void TemplateTable::load_invoke_cp_cache_entry(int byte_no, Register method, Register itable_index, Register flags, bool is_invokevirtual, bool is_invokevfinal /*unused*/) { // setup registers ///const Register cache = ecx; ///const Register index = edx; const Register cache = T3; const Register index = T4; assert_different_registers(method, flags); assert_different_registers(method, cache, index); assert_different_registers(itable_index, flags); assert_different_registers(itable_index, cache, index); // determine constant pool cache field offsets const int method_offset = in_bytes( constantPoolCacheOopDesc::base_offset() + (is_invokevirtual ? ConstantPoolCacheEntry::f2_offset() : ConstantPoolCacheEntry::f1_offset() ) ); const int flags_offset = in_bytes(constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::flags_offset()); // access constant pool cache fields const int index_offset = in_bytes(constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::f2_offset()); resolve_cache_and_index(byte_no, cache, index); assert(wordSize == 4, "adjust code below"); // note we shift 4 not 2, for we get is the true inde // of ConstantPoolCacheEntry, not the shifted 2-bit index as x86 version __ sll(AT, index, 4); __ add(AT, cache, AT); __ lw(method, AT, method_offset); if (itable_index != NOREG) { //__ sll(AT, index, 4); //__ addu(AT, cache, AT); __ lw(itable_index, AT, index_offset); } __ lw(flags, AT, flags_offset); } // The registers cache and index expected to be set before call. // Correct values of the cache and index registers are preserved. void TemplateTable::jvmti_post_field_access(Register cache, Register index, bool is_static, bool has_tos) { // do the JVMTI work here to avoid disturbing the register state below // We use c_rarg registers here because we want to use the register used in // the call to the VM if (JvmtiExport::can_post_field_access()) { // Check to see if a field access watch has been set before we take // the time to call into the VM. Label L1; assert_different_registers(cache, index, FSR); // __ movl(eax, Address((int)JvmtiExport::get_field_access_count_addr(), relocInfo::none)); __ lui(AT, Assembler::split_high((int)JvmtiExport::get_field_access_count_addr())); __ lw(FSR, AT, Assembler::split_low((int)JvmtiExport::get_field_access_count_addr())); // __ testl(eax,eax); // __ beq(T3, ZERO, L1); __ beq(FSR, ZERO, L1); __ delayed()->nop(); // We rely on the bytecode being resolved and the cpCache entry filled in. // cache entry pointer __ addi(cache, cache, in_bytes(constantPoolCacheOopDesc::base_offset())); __ shl(index, 4); __ add(cache, cache, index); if (is_static) { __ move(FSR, ZERO); } else { __ lw(FSR, SP, 0); __ verify_oop(FSR); } // FSR: object pointer or NULL // cache: cache entry pointer __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), FSR, cache); __ get_cache_and_index_at_bcp(cache, index, 1); __ bind(L1); } } void TemplateTable::pop_and_check_object(Register r) { __ pop_ptr(r); __ null_check(r); // for field access must check obj. __ verify_oop(r); } // used registers : T1, T2, T3, T4 // T1 : flags // T2 : off // T3 : obj // T4 : field address // The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the // following mapping to the TosState states: // btos: 0 // ctos: 1 // stos: 2 // itos: 3 // ltos: 4 // ftos: 5 // dtos: 6 // atos: 7 // vtos: 8 // see ConstantPoolCacheEntry::set_field for more info void TemplateTable::getfield_or_static(int byte_no, bool is_static) { transition(vtos, vtos); // const Register cache = ecx; const Register cache = T3; // const Register index = edx; const Register index = T4; const Register obj = T3; const Register off = T2; const Register flags = T1; resolve_cache_and_index(byte_no, cache, index); jvmti_post_field_access(cache, index, is_static, false); load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); if (!is_static) pop_and_check_object(obj); __ add(T4, obj, off); Label Done, notByte, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble; assert(btos == 0, "change code, btos != 0"); __ shr(flags, ConstantPoolCacheEntry::tosBits); __ andi(flags, flags, 0xf); __ bne(flags, ZERO, notByte); __ delayed()->nop(); // btos __ lb(FSR, T4, 0); __ sw(FSR, SP, - wordSize); // Rewrite bytecode to be faster if (!is_static) { patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2); } __ b(Done); __ delayed()->addi(SP, SP, - wordSize); __ bind(notByte); __ move(AT, itos); __ bne(T1, AT, notInt); __ delayed()->nop(); // itos __ lw(FSR, T4, 0); __ sw(FSR, SP, - wordSize); // Rewrite bytecode to be faster if (!is_static) { // patch_bytecode(Bytecodes::_fast_igetfield, T3, T2); patch_bytecode(Bytecodes::_fast_igetfield, T3, T2); } __ b(Done); __ delayed()->addi(SP, SP, - wordSize); __ bind(notInt); __ move(AT, atos); __ bne(T1, AT, notObj); __ delayed()->nop(); // atos __ lw(FSR, T4, 0); __ sw(FSR, SP, - wordSize); if (!is_static) { //patch_bytecode(Bytecodes::_fast_agetfield, T3, T2); patch_bytecode(Bytecodes::_fast_agetfield, T3, T2); } __ b(Done); __ delayed()->addi(SP, SP, - wordSize); __ bind(notObj); __ move(AT, ctos); __ bne(T1, AT, notChar); __ delayed()->nop(); // ctos __ lhu(FSR, T4, 0); __ sw(FSR, SP, - wordSize); if (!is_static) { patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2); } __ b(Done); __ delayed()->addi(SP, SP, - wordSize); __ bind(notChar); __ move(AT, stos); __ bne(T1, AT, notShort); __ delayed()->nop(); // stos __ lh(FSR, T4, 0); __ sw(FSR, SP, - wordSize); if (!is_static) { // patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2); patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2); } __ b(Done); __ delayed()->addi(SP, SP, - wordSize); __ bind(notShort); __ move(AT, ltos); __ bne(T1, AT, notLong); __ delayed()->nop(); // FIXME : the load/store should be atomic, we have no simple method to do this in mips32 // ltos __ lw(FSR, T4, 0*wordSize); __ lw(SSR, T4, 1*wordSize); __ sw(FSR, SP, - 2*wordSize); __ sw(SSR, SP, - 1*wordSize); // Don't rewrite to _fast_lgetfield for potential volatile case. __ b(Done); __ delayed()->addi(SP, SP, - 2 * wordSize); __ bind(notLong); __ move(AT, ftos); __ bne(T1, AT, notFloat); __ delayed()->nop(); // ftos __ lwc1(FSF, T4, 0); __ swc1(FSF, SP, - wordSize); if (!is_static) { patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2); } __ b(Done); __ delayed()->addi(SP, SP, - wordSize); __ bind(notFloat); __ move(AT, dtos); __ bne(T1, AT, notDouble); __ delayed()->nop(); // dtos __ lwc1(FSF, T4, 0 * wordSize); __ lwc1(SSF, T4, 1 * wordSize); __ swc1(FSF, SP, - 2 * wordSize); __ swc1(SSF, SP, - 1 * wordSize); if (!is_static) { patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2); } __ b(Done); __ delayed()->addi(SP, SP, - 2 * wordSize); __ bind(notDouble); __ stop("Bad state"); __ bind(Done); // Doug Lea believes this is not needed with current Sparcs (TSO) and Intel (PSO). //volatile_barrier( ); //jerome_for_debug /* __ nop(); __ nop(); __ nop(); __ nop(); __ nop(); __ nop(); __ nop(); __ nop(); __ nop(); __ verify_oop(FSR);*/ } void TemplateTable::getfield(int byte_no) { getfield_or_static(byte_no, false); } void TemplateTable::getstatic(int byte_no) { getfield_or_static(byte_no, true); } /* // used registers : T1, T2, T3, T4 // T1 : cache & cp entry // T2 : obj // T3 : flags & value pointer // T4 : index // see ConstantPoolCacheEntry::set_field for more info void TemplateTable::jvmti_post_field_mod(int byte_no, bool is_static) { */ // The registers cache and index expected to be set before call. // The function may destroy various registers, just not the cache and index registers. void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) { ByteSize cp_base_offset = constantPoolCacheOopDesc::base_offset(); if (JvmtiExport::can_post_field_modification()) { // Check to see if a field modification watch has been set before we take // the time to call into the VM. Label L1; assert_different_registers(cache, index, T3); __ lui(AT, Assembler::split_high((int)JvmtiExport::get_field_modification_count_addr())); __ lw(FSR, AT, Assembler::split_low((int)JvmtiExport::get_field_modification_count_addr())); __ beq(FSR, ZERO, L1); __ delayed()->nop(); /* // We rely on the bytecode being resolved and the cpCache entry filled in. resolve_cache_and_index(byte_no, T1, T4); */ // The cache and index registers have been already set. // This allows to eliminate this call but the cache and index // registers have to be correspondingly used after this line. // __ get_cache_and_index_at_bcp(eax, edx, 1); __ get_cache_and_index_at_bcp(T1, T4, 1); if (is_static) { __ move(T2, ZERO); } else { // Life is harder. The stack holds the value on top, // followed by the object. // We don't know the size of the value, though; // it could be one or two words // depending on its type. As a result, we must find // the type to determine where the object is. Label two_word, valsize_known; __ sll(AT, T4, 4); __ add(AT, T1, AT); __ lw(T3, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset())); __ move(T2, SP); __ shr(T3, ConstantPoolCacheEntry::tosBits); // Make sure we don't need to mask ecx for tosBits // after the above shift ConstantPoolCacheEntry::verify_tosBits(); __ move(AT, ltos); __ beq(T3, AT, two_word); __ delayed()->nop(); __ move(AT, dtos); __ beq(T3, AT, two_word); __ delayed()->nop(); __ b(valsize_known); //__ delayed()->addi(T2, T2, wordSize*1); __ delayed()->addi(T2, T2,Interpreter::expr_offset_in_bytes(1) ); __ bind(two_word); // __ addi(T2, T2, wordSize*2); __ addi(T2, T2,Interpreter::expr_offset_in_bytes(2)); __ bind(valsize_known); // setup object pointer __ lw(T2, T2, 0*wordSize); } // cache entry pointer __ addi(T1, T1, in_bytes(cp_base_offset)); __ shl(T4, 4); __ addu(T1, T1, T4); // object (tos) __ move(T3, SP); // T2: object pointer set up above (NULL if static) // T1: cache entry pointer // T3: jvalue object on the stack __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification), T2, T1, T3); __ get_cache_and_index_at_bcp(cache, index, 1); __ bind(L1); } } // used registers : T1, T2, T3, T4 // T1 : flags // T2 : off // T3 : obj // T4 : volatile bit // see ConstantPoolCacheEntry::set_field for more info void TemplateTable::putfield_or_static(int byte_no, bool is_static) { transition(vtos, vtos); const Register cache = T3; const Register index = T4; const Register obj = T3; const Register off = T2; const Register flags = T1; resolve_cache_and_index(byte_no, cache, index); jvmti_post_field_mod(cache, index, is_static); load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); // Doug Lea believes this is not needed with current Sparcs (TSO) and Intel (PSO). // volatile_barrier( ); Label notVolatile, Done; __ move(AT, 1<<ConstantPoolCacheEntry::volatileField); __ andr(T4, T1, AT); Label notByte, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble; assert(btos == 0, "change code, btos != 0"); // btos __ shr(T1, ConstantPoolCacheEntry::tosBits); __ andi(T1, T1, 0xf); __ bne(T1, ZERO, notByte); __ delayed()->nop(); __ pop(btos); if (!is_static) { pop_and_check_object(T3); } __ add(AT, T3, T2); __ sb(FSR, AT, 0); if (!is_static) { patch_bytecode(Bytecodes::_fast_bputfield, T3, T2); } __ b(Done); __ delayed()->nop(); __ bind(notByte); // itos __ move(AT, itos); __ bne(T1, AT, notInt); __ delayed()->nop(); __ pop(itos); if (!is_static) { pop_and_check_object(T3); } __ add(AT, T3, T2); __ sw(FSR, AT, 0); if (!is_static) { patch_bytecode(Bytecodes::_fast_iputfield, T3, T2); } __ b(Done); __ delayed()->nop(); __ bind(notInt); // atos __ move(AT, atos); __ bne(T1, AT, notObj); __ delayed()->nop(); __ pop(atos); if (!is_static) { pop_and_check_object(T3); } __ add(AT, T3, T2); __ sw(FSR, AT, 0); __ store_check(T3); if (!is_static) { patch_bytecode(Bytecodes::_fast_aputfield, T3, T2); } __ b(Done); __ delayed()->nop(); __ bind(notObj); // ctos __ move(AT, ctos); __ bne(T1, AT, notChar); __ delayed()->nop(); // __ lhu(FSR, SP, 0); // __ addi(SP, SP, wordSize); __ pop(ctos); if (!is_static) { // __ lw(T3, SP, addent); // addent += 1 * wordSize; // __ verify_oop(T3); pop_and_check_object(T3); } __ add(AT, T3, T2); __ sh(FSR, AT, 0); if (!is_static) { patch_bytecode(Bytecodes::_fast_cputfield, T3, T2); } __ b(Done); __ delayed()->nop(); __ bind(notChar); // stos __ move(AT, stos); __ bne(T1, AT, notShort); __ delayed()->nop(); // __ lh(FSR, SP, 0); // __ addi(SP, SP, wordSize); __ pop(stos); if (!is_static) { // __ lw(T3, SP, addent); // addent += 1 * wordSize; // __ verify_oop(T3); pop_and_check_object(T3); } __ add(AT, T3, T2); __ sh(FSR, AT, 0); if (!is_static) { patch_bytecode(Bytecodes::_fast_sputfield, T3, T2); } __ b(Done); __ delayed()->nop(); __ bind(notShort); // ltos __ move(AT, ltos); __ bne(T1, AT, notLong); __ delayed()->nop(); // FIXME: there is no simple method to load/store 64-bit data in a atomic operation // we just ignore the volatile flag. //Label notVolatileLong; //__ beq(T4, ZERO, notVolatileLong); //__ delayed()->nop(); //addent = 2 * wordSize; // no need //__ lw(FSR, SP, 0); //__ lw(SSR, SP, 1 * wordSize); //if (!is_static) { // __ lw(T3, SP, addent); // addent += 1 * wordSize; // __ verify_oop(T3); //} //__ addu(AT, T3, T2); // Replace with real volatile test // NOTE : we assume that sdc1&ldc1 operate in 32-bit, this is true for Godson2 even in 64-bit kernel // last modified by yjl 7/12/2005 //__ ldc1(FSF, SP, 0); //__ sdc1(FSF, AT, 0); //volatile_barrier(); // Don't rewrite volatile version //__ b(notVolatile); //__ delayed()->addiu(SP, SP, addent); //__ bind(notVolatileLong); //__ pop(ltos); // overwrites edx // __ lw(FSR, SP, 0 * wordSize); // __ lw(SSR, SP, 1 * wordSize); // __ addi(SP, SP, 2*wordSize); __ pop(ltos); if (!is_static) { // __ lw(T3, SP, addent); // addent += 1 * wordSize; // __ verify_oop(T3); pop_and_check_object(T3); } __ add(AT, T3, T2); __ sw(FSR, AT, 0); __ sw(SSR, AT, 4); if (!is_static) { patch_bytecode(Bytecodes::_fast_lputfield, T3, T2); } __ b(notVolatile); __ delayed()->nop(); __ bind(notLong); // ftos __ move(AT, ftos); __ bne(T1, AT, notFloat); __ delayed()->nop(); // __ lwc1(FSF, SP, 0); // __ addi(SP, SP, wordSize); __ pop(ftos); if (!is_static) { // __ lw(T3, SP, addent); // addent += 1 * wordSize; // __ verify_oop(T3); pop_and_check_object(T3); } __ add(AT, T3, T2); __ swc1(FSF, AT, 0); if (!is_static) { patch_bytecode(Bytecodes::_fast_fputfield, T3, T2); } __ b(Done); __ delayed()->nop(); __ bind(notFloat); // dtos __ move(AT, dtos); __ bne(T1, AT, notDouble); __ delayed()->nop(); //__ ldc1(FSF, SP, 0); // __ lwc1(FSF, SP, 0); // __ lwc1(SSF, SP, wordSize); // __ addi(SP, SP, 2*wordSize); __ pop(dtos); if (!is_static) { // __ lw(T3, SP, addent); // addent += 1 * wordSize; // __ verify_oop(T3); pop_and_check_object(T3); } __ add(AT, T3, T2); //__ sdc1(F12, AT, 0); __ swc1(FSF, AT, 0); __ swc1(SSF, AT, wordSize); if (!is_static) { patch_bytecode(Bytecodes::_fast_dputfield, T3, T2); } __ b(Done); __ delayed()->nop(); __ bind(notDouble); __ stop("Bad state"); __ bind(Done); // Check for volatile store __ beq(T4, ZERO, notVolatile); __ delayed()->nop(); volatile_barrier( ); __ bind(notVolatile); } void TemplateTable::putfield(int byte_no) { putfield_or_static(byte_no, false); } void TemplateTable::putstatic(int byte_no) { putfield_or_static(byte_no, true); } // used registers : T1, T2, T3 // T1 : cp_entry // T2 : obj // T3 : value pointer void TemplateTable::jvmti_post_fast_field_mod() { if (JvmtiExport::can_post_field_modification()) { // Check to see if a field modification watch has been set before we take // the time to call into the VM. Label L2; __ lui(AT, Assembler::split_high((int)JvmtiExport::get_field_modification_count_addr())); __ lw(T3, AT, Assembler::split_low((int)JvmtiExport::get_field_modification_count_addr())); __ beq(T3, ZERO, L2); __ delayed()->nop(); //__ pop(T2); __ pop_ptr(T2); //__ lw(T2, SP, 0); __ verify_oop(T2); __ push_ptr(T2); __ addiu(SP, SP, -sizeof(jvalue)); __ move(T3, SP); //__ push(T2); //__ move(T2, ZERO); switch (bytecode()) { // load values into the jvalue object case Bytecodes::_fast_bputfield: __ sb(FSR, SP, 0); break; case Bytecodes::_fast_sputfield: __ sh(FSR, SP, 0); break; case Bytecodes::_fast_cputfield: __ sh(FSR, SP, 0); break; case Bytecodes::_fast_iputfield: __ sw(FSR, SP, 0); break; case Bytecodes::_fast_lputfield: __ sw(FSR, SP, 0); __ sw(SSR, SP, 4); break; case Bytecodes::_fast_fputfield: __ swc1(FSF, SP, 0); break; case Bytecodes::_fast_dputfield: __ swc1(FSF, SP, 0); __ swc1(SSF, SP, 4); break; case Bytecodes::_fast_aputfield: __ sw(FSR, SP, 0); break; default: ShouldNotReachHere(); } //__ pop(T2); // restore copy of object pointer // Save eax and sometimes edx because call_VM() will clobber them, // then use them for JVM/DI purposes __ push(FSR); if (bytecode() == Bytecodes::_fast_lputfield) __ push(SSR); // access constant pool cache entry __ get_cache_entry_pointer_at_bcp(T1, T4, 1); // no need, verified ahead __ verify_oop(T2); // ebx: object pointer copied above // eax: cache entry pointer // ecx: jvalue object on the stack __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification), T2, T1, T3); if (bytecode() == Bytecodes::_fast_lputfield) __ pop(SSR); // restore high value //__ pop(FSR); // restore lower value //__ addi(SP, SP, sizeof(jvalue)); // release jvalue object space __ lw(FSR, SP, 0); __ addiu(SP, SP, sizeof(jvalue) + 1 * wordSize); __ bind(L2); } } // used registers : T2, T3, T4 // T2 : index & off & field address // T3 : cache & obj // T4 : flags void TemplateTable::fast_storefield(TosState state) { transition(state, vtos); ByteSize base = constantPoolCacheOopDesc::base_offset(); jvmti_post_fast_field_mod(); // access constant pool cache __ get_cache_and_index_at_bcp(T3, T2, 1); // test for volatile with edx but edx is tos register for lputfield. __ sll(AT, T2, 4); __ add(AT, T3, AT); __ lw(T4, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset())); // replace index with field offset from cache entry __ lw(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset())); // Doug Lea believes this is not needed with current Sparcs (TSO) and Intel (PSO). // volatile_barrier( ); Label notVolatile, Done; // Check for volatile store __ move(AT, 1<<ConstantPoolCacheEntry::volatileField); __ andr(AT, T4, AT); __ beq(AT, ZERO, notVolatile); __ delayed()->nop(); // Get object from stack // NOTE : the value in FSR/FSF now // __ pop(T3); // __ verify_oop(T3); pop_and_check_object(T3); // field addresses __ add(T2, T3, T2); // access field switch (bytecode()) { case Bytecodes::_fast_bputfield: __ sb(FSR, T2, 0); break; case Bytecodes::_fast_sputfield: // fall through case Bytecodes::_fast_cputfield: __ sh(FSR, T2, 0); break; case Bytecodes::_fast_iputfield: __ sw(FSR, T2, 0); break; case Bytecodes::_fast_lputfield: __ sw(FSR, T2, 0 * wordSize); __ sw(SSR, T2, 1 * wordSize); break; case Bytecodes::_fast_fputfield: __ swc1(FSF, T2, 0); break; case Bytecodes::_fast_dputfield: __ swc1(FSF, T2, 0 * wordSize); __ swc1(SSF, T2, 1 * wordSize); break; case Bytecodes::_fast_aputfield: __ sw(FSR, T2, 0); __ store_check(T3); break; default: ShouldNotReachHere(); } Label done; volatile_barrier( ); __ b(done); __ delayed()->nop(); // Same code as above, but don't need edx to test for volatile. __ bind(notVolatile); // Get object from stack // __ pop(T3); // __ verify_oop(T3); pop_and_check_object(T3); //get the field address __ add(T2, T3, T2); // access field switch (bytecode()) { case Bytecodes::_fast_bputfield: __ sb(FSR, T2, 0); break; case Bytecodes::_fast_sputfield: // fall through case Bytecodes::_fast_cputfield: __ sh(FSR, T2, 0); break; case Bytecodes::_fast_iputfield: __ sw(FSR, T2, 0); break; case Bytecodes::_fast_lputfield: __ sw(FSR, T2, 0 * wordSize); __ sw(SSR, T2, 1 * wordSize); break; case Bytecodes::_fast_fputfield: __ swc1(FSF, T2, 0); break; case Bytecodes::_fast_dputfield: __ swc1(FSF, T2, 0 * wordSize); __ swc1(SSF, T2, 1 * wordSize); break; case Bytecodes::_fast_aputfield: __ sw(FSR, T2, 0); __ store_check(T3); break; default: ShouldNotReachHere(); } __ bind(done); } // used registers : T2, T3, T4 // T3 : cp_entry & cache // T2 : index & offset void TemplateTable::fast_accessfield(TosState state) { transition(atos, state); // do the JVMTI work here to avoid disturbing the register state below if (JvmtiExport::can_post_field_access()) { // Check to see if a field access watch has been set before we take // the time to call into the VM. Label L1; __ lui(AT, Assembler::split_high((int)JvmtiExport::get_field_access_count_addr())); __ lw(T3, AT, Assembler::split_low((int)JvmtiExport::get_field_access_count_addr())); __ beq(T3, ZERO, L1); __ delayed()->nop(); // access constant pool cache entry __ get_cache_entry_pointer_at_bcp(T3, T4, 1); __ move(TSR, FSR); __ verify_oop(FSR); // FSR: object pointer copied above // T3: cache entry pointer __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), FSR, T3); __ move(FSR, TSR); __ bind(L1); } // access constant pool cache __ get_cache_and_index_at_bcp(T3, T2, 1); // replace index with field offset from cache entry __ sll(AT, T2, 4); __ add(AT, T3, AT); __ lw(T2, AT, in_bytes(constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::f2_offset())); // eax: object __ verify_oop(FSR); // __ null_check(FSR, 0); __ null_check(FSR); // field addresses __ add(FSR, FSR, T2); // access field switch (bytecode()) { case Bytecodes::_fast_bgetfield: __ lb(FSR, FSR, 0); break; case Bytecodes::_fast_sgetfield: __ lh(FSR, FSR, 0); break; case Bytecodes::_fast_cgetfield: __ lhu(FSR, FSR, 0); break; case Bytecodes::_fast_igetfield: __ lw(FSR, FSR, 0); break; case Bytecodes::_fast_lgetfield: __ stop("should not be rewritten"); break; case Bytecodes::_fast_fgetfield: __ lwc1(FSF, FSR, 0); break; case Bytecodes::_fast_dgetfield: __ lwc1(FSF, FSR, 0); __ lwc1(SSF, FSR, 4); break; case Bytecodes::_fast_agetfield: __ lw(FSR, FSR, 0); __ verify_oop(FSR); break; default: ShouldNotReachHere(); } // Doug Lea believes this is not needed with current Sparcs(TSO) and Intel(PSO) // volatile_barrier( ); } // generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0 // used registers : T1, T2, T3, T4 // T1 : obj & field address // T2 : off // T3 : cache // T4 : index void TemplateTable::fast_xaccess(TosState state) { transition(vtos, state); // get receiver __ lw(T1, aaddress(0)); debug_only(__ verify_local_tag(frame::TagReference, 0)); // access constant pool cache __ get_cache_and_index_at_bcp(T3, T4, 2); __ sll(AT, T4, 4); __ add(AT, T3, AT); __ lw(T2, AT, in_bytes(constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::f2_offset())); // make sure exception is reported in correct bcp range (getfield is next instruction) __ addi(BCP, BCP, 1); // __ null_check(T1, 0); __ null_check(T1); __ add(T1, T1, T2); if (state == itos) { __ lw(FSR, T1, 0); } else if (state == atos) { __ lw(FSR, T1, 0); __ verify_oop(FSR); } else if (state == ftos) { __ lwc1(FSF, T1, 0); } else { ShouldNotReachHere(); } __ addi(BCP, BCP, -1); } //--------------------------------------------------- //------------------------------------------------- // Calls void TemplateTable::count_calls(Register method, Register temp) { // implemented elsewhere ShouldNotReachHere(); } // method, index, recv, flags: T1, T2, T3, T4 // byte_no = 2 for _invokevirtual, 1 else // T0 : return address // get the method & index of the invoke, and push the return address of // the invoke(first word in the frame) // this address is where the return code jmp to. // NOTE : this method will set T3&T4 as recv&flags void TemplateTable::prepare_invoke(Register method, Register index, int byte_no, Bytecodes::Code code) { // determine flags const bool is_invokeinterface = code == Bytecodes::_invokeinterface; const bool is_invokevirtual = code == Bytecodes::_invokevirtual; const bool is_invokespecial = code == Bytecodes::_invokespecial; const bool load_receiver = code != Bytecodes::_invokestatic; const bool receiver_null_check = is_invokespecial; // const bool save_flags = is_invokeinterface || is_invokevirtual; // setup registers & access constant pool cache const Register recv = T3; const Register flags = T4; assert_different_registers(method, index, recv, flags); // save 'interpreter return address' __ save_bcp(); load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual); // load receiver if needed (note: no return address pushed yet) if (load_receiver) { __ andi(AT, flags, 0xff); __ shl(AT, Interpreter::stackElementScale()); __ add(AT, SP, AT); //__ move(T8, AT); __ lw(recv, AT, - Interpreter::expr_offset_in_bytes(1)); __ verify_oop(recv); } /* if (load_receiver) { Label mmm; __ move(AT, 0xf0000000); __ andr(AT, AT, recv); __ srl(AT, AT, 28); __ addi(AT, AT, -1); __ bne(AT, ZERO, mmm); __ delayed()->nop(); // __ move(AT, (int)&jerome6); // __ lw(AT, AT, 0); // __ beq(AT, ZERO, mmm); // __ delayed()->nop(); __ move(AT, (int)&jerome1 ); __ sw(SP, AT, 0); __ move(AT, (int)&jerome2 ); __ sw(FP, AT, 0); __ move(AT, (int)&jerome3 ); __ sw(BCP, AT, 0); __ move(AT, (int)&jerome4 ); __ sw(recv, AT, 0); __ move(AT, (int)&jerome5 ); __ sw(V0, AT, 0); __ move(AT, (int)&jerome6 ); __ lw(flags, T8, -4); __ sw(flags , AT, 0); __ move(AT, (int)&jerome7 ); __ lw(flags, T8, 0); __ sw(flags , AT, 0); __ move(AT, (int)&jerome8 ); __ lw(flags, T8, 4); __ sw(flags , AT, 0); __ move(AT, (int)&jerome9 ); __ lw(flags, recv, oopDesc::klass_offset_in_bytes()); __ sw(flags , AT, 0); __ move(AT, (int)&jerome10 ); __ lbu(flags, BCP, -1); __ sw(flags , AT, 0); __ move(AT, (int)&jerome5 ); __ lw(flags, AT, 0); __ pushad(); // __ enter(); __ call(CAST_FROM_FN_PTR(address, SharedRuntime::print_call_statistics), relocInfo::runtime_call_type); __ delayed()->nop(); // __ leave(); __ popad(); __ bind(mmm); } */ // do null check if needed if (receiver_null_check) { __ null_check(recv); } //FIXME, why not save flags here? // compute return type __ srl(T0, flags, ConstantPoolCacheEntry::tosBits); // Make sure we don't need to mask flags for tosBits after the above shift ConstantPoolCacheEntry::verify_tosBits(); // load return address { const int table = is_invokeinterface ? (int)Interpreter::return_5_addrs_by_index_table() : (int)Interpreter::return_3_addrs_by_index_table(); __ lui(AT, Assembler::split_high(table)); __ shl(T0, 2); __ add(AT, AT, T0); __ lw(RA, AT, Assembler::split_low(table)); } // push return address, see generate_fixed_frame for more info // __ push(T0); } // used registers : T0, T3, T4, T7, T9 // T9 : entry // T3 : recv, this two register using convention is by prepare_invoke // T4 : flags, klass // T7 : method, index must be T7 void TemplateTable::invokevirtual_helper(Register index, Register recv, Register flags) { assert_different_registers(index, recv, T1, T4); // Test for an invoke of a final method Label notFinal; __ move(AT, (1 << ConstantPoolCacheEntry::vfinalMethod)); __ andr(AT, flags, AT); __ beq(AT, ZERO, notFinal); __ delayed()->nop(); Register method = index; // method must be T7 assert(method == T7, "methodOop must be T7 for interpreter calling convention"); // do the call - the index is actually the method to call // the index is indeed methodOop, for this is vfinal, // see ConstantPoolCacheEntry::set_method for more info __ verify_oop(method); // It's final, need a null check here! __ null_check(recv); // profile this call __ profile_final_call(T0); __ move(T0, recv); __ jump_from_interpreted(method, T4); __ bind(notFinal); // get receiver klass __ null_check(recv, oopDesc::klass_offset_in_bytes()); // Keep recv in ecx for callee expects it there __ lw(T4, recv, oopDesc::klass_offset_in_bytes()); __ verify_oop(T4); // profile this call __ profile_virtual_call(T1, T0, T4); // get target methodOop & entry point const int base = instanceKlass::vtable_start_offset() * wordSize; assert(vtableEntry::size() * wordSize == 4, "adjust the scaling in the code below"); __ sll(AT, index, 2); __ add(AT, T4, AT); //this is a ualign read __ lw(method, AT, base + vtableEntry::method_offset_in_bytes()); __ move(T0, recv); __ jump_from_interpreted(method, T4); } void TemplateTable::invokevirtual(int byte_no) { transition(vtos, vtos); prepare_invoke(T7, NOREG, byte_no, bytecode()); // now recv & flags in T3, T4 invokevirtual_helper(T7, T3, T4); } // used registers : T9, T7 // T9 : entry // T7 : method void TemplateTable::invokespecial(int byte_no) { transition(vtos, vtos); // prepare_invoke(method, index, byte_no, bytecode()); prepare_invoke(T7, NOREG, byte_no, bytecode()); // do the call // now recv & flags in T3, T4 __ verify_oop(T7); __ profile_call(T9); __ jump_from_interpreted(T7, T9); __ move(T0, T3); } void TemplateTable::invokestatic(int byte_no) { transition(vtos, vtos); prepare_invoke(T7, NOREG, byte_no, bytecode()); __ verify_oop(T7); __ profile_call(T9); __ jump_from_interpreted(T7, T9); } // i have no idea what to do here, now. for future change. FIXME. void TemplateTable::fast_invokevfinal(int byte_no) { transition(vtos, vtos); __ stop("fast_invokevfinal not used on x86"); } // used registers : T0, T1, T2, T3, T4, T7 // T0 : itable, vtable, entry // T1 : interface // T3 : receiver // T4 : flags, klass // T7 : index, method, this is required by interpreter_entry void TemplateTable::invokeinterface(int byte_no) { transition(vtos, vtos); //this method will use T1-T4 and T0 prepare_invoke(T1, T7, byte_no, bytecode()); // T1: Interface // T2: index // T3: receiver // T4: flags Label notMethod; __ move(AT, (1 << ConstantPoolCacheEntry::methodInterface)); __ andr(AT, T4, AT); __ beq(AT, ZERO, notMethod); __ delayed()->nop(); // Special case of invokeinterface called for virtual method of // java.lang.Object. See cpCacheOop.cpp for details. // This code isn't produced by javac, but could be produced by // another compliant java compiler. invokevirtual_helper(T7, T3, T4); __ bind(notMethod); // Get receiver klass into T4 - also a null check __ lw(T4, T3, oopDesc::klass_offset_in_bytes()); __ verify_oop(T4); // profile this call __ profile_virtual_call(T4, T0, FSR); // Compute start of first itableOffsetEntry (which is at the end of the vtable) const int base = instanceKlass::vtable_start_offset() * wordSize; assert(vtableEntry::size() * wordSize == 4, "adjust the scaling in the code below"); __ lw(AT, T4, instanceKlass::vtable_length_offset() * wordSize); __ shl(AT, 2); __ add(T0, T4, AT); __ addi(T0, T0, base); if (HeapWordsPerLong > 1) { // Round up to align_object_offset boundary __ round_to(T0, BytesPerLong); } // now T0 is the begin of the itable Label entry, search, interface_ok; ///__ jmp(entry); __ b(entry); __ delayed()->nop(); __ bind(search); __ increment(T0, itableOffsetEntry::size() * wordSize); __ bind(entry); // Check that the entry is non-null. A null entry means that the receiver // class doesn't implement the interface, and wasn't the same as the // receiver class checked when the interface was resolved. __ lw(AT, T0, itableOffsetEntry::interface_offset_in_bytes()); __ bne(AT, ZERO, interface_ok); __ delayed()->nop(); // throw exception // the call_VM checks for exception, so we should never return here. //__ pop();//FIXME here, // pop return address (pushed by prepare_invoke). // no need now, we just save the value in RA now __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeError)); __ should_not_reach_here(); __ bind(interface_ok); //NOTICE here, no pop as x86 do //__ lw(AT, T0, itableOffsetEntry::interface_offset_in_bytes()); __ bne(AT, T1, search); __ delayed()->nop(); // now we get vtable of the interface __ lw(T0, T0, itableOffsetEntry::offset_offset_in_bytes()); __ addu(T0, T4, T0); assert(itableMethodEntry::size() * wordSize == 4, "adjust the scaling in the code below"); __ sll(AT, T7, 2); __ addu(AT, T0, AT); // now we get the method __ lw(T7, AT, 0); // T7: methodOop to call // T3: receiver // Check for abstract method error // Note: This should be done more efficiently via a throw_abstract_method_error // interpreter entry point and a conditional jump to it in case of a null // method. { Label L; ///__ testl(ebx, ebx); ///__ jcc(Assembler::notZero, L); __ bne(T7, ZERO, L); __ delayed()->nop(); // throw exception // note: must restore interpreter registers to canonical // state for exception handling to work correctly! ///__ popl(ebx); // pop return address (pushed by prepare_invoke) //__ restore_bcp(); // esi must be correct for exception handler //(was destroyed) //__ restore_locals(); // make sure locals pointer //is correct as well (was destroyed) ///__ call_VM(noreg, CAST_FROM_FN_PTR(address, //InterpreterRuntime::throw_AbstractMethodError)); __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError)); // the call_VM checks for exception, so we should never return here. __ should_not_reach_here(); __ bind(L); } __ jump_from_interpreted(T7, T9); } //---------------------------------------------------------------------------------------------------- // Allocation // T1 : tags & buffer end & thread // T2 : object end // T3 : klass // T4 : object size // A1 : cpool // A2 : cp index // return object in FSR void TemplateTable::_new() { transition(vtos, atos); __ load_two_bytes_from_at_bcp(A2, AT, 1); __ huswap(A2); Label slow_case; Label done; Label initialize_header; Label initialize_object; // including clearing the fields Label allocate_shared; // get instanceKlass in T3 __ get_cpool_and_tags(A1, T1); __ sll(AT, A2, 2); __ add(AT, A1, AT); __ lw(T3, AT, sizeof(constantPoolOopDesc)); // make sure the class we're about to instantiate has been resolved. // Note: slow_case does a pop of stack, which is why we loaded class/pushed above const int tags_offset = typeArrayOopDesc::header_size(T_BYTE) * wordSize; __ add(T1, T1, A2); __ lb(AT, T1, tags_offset); //__ addiu(AT, AT, - (int)JVM_CONSTANT_UnresolvedClass); __ addiu(AT, AT, - (int)JVM_CONSTANT_Class); //__ beq(AT, ZERO, slow_case); __ bne(AT, ZERO, slow_case); __ delayed()->nop(); /*make sure klass is initialized & doesn't have finalizer*/ // make sure klass is fully initialized __ lw(T1, T3, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc)); __ addiu(AT, T1, - (int)instanceKlass::fully_initialized); __ bne(AT, ZERO, slow_case); __ delayed()->nop(); // has_finalizer //__ lw(T1, T3, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc)); //__ move(AT, JVM_ACC_CAN_BE_FASTPATH_ALLOCATED); //__ andr(AT, T1, AT); //FIXME need confirmation and test. aoqi __ lw(T1, T3, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc)); __ andi(AT, T1, Klass::_lh_instance_slow_path_bit); __ bne(AT, ZERO, slow_case); __ delayed()->nop(); // get instance_size in instanceKlass (already aligned) in T4, // be sure to preserve this value //__ lw(T4, T3, Klass::size_helper_offset_in_bytes() + sizeof(oopDesc)); //Klass::_size_helper is renamed Klass::_layout_helper. aoqi __ lw(T4, T3, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc)); // // Allocate the instance // 1) Try to allocate in the TLAB // 2) if fail and the object is large allocate in the shared Eden // 3) if the above fails (or is not applicable), go to a slow case // (creates a new TLAB, etc.) const bool allow_shared_alloc = Universe::heap()->supports_inline_contig_alloc() && !CMSIncrementalMode; if (UseTLAB) { #ifndef OPT_THREAD const Register thread = T1; __ get_thread(thread); #else const Register thread = TREG; #endif // get tlab_top __ lw(FSR, thread, in_bytes(JavaThread::tlab_top_offset())); __ add(T2, FSR, T4); // get tlab_end __ lw(AT, thread, in_bytes(JavaThread::tlab_end_offset())); __ slt(AT, AT, T2); // __ bne(AT, ZERO, allocate_shared); __ bne(AT, ZERO, allow_shared_alloc ? allocate_shared : slow_case); __ delayed()->nop(); __ sw(T2, thread, in_bytes(JavaThread::tlab_top_offset())); if (ZeroTLAB) { // the fields have been already cleared __ b(initialize_header); } else { // initialize both the header and fields __ b(initialize_object); } __ delayed()->nop(); /* if (CMSIncrementalMode) { // No allocation in shared eden. ///__ jmp(slow_case); __ b(slow_case); __ delayed()->nop(); } */ } // Allocation in the shared Eden , if allowed // T4 : instance size in words if(allow_shared_alloc){ __ bind(allocate_shared); Label retry; Address heap_top(T1, Assembler::split_low((int)Universe::heap()->top_addr())); __ lui(T1, Assembler::split_high((int)Universe::heap()->top_addr())); __ lw(FSR, heap_top); __ bind(retry); __ add(T2, FSR, T4); __ lui(AT, Assembler::split_high((int)Universe::heap()->end_addr())); __ lw(AT, AT, Assembler::split_low((int)Universe::heap()->end_addr())); __ slt(AT, AT, T2); __ bne(AT, ZERO, slow_case); __ delayed()->nop(); // Compare eax with the top addr, and if still equal, store the new // top addr in ebx at the address of the top addr pointer. Sets ZF if was // equal, and clears it otherwise. Use lock prefix for atomicity on MPs. // // FSR: object begin // T2: object end // T4: instance size in words // if someone beat us on the allocation, try again, otherwise continue //__ lui(T1, Assembler::split_high((int)Universe::heap()->top_addr())); __ cmpxchg(T2, heap_top, FSR); __ beq(AT, ZERO, retry); __ delayed()->nop(); } if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) { // The object is initialized before the header. If the object size is // zero, go directly to the header initialization. __ bind(initialize_object); __ addiu(T4, T4, - sizeof(oopDesc)); __ beq(T4, ZERO, initialize_header); __ delayed()->nop(); // T4 must have been multiple of 2 #ifdef ASSERT // make sure T4 was multiple of 2 Label L; __ andi(AT, T4, 1); __ beq(AT, ZERO, L); __ delayed()->nop(); __ stop("object size is not multiple of 2 - adjust this code"); __ bind(L); // edx must be > 0, no extra check needed here #endif // initialize remaining object fields: T4 is a multiple of 2 { Label loop; __ add(T1, FSR, T4); __ addi(T1, T1, -8); __ bind(loop); __ sw(ZERO, T1, sizeof(oopDesc) + 0*oopSize); __ sw(ZERO, T1, sizeof(oopDesc) + 1*oopSize); __ bne(T1, FSR, loop); //dont clear header __ delayed()->addi(T1, T1, -8); // actually sizeof(oopDesc)==8, so we can move // __ addiu(AT, AT, -8) to delay slot, and compare FSR with T1 } //klass in T3, // initialize object header only. __ bind(initialize_header); if (UseBiasedLocking) { // __ popl(ecx); // get saved klass back in the register. // __ movl(ebx, Address(ecx, Klass::prototype_header_offset_in_bytes() // + klassOopDesc::klass_part_offset_in_bytes())); __ lw(AT,T3, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()); // __ movl(Address(eax, oopDesc::mark_offset_in_bytes ()), ebx); __ sw(AT, FSR, oopDesc::mark_offset_in_bytes ()); } else { __ move(AT, (int)markOopDesc::prototype()); __ sw(AT, FSR, oopDesc::mark_offset_in_bytes()); } __ sw(T3, FSR, oopDesc::klass_offset_in_bytes()); { SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0); // Trigger dtrace event for fastpath __ push(atos); __ call_VM_leaf( CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR); __ pop(atos); } __ b(done); __ delayed()->nop(); } // slow case __ bind(slow_case); // call_VM(result, InterpreterRuntime::_new, cpool, index) call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2); // continue __ bind(done); } void TemplateTable::newarray() { transition(itos, atos); __ lbu(A1, at_bcp(1)); //type, count call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR); } void TemplateTable::anewarray() { transition(itos, atos); __ load_two_bytes_from_at_bcp(A2, AT, 1); __ huswap(A2); __ get_constant_pool(A1); // cp, index, count call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR); } void TemplateTable::arraylength() { transition(atos, itos); __ null_check(FSR, arrayOopDesc::length_offset_in_bytes()); __ lw(FSR, FSR, arrayOopDesc::length_offset_in_bytes()); } // i use T2 as ebx, T3 as ecx, T4 as edx // when invoke gen_subtype_check, super in T4, sub in T2, object in FSR(it's always) // T2 : sub klass // T3 : cpool // T4 : super klass void TemplateTable::checkcast() { transition(atos, atos); Label done, is_null, ok_is_subtype, quicked, resolved; __ beq(FSR, ZERO, is_null); __ delayed()->nop(); __ profile_checkcast(false, T3); // Get cpool & tags index __ get_cpool_and_tags(T3, T4); __ load_two_bytes_from_at_bcp(T2, AT, 1); __ huswap(T2); // See if bytecode has already been quicked __ add(AT, T4, T2); __ lb(AT, AT, typeArrayOopDesc::header_size(T_BYTE) * wordSize); __ addiu(AT, AT, - (int)JVM_CONSTANT_Class); __ beq(AT, ZERO, quicked); __ delayed()->nop(); __ move(TSR, FSR); //call_VM blow FSR call_VM(T4, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); __ b(resolved); __ delayed(); __ move(FSR, TSR); // klass already in cp, get superklass in T4 __ bind(quicked); __ sll(AT, T2, 2); __ add(AT, T3, AT); __ lw(T4, AT, sizeof(constantPoolOopDesc)); __ bind(resolved); // get subklass in T2 __ lw(T2, FSR, oopDesc::klass_offset_in_bytes()); /* __ move(AT, (int)&jerome1 ); __ sw(T2, AT, 0); __ move(AT, (int)&jerome2 ); __ sw(ZERO, AT, 0); __ move(AT, (int)&jerome3 ); __ sw(ZERO, AT, 0); __ move(AT, (int)&jerome4 ); __ sw(ZERO, AT, 0); __ move(AT, (int)&jerome5 ); __ sw(ZERO, AT, 0); __ move(AT, (int)&jerome6 ); __ sw(ZERO, AT, 0); __ move(AT, (int)&jerome7 ); __ sw(ZERO, AT, 0); __ move(AT, (int)&jerome8 ); __ sw(ZERO, AT, 0); __ move(AT, (int)&jerome9 ); __ sw(ZERO, AT, 0); __ move(AT, (int)&jerome10 ); __ sw(ZERO, AT, 0); __ pushad(); // __ enter(); __ call(CAST_FROM_FN_PTR(address, SharedRuntime::print_call_statistics), relocInfo::runtime_call_type); __ delayed()->nop(); // __ leave(); __ popad(); */ // Superklass in T4. Subklass in T2. __ gen_subtype_check(T4, T2, ok_is_subtype); // Come here on failure // object is at FSR __ jmp(Interpreter::_throw_ClassCastException_entry); __ delayed()->nop(); // Come here on success __ bind(ok_is_subtype); // Collect counts on whether this check-cast sees NULLs a lot or not. if (ProfileInterpreter) { __ b(done); __ delayed()->nop(); } __ bind(is_null); __ profile_checkcast(true, T3); __ bind(done); } // i use T3 as cpool, T4 as tags, T2 as index // object always in FSR, superklass in T4, subklass in T2 void TemplateTable::instanceof() { transition(atos, itos); Label done, ok_is_subtype, quicked, resolved; __ beq(FSR, ZERO, done); __ delayed()->nop(); // Get cpool & tags index __ get_cpool_and_tags(T3, T4); // get index __ load_two_bytes_from_at_bcp(T2, AT, 1); __ hswap(T2); // See if bytecode has already been quicked // quicked __ addu(AT, T4, T2); __ lb(AT, AT, typeArrayOopDesc::header_size(T_BYTE) * wordSize); __ addiu(AT, AT, - (int)JVM_CONSTANT_Class); __ beq(AT, ZERO, quicked); __ delayed()->nop(); // get superklass in T4 //__ move(TSR, FSR); // sometimes S2 may be changed during the call, // be careful if u use TSR as a saving place //__ push(FSR); __ push(atos); call_VM(T4, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); //__ lw(FSR, SP, 0); __ pop_ptr(FSR); __ b(resolved); __ delayed()->nop(); //__ move(FSR, TSR); // get superklass in T4, subklass in T2 __ bind(quicked); __ sll(AT, T2, 2); __ addu(AT, T3, AT); __ lw(T4, AT, sizeof(constantPoolOopDesc)); __ bind(resolved); // get subklass in T2 __ lw(T2, FSR, oopDesc::klass_offset_in_bytes()); // Superklass in T4. Subklass in T2. __ gen_subtype_check(T4, T2, ok_is_subtype); // Come here on failure __ b(done); __ delayed(); __ move(FSR, ZERO); // Come here on success __ bind(ok_is_subtype); __ move(FSR, 1); __ bind(done); // FSR = 0: obj == NULL or obj is not an instanceof the specified klass // FSR = 1: obj != NULL and obj is an instanceof the specified klass } //-------------------------------------------------------- //-------------------------------------------- // Breakpoints void TemplateTable::_breakpoint() { // Note: We get here even if we are single stepping.. // jbug inists on setting breakpoints at every bytecode // even if we are in single step mode. transition(vtos, vtos); // get the unpatched byte code ///__ get_method(ecx); ///__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::get_original_bytecode_at) //, ecx, esi); ///__ movl(ebx, eax); __ get_method(A1); __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::get_original_bytecode_at), A1, BCP); __ move(T2, V0); // post the breakpoint event ///__ get_method(ecx); ///__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), ecx, esi); __ get_method(A1); __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP); // complete the execution of original bytecode __ dispatch_only_normal(vtos); } //---------------------------------------------------------------------------------------------------- // Exceptions void TemplateTable::athrow() { transition(atos, vtos); __ null_check(FSR); __ jmp(Interpreter::throw_exception_entry()); __ delayed()->nop(); } //---------------------------------------------------------------------------------------------------- // Synchronization // // Note: monitorenter & exit are symmetric routines; which is reflected // in the assembly code structure as well // // Stack layout: // // [expressions ] <--- SP = expression stack top // .. // [expressions ] // [monitor entry] <--- monitor block top = expression stack bot // .. // [monitor entry] // [frame data ] <--- monitor block bot // ... // [return addr ] <--- FP // we use T2 as monitor entry pointer, T3 as monitor top pointer, T6 as free slot pointer // object always in FSR void TemplateTable::monitorenter() { transition(atos, vtos); // check for NULL object __ null_check(FSR); const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize); Label allocated; // initialize entry pointer __ move(T6, ZERO); // find a free slot in the monitor block (result in edx) { Label entry, loop, exit, next; __ lw(T2, monitor_block_top); __ b(entry); __ delayed()->addi(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize); // free slot? __ bind(loop); __ lw(AT, T2, BasicObjectLock::obj_offset_in_bytes()); __ bne(AT, ZERO, next); __ delayed()->nop(); __ move(T6, T2); __ bind(next); __ beq(FSR, AT, exit); __ delayed()->nop(); __ addi(T2, T2, entry_size); __ bind(entry); __ bne(T3, T2, loop); __ delayed()->nop(); __ bind(exit); } __ bne(T6, ZERO, allocated); __ delayed()->nop(); // allocate one if there's no free slot { Label entry, loop; // 1. compute new pointers // SP: old expression stack top __ lw(T6, monitor_block_top); __ addi(SP, SP, - entry_size); __ addi(T6, T6, - entry_size); __ sw(T6, monitor_block_top); __ b(entry); __ delayed(); __ move(T3, SP); // 2. move expression stack contents __ bind(loop); __ lw(AT, T3, entry_size); __ sw(AT, T3, 0); __ addi(T3, T3, wordSize); __ bind(entry); __ bne(T3, T6, loop); __ delayed()->nop(); } __ bind(allocated); // Increment bcp to point to the next bytecode, // so exception handling for async. exceptions work correctly. // The object has already been poped from the stack, so the // expression stack looks correct. __ addi(BCP, BCP, 1); __ sw(FSR, T6, BasicObjectLock::obj_offset_in_bytes()); __ lock_object(T6); // check to make sure this monitor doesn't cause stack overflow after locking __ save_bcp(); // in case of exception __ generate_stack_overflow_check(0); // The bcp has already been incremented. Just need to dispatch to next instruction. __ dispatch_next(vtos); } // T2 : top // T6 : entry void TemplateTable::monitorexit() { transition(atos, vtos); __ null_check(FSR); const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize); Label found; // find matching slot { Label entry, loop; __ lw(T6, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); __ b(entry); __ delayed()->addiu(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize); __ bind(loop); __ lw(AT, T6, BasicObjectLock::obj_offset_in_bytes()); __ beq(FSR, AT, found); __ delayed()->nop(); __ addiu(T6, T6, entry_size); __ bind(entry); __ bne(T2, T6, loop); __ delayed()->nop(); } // error handling. Unlocking was not block-structured Label end; __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception)); __ should_not_reach_here(); // call run-time routine // T6: points to monitor entry __ bind(found); __ move(TSR, FSR); __ unlock_object(T6); __ move(FSR, TSR); __ bind(end); } //--------------------------------------------------------------------------------------------------// Wide instructions void TemplateTable::wide() { transition(vtos, vtos); // Note: the esi increment step is part of the individual wide bytecode implementations __ lbu(T7, at_bcp(1)); __ sll(AT, T7, 2); __ lui(T9, Assembler::split_high(int(Interpreter::_wentry_point))); __ add(T9, T9, AT); __ lw(T9, T9, Assembler::split_low(int(Interpreter::_wentry_point))); __ jr(T9); __ delayed()->nop(); } //--------------------------------------------------------------------------------------------------// Multi arrays void TemplateTable::multianewarray() { transition(vtos, atos); // last dim is on top of stack; we want address of first one: // first_addr = last_addr + (ndims - 1) * wordSize __ lbu(A1, at_bcp(3)); // dimension // __ sll(A1, A1, 2); __ sll(A1, A1, Interpreter::stackElementScale()); __ addi(A1, A1, -4); __ add(A1, SP, A1); // now A1 pointer to the count array on the stack call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1); __ lbu(AT, at_bcp(3)); // __ sll(AT, AT, 2); __ sll(AT, AT, Interpreter::stackElementScale()); __ add(SP, SP, AT); } #endif // !CC_INTERP