Mercurial > hg > openjdk6-mips
view hotspot/src/cpu/mips/vm/assembler_mips.cpp @ 27:b7ec29b378c9
Update codes to support deoptimization.
author | Ao Qi <aoqi@loongson.cn> |
---|---|
date | Thu, 11 Nov 2010 19:59:55 +0800 |
parents | 85b046e5468b |
children | 15f398a44411 |
line wrap: on
line source
/* * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. * Copyright 2010 Lemote, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, * CA 95054 USA or visit www.sun.com if you need additional information or * have any questions. * */ #include "incls/_precompiled.incl" #include "incls/_assembler_mips.cpp.incl" int MacroAssembler::i[32] = {0,}; float MacroAssembler::f[32] = {0.0,}; void MacroAssembler::print(outputStream *s) { unsigned int k; for(k=0; k<sizeof(i)/sizeof(i[0]); k++) { s->print_cr("i%d = 0x%.16lx", k, i[k]); } s->cr(); for(k=0; k<sizeof(f)/sizeof(f[0]); k++) { s->print_cr("f%d = %f", k, f[k]); } s->cr(); } int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; } int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; } void MacroAssembler::save_registers(MacroAssembler *masm) { #define __ masm-> for(int k=0; k<32; k++) { __ sw (as_Register(k), A0, i_offset(k)); } for(int k=0; k<32; k++) { __ swc1 (as_FloatRegister(k), A0, f_offset(k)); } #undef __ } void MacroAssembler::restore_registers(MacroAssembler *masm) { #define __ masm-> for(int k=0; k<32; k++) { __ lw (as_Register(k), A0, i_offset(k)); } for(int k=0; k<32; k++) { __ lwc1 (as_FloatRegister(k), A0, f_offset(k)); } #undef __ } // Implementation of AddressLiteral AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { _is_lval = false; _target = target; switch (rtype) { case relocInfo::oop_type: // Oops are a special case. Normally they would be their own section // but in cases like icBuffer they are literals in the code stream that // we don't have a section for. We use none so that we get a literal address // which is always patchable. break; case relocInfo::external_word_type: _rspec = external_word_Relocation::spec(target); break; case relocInfo::internal_word_type: _rspec = internal_word_Relocation::spec(target); break; case relocInfo::opt_virtual_call_type: _rspec = opt_virtual_call_Relocation::spec(); break; case relocInfo::static_call_type: _rspec = static_call_Relocation::spec(); break; case relocInfo::runtime_call_type: _rspec = runtime_call_Relocation::spec(); break; case relocInfo::poll_type: case relocInfo::poll_return_type: _rspec = Relocation::spec_simple(rtype); break; case relocInfo::none: break; default: ShouldNotReachHere(); break; } } // Implementation of Address #ifdef _LP64 Address Address::make_array(ArrayAddress adr) { // Not implementable on 64bit machines // Should have been handled higher up the call chain. ShouldNotReachHere(); return Address(); } // exceedingly dangerous constructor Address::Address(int disp, address loc, relocInfo::relocType rtype) { _base = noreg; _index = noreg; _scale = no_scale; _disp = disp; switch (rtype) { case relocInfo::external_word_type: _rspec = external_word_Relocation::spec(loc); break; case relocInfo::internal_word_type: _rspec = internal_word_Relocation::spec(loc); break; case relocInfo::runtime_call_type: // HMM _rspec = runtime_call_Relocation::spec(); break; case relocInfo::poll_type: case relocInfo::poll_return_type: _rspec = Relocation::spec_simple(rtype); break; case relocInfo::none: break; default: ShouldNotReachHere(); } } #else // LP64 Address Address::make_array(ArrayAddress adr) { AddressLiteral base = adr.base(); Address index = adr.index(); assert(index._disp == 0, "must not have disp"); // maybe it can? Address array(index._base, index._index, index._scale, (intptr_t) base.target()); array._rspec = base._rspec; return array; } // exceedingly dangerous constructor Address::Address(address loc, RelocationHolder spec) { _base = noreg; _index = noreg; _scale = no_scale; _disp = (intptr_t) loc; _rspec = spec; } #endif // _LP64 /* // Convert the raw encoding form into the form expected by the constructor for // Address. An index of 4 (rsp) corresponds to having no index, so convert // that to noreg for the Address constructor. Address Address::make_raw(int base, int index, int scale, int disp) { bool valid_index = index != rsp->encoding(); if (valid_index) { Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp)); return madr; } else { Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp)); return madr; } } */ // Implementation of Assembler const char *Assembler::ops_name[] = { "special", "regimm", "j", "jal", "beq", "bne", "blez", "bgtz", "addi", "addiu", "slti", "sltiu", "andi", "ori", "xori", "lui", "cop0", "cop1", "cop2", "cop3", "beql", "bnel", "bleql", "bgtzl", "daddi", "daddiu", "ldl", "ldr", "", "", "", "", "lb", "lh", "lwl", "lw", "lbu", "lhu", "lwr", "lwu", "sb", "sh", "swl", "sw", "sdl", "sdr", "swr", "cache", "ll", "lwc1", "", "", "lld", "ldc1", "", "ld", "sc", "swc1", "", "", "scd", "sdc1", "", "sd" }; const char* Assembler::special_name[] = { "sll", "", "srl", "sra", "sllv", "", "srlv", "srav", "jr", "jalr", "", "", "syscall", "break", "", "sync", "mfhi", "mthi", "mflo", "mtlo", "dsll", "", "dsrl", "dsra", "mult", "multu", "div", "divu", "dmult", "dmultu", "ddiv", "ddivu", "add", "addu", "sub", "subu", "and", "or", "xor", "nor", "", "", "slt", "sltu", "dadd", "daddu", "dsub", "dsubu", "tge", "tgeu", "tlt", "tltu", "teq", "", "tne", "", "dsll", "", "dsrl", "dsra", "dsll32", "", "dsrl32", "dsra32" }; const char* Assembler::regimm_name[] = { "bltz", "bgez", "bltzl", "bgezl", "", "", "", "", "tgei", "tgeiu", "tlti", "tltiu", "teqi", "", "tnei", "", "bltzal", "bgezal", "bltzall", "bgezall" }; const char* Assembler::float_name[] = { "add", "sub", "mul", "div", "sqrt", "abs", "mov", "neg", "round.l", "trunc.l", "ceil.l", "floor.l", "round.w", "trunc.w", "ceil.w", "floor.w" }; //misleading name, print only branch/jump instruction void Assembler::print_instruction(int inst) { const char *s; switch( opcode(inst) ) { default: s = ops_name[opcode(inst)]; break; case special_op: s = special_name[special(inst)]; break; case regimm_op: s = special_name[rt(inst)]; break; } ::tty->print("%s", s); } //without check, maybe fixed int Assembler::patched_branch(int dest_pos, int inst, int inst_pos) { int v = (dest_pos - inst_pos - 4)>>2; switch(opcode(inst)) { case j_op: case jal_op: assert(false, "should not use j/jal here"); break; default: v = low16(v); inst &= 0xffff0000; break; } return inst | v; } int Assembler::branch_destination(int inst, int pos) { int off; switch(opcode(inst)) { case j_op: case jal_op: assert(false, "should not use j/jal here"); break; default: off = expand(low16(inst), 15); break; } return off ? pos + 4 + (off<<2) : 0; } int AbstractAssembler::code_fill_byte() { return 0x00; // illegal instruction 0x00000000 } // Now the Assembler instruction (identical for 32/64 bits) void Assembler::lb(Register rt, Address src) { lb(rt, src.base(), src.disp()); } void Assembler::lbu(Register rt, Address src) { lbu(rt, src.base(), src.disp()); } void Assembler::ld(Register rt, Address src){ ld(rt, src.base(), src.disp()); } void Assembler::ldl(Register rt, Address src){ ldl(rt, src.base(), src.disp()); } void Assembler::ldr(Register rt, Address src){ ldr(rt, src.base(), src.disp()); } void Assembler::lh(Register rt, Address src){ lh(rt, src.base(), src.disp()); } void Assembler::lhu(Register rt, Address src){ lhu(rt, src.base(), src.disp()); } void Assembler::ll(Register rt, Address src){ ll(rt, src.base(), src.disp()); } void Assembler::lld(Register rt, Address src){ lld(rt, src.base(), src.disp()); } void Assembler::lw(Register rt, Address src){ lw(rt, src.base(), src.disp()); } void Assembler::lea(Register rt, Address src) { addi(rt, src.base(), src.disp()); } void Assembler::lwl(Register rt, Address src){ lwl(rt, src.base(), src.disp()); } void Assembler::lwr(Register rt, Address src){ lwr(rt, src.base(), src.disp()); } void Assembler::lwu(Register rt, Address src){ lwu(rt, src.base(), src.disp()); } void Assembler::sb(Register rt, Address dst) { sb(rt, dst.base(), dst.disp()); } void Assembler::sc(Register rt, Address dst) { sc(rt, dst.base(), dst.disp()); } void Assembler::scd(Register rt, Address dst) { scd(rt, dst.base(), dst.disp()); } void Assembler::sd(Register rt, Address dst) { sd(rt, dst.base(), dst.disp()); } void Assembler::sdl(Register rt, Address dst) { sdl(rt, dst.base(), dst.disp()); } void Assembler::sdr(Register rt, Address dst) { sdr(rt, dst.base(), dst.disp()); } void Assembler::sh(Register rt, Address dst) { sh(rt, dst.base(), dst.disp()); } void Assembler::sw(Register rt, Address dst) { sw(rt, dst.base(), dst.disp()); } void Assembler::swl(Register rt, Address dst) { swl(rt, dst.base(), dst.disp()); } void Assembler::swr(Register rt, Address dst) { swr(rt, dst.base(), dst.disp()); } void Assembler::lwc1(FloatRegister rt, Address src) { lwc1(rt, src.base(), src.disp()); } void Assembler::ldc1(FloatRegister rt, Address src) { ldc1(rt, src.base(), src.disp()); } void Assembler::swc1(FloatRegister rt, Address dst) { swc1(rt, dst.base(), dst.disp()); } void Assembler::sdc1(FloatRegister rt, Address dst) { sdc1(rt, dst.base(), dst.disp()); } void Assembler::j(address entry) { int dest = ((int)entry - (((int)pc() + 4) & 0xf0000000))>>2; emit_long((j_op<<26) | dest); has_delay_slot(); } void Assembler::jal(address entry) { int dest = ((int)entry - (((int)pc() + 4) & 0xf0000000))>>2; emit_long((jal_op<<26) | dest); has_delay_slot(); } // Implementation of MacroAssembler // First all the versions that have distinct versions depending on 32/64 bit // Unless the difference is trivial (1 line or so). //#ifndef _LP64 // 32bit versions void MacroAssembler::ld_ptr(Register rt, Register offset, Register base) { addu_long(AT, base, offset); ld_ptr(rt, 0, AT); } void MacroAssembler::st_ptr(Register rt, Register offset, Register base) { addu_long(AT, base, offset); st_ptr(rt, 0, AT); } void MacroAssembler::ld_long(Register rt, Register offset, Register base) { addu_long(AT, base, offset); ld_long(rt, 0, AT); } void MacroAssembler::st_long(Register rt, Register offset, Register base) { addu_long(AT, base, offset); st_long(rt, 0, AT); } Address MacroAssembler::as_Address(AddressLiteral adr) { return Address(adr.target(), adr.rspec()); } Address MacroAssembler::as_Address(ArrayAddress adr) { return Address::make_array(adr); } int MacroAssembler::biased_locking_enter(Register lock_reg, Register obj_reg, Register swap_reg, Register tmp_reg, bool swap_reg_contains_mark, Label& done, Label* slow_case, BiasedLockingCounters* counters) { assert(UseBiasedLocking, "why call this otherwise?"); //assert(swap_reg == eax, "swap_reg must be eax for cmpxchg"); assert_different_registers(lock_reg, obj_reg, swap_reg); bool need_tmp_reg = false; if (tmp_reg == noreg) { need_tmp_reg = true; tmp_reg = lock_reg; } else { assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); } assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); Address saved_mark_addr(lock_reg, 0); // Biased locking // See whether the lock is currently biased toward our thread and // whether the epoch is still valid // Note that the runtime guarantees sufficient alignment of JavaThread // pointers to allow age to be placed into low bits // First check to see whether biasing is even enabled for this object Label cas_label; int null_check_offset = -1; if (!swap_reg_contains_mark) { null_check_offset = offset(); // movl(swap_reg, mark_addr); lw(swap_reg, mark_addr); } ///////////////////////////////////////////// //jerome_for_debug /* Label ne; move(AT, 0x00000005); sub(AT, AT,swap_reg); bne(AT, ZERO, ne); delayed()->nop(); move(AT, (int)(&jerome8)); sw(swap_reg, AT, 0); bind(ne); */ ////////////////////////////////////////////// if (need_tmp_reg) { // pushl(tmp_reg); push(tmp_reg); } //movl(tmp_reg, swap_reg); move(tmp_reg, swap_reg); //andl(tmp_reg, markOopDesc::biased_lock_mask_in_place); andi(tmp_reg,tmp_reg, markOopDesc::biased_lock_mask_in_place); //cmpl(tmp_reg, markOopDesc::biased_lock_pattern); addi(AT, ZERO,markOopDesc::biased_lock_pattern); sub(AT, AT, tmp_reg); if (need_tmp_reg) { // popl(tmp_reg); pop(tmp_reg); } //jcc(Assembler::notEqual, cas_label); bne(AT,ZERO,cas_label); delayed()->nop(); // The bias pattern is present in the object's header. Need to check // whether the bias owner and the epoch are both still current. // Note that because there is no current thread register on x86 we // need to store off the mark word we read out of the object to // avoid reloading it and needing to recheck invariants below. This // store is unfortunate but it makes the overall code shorter and // simpler. // movl(saved_mark_addr, swap_reg); /* // jerome_for_debug push(tmp_reg); move(AT, (int)(&jerome1)); move(tmp_reg, 0xeeeeeeee); sw(tmp_reg, AT, 0); pop(tmp_reg); */ sw(swap_reg,saved_mark_addr); if (need_tmp_reg) { //pushl(tmp_reg); push(tmp_reg); } get_thread(tmp_reg); //xorl(swap_reg, tmp_reg); xorr(swap_reg,swap_reg, tmp_reg); if (swap_reg_contains_mark) { null_check_offset = offset(); } // movl(tmp_reg, klass_addr); lw(tmp_reg,klass_addr); // xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() // + klassOopDesc::klass_part_offset_in_bytes())); //xori(swap_reg, swap_reg,Address(tmp_reg, Klass::prototype_header_offset_in_bytes() //+ klassOopDesc::klass_part_offset_in_bytes())); lw(AT, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); xorr(swap_reg,swap_reg,AT); // andl(swap_reg, ~((int) markOopDesc::age_mask_in_place)); move(AT, ~((int) markOopDesc::age_mask_in_place)); andr(swap_reg,swap_reg,AT); if (need_tmp_reg) { //popl(tmp_reg); pop(tmp_reg); } if (PrintBiasedLockingStatistics) { //FIXME //cond_incl(ZERO, Address((int) BiasedLocking::biased_lock_entry_count_addr(), //relocInfo::none)); } // jcc(Assembler::equal, done); //FIXME, equal is for what ,there is no cmp or test here? @jerome //beq(tmp_reg,ZERO, done); beq(swap_reg,ZERO, done); delayed()->nop(); /* // jerome_for_debug push(tmp_reg); move(AT, (int)(&jerome2)); move(tmp_reg, 0xdddddddd); sw(tmp_reg, AT, 0); pop(tmp_reg); */ Label try_revoke_bias; Label try_rebias; // At this point we know that the header has the bias pattern and // that we are not the bias owner in the current epoch. We need to // figure out more details about the state of the header in order to // know what operations can be legally performed on the object's // header. // If the low three bits in the xor result aren't clear, that means // the prototype header is no longer biased and we have to revoke // the bias on this object. //testl(swap_reg, markOopDesc::biased_lock_mask_in_place); //jcc(Assembler::notZero, try_revoke_bias); move(AT, markOopDesc::biased_lock_mask_in_place); andr(AT,swap_reg,AT ); bne(AT,ZERO,try_revoke_bias); delayed()->nop(); /* // jerome_for_debug push(tmp_reg); move(AT, (int)(&jerome3)); move(tmp_reg, 0xcccccccc); sw(tmp_reg, AT, 0); pop(tmp_reg); */ // Biasing is still enabled for this data type. See whether the // epoch of the current bias is still valid, meaning that the epoch // bits of the mark word are equal to the epoch bits of the // prototype header. (Note that the prototype header's epoch bits // only change at a safepoint.) If not, attempt to rebias the object // toward the current thread. Note that we must be absolutely sure // that the current epoch is invalid in order to do this because // otherwise the manipulations it performs on the mark word are // illegal. // testl(swap_reg, markOopDesc::epoch_mask_in_place); //jcc(Assembler::notZero, try_rebias); move(AT, markOopDesc::epoch_mask_in_place); andr(AT,swap_reg,AT); bne(AT,ZERO,try_rebias); delayed()->nop(); /* // jerome_for_debug push(tmp_reg); move(AT, (int)(&jerome4)); move(tmp_reg, 0xbbbbbbbb); sw(tmp_reg, AT, 0); pop(tmp_reg); */ // The epoch of the current bias is still valid but we know nothing // about the owner; it might be set or it might be clear. Try to // acquire the bias of the object using an atomic operation. If this // fails we will go in to the runtime to revoke the object's bias. // Note that we first construct the presumed unbiased header so we // don't accidentally blow away another thread's valid bias. //movl(swap_reg, saved_mark_addr); lw(swap_reg, saved_mark_addr); // andl(swap_reg,markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); andr(swap_reg,swap_reg,AT); if (need_tmp_reg) { // pushl(tmp_reg); push(tmp_reg); } get_thread(tmp_reg); //orl(tmp_reg, swap_reg); orr(tmp_reg,tmp_reg, swap_reg); //if (os::is_MP()) { // lock(); //} //cmpxchg(tmp_reg, Address(obj_reg)); // what is store in eax now ? @jerome,see the entry of the func, swap_reg! cmpxchg(tmp_reg, Address(obj_reg, 0),swap_reg); if (need_tmp_reg) { //popl(tmp_reg); pop(tmp_reg); } // If the biasing toward our thread failed, this means that // another thread succeeded in biasing it toward itself and we // need to revoke that bias. The revocation will occur in the // interpreter runtime in the slow case. if (PrintBiasedLockingStatistics) { //FIXME // cond_incl(ZERO, Address((int) BiasedLocking::anonymously_biased_lock_entry_count_addr(), relocInfo::none)); } if (slow_case != NULL) { //jcc(Assembler::notZero, *slow_case); beq(AT,ZERO, *slow_case); delayed()->nop(); } //jmp(done); /* // jerome_for_debug push(tmp_reg); move(AT, (int)(&jerome5)); move(tmp_reg, 0xaaaaaaaa); sw(ZERO, AT, 0); pop(tmp_reg); */ b(done); delayed()->nop(); bind(try_rebias); // At this point we know the epoch has expired, meaning that the // current "bias owner", if any, is actually invalid. Under these // circumstances _only_, we are allowed to use the current header's // value as the comparison value when doing the cas to acquire the // bias in the current epoch. In other words, we allow transfer of // the bias from one thread to another directly in this situation. // // FIXME: due to a lack of registers we currently blow away the age // bits in this situation. Should attempt to preserve them. if (need_tmp_reg) { // pushl(tmp_reg); push(tmp_reg); } /* // jerome_for_debug push(tmp_reg); move(AT, (int)(&jerome6)); move(tmp_reg, 0x99999999); sw(tmp_reg, AT, 0); pop(tmp_reg); */ get_thread(tmp_reg); //movl(swap_reg, klass_addr); lw(swap_reg, klass_addr); // orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset_in_bytes() // + klassOopDesc::klass_part_offset_in_bytes())); lw(AT,Address(swap_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); orr(tmp_reg,tmp_reg,AT); // movl(swap_reg, saved_mark_addr); lw(swap_reg, saved_mark_addr); // if (os::is_MP()) { // lock(); //} // cmpxchg(tmp_reg, Address(obj_reg)); cmpxchg(tmp_reg, Address(obj_reg, 0),swap_reg); if (need_tmp_reg) { // popl(tmp_reg); pop(tmp_reg); } // If the biasing toward our thread failed, then another thread // succeeded in biasing it toward itself and we need to revoke that // bias. The revocation will occur in the runtime in the slow case. if (PrintBiasedLockingStatistics) { //FIXME //cond_incl(ZERO, Address((int) BiasedLocking::rebiased_lock_entry_count_addr(), //relocInfo::none)); } if (slow_case != NULL) { //jcc(Assembler::notZero, *slow_case); beq(AT,ZERO, *slow_case); delayed()->nop(); } //jmp(done); b(done); delayed()->nop(); bind(try_revoke_bias); // The prototype mark in the klass doesn't have the bias bit set any // more, indicating that objects of this data type are not supposed // to be biased any more. We are going to try to reset the mark of // this object to the prototype value and fall through to the // CAS-based locking scheme. Note that if our CAS fails, it means // that another thread raced us for the privilege of revoking the // bias of this particular object, so it's okay to continue in the // normal locking code. // // FIXME: due to a lack of registers we currently blow away the age // bits in this situation. Should attempt to preserve them. // movl(swap_reg, saved_mark_addr); lw(swap_reg, saved_mark_addr); if (need_tmp_reg) { //pushl(tmp_reg); push(tmp_reg); } /* // jerome_for_debug push(tmp_reg); move(AT, (int)(&jerome7)); move(tmp_reg, 0x88888888); sw(tmp_reg, AT, 0); pop(tmp_reg); */ //movl(tmp_reg, klass_addr); lw(tmp_reg, klass_addr); //movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); lw(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); //if (os::is_MP()) { // lock(); //} //cmpxchg(tmp_reg, Address(obj_reg)); cmpxchg(tmp_reg, Address(obj_reg, 0),swap_reg); if (need_tmp_reg) { //popl(tmp_reg); pop(tmp_reg); } // Fall through to the normal CAS-based lock, because no matter what // the result of the above CAS, some thread must have succeeded in // removing the bias bit from the object's header. if (PrintBiasedLockingStatistics) { //FIXME //cond_incl(ZERO, Address((int) BiasedLocking::revoked_lock_entry_count_addr(), relocInfo::none)); } bind(cas_label); /*// jerome_for_debug push(tmp_reg); move(AT, (int)(&jerome8)); move(tmp_reg, 0x77777777); sw(tmp_reg, AT, 0); pop(tmp_reg); */ return null_check_offset; } void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { assert(UseBiasedLocking, "why call this otherwise?"); // Check for biased locking unlock case, which is a no-op // Note: we do not have to check the thread ID for two reasons. // First, the interpreter checks for IllegalMonitorStateException at // a higher level. Second, if the bias was revoked while we held the // lock, the object could not be rebiased toward another thread, so // the bias bit would be clear. //movl(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); lw(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); //andl(temp_reg, markOopDesc::biased_lock_mask_in_place); andi(temp_reg, temp_reg,markOopDesc::biased_lock_mask_in_place); // cmpl(temp_reg, markOopDesc::biased_lock_pattern); addi(AT,ZERO,markOopDesc::biased_lock_pattern); //jcc(Assembler::equal, done); beq(AT,temp_reg,done); delayed()->nop(); } // NOTE: we dont increment the SP after call like the x86 version, maybe this is a problem, FIXME. // by yjl 6/27/2005 // the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf // by yjl 7/11/2005 // this method will handle the stack problem, you need not to preserve the stack space for the argument now // by yjl 8/1/2005 void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) { //call(RuntimeAddress(entry_point)); //increment(rsp, number_of_arguments * wordSize); #ifndef OPT_THREAD Register java_thread = T8; get_thread(java_thread); #else Register java_thread = TREG; #endif // save stack pointer assert(number_of_arguments <= 4, "just check"); sw(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); if (number_of_arguments) addi(SP, SP, - number_of_arguments * wordSize); move(AT, -8); andr(SP, SP, AT); call(entry_point, relocInfo::runtime_call_type); delayed()->nop(); #ifndef OPT_THREAD get_thread(java_thread); #endif lw(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); } // FIXME: i'm not sure of which register to use for jr. i use AT now. // by yjl 6/27/2005 void MacroAssembler::jmp(address entry) { //if (fit_in_jal((entry - pc() - 4))/4) { // j(entry); //} else { move(T9, (int)entry); jr(T9); //} } // FIXME: i'm not sure of which register to use for jr. i use AT now. // maybe should use T9 instead // by yjl 6/27/2005 void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) { switch (rtype) { case relocInfo::runtime_call_type: case relocInfo::none: jmp(entry); break; default: { InstructionMark im(this); relocate(rtype); //move(T9, (int)entry); lui(T9, Assembler::split_high((int)entry)); addiu(T9, T9, Assembler::split_low((int)entry)); jr(T9); } break; } } void MacroAssembler::call(address entry) { // c/c++ code assume T9 is it's entry point, so we just always move entry to t9 // maybe there is some more graceful method to handle this. FIXME // by yjl 6/27/2005 move(T9, (int)entry); jalr(); } void MacroAssembler::call(address entry, relocInfo::relocType rtype) { switch (rtype) { case relocInfo::runtime_call_type: case relocInfo::none: // call(entry); move(T9, (int)entry); jalr(); break; default: { InstructionMark im(this); relocate(rtype); //move(T9, (int)entry); lui(T9, Assembler::split_high((int)entry)); addiu(T9, T9, Assembler::split_low((int)entry)); jalr(); } break; } } void MacroAssembler::call(address entry, RelocationHolder& rh) { switch (rh.type()) { case relocInfo::runtime_call_type: case relocInfo::none: call(entry); break; default: { InstructionMark im(this); relocate(rh); //move(T9, (int)entry); lui(T9, Assembler::split_high((int)entry)); addiu(T9, T9, Assembler::split_low((int)entry)); jalr(); } break; } } void MacroAssembler::c2bool(Register r) { Label L; Assembler::beq(r, ZERO, L); delayed()->nop(); move(r, 1); bind(L); } static void pass_arg0(MacroAssembler* masm, Register arg) { masm->push(arg); } static void pass_arg1(MacroAssembler* masm, Register arg) { masm->push(arg); } static void pass_arg2(MacroAssembler* masm, Register arg) { masm->push(arg); } static void pass_arg3(MacroAssembler* masm, Register arg) { masm->push(arg); } #ifndef PRODUCT extern "C" void findpc(intptr_t x); #endif void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { // In order to get locks to work, we need to fake a in_VM state JavaThread* thread = JavaThread::current(); JavaThreadState saved_state = thread->thread_state(); thread->set_thread_state(_thread_in_vm); if (ShowMessageBoxOnError) { JavaThread* thread = JavaThread::current(); JavaThreadState saved_state = thread->thread_state(); thread->set_thread_state(_thread_in_vm); if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { ttyLocker ttyl; BytecodeCounter::print(); } // To see where a verify_oop failed, get $ebx+40/X for this frame. // This is the value of eip which points to where verify_oop will return. if (os::message_box(msg, "Execution stopped, print registers?")) { ttyLocker ttyl; tty->print_cr("eip = 0x%08x", eip); #ifndef PRODUCT tty->cr(); findpc(eip); tty->cr(); #endif tty->print_cr("rax, = 0x%08x", rax); tty->print_cr("rbx, = 0x%08x", rbx); tty->print_cr("rcx = 0x%08x", rcx); tty->print_cr("rdx = 0x%08x", rdx); tty->print_cr("rdi = 0x%08x", rdi); tty->print_cr("rsi = 0x%08x", rsi); tty->print_cr("rbp, = 0x%08x", rbp); tty->print_cr("rsp = 0x%08x", rsp); BREAKPOINT; } } else { ttyLocker ttyl; ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); assert(false, "DEBUG MESSAGE"); } ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); } void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) { if ( ShowMessageBoxOnError ) { JavaThreadState saved_state = JavaThread::current()->thread_state(); JavaThread::current()->set_thread_state(_thread_in_vm); { // In order to get locks work, we need to fake a in_VM state ttyLocker ttyl; ::tty->print_cr("EXECUTION STOPPED: %s\n", msg); if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { BytecodeCounter::print(); } // if (os::message_box(msg, "Execution stopped, print registers?")) // regs->print(::tty); } ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state); } else ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); } void MacroAssembler::stop(const char* msg) { move(A0, (int)msg); //reserver space for argument. added by yjl 7/10/2005 addiu(SP, SP, - 1 * wordSize); call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); delayed()->nop(); //restore space for argument addiu(SP, SP, 1 * wordSize); brk(17); } void MacroAssembler::warn(const char* msg) { /* push_CPU_state(); ExternalAddress message((address) msg); // push address of message pushptr(message.addr()); call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning))); addl(rsp, wordSize); // discard argument pop_CPU_state(); */ //save_registers(this); pushad(); addi(SP, SP, -4); sw(A0, SP, -1 * wordSize); move(A0, (int)msg); addi(SP, SP, -1 * wordSize); call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); delayed()->nop(); addi(SP, SP, 1 * wordSize); lw(A0, SP, -1 * wordSize); addi(SP, SP, 4); popad(); //restore_registers(this); } #ifndef PRODUCT //FIXME cannot print sp void MacroAssembler::print_reg(Register reg) { pushad(); push(FP); char* a = new char[50]; sprintf(a, "%s: 0x",reg->name()); move(A0, (int)a); call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type); delayed()->nop(); pop(FP); popad(); pushad(); push(FP); move(A0, reg); call(CAST_FROM_FN_PTR(address, SharedRuntime::print_int),relocInfo::runtime_call_type); delayed()->nop(); pop(FP); popad(); pushad(); push(FP); char* b = new char[50]; sprintf(b, " pc: %p\n",pc()); move(A0, (int)b); call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type); delayed()->nop(); pop(FP); popad(); } #endif void MacroAssembler::increment(Register reg, int imm) { if (!imm) return; if (is_simm16(imm)) { addiu(reg, reg, imm); } else { move(AT, imm); addu(reg, reg, AT); } } void MacroAssembler::decrement(Register reg, int imm) { increment(reg, -imm); } void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) { call_VM_helper(oop_result, entry_point, 0, check_exceptions); } void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) { /* Label C, E; call(C, relocInfo::none); jmp(E); bind(C); pass_arg1(this, arg_1); call_VM_helper(oop_result, entry_point, 1, check_exceptions); ret(0); bind(E); */ if (arg_1!=A1) move(A1, arg_1); call_VM_helper(oop_result, entry_point, 1, check_exceptions); } void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { if (arg_1!=A1) move(A1, arg_1); if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); call_VM_helper(oop_result, entry_point, 2, check_exceptions); } void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { if (arg_1!=A1) move(A1, arg_1); if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); call_VM_helper(oop_result, entry_point, 3, check_exceptions); } void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) { //Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); //call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions); } void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) { if (arg_1!=A1) move(A1, arg_1); call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); } void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { if (arg_1!=A1) move(A1, arg_1); if (arg_2!=A2) move(A2, arg_2); //assert(arg_2 != O1, "smashed argument"); call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); } void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { if (arg_1!=A1) move(A1, arg_1); if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); } void MacroAssembler::call_VM_base(Register oop_result, Register java_thread, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) { address before_call_pc; // determine java_thread register if (!java_thread->is_valid()) { #ifndef OPT_THREAD java_thread = T2; get_thread(java_thread); #else java_thread = TREG; #endif } // determine last_java_sp register if (!last_java_sp->is_valid()) { last_java_sp = SP; } // debugging support assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); assert(number_of_arguments <= 4 , "cannot have negative number of arguments"); assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save ebp"); // set last Java frame before call before_call_pc = (address)pc(); set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc); // do the call move(A0, java_thread); call(entry_point, relocInfo::runtime_call_type); delayed()->nop(); // restore the thread (cannot use the pushed argument since arguments // may be overwritten by C code generated by an optimizing compiler); // however can use the register value directly if it is callee saved. #ifndef OPT_THREAD if (java_thread >=S0 && java_thread <=S7) { #ifdef ASSERT { Label L; get_thread(AT); beq(java_thread, AT, L); delayed()->nop(); stop("MacroAssembler::call_VM_base: edi not callee saved?"); bind(L); } #endif } else { get_thread(java_thread); } #endif // discard thread and arguments lw(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); // reset last Java frame reset_last_Java_frame(java_thread, false, true); check_and_handle_popframe(java_thread); check_and_handle_earlyret(java_thread); if (check_exceptions) { // check for pending exceptions (java_thread is set upon return) Label L; lw(AT, java_thread, in_bytes(Thread::pending_exception_offset())); beq(AT, ZERO, L); delayed()->nop(); move(AT, (int)before_call_pc); push(AT); jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); delayed()->nop(); bind(L); } // get oop result if there is one and reset the value in the thread if (oop_result->is_valid()) { lw(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset())); sw(ZERO, java_thread, in_bytes(JavaThread::vm_result_offset())); verify_oop(oop_result); } } void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { move(V0, SP); //we also reserve space for java_thread here addi(SP, SP, (1 + number_of_arguments) * (- wordSize)); move(AT, 0xfffffff8); andr(SP, SP, AT); call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions); } void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { call_VM_leaf_base(entry_point, number_of_arguments); } void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { //pass_arg0(this, arg_0); //call_VM_leaf(entry_point, 1); if (arg_0!=A0) move(A0, arg_0); call_VM_leaf(entry_point, 1); } void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { /* LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); pass_arg1(this, arg_1); pass_arg0(this, arg_0); call_VM_leaf(entry_point, 2); */ if (arg_0 != A0) move(A0, arg_0); if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A1, "smashed argument"); call_VM_leaf(entry_point, 2); } void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { if (arg_0 != A0) move(A0, arg_0); if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A1, "smashed argument"); if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1 && arg_2 != A2, "smashed argument"); call_VM_leaf(entry_point, 3); } void MacroAssembler::check_and_handle_earlyret(Register java_thread) { } void MacroAssembler::check_and_handle_popframe(Register java_thread) { } void MacroAssembler::null_check(Register reg, int offset) { if (needs_explicit_null_check(offset)) { // provoke OS NULL exception if reg = NULL by // accessing M[reg] w/o changing any (non-CC) registers // NOTE: cmpl is plenty here to provoke a segv lw(AT, reg, 0); nop(); nop(); nop(); // Note: should probably use testl(rax, Address(reg, 0)); // may be shorter code (however, this version of // testl needs to be implemented first) } else { // nothing to do, (later) access of M[reg + offset] // will provoke OS NULL exception if reg = NULL } } void MacroAssembler::enter() { push2(RA, FP); move(FP, SP); } void MacroAssembler::leave() { //move(SP, FP); //pop2(FP, RA); addi(SP, FP, 2 * wordSize); lw(RA, SP, - 1 * wordSize); lw(FP, SP, - 2 * wordSize); } /* void MacroAssembler::os_breakpoint() { // instead of directly emitting a breakpoint, call os:breakpoint for better debugability // (e.g., MSVC can't call ps() otherwise) call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); } */ void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) { // determine java_thread register if (!java_thread->is_valid()) { #ifndef OPT_THREAD java_thread = T1; get_thread(java_thread); #else java_thread = TREG; #endif } // we must set sp to zero to clear frame sw(ZERO, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); // must clear fp, so that compiled frames are not confused; it is possible // that we need it only for debugging if(clear_fp) sw(ZERO, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); if (clear_pc) sw(ZERO, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); } // Write serialization page so VM thread can do a pseudo remote membar. // We use the current thread pointer to calculate a thread specific // offset to write to within the page. This minimizes bus traffic // due to cache line collision. void MacroAssembler::serialize_memory(Register thread, Register tmp) { /* movl(tmp, thread); shrl(tmp, os::get_serialize_page_shift_count()); andl(tmp, (os::vm_page_size() - sizeof(int))); Address index(noreg, tmp, Address::times_1); ExternalAddress page(os::get_memory_serialize_page()); movptr(ArrayAddress(page, index), tmp); */ move(tmp, thread); srl(tmp, tmp,os::get_serialize_page_shift_count()); move(AT, (os::vm_page_size() - sizeof(int))); andr(tmp, tmp,AT); sw(tmp,Address(tmp, (int)os::get_memory_serialize_page())); } // Calls to C land // // When entering C land, the rbp, & rsp of the last Java frame have to be recorded // in the (thread-local) JavaThread object. When leaving C land, the last Java fp // has to be reset to 0. This is required to allow proper stack traversal. void MacroAssembler::set_last_Java_frame(Register java_thread, Register last_java_sp, Register last_java_fp, address last_java_pc) { // determine java_thread register if (!java_thread->is_valid()) { #ifndef OPT_THREAD java_thread = T2; get_thread(java_thread); #else java_thread = TREG; #endif } // determine last_java_sp register if (!last_java_sp->is_valid()) { last_java_sp = SP; } // last_java_fp is optional if (last_java_fp->is_valid()) { sw(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); } // last_java_pc is optional if (last_java_pc != NULL) { relocate(relocInfo::internal_pc_type); lui(AT, split_high((int)last_java_pc)); addiu(AT, AT, split_low((int)last_java_pc)); sw(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); } sw(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); } ////////////////////////////////////////////////////////////////////////////////// #ifndef SERIALGC /* void MacroAssembler::g1_write_barrier_pre(Register obj, #ifndef _LP64 Register thread, #endif Register tmp, Register tmp2, bool tosca_live) { LP64_ONLY(Register thread = r15_thread;) Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_active())); Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_index())); Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_buf())); Label done; Label runtime; // if (!marking_in_progress) goto done; if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { cmpl(in_progress, 0); } else { assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); cmpb(in_progress, 0); } jcc(Assembler::equal, done); // if (x.f == NULL) goto done; cmpptr(Address(obj, 0), NULL_WORD); jcc(Assembler::equal, done); // Can we store original value in the thread's buffer? LP64_ONLY(movslq(tmp, index);) movptr(tmp2, Address(obj, 0)); #ifdef _LP64 cmpq(tmp, 0); #else cmpl(index, 0); #endif jcc(Assembler::equal, runtime); #ifdef _LP64 subq(tmp, wordSize); movl(index, tmp); addq(tmp, buffer); #else subl(index, wordSize); movl(tmp, buffer); addl(tmp, index); #endif movptr(Address(tmp, 0), tmp2); jmp(done); bind(runtime); // save the live input values if(tosca_live) push(rax); push(obj); #ifdef _LP64 movq(c_rarg0, Address(obj, 0)); call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), c_rarg0, r15_thread); #else push(thread); call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread); pop(thread); #endif pop(obj); if(tosca_live) pop(rax); bind(done); } void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val, #ifndef _LP64 Register thread, #endif Register tmp, Register tmp2) { LP64_ONLY(Register thread = r15_thread;) Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + PtrQueue::byte_offset_of_index())); Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + PtrQueue::byte_offset_of_buf())); BarrierSet* bs = Universe::heap()->barrier_set(); CardTableModRefBS* ct = (CardTableModRefBS*)bs; Label done; Label runtime; // Does store cross heap regions? movptr(tmp, store_addr); xorptr(tmp, new_val); shrptr(tmp, HeapRegion::LogOfHRGrainBytes); jcc(Assembler::equal, done); // crosses regions, storing NULL? cmpptr(new_val, (int32_t) NULL_WORD); jcc(Assembler::equal, done); // storing region crossing non-NULL, is card already dirty? ExternalAddress cardtable((address) ct->byte_map_base); assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); #ifdef _LP64 const Register card_addr = tmp; movq(card_addr, store_addr); shrq(card_addr, CardTableModRefBS::card_shift); lea(tmp2, cardtable); // get the address of the card addq(card_addr, tmp2); #else const Register card_index = tmp; movl(card_index, store_addr); shrl(card_index, CardTableModRefBS::card_shift); Address index(noreg, card_index, Address::times_1); const Register card_addr = tmp; lea(card_addr, as_Address(ArrayAddress(cardtable, index))); #endif cmpb(Address(card_addr, 0), 0); jcc(Assembler::equal, done); // storing a region crossing, non-NULL oop, card is clean. // dirty card and log. movb(Address(card_addr, 0), 0); cmpl(queue_index, 0); jcc(Assembler::equal, runtime); subl(queue_index, wordSize); movptr(tmp2, buffer); #ifdef _LP64 movslq(rscratch1, queue_index); addq(tmp2, rscratch1); movq(Address(tmp2, 0), card_addr); #else addl(tmp2, queue_index); movl(Address(tmp2, 0), card_index); #endif jmp(done); bind(runtime); // save the live input values push(store_addr); push(new_val); #ifdef _LP64 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread); #else push(thread); call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); pop(thread); #endif pop(new_val); pop(store_addr); bind(done); } */ #endif // SERIALGC ////////////////////////////////////////////////////////////////////////////////// void MacroAssembler::store_check(Register obj) { // Does a store check for the oop in register obj. The content of // register obj is destroyed afterwards. store_check_part_1(obj); store_check_part_2(obj); } void MacroAssembler::store_check(Register obj, Address dst) { store_check(obj); } // split the store check operation so that other instructions can be scheduled inbetween void MacroAssembler::store_check_part_1(Register obj) { BarrierSet* bs = Universe::heap()->barrier_set(); assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); shr(obj, CardTableModRefBS::card_shift); } void MacroAssembler::store_check_part_2(Register obj) { BarrierSet* bs = Universe::heap()->barrier_set(); assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); CardTableModRefBS* ct = (CardTableModRefBS*)bs; assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); move(AT, (int)ct->byte_map_base); add(AT, AT, obj); sb(ZERO, AT, 0); /* // The calculation for byte_map_base is as follows: // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift); // So this essentially converts an address to a displacement and // it will never need to be relocated. On 64bit however the value may be too // large for a 32bit displacement intptr_t disp = (intptr_t) ct->byte_map_base; if (is_simm32(disp)) { Address cardtable(noreg, obj, Address::times_1, disp); movb(cardtable, 0); } else { // By doing it as an ExternalAddress disp could be converted to a rip-relative // displacement and done in a single instruction given favorable mapping and // a smarter version of as_Address. Worst case it is two instructions which // is no worse off then loading disp into a register and doing as a simple // Address() as above. // We can't do as ExternalAddress as the only style since if disp == 0 we'll // assert since NULL isn't acceptable in a reloci (see 6644928). In any case // in some cases we'll get a single instruction version. ExternalAddress cardtable((address)disp); Address index(noreg, obj, Address::times_1); movb(as_Address(ArrayAddress(cardtable, index)), 0); } */ } /* void MacroAssembler::subptr(Register dst, int32_t imm32) { LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32)); } void MacroAssembler::subptr(Register dst, Register src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); } void MacroAssembler::test32(Register src1, AddressLiteral src2) { // src2 must be rval if (reachable(src2)) { testl(src1, as_Address(src2)); } else { lea(rscratch1, src2); testl(src1, Address(rscratch1, 0)); } } // C++ bool manipulation void MacroAssembler::testbool(Register dst) { if(sizeof(bool) == 1) testb(dst, 0xff); else if(sizeof(bool) == 2) { // testw implementation needed for two byte bools ShouldNotReachHere(); } else if(sizeof(bool) == 4) testl(dst, dst); else // unsupported ShouldNotReachHere(); } void MacroAssembler::testptr(Register dst, Register src) { LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src)); } */ // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2, Label& slow_case) { assert_different_registers(obj, var_size_in_bytes, t1, t2, AT); Register end = t2; Register thread = t1; verify_tlab(t1, t2); //blows t1&t2 get_thread(thread); lw(obj, thread, in_bytes(JavaThread::tlab_top_offset())); if (var_size_in_bytes == NOREG) { // i dont think we need move con_size_in_bytes to a register first. // by yjl 8/17/2005 assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first"); addi(end, obj, con_size_in_bytes); } else { add(end, obj, var_size_in_bytes); } lw(AT, thread, in_bytes(JavaThread::tlab_end_offset())); sltu(AT, AT, end); bne(AT, ZERO, slow_case); delayed()->nop(); // update the tlab top pointer sw(end, thread, in_bytes(JavaThread::tlab_top_offset())); // recover var_size_in_bytes if necessary /*if (var_size_in_bytes == end) { sub(var_size_in_bytes, end, obj); }*/ verify_tlab(t1, t2); } // Defines obj, preserves var_size_in_bytes void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2, Label& slow_case) { assert_different_registers(obj, var_size_in_bytes, t1, AT); if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq // No allocation in the shared eden. b(slow_case); delayed()->nop(); } else { Address heap_top(t1, Assembler::split_low((int)Universe::heap()->top_addr())); lui(t1, split_high((int)Universe::heap()->top_addr())); lw(obj, heap_top); Register end = t2; Label retry; bind(retry); if (var_size_in_bytes == NOREG) { // i dont think we need move con_size_in_bytes to a register first. // by yjl 8/17/2005 assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first"); addi(end, obj, con_size_in_bytes); } else { add(end, obj, var_size_in_bytes); } // if end < obj then we wrapped around => object too long => slow case sltu(AT, end, obj); bne(AT, ZERO, slow_case); delayed()->nop(); lui(AT, split_high((int)Universe::heap()->end_addr())); lw(AT, AT, split_low((int)Universe::heap()->end_addr())); sltu(AT, AT, end); bne(AT, ZERO, slow_case); delayed()->nop(); // Compare obj with the top addr, and if still equal, store the new top addr in // end at the address of the top addr pointer. Sets ZF if was equal, and clears // it otherwise. Use lock prefix for atomicity on MPs. if (os::is_MP()) { ///lock(); } // if someone beat us on the allocation, try again, otherwise continue cmpxchg(end, heap_top, obj); beq(AT, ZERO, retry); //by yyq delayed()->nop(); } } void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) { Register top = T0; Register t1 = T1; Register t2 = T5; Register t3 = T6; Register thread_reg = T3; Label do_refill, discard_tlab; if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq // No allocation in the shared eden. b(slow_case); delayed()->nop(); } get_thread(thread_reg); lw(top, thread_reg, in_bytes(JavaThread::tlab_top_offset())); lw(t1, thread_reg, in_bytes(JavaThread::tlab_end_offset())); // calculate amount of free space sub(t1, t1, top); shr(t1, LogHeapWordSize); // Retain tlab and allocate object in shared space if // the amount free in the tlab is too large to discard. lw(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())); slt(AT, t2, t1); beq(AT, ZERO, discard_tlab); delayed()->nop(); // Retain move(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment()); add(t2, t2, AT); sw(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())); if (TLABStats) { // increment number of slow_allocations lw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())); addiu(AT, AT, 1); sw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())); } b(try_eden); delayed()->nop(); bind(discard_tlab); if (TLABStats) { // increment number of refills lw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())); addi(AT, AT, 1); sw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())); // accumulate wastage -- t1 is amount free in tlab lw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())); add(AT, AT, t1); sw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())); } // if tlab is currently allocated (top or end != null) then // fill [top, end + alignment_reserve) with array object beq(top, ZERO, do_refill); delayed()->nop(); // set up the mark word move(AT, (int)markOopDesc::prototype()->copy_set_hash(0x2)); sw(AT, top, oopDesc::mark_offset_in_bytes()); // set the length to the remaining space addi(t1, t1, - typeArrayOopDesc::header_size(T_INT)); addi(t1, t1, ThreadLocalAllocBuffer::alignment_reserve()); shl(t1, log2_intptr(HeapWordSize/sizeof(jint))); sw(t1, top, arrayOopDesc::length_offset_in_bytes()); // set klass to intArrayKlass lui(AT, split_high((int)Universe::intArrayKlassObj_addr())); lw(t1, AT, split_low((int)Universe::intArrayKlassObj_addr())); sw(t1, top, oopDesc::klass_offset_in_bytes()); // refill the tlab with an eden allocation bind(do_refill); lw(t1, thread_reg, in_bytes(JavaThread::tlab_size_offset())); shl(t1, LogHeapWordSize); // add object_size ?? eden_allocate(top, t1, 0, t2, t3, slow_case); // Check that t1 was preserved in eden_allocate. #ifdef ASSERT if (UseTLAB) { Label ok; assert_different_registers(thread_reg, t1); lw(AT, thread_reg, in_bytes(JavaThread::tlab_size_offset())); shl(AT, LogHeapWordSize); beq(AT, t1, ok); delayed()->nop(); stop("assert(t1 != tlab size)"); should_not_reach_here(); bind(ok); } #endif sw(top, thread_reg, in_bytes(JavaThread::tlab_start_offset())); sw(top, thread_reg, in_bytes(JavaThread::tlab_top_offset())); add(top, top, t1); addi(top, top, - ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); sw(top, thread_reg, in_bytes(JavaThread::tlab_end_offset())); verify_tlab(t1, t2); b(retry); delayed()->nop(); } static const double pi_4 = 0.7853981633974483; // the x86 version is to clumsy, i dont think we need that fuss. maybe i'm wrong, FIXME // must get argument(a double) in F12/F13 //void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) { //We need to preseve the register which maybe modified during the Call @Jerome void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { //save all modified register here // if (preserve_cpu_regs) { // } //FIXME, in the disassembly of tirgfunc, only used V0,V1,T9, SP,RA,so we ony save V0,V1,T9 pushad(); //we should preserve the stack space before we call addi(SP, SP, -wordSize * 2); switch (trig){ case 's' : call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type ); delayed()->nop(); break; case 'c': call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type ); delayed()->nop(); break; case 't': call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type ); delayed()->nop(); break; default:assert (false, "bad intrinsic") break; } addi(SP, SP, wordSize * 2); popad(); // if (preserve_cpu_regs) { // } } /* void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { ucomisd(dst, as_Address(src)); } void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { ucomiss(dst, as_Address(src)); } void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { if (reachable(src)) { xorpd(dst, as_Address(src)); } else { lea(rscratch1, src); xorpd(dst, Address(rscratch1, 0)); } } void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { if (reachable(src)) { xorps(dst, as_Address(src)); } else { lea(rscratch1, src); xorps(dst, Address(rscratch1, 0)); } } */ void MacroAssembler::move(Register reg, int imm) { if (is_simm16(imm)) { addiu(reg, ZERO, imm); } else { lui(reg, split_high(imm)); if (split_low(imm)) addiu(reg, reg, split_low(imm)); } } // NOTE: i dont push eax as i486. // the x86 save eax for it use eax as the jump register void MacroAssembler::verify_oop(Register reg, const char* s) { /* if (!VerifyOops) return; // Pass register number to verify_oop_subroutine char* b = new char[strlen(s) + 50]; sprintf(b, "verify_oop: %s: %s", reg->name(), s); push(rax); // save rax, push(reg); // pass register argument ExternalAddress buffer((address) b); // avoid using pushptr, as it modifies scratch registers // and our contract is not to modify anything movptr(rax, buffer.addr()); push(rax); // call indirectly to solve generation ordering problem movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); call(rax); */ if (!VerifyOops) return; // Pass register number to verify_oop_subroutine char* b = new char[strlen(s) + 50]; sprintf(b, "verify_oop: %s: %s", reg->name(), s); sw(T5, SP, - wordSize); sw(T6, SP, - 2*wordSize); sw(RA, SP, - 3*wordSize); sw(A0, SP ,- 4*wordSize); sw(A1, SP ,- 5*wordSize); sw(AT, SP ,- 6*wordSize); sw(T9, SP ,- 7*wordSize); addiu(SP, SP, - 7 * wordSize); move(A0, (int)b); move(A1, reg); // call indirectly to solve generation ordering problem move(AT, (int)StubRoutines::verify_oop_subroutine_entry_address()); lw(T9, AT, 0); jalr(T9); delayed()->nop(); lw(T5, SP, 6* wordSize); lw(T6, SP, 5* wordSize); lw(RA, SP, 4* wordSize); lw(A0, SP, 3* wordSize); lw(A1, SP, 2* wordSize); lw(AT, SP, 1* wordSize); lw(T9, SP, 0* wordSize); addiu(SP, SP, 7 * wordSize); } void MacroAssembler::verify_oop_addr(Address addr, const char* s) { if (!VerifyOops) { nop(); return; } // Pass register number to verify_oop_subroutine char* b = new char[strlen(s) + 50]; sprintf(b, "verify_oop_addr: %s", s); sw(T5, SP, - wordSize); sw(T6, SP, - 2*wordSize); sw(RA, SP, - 3*wordSize); sw(A0, SP, - 4*wordSize); sw(A1, SP, - 5*wordSize); sw(AT, SP, - 6*wordSize); sw(T9, SP, - 7*wordSize); lw(A1, addr); // addr may use SP, so load from it before change SP addiu(SP, SP, - 7 * wordSize); move(A0, (int)b); // call indirectly to solve generation ordering problem move(AT, (int)StubRoutines::verify_oop_subroutine_entry_address()); lw(T9, AT, 0); jalr(T9); delayed()->nop(); lw(T5, SP, 6* wordSize); lw(T6, SP, 5* wordSize); lw(RA, SP, 4* wordSize); lw(A0, SP, 3* wordSize); lw(A1, SP, 2* wordSize); lw(AT, SP, 1* wordSize); lw(T9, SP, 0* wordSize); addiu(SP, SP, 7 * wordSize); } // used registers : T5, T6 void MacroAssembler::verify_oop_subroutine() { // RA: ra // A0: char* error message // A1: oop object to verify Label exit, error; // increment counter move(T5, (int)StubRoutines::verify_oop_count_addr()); lw(AT, T5, 0); addi(AT, AT, 1); sw(AT, T5, 0); // make sure object is 'reasonable' beq(A1, ZERO, exit); // if obj is NULL it is ok delayed()->nop(); // Check if the oop is in the right area of memory const int oop_mask = Universe::verify_oop_mask(); const int oop_bits = Universe::verify_oop_bits(); move(AT, oop_mask); andr(T5, A1, AT); move(AT, oop_bits); bne(T5, AT, error); delayed()->nop(); // make sure klass is 'reasonable' lw(T5, A1, oopDesc::klass_offset_in_bytes()); // get klass beq(T5, ZERO, error); // if klass is NULL it is broken delayed()->nop(); // Check if the klass is in the right area of memory const int klass_mask = Universe::verify_klass_mask(); const int klass_bits = Universe::verify_klass_bits(); move(AT, klass_mask); andr(T6, T5, AT); move(AT, klass_bits); bne(T6, AT, error); delayed()->nop(); // make sure klass' klass is 'reasonable' lw(T5, T5, oopDesc::klass_offset_in_bytes()); // get klass' klass beq(T5, ZERO, error); // if klass' klass is NULL it is broken delayed()->nop(); move(AT, klass_mask); andr(T6, T5, AT); move(AT, klass_bits); bne(T6, AT, error); delayed()->nop(); // if klass not in right area of memory it is broken too. // return if everything seems ok bind(exit); jr(RA); delayed()->nop(); // handle errors bind(error); pushad(); addi(SP, SP, (-1) * wordSize); call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); delayed()->nop(); addiu(SP, SP, 1 * wordSize); popad(); jr(RA); delayed()->nop(); } void MacroAssembler::verify_tlab(Register t1, Register t2) { #ifdef ASSERT assert_different_registers(t1, t2, AT); if (UseTLAB && VerifyOops) { Label next, ok; get_thread(t1); lw(t2, t1, in_bytes(JavaThread::tlab_top_offset())); lw(AT, t1, in_bytes(JavaThread::tlab_start_offset())); sltu(AT, t2, AT); beq(AT, ZERO, next); delayed()->nop(); stop("assert(top >= start)"); bind(next); lw(AT, t1, in_bytes(JavaThread::tlab_end_offset())); sltu(AT, AT, t2); beq(AT, ZERO, ok); delayed()->nop(); stop("assert(top <= end)"); bind(ok); /* Label next, ok; Register t1 = rsi; Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread); push(t1); NOT_LP64(push(thread_reg)); NOT_LP64(get_thread(thread_reg)); movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); jcc(Assembler::aboveEqual, next); stop("assert(top >= start)"); should_not_reach_here(); bind(next); movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); jcc(Assembler::aboveEqual, ok); stop("assert(top <= end)"); should_not_reach_here(); bind(ok); NOT_LP64(pop(thread_reg)); pop(t1); */ } #endif } void MacroAssembler::hswap(Register reg) { //andi(reg, reg, 0xffff); srl(AT, reg, 8); sll(reg, reg, 24); sra(reg, reg, 16); orr(reg, reg, AT); } void MacroAssembler::huswap(Register reg) { //andi(reg, reg, 0xffff); srl(AT, reg, 8); sll(reg, reg, 24); srl(reg, reg, 16); orr(reg, reg, AT); } // something funny to do this will only one more register AT // by yjl 6/29/2005 void MacroAssembler::swap(Register reg) { srl(AT, reg, 8); sll(reg, reg, 24); orr(reg, reg, AT); //reg : 4 1 2 3 srl(AT, AT, 16); xorr(AT, AT, reg); andi(AT, AT, 0xff); //AT : 0 0 0 1^3); xorr(reg, reg, AT); //reg : 4 1 2 1 sll(AT, AT, 16); xorr(reg, reg, AT); //reg : 4 3 2 1 } void MacroAssembler::cmpxchg(Register x_reg, Address dest, Register c_reg) { Label done, again, nequal; bind(again); ll(AT, dest); bne(AT, c_reg, nequal); delayed()->nop(); move(AT, x_reg); sc(AT, dest); beq(AT, ZERO, again); delayed()->nop(); b(done); delayed()->nop(); // not xchged bind(nequal); move(c_reg, AT); move(AT, ZERO); bind(done); } void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) { Label done, again, nequal; Register x_reg = x_regLo; dsll32(x_regHi, x_regHi, 0); dsll32(x_regLo, x_regLo, 0); dsrl32(x_regLo, x_regLo, 0); orr(x_reg, x_regLo, x_regHi); Register c_reg = c_regLo; dsll32(c_regHi, c_regHi, 0); dsll32(c_regLo, c_regLo, 0); dsrl32(c_regLo, c_regLo, 0); orr(c_reg, c_regLo, c_regHi); bind(again); lld(AT, dest); bne(AT, c_reg, nequal); delayed()->nop(); //move(AT, x_reg); dadd(AT, x_reg, ZERO); scd(AT, dest); beq(AT, ZERO, again); delayed()->nop(); b(done); delayed()->nop(); // not xchged bind(nequal); //move(c_reg, AT); //move(AT, ZERO); dadd(c_reg, AT, ZERO); dadd(AT, ZERO, ZERO); bind(done); } // be sure the three register is different void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { assert_different_registers(tmp, fs, ft); div_s(tmp, fs, ft); trunc_l_s(tmp, tmp); cvt_s_l(tmp, tmp); mul_s(tmp, tmp, ft); sub_s(fd, fs, tmp); } // be sure the three register is different void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { assert_different_registers(tmp, fs, ft); div_d(tmp, fs, ft); trunc_l_d(tmp, tmp); cvt_d_l(tmp, tmp); mul_d(tmp, tmp, ft); sub_d(fd, fs, tmp); } class ControlWord { public: int32_t _value; int rounding_control() const { return (_value >> 10) & 3 ; } int precision_control() const { return (_value >> 8) & 3 ; } bool precision() const { return ((_value >> 5) & 1) != 0; } bool underflow() const { return ((_value >> 4) & 1) != 0; } bool overflow() const { return ((_value >> 3) & 1) != 0; } bool zero_divide() const { return ((_value >> 2) & 1) != 0; } bool denormalized() const { return ((_value >> 1) & 1) != 0; } bool invalid() const { return ((_value >> 0) & 1) != 0; } void print() const { // rounding control const char* rc; switch (rounding_control()) { case 0: rc = "round near"; break; case 1: rc = "round down"; break; case 2: rc = "round up "; break; case 3: rc = "chop "; break; }; // precision control const char* pc; switch (precision_control()) { case 0: pc = "24 bits "; break; case 1: pc = "reserved"; break; case 2: pc = "53 bits "; break; case 3: pc = "64 bits "; break; }; // flags char f[9]; f[0] = ' '; f[1] = ' '; f[2] = (precision ()) ? 'P' : 'p'; f[3] = (underflow ()) ? 'U' : 'u'; f[4] = (overflow ()) ? 'O' : 'o'; f[5] = (zero_divide ()) ? 'Z' : 'z'; f[6] = (denormalized()) ? 'D' : 'd'; f[7] = (invalid ()) ? 'I' : 'i'; f[8] = '\x0'; // output printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc); } }; class StatusWord { public: int32_t _value; bool busy() const { return ((_value >> 15) & 1) != 0; } bool C3() const { return ((_value >> 14) & 1) != 0; } bool C2() const { return ((_value >> 10) & 1) != 0; } bool C1() const { return ((_value >> 9) & 1) != 0; } bool C0() const { return ((_value >> 8) & 1) != 0; } int top() const { return (_value >> 11) & 7 ; } bool error_status() const { return ((_value >> 7) & 1) != 0; } bool stack_fault() const { return ((_value >> 6) & 1) != 0; } bool precision() const { return ((_value >> 5) & 1) != 0; } bool underflow() const { return ((_value >> 4) & 1) != 0; } bool overflow() const { return ((_value >> 3) & 1) != 0; } bool zero_divide() const { return ((_value >> 2) & 1) != 0; } bool denormalized() const { return ((_value >> 1) & 1) != 0; } bool invalid() const { return ((_value >> 0) & 1) != 0; } void print() const { // condition codes char c[5]; c[0] = (C3()) ? '3' : '-'; c[1] = (C2()) ? '2' : '-'; c[2] = (C1()) ? '1' : '-'; c[3] = (C0()) ? '0' : '-'; c[4] = '\x0'; // flags char f[9]; f[0] = (error_status()) ? 'E' : '-'; f[1] = (stack_fault ()) ? 'S' : '-'; f[2] = (precision ()) ? 'P' : '-'; f[3] = (underflow ()) ? 'U' : '-'; f[4] = (overflow ()) ? 'O' : '-'; f[5] = (zero_divide ()) ? 'Z' : '-'; f[6] = (denormalized()) ? 'D' : '-'; f[7] = (invalid ()) ? 'I' : '-'; f[8] = '\x0'; // output printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top()); } }; class TagWord { public: int32_t _value; int tag_at(int i) const { return (_value >> (i*2)) & 3; } void print() const { printf("%04x", _value & 0xFFFF); } }; class FPU_Register { public: int32_t _m0; int32_t _m1; int16_t _ex; bool is_indefinite() const { return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0; } void print() const { char sign = (_ex < 0) ? '-' : '+'; const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " "; printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind); }; }; class FPU_State { public: enum { register_size = 10, number_of_registers = 8, register_mask = 7 }; ControlWord _control_word; StatusWord _status_word; TagWord _tag_word; int32_t _error_offset; int32_t _error_selector; int32_t _data_offset; int32_t _data_selector; int8_t _register[register_size * number_of_registers]; int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); } FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; } const char* tag_as_string(int tag) const { switch (tag) { case 0: return "valid"; case 1: return "zero"; case 2: return "special"; case 3: return "empty"; } ShouldNotReachHere() return NULL; } void print() const { // print computation registers { int t = _status_word.top(); for (int i = 0; i < number_of_registers; i++) { int j = (i - t) & register_mask; printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j); st(j)->print(); printf(" %s\n", tag_as_string(_tag_word.tag_at(i))); } } printf("\n"); // print control registers printf("ctrl = "); _control_word.print(); printf("\n"); printf("stat = "); _status_word .print(); printf("\n"); printf("tags = "); _tag_word .print(); printf("\n"); } }; class Flag_Register { public: int32_t _value; bool overflow() const { return ((_value >> 11) & 1) != 0; } bool direction() const { return ((_value >> 10) & 1) != 0; } bool sign() const { return ((_value >> 7) & 1) != 0; } bool zero() const { return ((_value >> 6) & 1) != 0; } bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; } bool parity() const { return ((_value >> 2) & 1) != 0; } bool carry() const { return ((_value >> 0) & 1) != 0; } void print() const { // flags char f[8]; f[0] = (overflow ()) ? 'O' : '-'; f[1] = (direction ()) ? 'D' : '-'; f[2] = (sign ()) ? 'S' : '-'; f[3] = (zero ()) ? 'Z' : '-'; f[4] = (auxiliary_carry()) ? 'A' : '-'; f[5] = (parity ()) ? 'P' : '-'; f[6] = (carry ()) ? 'C' : '-'; f[7] = '\x0'; // output printf("%08x flags = %s", _value, f); } }; class IU_Register { public: int32_t _value; void print() const { printf("%08x %11d", _value, _value); } }; class IU_State { public: Flag_Register _eflags; IU_Register _rdi; IU_Register _rsi; IU_Register _rbp; IU_Register _rsp; IU_Register _rbx; IU_Register _rdx; IU_Register _rcx; IU_Register _rax; void print() const { // computation registers printf("rax, = "); _rax.print(); printf("\n"); printf("rbx, = "); _rbx.print(); printf("\n"); printf("rcx = "); _rcx.print(); printf("\n"); printf("rdx = "); _rdx.print(); printf("\n"); printf("rdi = "); _rdi.print(); printf("\n"); printf("rsi = "); _rsi.print(); printf("\n"); printf("rbp, = "); _rbp.print(); printf("\n"); printf("rsp = "); _rsp.print(); printf("\n"); printf("\n"); // control registers printf("flgs = "); _eflags.print(); printf("\n"); } }; class CPU_State { public: FPU_State _fpu_state; IU_State _iu_state; void print() const { printf("--------------------------------------------------\n"); _iu_state .print(); printf("\n"); _fpu_state.print(); printf("--------------------------------------------------\n"); } }; static void _print_CPU_state(CPU_State* state) { state->print(); }; /* void MacroAssembler::print_CPU_state() { push_CPU_state(); push(rsp); // pass CPU state call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state))); addptr(rsp, wordSize); // discard argument pop_CPU_state(); } */ void MacroAssembler::align(int modulus) { while (offset() % modulus != 0) nop(); } static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) { static int counter = 0; FPU_State* fs = &state->_fpu_state; counter++; // For leaf calls, only verify that the top few elements remain empty. // We only need 1 empty at the top for C2 code. if( stack_depth < 0 ) { if( fs->tag_for_st(7) != 3 ) { printf("FPR7 not empty\n"); state->print(); assert(false, "error"); return false; } return true; // All other stack states do not matter } assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std, "bad FPU control word"); // compute stack depth int i = 0; while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++; int d = i; while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++; // verify findings if (i != FPU_State::number_of_registers) { // stack not contiguous printf("%s: stack not contiguous at ST%d\n", s, i); state->print(); assert(false, "error"); return false; } // check if computed stack depth corresponds to expected stack depth if (stack_depth < 0) { // expected stack depth is -stack_depth or less if (d > -stack_depth) { // too many elements on the stack printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d); state->print(); assert(false, "error"); return false; } } else { // expected stack depth is stack_depth if (d != stack_depth) { // wrong stack depth printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d); state->print(); assert(false, "error"); return false; } } // everything is cool return true; } void MacroAssembler::verify_FPU(int stack_depth, const char* s) { //FIXME aoqi // %%%%% need to implement this //Unimplemented(); /* if (!VerifyFPU) return; push_CPU_state(); push(rsp); // pass CPU state ExternalAddress msg((address) s); // pass message string s pushptr(msg.addr()); push(stack_depth); // pass stack depth call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU))); addptr(rsp, 3 * wordSize); // discard arguments // check for error { Label L; testl(rax, rax); jcc(Assembler::notZero, L); int3(); // break if error condition bind(L); } pop_CPU_state(); */ } //We preserve all caller-saved register void MacroAssembler::pushad(){ push(AT); push(A0); push(A1); push(A2); push(A3); push(V0); push(V1); push(T0); push(T1); push(T2); push(T3); push(T4); push(T5); push(T6); push(T7); push(T8); push(T9); push(GP); push(RA); push(FP); }; void MacroAssembler::popad(){ pop(FP); pop(RA); pop(GP); pop(T9); pop(T8); pop(T7); pop(T6); pop(T5); pop(T4); pop(T3); pop(T2); pop(T1); pop(T0); pop(V1); pop(V0); pop(A3); pop(A2); pop(A1); pop(A0); pop(AT); }; void MacroAssembler::push2(Register reg1, Register reg2) { addi(SP, SP, -8); sw(reg2, SP, 0); sw(reg1, SP, 4); } void MacroAssembler::pop2(Register reg1, Register reg2) { lw(reg1, SP, 0); lw(reg2, SP, 4); addi(SP, SP, 8); } void MacroAssembler::load_two_bytes_from_at_bcp(Register reg, Register tmp, int offset) { if(offset & 1){ lbu(reg, BCP, offset+1); lbu(tmp, BCP, offset); sll(reg, reg, 8); addu(reg, tmp, reg); } else lhu(reg, BCP, offset); } void MacroAssembler::store_two_byts_to_at_bcp(Register reg, Register tmp, int offset) { if(offset & 1){ sb(reg, BCP, offset); srl(reg, reg, 8); sb(reg, BCP, offset + 1); } else sh(reg, BCP, offset); } /* void MacroAssembler::load_klass(Register dst, Register src) { #ifdef _LP64 if (UseCompressedOops) { movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); decode_heap_oop_not_null(dst); } else #endif movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); } void MacroAssembler::load_prototype_header(Register dst, Register src) { #ifdef _LP64 if (UseCompressedOops) { movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); } else #endif { movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); } } void MacroAssembler::store_klass(Register dst, Register src) { #ifdef _LP64 if (UseCompressedOops) { encode_heap_oop_not_null(src); movl(Address(dst, oopDesc::klass_offset_in_bytes()), src); } else #endif movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src); } #ifdef _LP64 void MacroAssembler::store_klass_gap(Register dst, Register src) { if (UseCompressedOops) { // Store to klass gap in destination movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src); } } void MacroAssembler::load_heap_oop(Register dst, Address src) { if (UseCompressedOops) { movl(dst, src); decode_heap_oop(dst); } else { movq(dst, src); } } void MacroAssembler::store_heap_oop(Address dst, Register src) { if (UseCompressedOops) { assert(!dst.uses(src), "not enough registers"); encode_heap_oop(src); movl(dst, src); } else { movq(dst, src); } } // Algorithm must match oop.inline.hpp encode_heap_oop. void MacroAssembler::encode_heap_oop(Register r) { assert (UseCompressedOops, "should be compressed"); #ifdef ASSERT if (CheckCompressedOops) { Label ok; push(rscratch1); // cmpptr trashes rscratch1 cmpptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr())); jcc(Assembler::equal, ok); stop("MacroAssembler::encode_heap_oop: heap base corrupted?"); bind(ok); pop(rscratch1); } #endif verify_oop(r, "broken oop in encode_heap_oop"); testq(r, r); cmovq(Assembler::equal, r, r12_heapbase); subq(r, r12_heapbase); shrq(r, LogMinObjAlignmentInBytes); } void MacroAssembler::encode_heap_oop_not_null(Register r) { assert (UseCompressedOops, "should be compressed"); #ifdef ASSERT if (CheckCompressedOops) { Label ok; testq(r, r); jcc(Assembler::notEqual, ok); stop("null oop passed to encode_heap_oop_not_null"); bind(ok); } #endif verify_oop(r, "broken oop in encode_heap_oop_not_null"); subq(r, r12_heapbase); shrq(r, LogMinObjAlignmentInBytes); } void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { assert (UseCompressedOops, "should be compressed"); #ifdef ASSERT if (CheckCompressedOops) { Label ok; testq(src, src); jcc(Assembler::notEqual, ok); stop("null oop passed to encode_heap_oop_not_null2"); bind(ok); } #endif verify_oop(src, "broken oop in encode_heap_oop_not_null2"); if (dst != src) { movq(dst, src); } subq(dst, r12_heapbase); shrq(dst, LogMinObjAlignmentInBytes); } void MacroAssembler::decode_heap_oop(Register r) { assert (UseCompressedOops, "should be compressed"); #ifdef ASSERT if (CheckCompressedOops) { Label ok; push(rscratch1); cmpptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr())); jcc(Assembler::equal, ok); stop("MacroAssembler::decode_heap_oop: heap base corrupted?"); bind(ok); pop(rscratch1); } #endif Label done; shlq(r, LogMinObjAlignmentInBytes); jccb(Assembler::equal, done); addq(r, r12_heapbase); #if 0 // alternate decoding probably a wash. testq(r, r); jccb(Assembler::equal, done); leaq(r, Address(r12_heapbase, r, Address::times_8, 0)); #endif bind(done); verify_oop(r, "broken oop in decode_heap_oop"); } void MacroAssembler::decode_heap_oop_not_null(Register r) { assert (UseCompressedOops, "should only be used for compressed headers"); // Cannot assert, unverified entry point counts instructions (see .ad file) // vtableStubs also counts instructions in pd_code_size_limit. // Also do not verify_oop as this is called by verify_oop. assert(Address::times_8 == LogMinObjAlignmentInBytes, "decode alg wrong"); leaq(r, Address(r12_heapbase, r, Address::times_8, 0)); } void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { assert (UseCompressedOops, "should only be used for compressed headers"); // Cannot assert, unverified entry point counts instructions (see .ad file) // vtableStubs also counts instructions in pd_code_size_limit. // Also do not verify_oop as this is called by verify_oop. assert(Address::times_8 == LogMinObjAlignmentInBytes, "decode alg wrong"); leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); } void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); int oop_index = oop_recorder()->find_index(obj); RelocationHolder rspec = oop_Relocation::spec(oop_index); mov_literal32(dst, oop_index, rspec, narrow_oop_operand); } void MacroAssembler::reinit_heapbase() { if (UseCompressedOops) { movptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr())); } } #endif // _LP64 */ SkipIfEqual::SkipIfEqual( MacroAssembler* masm, const bool* flag_addr, bool value) { _masm = masm; _masm->move(AT, (int32_t)flag_addr); _masm->lb(AT,AT,0); _masm->addi(AT,AT,-value); _masm->beq(AT,ZERO,_label); _masm->delayed()->nop(); } SkipIfEqual::~SkipIfEqual() { _masm->bind(_label); }