view hotspot/src/cpu/mips/vm/c1_LIRAssembler_mips.cpp @ 23:388ae1bd0bdd

Fix 2 bugs related to patching and make some codes more readable. 1. In MIPS, oops-table used by relocating must be updated accordingly when patching. 2. Allocate enough space for patching. 3. Make NativeInstructions more readable. NativeCall's size is 16 bytes instead of 12. If 12 is used, we must fix it by adding 4 explicitly.
author YANG Yongqiang <yangyongqiang@loongson.cn>
date Thu, 04 Nov 2010 11:15:53 +0800
parents 7a9f890eafef
children da31f361800f
line wrap: on
line source

#/*
 * Copyright 2000-2008 Sun Microsystems, Inc.  All Rights Reserved.
 * Copyright 2010 Lemote, Inc.  All Rights Reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
 * CA 95054 USA or visit www.sun.com if you need additional information or
 * have any questions.
 *
 */

# include "incls/_precompiled.incl"
# include "incls/_c1_LIRAssembler_mips.cpp.incl"

#define __ _masm->

static void select_different_registers(Register preserve,
		Register extra,
		Register &tmp1,
		Register &tmp2) {
	if (tmp1 == preserve) {
		assert_different_registers(tmp1, tmp2, extra);
		tmp1 = extra;
	} else if (tmp2 == preserve) {
		assert_different_registers(tmp1, tmp2, extra);
		tmp2 = extra;
	}
	assert_different_registers(preserve, tmp1, tmp2);
}



static void select_different_registers(Register preserve,
		Register extra,
		Register &tmp1,
		Register &tmp2,
		Register &tmp3) {
	if (tmp1 == preserve) {
		assert_different_registers(tmp1, tmp2, tmp3, extra);
		tmp1 = extra;
	} else if (tmp2 == preserve) {
		tmp2 = extra;
	} else if (tmp3 == preserve) {
		assert_different_registers(tmp1, tmp2, tmp3, extra);
		tmp3 = extra;
	}
	assert_different_registers(preserve, tmp1, tmp2, tmp3);
}

// need add method Assembler::is_simm16 in assembler_gs2.hpp
bool LIR_Assembler::is_small_constant(LIR_Opr opr) {
	if (opr->is_constant()) {
		LIR_Const* constant = opr->as_constant_ptr();
		switch (constant->type()) {
			case T_INT: {
				jint value = constant->as_jint();
				return Assembler::is_simm16(value);
				    }
			default:
				    return false;
		}
	}
	return false;
}
//FIXME, which register should be used?
LIR_Opr LIR_Assembler::receiverOpr() {
	return FrameMap::_t0_oop_opr;
}

LIR_Opr LIR_Assembler::incomingReceiverOpr() {
	return receiverOpr();
}

LIR_Opr LIR_Assembler::osrBufferPointer() {
	//return FrameMap::ecx_opr;
//	return FrameMap::_v1_opr;
	return FrameMap::_t0_opr;
}

//--------------fpu register translations-----------------------
// FIXME:I do not know what's to do for mips fpu

address LIR_Assembler::float_constant(float f) {
	address const_addr = __ float_constant(f);
	if (const_addr == NULL) {
		bailout("const section overflow");
		return __ code()->consts()->start();
	} else {
		return const_addr;
	}
}


address LIR_Assembler::double_constant(double d) {
	address const_addr = __ double_constant(d);
	if (const_addr == NULL) {
		bailout("const section overflow");
		return __ code()->consts()->start();
	} else {
		return const_addr;
	}
}





void LIR_Assembler::reset_FPU() {
	Unimplemented();
}


void LIR_Assembler::set_24bit_FPU() {
	Unimplemented();
}

//FIXME.
void LIR_Assembler::fpop() {
	// do nothing
}
void LIR_Assembler::fxch(int i) {
	// do nothing
}
void LIR_Assembler::fld(int i) {
	// do nothing
}
void LIR_Assembler::ffree(int i) {
	// do nothing
}

void LIR_Assembler::breakpoint() {
  __ brk(17);
}
//FIXME, opr can not be float?
void LIR_Assembler::push(LIR_Opr opr) {
	if (opr->is_single_cpu()) {
		__ push_reg(opr->as_register());
	} else if (opr->is_double_cpu()) {
		__ push_reg(opr->as_register_hi());
		__ push_reg(opr->as_register_lo());
	} else if (opr->is_stack()) {
		__ push_addr(frame_map()->address_for_slot(opr->single_stack_ix()));
	} else if (opr->is_constant()) {
		LIR_Const* const_opr = opr->as_constant_ptr();
		if (const_opr->type() == T_OBJECT) {
			__ push_oop(const_opr->as_jobject());
		} else if (const_opr->type() == T_INT) {
			__ push_jint(const_opr->as_jint());
		} else {
			ShouldNotReachHere();
		}
	} else {
		ShouldNotReachHere();
	}
}

void LIR_Assembler::pop(LIR_Opr opr) {
	if (opr->is_single_cpu() ) { 
		__ pop(opr->as_register());
	} else {
		assert(false, "Must be single word register or floating-point register");
	}
}


Address LIR_Assembler::as_Address(LIR_Address* addr) {
	Register reg = addr->base()->as_register();
	// now we need this for parameter pass
	return Address(reg, addr->disp());
}


Address LIR_Assembler::as_Address_lo(LIR_Address* addr) {
	return as_Address(addr);
}


Address LIR_Assembler::as_Address_hi(LIR_Address* addr) {
	Register reg = addr->base()->as_register();
	return Address(reg, addr->disp()+longSize/2);
}


//void LIR_Assembler::osr_entry(IRScope* scope, int number_of_locks, Label* continuation, int osr_bci) { 
void LIR_Assembler::osr_entry() { 
	//  assert(scope->is_top_scope(), "inlined OSR not yet implemented");
	offsets()->set_value(CodeOffsets::OSR_Entry, code_offset());
	BlockBegin* osr_entry = compilation()->hir()->osr_entry();
	ValueStack* entry_state = osr_entry->state();
	int number_of_locks = entry_state->locks_size();

	// we jump here if osr happens with the interpreter
	// state set up to continue at the beginning of the
	// loop that triggered osr - in particular, we have
	// the following registers setup:
	//
	// S7: interpreter locals pointer
	// V1: interpreter locks pointer
	// RA: return address
	//T0: OSR buffer
	// build frame
	// ciMethod* m = scope->method();
	ciMethod* m = compilation()->method();
	__ build_frame(initial_frame_size_in_bytes());

  // OSR buffer is
  //
  // locals[nlocals-1..0]
  // monitors[0..number_of_locks]
  //
  // locals is a direct copy of the interpreter frame so in the osr buffer
  // so first slot in the local array is the last local from the interpreter
  // and last slot is local[0] (receiver) from the interpreter
  //
  // Similarly with locks. The first lock slot in the osr buffer is the nth lock
  // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock
  // in the interpreter frame (the method lock if a sync method)

  // Initialize monitors in the compiled activation.
  //   T0: pointer to osr buffer
  //
  // All other registers are dead at this point and the locals will be
  // copied into place by code emitted in the IR.

  Register OSR_buf = osrBufferPointer()->as_register();

  
  // note: we do osr only if the expression stack at the loop beginning is empty,
  //       in which case the spill area is empty too and we don't have to setup
  //       spilled locals
  //
  // copy monitors
  // V1: pointer to locks
  { 
	  assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below");
	  int monitor_offset = BytesPerWord * method()->max_locals()+
		  (BasicObjectLock::size() * BytesPerWord) * (number_of_locks - 1);
	  for (int i = 0; i < number_of_locks; i++) {
		  int slot_offset =monitor_offset - (i * BasicObjectLock::size())*BytesPerWord;
#ifdef ASSERT
		  { 
			  Label L;
			  //__ lw(AT, V1, slot_offset * BytesPerWord + BasicObjectLock::obj_offset_in_bytes());
			  __ lw(AT, OSR_buf, slot_offset + BasicObjectLock::obj_offset_in_bytes());
			  __ bne(AT, ZERO, L);
			  __ delayed()->nop();
			  __ stop("locked object is NULL");
			  __ bind(L);
		  }
#endif
		  __ lw(AT, OSR_buf, slot_offset + BasicObjectLock::lock_offset_in_bytes());
		  __ sw(AT, frame_map()->address_for_monitor_lock(i));
		  __ lw(AT, OSR_buf, slot_offset + BasicObjectLock::obj_offset_in_bytes());
		  __ sw(AT, frame_map()->address_for_monitor_object(i));
	  }
  }
}


int LIR_Assembler::check_icache() {
	Register receiver = FrameMap::receiver_opr->as_register();
	Register ic_klass = IC_Klass;

	int offset = __ offset();
	__ inline_cache_check(receiver, IC_Klass);
	__ align(CodeEntryAlignment);
	return offset;


}

void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo* info) {
	jobject o = NULL;
	PatchingStub* patch = new PatchingStub(_masm, PatchingStub::load_klass_id);
	int oop_index = __ oop_recorder()->allocate_index(o);
	RelocationHolder rspec = oop_Relocation::spec(oop_index);
	__ relocate(rspec);
	__ lui(reg, Assembler::split_high((int)o));
	__ addiu(reg, reg, Assembler::split_low((int)o));
	// patching_epilog(patch, LIR_Op1::patch_normal, noreg, info);
	patching_epilog(patch, lir_patch_normal, reg, info);
}


void LIR_Assembler::monitorexit(LIR_Opr obj_opr, LIR_Opr lock_opr, Register unused, int monitor_no, Register exception) {

	if (exception->is_valid()) {
		// preserve exception
		// note: the monitor_exit runtime call is a leaf routine
		//       and cannot block => no GC can happen
		// The slow case (MonitorAccessStub) uses the first two stack slots
		// ([SP+0] and [SP+4]), therefore we store the exception at [esp+8]
		__ sw(exception, SP, 2 * wordSize);
	}

	Register obj_reg  = obj_opr->as_register();
	Register lock_reg = lock_opr->as_register();

	// compute pointer to BasicLock
	//Address lock_addr = frame_map()->address_for_monitor_lock_index(monitor_no);
	Address lock_addr = frame_map()->address_for_monitor_lock(monitor_no);
	__ lea(lock_reg, lock_addr);
	// unlock object
	MonitorAccessStub* slow_case = new MonitorExitStub(lock_opr, true, monitor_no);
	// temporary fix: must be created after exceptionhandler, therefore as call stub
	_slow_case_stubs->append(slow_case);
	if (UseFastLocking) {
		// try inlined fast unlocking first, revert to slow locking if it fails
		// note: lock_reg points to the displaced header since the displaced header offset is 0!
		assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
		__ unlock_object(NOREG, obj_reg, lock_reg, *slow_case->entry());
	} else {
		// always do slow unlocking
		// note: the slow unlocking code could be inlined here, however if we use
		//       slow unlocking, speed doesn't matter anyway and this solution is
		//       simpler and requires less duplicated code - additionally, the
		//       slow unlocking code is the same in either case which simplifies
		//       debugging
		__ b(*slow_case->entry());
		__ delayed()->nop();
	}
	// done
	__ bind(*slow_case->continuation());

	if (exception->is_valid()) {
		// restore exception
		__ lw(exception, SP, 2 * wordSize);
	}
}

// This specifies the esp decrement needed to build the frame
int LIR_Assembler::initial_frame_size_in_bytes() {
	// if rounding, must let FrameMap know!
	return (frame_map()->framesize() - 2)  * BytesPerWord; // subtract two words to account for return address and link
}

void LIR_Assembler::emit_exception_handler() { 
  // if the last instruction is a call (typically to do a throw which
  // is coming at the end after block reordering) the return address
  // must still point into the code area in order to avoid assertion
  // failures when searching for the corresponding bci => add a nop
  // (was bug 5/14/1999 - gri)
  // Lazy deopt bug 4932387. If last instruction is a call then we
  // need an area to patch where we won't overwrite the exception
  // handler. This means we need 5 bytes. Could use a fat_nop 
  // but since this never gets executed it doesn't really make
  // much difference.
  // 
	for (int i = 0; i < (NativeCall::instruction_size/4 + 1) ; i++ ) {
		__ nop();
	}

  // generate code for exception handler
	address handler_base = __ start_a_stub(exception_handler_size);
	if (handler_base == NULL) {
		//no enough space 
		bailout("exception handler overflow");
		return;
	}



	compilation()->offsets()->set_value(CodeOffsets::Exceptions, code_offset()); 
	// if the method does not have an exception handler, then there is
	// no reason to search for one
	if (compilation()->has_exception_handlers() || JvmtiExport::can_post_exceptions()) {
		// the exception oop and pc are in V0 and V1
		// no other registers need to be preserved, so invalidate them
		// check that there is really an exception
		__ verify_not_null_oop(V0);

		// search an exception handler (V0: exception oop, V1: throwing pc)
		__ call(Runtime1::entry_for(Runtime1::handle_exception_nofpu_id), 
				relocInfo::runtime_call_type);
		__ delayed()->nop();
    // if the call returns here, then the exception handler for particular
    // exception doesn't exist -> unwind activation and forward exception to caller
  }

	// the exception oop is in V0
	// no other registers need to be preserved, so invalidate them
	// check that there is really an exception
	__ verify_not_null_oop(V0);

	// unlock the receiver/klass if necessary
	// V0: exception
	ciMethod* method = compilation()->method();
	if (method->is_synchronized() && GenerateSynchronizationCode) {
		monitorexit(FrameMap::_t0_oop_opr, FrameMap::_t6_opr, NOREG, 0, V0);
	}

	// unwind activation and forward exception to caller
	// V0: exception
	__ jmp(Runtime1::entry_for(Runtime1::unwind_exception_id), 
				relocInfo::runtime_call_type);
	__ delayed()->nop();
	__ end_a_stub();
}

void LIR_Assembler::emit_deopt_handler() {
	// if the last instruction is a call (typically to do a throw which
	// is coming at the end after block reordering) the return address
 	// must still point into the code area in order to avoid assertion
 	// failures when searching for the corresponding bci => add a nop
 	// (was bug 5/14/1999 - gri)
 	
 	__ nop();
	
 	// generate code for exception handler
	address handler_base = __ start_a_stub(deopt_handler_size);
	if (handler_base == NULL) {
		// not enough space left for the handler
		bailout("deopt handler overflow");
		return;
	}
	#ifdef ASSERT
	int offset = code_offset();
	#endif // ASSERT
 
	compilation()->offsets()->set_value(CodeOffsets::Deopt, code_offset());
 
	InternalAddress here(__ pc());
	//FIXE:: may be wrong, Address_Literal
	__ lw(AT, __ as_Address(here) );
	__ push(AT);
	assert(code_offset() - offset <= deopt_handler_size, "overflow");
 	__ end_a_stub();

}


// Optimized Library calls
// This is the fast version of java.lang.String.compare; it has not
// OSR-entry and therefore, we generate a slow version for OSR's
//void LIR_Assembler::emit_string_compare(IRScope* scope) {
void LIR_Assembler::emit_string_compare(LIR_Opr arg0, LIR_Opr arg1, LIR_Opr dst, CodeEmitInfo* info) {
	// get two string object in T0&T1
	//receiver already in T0
	__ lw(T1, arg1->as_register());
	__ lw (T2, T0, java_lang_String::value_offset_in_bytes());	//value, T_CHAR array
	__ lw (AT, T0, java_lang_String::offset_offset_in_bytes());	//offset
	__ shl(AT, 1);
	__ add(T2, T2, AT);
	__ addi(T2, T2, arrayOopDesc::base_offset_in_bytes(T_CHAR));
	// Now T2 is the address of the first char in first string(T0)

	add_debug_info_for_null_check_here(info);
	__ lw (T3, T1, java_lang_String::value_offset_in_bytes());
	__ lw (AT, T1, java_lang_String::offset_offset_in_bytes());
	__ shl(AT, 1);
	__ add(T3, T3, AT);
	__ addi(T3, T3, arrayOopDesc::base_offset_in_bytes(T_CHAR));
	// Now T3 is the address of the first char in second string(T1)

	// compute minimum length (in T4) and difference of lengths (V0)
	Label L;
	__ lw (T4, Address(T0, java_lang_String::count_offset_in_bytes())); 
	// the length of the first string(T0)
	__ lw (T5, Address(T1, java_lang_String::count_offset_in_bytes()));	
	// the length of the second string(T1)

	__ subu(V0, T4, T5);
	__ blez(V0, L);
	__ delayed()->nop();
	__ move (T4, T5);
	__ bind (L);

	Label Loop, haveResult, LoopEnd;
	__ bind(Loop);
	__ beq(T4, ZERO, LoopEnd);
	__ delayed();

	__ addi(T2, T2, 2);

	// compare current character
	__ lhu(T5, T2, -2);
	__ lhu(T6, T3, 0);
	__ bne(T5, T6, haveResult);
	__ delayed();

	__ addi(T3, T3, 2);

	__ b(Loop);
	__ delayed()->addi(T4, T4, -1);

	__ bind(haveResult);
	__ subu(V0, T5, T6);

	__ bind(LoopEnd);
	return_op(FrameMap::_v0_opr);
}


void LIR_Assembler::return_op(LIR_Opr result) {
	assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == V0, "word returns are in V0");
	// Pop the stack before the safepoint code
	__ leave();
	__ lui(AT, Assembler::split_high((intptr_t)os::get_polling_page() 
			+ (SafepointPollOffset % os::vm_page_size())));
	__ relocate(relocInfo::poll_return_type);
	__ lw(AT, AT, Assembler::split_low((intptr_t)os::get_polling_page() 
			+ (SafepointPollOffset % os::vm_page_size())));

	__ jr(RA);
	__ delayed()->nop();
}

//read protect mem to ZERO won't cause the exception only in godson-2e, So I modify ZERO to AT .@jerome,11/25,2006
int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
  assert(info != NULL, "info must not be null for safepoint poll");
	int offset = __ offset();
  Register r = tmp->as_register();
	__ lui(r, Assembler::split_high((intptr_t)os::get_polling_page() 
				+ (SafepointPollOffset % os::vm_page_size())));
  add_debug_info_for_branch(info);
	__ relocate(relocInfo::poll_type);
	__ lw(AT, r, Assembler::split_low((intptr_t)os::get_polling_page() 
				+ (SafepointPollOffset % os::vm_page_size())));
	return offset; 
}

void LIR_Assembler::move_regs(Register from_reg, Register to_reg) {
	if (from_reg != to_reg) __ move(to_reg, from_reg);
}


void LIR_Assembler::swap_reg(Register a, Register b) {
	__ xorr(a, a, b);
	__ xorr(b, a, b);
	__ xorr(a, a, b);
}

void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
 	assert(src->is_constant(), "should not call otherwise");
  assert(dest->is_register(), "should not call otherwise");
  LIR_Const* c = src->as_constant_ptr();
	switch (c->type()) {
    case T_INT:
    	{
    		jint con = c->as_jint();
    		if (dest->is_single_cpu()) {
    			assert(patch_code == lir_patch_none, "no patching handled here");
    			__ move(dest->as_register(), con);
    		} else {
    			assert(dest->is_single_fpu(), "wrong register kind");
    			__ move(AT, con);
    			__ mtc1(AT, dest->as_float_reg());
    		}
    	}
    	break;
    
    case T_LONG:
    	{
    		jlong con = c->as_jlong();
    		jint* conhi = (jint*)&con + 1;
    		jint* conlow = (jint*)&con;
    
    		if (dest->is_double_cpu()) {
    			__ move(dest->as_register_lo(), *conlow);
    			__ move(dest->as_register_hi(), *conhi);
    		} else {
    		//	assert(dest->is_double(), "wrong register kind");
    			__ move(AT, *conlow);
    			__ mtc1(AT, dest->as_double_reg());
    			__ move(AT, *conhi);
    			__ mtc1(AT, dest->as_double_reg()+1);
    		}
    	}
    	break;
    
    case T_OBJECT:
    	{
    		if (patch_code == lir_patch_none) {
          jobject2reg(c->as_jobject(), dest->as_register());
    		} else {
    			jobject2reg_with_patching(dest->as_register(), info);
    		}
    	}
    	break;
    
    case T_FLOAT:
    	{
    		address const_addr = float_constant(c->as_jfloat());
    		assert (const_addr != NULL, "must create float constant in the constant table");
    
    		if (dest->is_single_fpu()) {
    			__ relocate(relocInfo::internal_pc_type);
    			__ lui(AT, Assembler::split_high((int)const_addr));
    			__ addiu(AT, AT, Assembler::split_low((int)const_addr));
    			__ lwc1(dest->as_float_reg(), AT, 0);
    
    		} else {
    			assert(dest->is_single_cpu(), "Must be a cpu register.");
    			assert(dest->as_register() != AT, "AT can not be allocated.");
    
    			__ relocate(relocInfo::internal_pc_type);
    			__ lui(AT, Assembler::split_high((int)const_addr));
    			__ addiu(AT, AT, Assembler::split_low((int)const_addr));
    			__ lw(dest->as_register(), AT, 0); 
    		}
    	}
    	break;
    
    case T_DOUBLE:
    	{
    		address const_addr = double_constant(c->as_jdouble());
    		assert (const_addr != NULL, "must create double constant in the constant table");
    		
    		if (dest->is_double_fpu()) {
    			__ relocate(relocInfo::internal_pc_type);
    			__ lui(AT, Assembler::split_high((int)const_addr));
    			__ addiu(AT, AT, Assembler::split_low((int)const_addr));
    			__ lwc1(dest->as_double_reg(), AT, 0);
    			__ lwc1(dest->as_double_reg()+1, AT, 4);					
    		} else {
    			assert(dest->as_register_lo() != AT, "AT can not be allocated.");
    			assert(dest->as_register_hi() != AT, "AT can not be allocated.");
    
    			__ relocate(relocInfo::internal_pc_type);
    			__ lui(AT, Assembler::split_high((int)const_addr));
    			__ addiu(AT, AT, Assembler::split_low((int)const_addr));
    			__ lw(dest->as_register_lo(), AT, 0);
    			__ lw(dest->as_register_hi(), AT, 4);
    		}
    	}
    	break;
    
    default:
    	ShouldNotReachHere();
	}
}


void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) {
	assert(src->is_constant(), "should not call otherwise");
 	assert(dest->is_stack(), "should not call otherwise");
   	LIR_Const* c = src->as_constant_ptr();
	switch (c->type()) {
  	case T_INT:  // fall through
  	case T_FLOAT:
		  __ move(AT, c->as_jint_bits());
		  __ sw(AT, frame_map()->address_for_slot(dest->single_stack_ix()));
		  break;

  	case T_OBJECT:
    	if (c->as_jobject() == NULL) {
			  __ sw(ZERO, frame_map()->address_for_slot(dest->single_stack_ix()));
		  } else {
			  int oop_index = __ oop_recorder()->find_index(c->as_jobject());
			  RelocationHolder rspec = oop_Relocation::spec(oop_index);
			  __ relocate(rspec);
			  __ lui(AT, Assembler::split_high((int)c->as_jobject()));
			  __ addiu(AT, AT, Assembler::split_low((int)c->as_jobject()));
			  __ sw(AT, frame_map()->address_for_slot(dest->single_stack_ix()));
			}
		  break;
  	case T_LONG:  // fall through
  	case T_DOUBLE:
      __ move(AT, c->as_jint_lo_bits());
		  __ sw(AT, frame_map()->address_for_slot(dest->double_stack_ix(),
						lo_word_offset_in_bytes));
 		  __ move(AT, c->as_jint_hi_bits());
		  __ sw(AT, frame_map()->address_for_slot(dest->double_stack_ix(),
						hi_word_offset_in_bytes));
		  break;
  	default:
  		ShouldNotReachHere();
  }
}

void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info ) {
	assert(src->is_constant(), "should not call otherwise");
	assert(dest->is_address(), "should not call otherwise");
	LIR_Const* c = src->as_constant_ptr();
	LIR_Address* addr = dest->as_address_ptr();

	if (info != NULL) add_debug_info_for_null_check_here(info);
	switch (type) {
		case T_LONG: // fall through
		case T_DOUBLE:
			__ move(AT, c->as_jint_hi_bits());
			__ sw(AT, as_Address_hi(addr));
			__ move(AT, c->as_jint_lo_bits());
			__ sw(AT, as_Address_lo(addr));
			break; 
		case T_OBJECT:  // fall through
		case T_ARRAY:   
			if (c->as_jobject() == NULL){
				__ sw(ZERO, as_Address(addr));
			} else {
				int oop_index = __ oop_recorder()->find_index(c->as_jobject());
				RelocationHolder rspec = oop_Relocation::spec(oop_index);
				__ relocate(rspec);
				__ lui(AT, Assembler::split_high((int)c->as_jobject()));
				__ addiu(AT, AT, Assembler::split_low((int)c->as_jobject()));
				__ sw(AT, as_Address(addr));
			}
			break;
		case T_INT:     // fall through
		case T_FLOAT:  
			__ move(AT, c->as_jint_bits());
			__ sw(AT, as_Address(addr));
			break;
		case T_BOOLEAN: // fall through
		case T_BYTE:    
			__ move(AT, c->as_jint());
			__ sb(AT, as_Address(addr));
			break;
		case T_CHAR:    // fall through
		case T_SHORT:   
			__ move(AT, c->as_jint());
			__ sh(AT, as_Address(addr));
			break;
		default: ShouldNotReachHere();
	};
}

void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) {
  assert(src->is_register(), "should not call otherwise");
  assert(dest->is_register(), "should not call otherwise");
  if (dest->is_float_kind() && src->is_float_kind()) {
		if (dest->is_single_fpu()) {
			assert(src->is_single_fpu(), "must both be float");
			 __ mov_s(dest->as_float_reg(), src->as_float_reg());
		} else {
			assert(src->is_double_fpu(), "must bothe be double");
			__ mov_d( dest->as_double_reg(),src->as_double_reg());
		}
  } else if (!dest->is_float_kind() && !src->is_float_kind()) {
	  if (dest->is_single_cpu()) {
		  assert(src->is_single_cpu(), "must match");
		  move_regs(src->as_register(), dest->as_register());
	  } else if (dest->is_double_cpu()) {
		  //      assert(src->is_double_cpu() && !src->overlaps(dest), "must match and not overlap");
		  assert(src->is_double_cpu(),"must match and not overlap");
      if (src->as_register_hi() != dest->as_register_lo()) {
		    move_regs(src->as_register_lo(), dest->as_register_lo());
		    move_regs(src->as_register_hi(), dest->as_register_hi());
      } else if (src->as_register_lo() != dest->as_register_hi()) {
	  	  move_regs(src->as_register_hi(), dest->as_register_hi());     
  		  move_regs(src->as_register_lo(), dest->as_register_lo());
      } else {
        swap_reg(src->as_register_lo(), src->as_register_hi());
      }

	  }
  } else {
	  // float to int or int to float moves
	  if (dest->is_double_cpu()) {
		  assert(src->is_double_fpu(), "must match");
		  __ mfc1(dest->as_register_lo(), src->as_double_reg());
		  __ mfc1(dest->as_register_hi(), src->as_double_reg() + 1);
	  } else if (dest->is_single_cpu()) {
		  assert(src->is_single_fpu(), "must match");
		  __ mfc1(dest->as_register(), src->as_float_reg());
	  } else if (dest->is_double_fpu()) {
		  assert(src->is_double_cpu(), "must match");
		  __ mtc1(src->as_register_lo(), dest->as_double_reg());
		  __ mtc1(src->as_register_hi(), dest->as_double_reg() + 1);
	  } else if (dest->is_single_fpu()) {
		  assert(src->is_single_cpu(), "must match");
		  __ mtc1(src->as_register(), dest->as_float_reg());
	  }
  }
}


void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type,bool pop_fpu_stack) {
  assert(src->is_register(), "should not call otherwise");
  assert(dest->is_stack(), "should not call otherwise");

  if (src->is_single_cpu()) {
    Address dst = frame_map()->address_for_slot(dest->single_stack_ix());
    if (type == T_OBJECT || type == T_ARRAY) {
      __ verify_oop(src->as_register());
    }
    __ sw(src->as_register(),dst);  
  } else if (src->is_double_cpu()) {
    Address dstLO = frame_map()->address_for_slot(dest->double_stack_ix(), lo_word_offset_in_bytes);
    Address dstHI = frame_map()->address_for_slot(dest->double_stack_ix(), hi_word_offset_in_bytes);
     __ sw(src->as_register_lo(),dstLO);
     __ sw(src->as_register_hi(),dstHI);
  }else if (src->is_single_fpu()) {
    Address dst_addr = frame_map()->address_for_slot(dest->single_stack_ix());
    __ swc1(src->as_float_reg(), dst_addr);

  } else if (src->is_double_fpu()) {
    Address dst_addr = frame_map()->address_for_slot(dest->double_stack_ix());
    __ swc1(src->as_double_reg(), dst_addr);
    __ swc1(src->as_double_reg() + 1, dst_addr.base(), dst_addr.disp() + 4);

  } else {
    ShouldNotReachHere();
  }
}

//FIXME
void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info,bool pop_fpu_stack, bool/*unaliged*/) {
  LIR_Address* to_addr = dest->as_address_ptr();
  Register dest_reg = to_addr->base()->as_register();	
	PatchingStub* patch = NULL;
	bool needs_patching = (patch_code != lir_patch_none);
  Register disp_reg = NOREG;
	int disp_value = to_addr->disp();

	if (type == T_ARRAY || type == T_OBJECT) {
    __ verify_oop(src->as_register());
  }

	if (needs_patching) {
		patch = new PatchingStub(_masm, PatchingStub::access_field_id);
		assert(!src->is_double_cpu() || 
				patch_code == lir_patch_none || 
				patch_code == lir_patch_normal, 
				"patching doesn't match register");
	}
        
	if (info != NULL) {
     	add_debug_info_for_null_check_here(info);
  }
	if (needs_patching) {
		disp_reg = AT;
		__ lui(AT, Assembler::split_high(disp_value));
		__ addiu(AT, AT, Assembler::split_low(disp_value));
	} else if (!Assembler::is_simm16(disp_value)) { 
		disp_reg = AT;
		__ lui(AT, Assembler::split_high(disp_value));
	}
	int offset = code_offset();

	switch(type) {
	case T_DOUBLE:
		assert(src->is_double_fpu(), "just check");
		if (disp_reg == noreg) {
			__ swc1(src->as_double_reg(), dest_reg, disp_value);
			__ swc1(src->as_double_reg()+1, dest_reg, disp_value+4);
		} else if (needs_patching) {
			__ add(AT, dest_reg, disp_reg);
			offset = code_offset();
			__ swc1(src->as_double_reg(), AT, 0);
			__ swc1(src->as_double_reg()+1, AT, 4);
		} else {
			__ add(AT, dest_reg, disp_reg);
			offset = code_offset();
			__ swc1(src->as_double_reg(), AT, Assembler::split_low(disp_value));
			__ swc1(src->as_double_reg()+1, AT, Assembler::split_low(disp_value) + 4);
		}
		break;

	case T_FLOAT:
	//	assert(src->is_single_cpu(), "just check");

		if (disp_reg == noreg) {
			__ swc1(src->as_float_reg(), dest_reg, disp_value);
		} else if(needs_patching) {
			__ add(AT, dest_reg, disp_reg);
			offset = code_offset();
			__ swc1(src->as_float_reg(), AT, 0);
		} else {
			__ add(AT, dest_reg, disp_reg);
			offset = code_offset();
			__ swc1(src->as_float_reg(), AT, Assembler::split_low(disp_value));
		}
		break;
		
	case T_LONG: {
    Register from_lo = src->as_register_lo();
  	Register from_hi = src->as_register_hi();
  	Register base = to_addr->base()->as_register();
   	Register index = noreg;
    if (to_addr->index()->is_register()) {
   	  index = to_addr->index()->as_register();
  	}
  	if (base == from_lo || index == from_lo) {
      assert(base != from_hi, "can't be");
      assert(index == noreg || (index != base && index != from_hi), "can't handle this");
      __ sw(from_hi,as_Address_hi(to_addr));  
		  if (patch != NULL) {
        patching_epilog(patch, lir_patch_high, base, info);
        patch = new PatchingStub(_masm, PatchingStub::access_field_id);
        patch_code = lir_patch_low;
      }
      __ sw(from_lo,as_Address_lo(to_addr)); 
		} else {
      assert(index == noreg || (index != base && index != from_lo), "can't handle this");
    	__ sw(from_lo,as_Address_lo(to_addr)); 
		  if (patch != NULL) {
        patching_epilog(patch, lir_patch_low, base, info);
        patch = new PatchingStub(_masm, PatchingStub::access_field_id);
        patch_code = lir_patch_high;
      }
      __ sw(from_hi,as_Address_hi(to_addr));  
    }
		break;
  }
	case T_ADDRESS:
	case T_ARRAY:
	case T_OBJECT:
	case T_INT:
		//assert(from_reg.is_word(), "just check");
		if (disp_reg == noreg) {
			__ sw(src->as_register(), dest_reg, disp_value);
		} else if (needs_patching) {
			__ add(AT, dest_reg, disp_reg);
			offset = code_offset();
			__ sw(src->as_register(), AT, 0);
		} else {
			__ add(AT, dest_reg, disp_reg);
			offset = code_offset();
			__ sw(src->as_register(), AT, Assembler::split_low(disp_value));
		}
		break;

	case T_CHAR:
	case T_SHORT:
//		assert(from_reg.is_word(), "just check");

		if (disp_reg == noreg) {
			__ sh(src->as_register(), dest_reg, disp_value);
		} else if (needs_patching) {
			__ add(AT, dest_reg, disp_reg);
			offset = code_offset();
			__ sh(src->as_register(), AT, 0);
		} else {
			__ add(AT, dest_reg, disp_reg);
			offset = code_offset();
			__ sh(src->as_register(), AT, Assembler::split_low(disp_value));
		}
		break;

	case T_BYTE:
	case T_BOOLEAN:
		assert(src->is_single_cpu(), "just check");

		if (disp_reg == noreg) {
			__ sb(src->as_register(), dest_reg, disp_value);
		} else if (needs_patching) {
			__ add(AT, dest_reg, disp_reg);
			offset = code_offset();
			__ sb(src->as_register(), AT, 0);
		} else {
			__ add(AT, dest_reg, disp_reg);
			offset = code_offset();
			__ sb(src->as_register(), AT, Assembler::split_low(disp_value));
		}
		break;

	default:
		ShouldNotReachHere();
	}

  if (needs_patching) {
    patching_epilog(patch, patch_code, to_addr->base()->as_register(), info);
  }
}



void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) {
	assert(src->is_stack(), "should not call otherwise");
  assert(dest->is_register(), "should not call otherwise");
	if (dest->is_single_cpu()) {
    __ lw(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()));
    if (type == T_ARRAY || type == T_OBJECT) {
      __ verify_oop(dest->as_register());
    }
	} else if (dest->is_double_cpu()) {
		Address src_addr_LO = frame_map()->address_for_slot(src->double_stack_ix(),lo_word_offset_in_bytes);
		Address src_addr_HI = frame_map()->address_for_slot(src->double_stack_ix(), hi_word_offset_in_bytes);
		__ lw(dest->as_register_lo(), src_addr_LO);
		__ lw(dest->as_register_hi(), src_addr_HI);
	}else if (dest->is_single_fpu()) {
		Address addr = frame_map()->address_for_slot(src->single_stack_ix());
		__ lwc1(dest->as_float_reg(), addr);
	} else if (dest->is_double_fpu())  {
		Address src_addr_LO = frame_map()->address_for_slot(src->double_stack_ix(),lo_word_offset_in_bytes);
		Address src_addr_HI = frame_map()->address_for_slot(src->double_stack_ix(), hi_word_offset_in_bytes);
		__ lwc1(dest->as_double_reg(), src_addr_LO);
		__ lwc1(dest->as_double_reg()+1, src_addr_HI);
	} else {
		assert(dest->is_single_cpu(), "cannot be anything else but a single cpu");
		assert(type!= T_ILLEGAL, "Bad type in stack2reg")
		Address addr = frame_map()->address_for_slot(src->single_stack_ix());
		__ lw(dest->as_register(), addr);
	}
}

void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) {
  if (src->is_single_stack()) {
      __ lw(AT, frame_map()->address_for_slot(src ->single_stack_ix())); 
      __ sw(AT, frame_map()->address_for_slot(dest->single_stack_ix()));
  } else if (src->is_double_stack()) {
      __ lw(AT, frame_map()->address_for_slot(src ->double_stack_ix())); 
      __ sw(AT, frame_map()->address_for_slot(dest->double_stack_ix())); 
      __ lw(AT, frame_map()->address_for_slot(src ->double_stack_ix(),4)); 
      __ sw(AT, frame_map()->address_for_slot(dest ->double_stack_ix(),4)); 
  } else {
    ShouldNotReachHere();
  }
}



// if patching needed, be sure the instruction at offset is a MoveMemReg
void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool) {
	assert(src->is_address(), "should not call otherwise");
  assert(dest->is_register(), "should not call otherwise");
  LIR_Address* addr = src->as_address_ptr();
  Address from_addr = as_Address(addr);
	
	Register src_reg = addr->base()->as_register();
	Register disp_reg = noreg;
	int disp_value = addr->disp();
	bool needs_patching = (patch_code != lir_patch_none);

	PatchingStub* patch = NULL;
	if (needs_patching) {
		patch = new PatchingStub(_masm, PatchingStub::access_field_id);
//		assert(!to_reg.is_long() || patch_code == LIR_Op1::patch_low || patch_code == LIR_Op1::patch_high, "patching doesn't match register");
	}

	// we must use lui&addiu, 
	if (needs_patching) {
		disp_reg = AT;
		__ lui(AT, Assembler::split_high(disp_value));
		__ addiu(AT, AT, Assembler::split_low(disp_value));
	} else if (!Assembler::is_simm16(disp_value)) {
		disp_reg = AT;
		__ lui(AT, Assembler::split_high(disp_value));
	}

	// remember the offset of the load.  The patching_epilog must be done
	// before the call to add_debug_info, otherwise the PcDescs don't get
	// entered in increasing order.
	int offset = code_offset();

	switch(type) {
    case T_BOOLEAN:
    case T_BYTE: {
    	//assert(to_reg.is_word(), "just check");
    	if (disp_reg == noreg) {
    		__ lb(dest->as_register(), src_reg, disp_value);
    	} else if (needs_patching) {
    		__ add(AT, src_reg, disp_reg);
    		offset = code_offset();
    		__ lb(dest->as_register(), AT, 0);
    	} else {
    		__ add(AT, src_reg, disp_reg);
    		offset = code_offset();
    		__ lb(dest->as_register(), AT, Assembler::split_low(disp_value));
    	}
    }
    break;
    
    case T_CHAR: {
    		//assert(to_reg.is_word(), "just check");
    	if (disp_reg == noreg) {
    		__ lhu(dest->as_register(), src_reg, disp_value);
    	} else if (needs_patching) {
    		__ add(AT, src_reg, disp_reg);
    		offset = code_offset();
    		__ lhu(dest->as_register(), AT, 0);
    	} else {
    		__ add(AT, src_reg, disp_reg);
    		offset = code_offset();
    		__ lhu(dest->as_register(), AT, Assembler::split_low(disp_value));
    	}
    }
    break;
    
    case T_SHORT: {
    	//	assert(to_reg.is_word(), "just check");
    	if (disp_reg == noreg) {
    		__ lh(dest->as_register(), src_reg, disp_value);
    	} else if (needs_patching) {
    		__ add(AT, src_reg, disp_reg);
    		offset = code_offset();
    		__ lh(dest->as_register(), AT, 0);
    	} else {
    		__ add(AT, src_reg, disp_reg);
    		offset = code_offset();
    		__ lh(dest->as_register(), AT, Assembler::split_low(disp_value));
    	}
    }
    break;
    
    case T_INT:
    case T_OBJECT:
    case T_ARRAY: {
    		//assert(to_reg.is_word(), "just check");
    	if (disp_reg == noreg) {
    		__ lw(dest->as_register(), src_reg, disp_value);
    	} else if (needs_patching) {
    		__ add(AT, src_reg, disp_reg);
    		offset = code_offset();
    		__ lw(dest->as_register(), AT, 0);
    	} else {
    		__ add(AT, src_reg, disp_reg);
    		offset = code_offset();
    		__ lw(dest->as_register(), AT, Assembler::split_low(disp_value));
    	}
    }
    break;
    
    case T_LONG: {	
    	Register to_lo = dest->as_register_lo();
      Register to_hi = dest->as_register_hi();
      Register base = addr->base()->as_register();
      Register index = noreg;
      if (addr->index()->is_register()) {
      	index = addr->index()->as_register();
      }
      if ((base == to_lo && index == to_hi) ||(base == to_hi && index == to_lo)) {
        // addresses with 2 registers are only formed as a result of
        // array access so this code will never have to deal with
        // patches or null checks.
        assert(info == NULL && patch == NULL, "must be");
        __ lea(to_hi, as_Address(addr));
        __ lw(to_lo, Address(to_hi));
        __ lw(to_hi, Address(to_hi, BytesPerWord));
      } else if (base == to_lo || index == to_lo) {
        assert(base != to_hi, "can't be");
        assert(index == noreg || (index != base && index != to_hi), "can't handle this");
        __ lw(to_hi, as_Address_hi(addr));
        if (patch != NULL) {
        	patching_epilog(patch, lir_patch_high, base, info);
        	patch = new PatchingStub(_masm, PatchingStub::access_field_id);
        	patch_code = lir_patch_low;
        }
        __ lw(to_lo, as_Address_lo(addr));
      } else {
        assert(index == noreg || (index != base && index != to_lo), "can't handle this");
        __ lw(to_lo, as_Address_lo(addr));
        if (patch != NULL) {
        	patching_epilog(patch, lir_patch_low, base, info);
        	patch = new PatchingStub(_masm, PatchingStub::access_field_id);
        	patch_code = lir_patch_high;
        }
        __ lw(to_hi, as_Address_hi(addr));
      }
    }
    break;

    case T_FLOAT: {
    	//assert(to_reg.is_float(), "just check");
    	if (disp_reg == noreg) {
    		__ lwc1(dest->as_float_reg(), src_reg, disp_value);
    	} else if (needs_patching) {
    		__ add(AT, src_reg, disp_reg);
    		offset = code_offset();
    		__ lwc1(dest->as_float_reg(), AT, 0);
    	} else {
    		__ add(AT, src_reg, disp_reg);
    		offset = code_offset();
    		__ lwc1(dest->as_float_reg(), AT, Assembler::split_low(disp_value));
    	}
    }
    break;
    
    case T_DOUBLE: {
    	//assert(to_reg.is_double(), "just check");
    
    	if (disp_reg == noreg) {
    		__ lwc1(dest->as_double_reg(), src_reg, disp_value);
    		__ lwc1(dest->as_double_reg()+1, src_reg, disp_value+4);
    	} else if (needs_patching) {
    		__ add(AT, src_reg, disp_reg);
    		offset = code_offset();
    		__ lwc1(dest->as_double_reg(), AT, 0);
    		__ lwc1(dest->as_double_reg()+1, AT, 4);
    	} else {
    		__ add(AT, src_reg, disp_reg);
    		offset = code_offset();
    		__ lwc1(dest->as_double_reg(), AT, Assembler::split_low(disp_value));
    		__ lwc1(dest->as_double_reg()+1, AT, Assembler::split_low(disp_value) + 4);
    	}
    }
    break;
    	
    default:
    	ShouldNotReachHere();
	}

	if (needs_patching) {
		patching_epilog(patch, patch_code, src_reg, info);
	}

	if (info != NULL) add_debug_info_for_null_check(offset, info);
}


void LIR_Assembler::prefetchr(LIR_Opr src) {
  LIR_Address* addr = src->as_address_ptr();
  Address from_addr = as_Address(addr);
}


void LIR_Assembler::prefetchw(LIR_Opr src) {
}

NEEDS_CLEANUP; // This could be static? 
Address::ScaleFactor LIR_Assembler::array_element_size(BasicType type) const {
  int elem_size = type2aelembytes(type);
  switch (elem_size) {
    case 1: return Address::times_1;
    case 2: return Address::times_2;
    case 4: return Address::times_4;
    case 8: return Address::times_8;
  }
  ShouldNotReachHere();
  return Address::no_scale;
}


void LIR_Assembler::emit_op3(LIR_Op3* op) {
 switch (op->code()) {
    case lir_frem:
      arithmetic_frem(
        op->code(),
        op->in_opr1(),
        op->in_opr2(),
        op->in_opr3(),
        op->result_opr(),
        op->info());
      break;

    case lir_idiv:
    case lir_irem:
      arithmetic_idiv(
        op->code(),
        op->in_opr1(),
        op->in_opr2(),
        op->in_opr3(),
        op->result_opr(),
        op->info());
      break;
    default:      ShouldNotReachHere(); break;
  }
}

void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
	LIR_Opr opr1 = op->left();
	LIR_Opr opr2 = op->right();
	LIR_Condition condition = op->cond();
#ifdef ASSERT
	assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label");
	if (op->block() != NULL)  _branch_target_blocks.append(op->block());
	if (op->ublock() != NULL) _branch_target_blocks.append(op->ublock());
#endif	
	if (op->cond() == lir_cond_always) {
		__ b(*op->label());
		__ delayed()->nop();
		return;
	}
  if (opr1->is_single_cpu()) {
		Register reg_op1 = opr1->as_register();
		if (opr2->is_single_cpu()) {
#ifdef OPT_RANGECHECK
			assert(!op->check(), "just check");
#endif
			Register reg_op2 = opr2->as_register();
			switch (condition) {
		    case lir_cond_equal:
		    	__ beq(reg_op1, reg_op2, *op->label());
		    	break;
		    case lir_cond_notEqual:
		    	__ bne(reg_op1, reg_op2, *op->label());
		    	break;
		    case lir_cond_less:
		    	// AT = 1 TRUE
		    	__ slt(AT, reg_op1, reg_op2);
		    	__ bne(AT, ZERO, *op->label());
		    	break;
		    case lir_cond_lessEqual:
		    	// AT = 0 TRUE
		    	__ slt(AT, reg_op2, reg_op1);
		    	__ beq(AT, ZERO, *op->label());
		    	break;
		    case lir_cond_belowEqual:
		    	// AT = 0 TRUE
		    	__ sltu(AT, reg_op2, reg_op1);
		    	__ beq(AT, ZERO, *op->label());
		    	break;
		    case lir_cond_greaterEqual:
		    	// AT = 0 TRUE
		    	__ slt(AT, reg_op1, reg_op2);
		    	__ beq(AT, ZERO, *op->label());
		    	break;
		    case lir_cond_aboveEqual:
		    	// AT = 0 TRUE
		    	__ sltu(AT, reg_op1, reg_op2);
		    	__ beq(AT, ZERO, *op->label());
		    	break;
		    case lir_cond_greater:
		    	// AT = 1 TRUE
		    	__ slt(AT, reg_op2, reg_op1);
		    	__ bne(AT, ZERO, *op->label());
		    	break;				
		    default: ShouldNotReachHere();
			}
		} else if (opr2->is_constant()) {
			jint temp_value;
			bool is_object = false;
			if (opr2->pointer()->as_constant()->type() == T_INT) {
				temp_value = (jint)(opr2->as_jint());
			} else if (opr2->pointer()->as_constant()->type() == T_OBJECT) {
				is_object = true;
				temp_value = (jint)(opr2->as_jobject());
			} else {
				ShouldNotReachHere();
			}

			switch (condition) {
			  case lir_cond_equal:
#ifdef OPT_RANGECHECK
			  assert(!op->check(), "just check");
#endif
			  if (temp_value) {
				  if (is_object) {
				    int oop_index = __ oop_recorder()->allocate_index((jobject)temp_value);
				    RelocationHolder rspec = oop_Relocation::spec(oop_index);
				    __ relocate(rspec);
    			}
    			__ move(AT, temp_value);
    			__ beq(reg_op1, AT, *op->label());
    		} else {
    			__ beq(reg_op1, ZERO, *op->label());
    		}
    		break;
    			
    		case lir_cond_notEqual:
#ifdef OPT_RANGECHECK
	    		assert(!op->check(), "just check");
#endif
			    if (temp_value) {
      			if (is_object) {
      				int oop_index = __ oop_recorder()->allocate_index((jobject)temp_value);
      				RelocationHolder rspec = oop_Relocation::spec(oop_index);
      				__ relocate(rspec);
      			}
      			__ move(AT, temp_value);
      			__ bne(reg_op1, AT, *op->label());
      		} else {
      			__ bne(reg_op1, ZERO, *op->label());
      		}
      		break;
      			
      	case lir_cond_less:
#ifdef OPT_RANGECHECK
			    assert(!op->check(), "just check");
#endif
	    		// AT = 1 TRUE
	    		if (Assembler::is_simm16(temp_value)) {
	    			__ slti(AT, reg_op1, temp_value);
	    		} else {
	    			__ move(AT, temp_value);
	    			__ slt(AT, reg_op1, AT);
	    		}
	    		__ bne(AT, ZERO, *op->label());
	    		break;
				
		  	case lir_cond_lessEqual:
#ifdef OPT_RANGECHECK
			    assert(!op->check(), "just check");
#endif
    			// AT = 0 TRUE
    			__ move(AT, temp_value);
    			__ slt(AT, AT, reg_op1);
    			__ beq(AT, ZERO, *op->label());
    			break;
    			
    		case lir_cond_belowEqual:
    			// AT = 0 TRUE
#ifdef OPT_RANGECHECK
    			if (op->check()) {
    				__ move(AT, temp_value);
    				add_debug_info_for_range_check_here(op->info(), temp_value);
    				__ tgeu(AT, reg_op1, 29);
    			} else {
#endif
	    			__ move(AT, temp_value);
	    			__ sltu(AT, AT, reg_op1);
	    			__ beq(AT, ZERO, *op->label());
#ifdef OPT_RANGECHECK
		  		}
#endif
				  break;
				
			  case lir_cond_greaterEqual:
#ifdef OPT_RANGECHECK
  			  assert(!op->check(), "just check");
#endif
    			// AT = 0 TRUE
    			if (Assembler::is_simm16(temp_value)) {
    				__ slti(AT, reg_op1, temp_value);
    			} else {
    				__ move(AT, temp_value);
    				__ slt(AT, reg_op1, AT);
    			}
    			__ beq(AT, ZERO, *op->label());
    			break;
				
	  		case lir_cond_aboveEqual:
#ifdef OPT_RANGECHECK
		    	assert(!op->check(), "just check");
#endif
    			// AT = 0 TRUE
    			if (Assembler::is_simm16(temp_value)) {
    				__ sltiu(AT, reg_op1, temp_value);
    			} else {
    				__ move(AT, temp_value);
    				__ sltu(AT, reg_op1, AT);
    			}
    			__ beq(AT, ZERO, *op->label());
    			break;
    			
    		case lir_cond_greater:
#ifdef OPT_RANGECHECK
			    assert(!op->check(), "just check");
#endif
    			// AT = 1 TRUE
    			__ move(AT, temp_value);
    			__ slt(AT, AT, reg_op1);
    			__ bne(AT, ZERO, *op->label());
    			break;				
    
    		default: ShouldNotReachHere();
    		}
    
    	} else {
    		if (opr2->is_address()) {
    			__ lw(AT, as_Address(opr2->pointer()->as_address()));
    		} else if (opr2->is_stack()) {
    			__ lw(AT, frame_map()->address_for_slot(opr2->single_stack_ix()));
    		} else {
    			ShouldNotReachHere();
    		}
    		switch (condition) {
    			case lir_cond_equal:
#ifdef OPT_RANGECHECK
			      assert(!op->check(), "just check");
#endif
					  __ beq(reg_op1, AT, *op->label());
					  break;
				  case lir_cond_notEqual:
#ifdef OPT_RANGECHECK
			      assert(!op->check(), "just check");
#endif
					  __ bne(reg_op1, AT, *op->label());
					  break;
				  case lir_cond_less:
#ifdef OPT_RANGECHECK
			      assert(!op->check(), "just check");
#endif
    				// AT = 1 TRUE
    				__ slt(AT, reg_op1, AT);
    				__ bne(AT, ZERO, *op->label());
    				break;
    			case lir_cond_lessEqual:
#ifdef OPT_RANGECHECK
	      		assert(!op->check(), "just check");
#endif
    				// AT = 0 TRUE
    				__ slt(AT, AT, reg_op1);
    				__ beq(AT, ZERO, *op->label());
    				break;
    			case lir_cond_belowEqual:
#ifdef OPT_RANGECHECK
	      		assert(!op->check(), "just check");
#endif
    				// AT = 0 TRUE
    				__ sltu(AT, AT, reg_op1);
    				__ beq(AT, ZERO, *op->label());
    				break;
    			case lir_cond_greaterEqual:
#ifdef OPT_RANGECHECK
	      		assert(!op->check(), "just check");
#endif
    				// AT = 0 TRUE
    				__ slt(AT, reg_op1, AT);
    				__ beq(AT, ZERO, *op->label());
    				break;
    			case lir_cond_aboveEqual:
    				// AT = 0 TRUE
#ifdef OPT_RANGECHECK
    				if (op->check()) {
    					add_debug_info_for_range_check_here(op->info(), opr1->rinfo());
    					__ tgeu(reg_op1, AT, 29);
    				} else {
#endif
	  				  __ sltu(AT, reg_op1, AT);
	  				  __ beq(AT, ZERO, *op->label());
#ifdef OPT_RANGECHECK
					  }
#endif
					  break;
				  case lir_cond_greater:
#ifdef OPT_RANGECHECK
			      assert(!op->check(), "just check");
#endif
					  // AT = 1 TRUE
					  __ slt(AT, AT, reg_op1);
					  __ bne(AT, ZERO, *op->label());
					  break;				
				  default: ShouldNotReachHere();
			  }
		  }
#ifdef OPT_RANGECHECK
		  if (!op->check())
#endif
		  __ delayed()->nop();

    } else if(opr1->is_address() || opr1->is_stack()) {
#ifdef OPT_RANGECHECK
			assert(!op->check(), "just check");
#endif
      if (opr2->is_constant()) {
			  jint temp_value;
        if (opr2->as_constant_ptr()->type() == T_INT) {
          temp_value = (jint)opr2->as_constant_ptr()->as_jint();
        } else if (opr2->as_constant_ptr()->type() == T_OBJECT) {
          temp_value = (jint)opr2->as_constant_ptr()->as_jobject();
        } else {
    			ShouldNotReachHere();
    		}
   
    		if (Assembler::is_simm16(temp_value)) {
    			if (opr1->is_address()) {
    				__ lw(AT, as_Address(opr1->pointer()->as_address()));
    			} else {
    				__ lw(AT, frame_map()->address_for_slot(opr1->single_stack_ix()));
    			}
   
    			switch(condition) {
    			
    		    case lir_cond_equal:
    		    	__ addi(AT, AT, -(int)temp_value);	
    		    	__ beq(AT, ZERO, *op->label());
    		    	break;
    		    case lir_cond_notEqual:
    		    	__ addi(AT, AT, -(int)temp_value);	
    		    	__ bne(AT, ZERO, *op->label());
    		    	break;
    		    case lir_cond_less:
    		    	// AT = 1 TRUE
    		    	__ slti(AT, AT, temp_value);
    		    	__ bne(AT, ZERO, *op->label());
    		    	break;
    		    case lir_cond_lessEqual:
    		    	// AT = 0 TRUE
    		    	__ addi(AT, AT, -temp_value);	
    		    	__ slt(AT, ZERO, AT);
    		    	__ beq(AT, ZERO, *op->label());
    		    	break;
    		    case lir_cond_belowEqual:
    		    	// AT = 0 TRUE
    		    	__ addiu(AT, AT, -temp_value);	
    		    	__ sltu(AT, ZERO, AT);
    		    	__ beq(AT, ZERO, *op->label());
    		    	break;
    		    case lir_cond_greaterEqual:
    		    	// AT = 0 TRUE
    		    	__ slti(AT, AT, temp_value);
    		    	__ beq(AT, ZERO, *op->label());
    		    	break;
    		    case lir_cond_aboveEqual:
    		    	// AT = 0 TRUE
    		    	__ sltiu(AT, AT, temp_value);
    		    	__ beq(AT, ZERO, *op->label());
    		    	break;
    		    case lir_cond_greater:
    		    	// AT = 1 TRUE
    		    	__ addi(AT, AT, -temp_value);		
    		    	__ slt(AT, ZERO, AT);
    		    	__ bne(AT, ZERO, *op->label());
    		    	break;				
    		    
    		    default:
    		    	Unimplemented();
    			}
    		} else {
          Unimplemented();
        }
      } else {
        Unimplemented();
      }
    	__ delayed()->nop();
   
    } else if(opr1->is_double_cpu()) {
#ifdef OPT_RANGECHECK
			assert(!op->check(), "just check");
#endif
	    Register opr1_lo = opr1->as_register_lo();
	    Register opr1_hi = opr1->as_register_hi();

	    if (opr2->is_double_cpu()) {
	    	Register opr2_lo = opr2->as_register_lo();
	    	Register opr2_hi = opr2->as_register_hi();
	    	switch (condition) {
	        case lir_cond_equal: {
	        	Label L;
	        	__ bne(opr1_lo, opr2_lo, L);
	        	__ delayed()->nop();
	        	__ beq(opr1_hi, opr2_hi, *op->label());
	        	__ delayed()->nop();
	        	__ bind(L);
	        }
	        break;

	        case lir_cond_notEqual:
	        	__ bne(opr1_lo, opr2_lo, *op->label());
	        	__ delayed()->nop();
	        	__ bne(opr1_hi, opr2_hi, *op->label());
	        	__ delayed()->nop();
	        	break;

	        case lir_cond_less: { 
	        	Label L;
	        	
	        	// if hi less then jump
	        	__ slt(AT, opr1_hi, opr2_hi);
	        	__ bne(AT, ZERO, *op->label());
	        	__ delayed()->nop();
	        	
	        	// if hi great then fail
	        	__ bne(opr1_hi, opr2_hi, L);
	        	__ delayed();
	        
	        	// now just comp lo as unsigned
	        	__ sltu(AT, opr1_lo, opr2_lo);
	        	__ bne(AT, ZERO, *op->label());
	        	__ delayed()->nop();

	        	__ bind(L);
	        }
	        break;

	        case lir_cond_lessEqual: {
	        	Label L;
	        	
	        	// if hi great then fail
	        	__ slt(AT, opr2_hi, opr1_hi);
	        	__ bne(AT, ZERO, L);
	        	__ delayed()->nop();
	        	
	        	// if hi less then jump
	        	__ bne(opr2_hi, opr1_hi, *op->label());
	        	__ delayed();

	        	// now just comp lo as unsigned
	        	__ sltu(AT, opr2_lo, opr1_lo);
	        	__ beq(AT, ZERO, *op->label());
	        	__ delayed()->nop();

	        	__ bind(L);
	        }
	        break;

	        case lir_cond_belowEqual: {
	          Label L;
	        		
	        	// if hi great then fail
	        	__ sltu(AT, opr2_hi, opr1_hi);
	        	__ bne(AT, ZERO, L);
	        	__ delayed()->nop();
	        	
	        	// if hi less then jump
	        	__ bne(opr2_hi, opr1_hi, *op->label());
	        	__ delayed();

	        	// now just comp lo as unsigned
	        	__ sltu(AT, opr2_lo, opr1_lo);
	        	__ beq(AT, ZERO, *op->label());
	        	__ delayed()->nop();

	        	__ bind(L);
	        }
	        break;

	        case lir_cond_greaterEqual: {
	        	Label L;
	        	
	        	// if hi less then fail
	        	__ slt(AT, opr1_hi, opr2_hi);
	        	__ bne(AT, ZERO, L);
	        	__ delayed()->nop();
	        	
	        	// if hi great then jump
	        	__ bne(opr2_hi, opr1_hi, *op->label());
	        	__ delayed();

	        	// now just comp lo as unsigned
	        	__ sltu(AT, opr1_lo, opr2_lo);
	        	__ beq(AT, ZERO, *op->label());
	        	__ delayed()->nop();

	        	__ bind(L);
	        }
	        break;

	        case lir_cond_aboveEqual: {
	        	Label L;
	        	
	        	// if hi less then fail
	        	__ sltu(AT, opr1_hi, opr2_hi);
	        	__ bne(AT, ZERO, L);
	        	__ delayed()->nop();
	        	
	        	// if hi great then jump
	        	__ bne(opr2_hi, opr1_hi, *op->label());
	        	__ delayed();

	        	// now just comp lo as unsigned
	        	__ sltu(AT, opr1_lo, opr2_lo);
	        	__ beq(AT, ZERO, *op->label());
	        	__ delayed()->nop();

	        	__ bind(L);
	        }
	        break;

	        case lir_cond_greater: {
	        	Label L;
	        	
	        	// if hi great then jump
	        	__ slt(AT, opr2_hi, opr1_hi);
	        	__ bne(AT, ZERO, *op->label());
	        	__ delayed()->nop();
	        	
	        	// if hi less then fail
	        	__ bne(opr2_hi, opr1_hi, L);
	        	__ delayed();

	        	// now just comp lo as unsigned
	        	__ sltu(AT, opr2_lo, opr1_lo);
	        	__ bne(AT, ZERO, *op->label());
	        	__ delayed()->nop();

	        	__ bind(L);
	        }
	        break;				
	        	
	        default: ShouldNotReachHere();
	      }
	    	
	    } else if(opr2->is_constant()) {
	    	jlong lv = opr2->as_jlong();
	    	jint iv_lo = (jint)lv;
	    	jint iv_hi = (jint)(lv>>32);
	    	bool is_zero = (lv==0);

	    	switch (condition) {
	        case lir_cond_equal: 
	        	if (is_zero) {
	        		__ orr(AT, opr1_lo, opr1_hi);
	        		__ beq(AT, ZERO, *op->label());
	        		__ delayed()->nop();
	        	} else {
	        		Label L;
	        		__ move(T8, iv_lo);
	        		__ bne(opr1_lo, T8, L);
	        		__ delayed();
	        		__ move(T8, iv_hi);
	        		__ beq(opr1_hi, T8, *op->label());
	        		__ delayed()->nop();
	        		__ bind(L);
	        	}
	        	break;

	        case lir_cond_notEqual:
	        	if (is_zero) {
	        		__ orr(AT, opr1_lo, opr1_hi);
	        		__ bne(AT, ZERO, *op->label());
	        		__ delayed()->nop();
	        	} else {
	        		__ move(T8, iv_lo);
	        		__ bne(opr1_lo, T8, *op->label());
	        		__ delayed();
	        		__ move(T8, iv_hi);
	        		__ bne(opr1_hi, T8, *op->label());
	        		__ delayed()->nop();
	        	}
	        	break;

	        case lir_cond_less:
	        	if (is_zero) {
	        		__ bltz(opr1_hi, *op->label());
	        		__ delayed()->nop();
	        	} else { 
	        		Label L;
	        		
	        		// if hi less then jump
	        		__ move(T8, iv_hi);
	        		__ slt(AT, opr1_hi, T8);
	        		__ bne(AT, ZERO, *op->label());
	        		__ delayed()->nop();
	        		
	        		// if hi great then fail
	        		__ bne(opr1_hi, T8, L);
	        		__ delayed();
	        	
	        		// now just comp lo as unsigned
	        		if (Assembler::is_simm16(iv_lo)) {
	        			__ sltiu(AT, opr1_lo, iv_lo);
	        		} else {
	        			__ move(T8, iv_lo);
	        			__ sltu(AT, opr1_lo, T8);
	        		}
	        		__ bne(AT, ZERO, *op->label());
	        		__ delayed()->nop();

	        		__ bind(L);
	        	}
	        	break;

	        case lir_cond_lessEqual:
	        	if (is_zero) {
	        		__ bltz(opr1_hi, *op->label());
	        		__ delayed()->nop();
	        		__ orr(AT, opr1_hi, opr1_lo);
	        		__ beq(AT, ZERO, *op->label());
	        		__ delayed();
	        	} else {
	        		Label L;
	        		
	        		// if hi great then fail
	        		__ move(T8, iv_hi);
	        		__ slt(AT, T8, opr1_hi);
	        		__ bne(AT, ZERO, L);
	        		__ delayed()->nop();
	        		
	        		// if hi less then jump
	        		__ bne(T8, opr1_hi, *op->label());
	        		__ delayed();

	        		// now just comp lo as unsigned
	        		__ move(T8, iv_lo);
	        		__ sltu(AT, T8, opr1_lo);
	        		__ beq(AT, ZERO, *op->label());
	        		__ delayed()->nop();

	        		__ bind(L);
	        	}
	        	break;

	        case lir_cond_belowEqual:
	        	if (is_zero) {
	        		__ orr(AT, opr1_hi, opr1_lo);
	        		__ beq(AT, ZERO, *op->label());
	        		__ delayed()->nop();
	        	} else {
	        		Label L;
	        		
	        		// if hi great then fail
	        		__ move(T8, iv_hi);
	        		__ sltu(AT, T8, opr1_hi);
	        		__ bne(AT, ZERO, L);
	        		__ delayed()->nop();
	        		
	        		// if hi less then jump
	        		__ bne(T8, opr1_hi, *op->label());
	        		__ delayed();

	        		// now just comp lo as unsigned
	        		__ move(T8, iv_lo);
	        		__ sltu(AT, T8, opr1_lo);
	        		__ beq(AT, ZERO, *op->label());
	        		__ delayed()->nop();

	        		__ bind(L);
	        	}
	        	break;

	        case lir_cond_greaterEqual:
	        	if (is_zero) {
	        		__ bgez(opr1_hi, *op->label());
	        		__ delayed()->nop();
	        	} else {
	        		Label L;
	        		
	        		// if hi less then fail
	        		__ move(T8, iv_hi);
	        		__ slt(AT, opr1_hi, T8);
	        		__ bne(AT, ZERO, L);
	        		__ delayed()->nop();
	        		
	        		// if hi great then jump
	        		__ bne(T8, opr1_hi, *op->label());
	        		__ delayed();

	        		// now just comp lo as unsigned
	        		if (Assembler::is_simm16(iv_lo)) {
	        			__ sltiu(AT, opr1_lo, iv_lo);
	        		} else {
	        			__ move(T8, iv_lo);
	        			__ sltu(AT, opr1_lo, T8);
	        		}
	        		__ beq(AT, ZERO, *op->label());
	        		__ delayed()->nop();

	        		__ bind(L);
	        	}
	        	break;

	        case lir_cond_aboveEqual:
	        	if (is_zero) {
	        		__ b(*op->label());
	        		__ delayed()->nop();
	        	} else {
	        		Label L;
	        		
	        		// if hi less then fail
	        		__ move(T8, iv_hi);
	        		__ sltu(AT, opr1_hi, T8);
	        		__ bne(AT, ZERO, L);
	        		__ delayed()->nop();
	        		
	        		// if hi great then jump
	        		__ bne(T8, opr1_hi, *op->label());
	        		__ delayed();

	        		// now just comp lo as unsigned
	        		if (Assembler::is_simm16(iv_lo)) {
	        			__ sltiu(AT, opr1_lo, iv_lo);
	        		} else {
	        			__ move(T8, iv_lo);
	        			__ sltu(AT, opr1_lo, T8);
	        		}
	        		__ beq(AT, ZERO, *op->label());
	        		__ delayed()->nop();

	        		__ bind(L);
	        	}
	        	break;

	        case lir_cond_greater:
	        	if (is_zero) {
	        		Label L;
	        		__ bgtz(opr1_hi, *op->label());
	        		__ delayed()->nop();
	        		__ bne(opr1_hi, ZERO, L);
	        		__ delayed()->nop();
	        		__ bne(opr1_lo, ZERO, *op->label());
	        		__ delayed()->nop();
	        		__ bind(L);
	        	} else {
	        		Label L;
	        		
	        		// if hi great then jump
	        		__ move(T8, iv_hi);
	        		__ slt(AT, T8, opr1_hi);
	        		__ bne(AT, ZERO, *op->label());
	        		__ delayed()->nop();
	        		
	        		// if hi less then fail
	        		__ bne(T8, opr1_hi, L);
	        		__ delayed();

	        		// now just comp lo as unsigned
	        		__ move(T8, iv_lo);
	        		__ sltu(AT, T8, opr1_lo);
	        		__ bne(AT, ZERO, *op->label());
	        		__ delayed()->nop();

	        		__ bind(L);
	        	}
	        	break;				
	        	
	        default: 
	        	ShouldNotReachHere();
	      }
	    } else {
	    	Unimplemented();
	    }
    } else if (opr1->is_single_fpu()) {
#ifdef OPT_RANGECHECK
			assert(!op->check(), "just check");
#endif
	    assert(opr2->is_single_fpu(), "change the code");
	    
	    FloatRegister reg_op1 = opr1->as_float_reg();
	    FloatRegister reg_op2 = opr2->as_float_reg();
	    //	bool un_ls
	    bool un_jump = (op->ublock()->label()==op->label());
               	
	    Label& L = *op->label();
	    
	    switch (condition) {
	      case lir_cond_equal:
	      	if (un_jump)
	      		__ c_ueq_s(reg_op1, reg_op2);
	      	else 
	      		__ c_eq_s(reg_op1, reg_op2);
	      	__ bc1t(L);

	      	break;

	      case lir_cond_notEqual:
	      	if (un_jump)
	      		__ c_eq_s(reg_op1, reg_op2);
	      	else 
	      		__ c_ueq_s(reg_op1, reg_op2);
	      	__ bc1f(L);

	      	break;

	      case lir_cond_less:
	      	if (un_jump)
	      		__ c_ult_s(reg_op1, reg_op2);
	      	else
	      		__ c_olt_s(reg_op1, reg_op2);
	      	__ bc1t(L);

	      	break;

	      case lir_cond_lessEqual:
	      case lir_cond_belowEqual:
	      	if (un_jump)
	      		__ c_ule_s(reg_op1, reg_op2);
	      	else
	      		__ c_ole_s(reg_op1, reg_op2);
	      	__ bc1t(L);

	      	break;
	      	
	      case lir_cond_greaterEqual:
	      case lir_cond_aboveEqual:
	      	if (un_jump)
	      		__ c_olt_s(reg_op1, reg_op2);
	      	else 
	      		__ c_ult_s(reg_op1, reg_op2);
	      	__ bc1f(L);
	      	
	      	break;
	      	
	      case lir_cond_greater:
	      	if (un_jump)
	      		__ c_ole_s(reg_op1, reg_op2);
	      	else 
	      		__ c_ule_s(reg_op1, reg_op2);
	      	__ bc1f(L);
	      	
	      	break;				
	      	
	      default: 
	      	ShouldNotReachHere();
	      }
	      __ delayed()->nop();
      } else if (opr1->is_double_fpu()) {
#ifdef OPT_RANGECHECK
			  assert(!op->check(), "just check");
#endif
      	assert(opr2->is_double_fpu(), "change the code");
      
      	FloatRegister reg_op1 = opr1->as_double_reg();
      	FloatRegister reg_op2 = opr2->as_double_reg();
      	bool un_jump = (op->ublock()->label()==op->label());
      	Label& L = *op->label();
      	
      	switch (condition) {
      	case lir_cond_equal:
      		if (un_jump)
      			__ c_ueq_d(reg_op1, reg_op2);
      		else 
      			__ c_eq_d(reg_op1, reg_op2);
      		__ bc1t(L);
      
      		break;
      
      	case lir_cond_notEqual:
      		if (un_jump)
      			__ c_eq_d(reg_op1, reg_op2);
      		else 
      			__ c_ueq_d(reg_op1, reg_op2);
      		__ bc1f(L);
      
      		break;
      
      	case lir_cond_less:
      		if (un_jump)
      			__ c_ult_d(reg_op1, reg_op2);
      		else
      			__ c_olt_d(reg_op1, reg_op2);
      		__ bc1t(L);
      
      		break;
      
      	case lir_cond_lessEqual:
      	case lir_cond_belowEqual:
      		if (un_jump)
      			__ c_ule_d(reg_op1, reg_op2);
      		else
      			__ c_ole_d(reg_op1, reg_op2);
      		__ bc1t(L);
      
      		break;
      		
      	case lir_cond_greaterEqual:
      	case lir_cond_aboveEqual:
      		if (un_jump)
      			__ c_olt_d(reg_op1, reg_op2);
      		else 
      			__ c_ult_d(reg_op1, reg_op2);
      		__ bc1f(L);
      		
      		break;
      		
      	case lir_cond_greater:
      		if (un_jump)
      			__ c_ole_d(reg_op1, reg_op2);
      		else 
      			__ c_ule_d(reg_op1, reg_op2);
      		__ bc1f(L);
      		
      		break;				
      		
      	default: 
      		ShouldNotReachHere();
      	}
      	__ delayed()->nop();
      } else {
    Unimplemented();
	}
}


void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
  LIR_Opr value        = op->in_opr();
  LIR_Opr src       = op->in_opr();
  LIR_Opr dest      = op->result_opr();
  Bytecodes::Code code = op->bytecode();

  switch (code) {
    case Bytecodes::_i2l: 
      move_regs(src->as_register(), dest->as_register_lo());
      __ sra (dest->as_register_hi(), dest->as_register_lo(), 31);
      break;
        
    case Bytecodes::_l2i:
			move_regs (src->as_register_lo(), dest->as_register());
      break;

    case Bytecodes::_i2b:
	    move_regs (src->as_register(), dest->as_register());
      __ sign_extend_byte(dest->as_register());
      break;

    case Bytecodes::_i2c:
      __ andi(dest->as_register(), src->as_register(), 0xFFFF);
      break;

    case Bytecodes::_i2s:
	    move_regs (src->as_register(), dest->as_register());
      __ sign_extend_short(dest->as_register());
      break;

    case Bytecodes::_f2d:
	    __ cvt_d_s(dest->as_double_reg(), src->as_float_reg());
      break;

    case Bytecodes::_d2f:
	    __ cvt_s_d(dest->as_float_reg(), src->as_double_reg());
	    break;
    case Bytecodes::_i2f: {
	    FloatRegister df = dest->as_float_reg();
	    if(src->is_single_cpu()) {
	    	__ mtc1(src->as_register(), df);
	    	__ cvt_s_w(df, df);
	    } else if (src->is_stack()) {
	    	Address src_addr = src->is_single_stack()
	    		? frame_map()->address_for_slot(src->single_stack_ix())
	    		: frame_map()->address_for_slot(src->double_stack_ix()); 
	    	__ lw(AT, src_addr);
	    	__ mtc1(AT, df);
	    	__ cvt_s_w(df, df);
	    } else {
	    	Unimplemented();
	    }
	    break;
	  }
    case Bytecodes::_i2d: {	
      FloatRegister dd = dest->as_double_reg();
      if (src->is_single_cpu()) {
      	__ mtc1(src->as_register(), dd);
      	__ cvt_d_w(dd, dd);
      } else if (src->is_stack()) {
      	Address src_addr = src->is_single_stack()
      		? frame_map()->address_for_slot(value->single_stack_ix())
      		: frame_map()->address_for_slot(value->double_stack_ix()); 
      	__ lw(AT, src_addr);
      	__ mtc1(AT, dd);
      	__ cvt_d_w(dd, dd);
      } else {
      	Unimplemented();
      }
	    break;
	  }
    case Bytecodes::_f2i: {
	    FloatRegister fval = src->as_float_reg();
	    Register dreg = dest->as_register();

	    Label L;
	    __ c_un_s(fval, fval);    //NaN?
	    __ bc1t(L);
	    __ delayed();
	    __ move(dreg, ZERO);

	    __ trunc_w_s(F30, fval);
	    __ mfc1(dreg, F30);
	    __ bind(L);
	    break;
	  }
    case Bytecodes::_d2i: {
      FloatRegister dval = src->as_double_reg();
      Register dreg = dest->as_register();
      
      Label L;
      __ c_un_d(dval, dval);    //NaN?
      __ bc1t(L);
      __ delayed();
      __ move(dreg, ZERO);
      
      __ trunc_w_d(F30, dval);
      __ mfc1(dreg, F30);
      __ bind(L);
      break;
    }
    case Bytecodes::_l2f: {
	    FloatRegister ldf = dest->as_float_reg();
	    if (src->is_double_cpu()) {
	    	__ mtc1(src->as_register_lo(), ldf);
	    	__ mtc1(src->as_register_hi(), ldf + 1);
	    	__ cvt_s_l(ldf, ldf);				
	    } else if (src->is_double_stack()) {
	    	Address src_addr=frame_map()->address_for_slot(value->double_stack_ix());
	    	__ lw(AT, src_addr);
	    	__ mtc1(AT, ldf);
	    	__ lw(AT, src_addr.base(), src_addr.disp() + 4);
	    	__ mtc1(AT, ldf + 1);
	    	__ cvt_s_l(ldf, ldf);
	    } else {
	    	Unimplemented();
	    }
	    break;
  	}
    case Bytecodes::_l2d: {
    	FloatRegister ldd = dest->as_double_reg();
    	if (src->is_double_cpu()) {
    		__ mtc1(src->as_register_lo(), ldd);
    		__ mtc1(src->as_register_hi(), ldd + 1);
    		__ cvt_d_l(ldd, ldd);
    	} else if (src->is_double_stack()) {
    		Address src_addr = frame_map()->address_for_slot(src->double_stack_ix());
    		__ lw(AT, src_addr);
    		__ mtc1(AT, ldd);
    		__ lw(AT, src_addr.base(), src_addr.disp() + 4);
    		__ mtc1(AT, ldd + 1);
    		__ cvt_d_l(ldd, ldd);
    	} else {
    		Unimplemented();
    	}
      break;
    }
    
    case Bytecodes::_f2l: {
    	FloatRegister fval = src->as_float_reg();
    	Register dlo = dest->as_register_lo();
    	Register dhi = dest->as_register_hi();
    
    	Label L;
    	__ move(dhi, ZERO);
    	__ c_un_s(fval, fval);    //NaN?
    	__ bc1t(L);
    	__ delayed();
    	__ move(dlo, ZERO);
    
    	__ trunc_l_s(F30, fval);
    	__ mfc1(dlo, F30);
    	__ mfc1(dhi, F31);
    	__ bind(L);
      break;
    }
    case Bytecodes::_d2l: {
    	FloatRegister dval = src->as_double_reg();
    	Register dlo = dest->as_register_lo();
    	Register dhi = dest->as_register_hi();
    
    	Label L;
    	__ move(dhi, ZERO);
    	__ c_un_d(dval, dval);    //NaN?
    	__ bc1t(L);
    	__ delayed();
    	__ move(dlo, ZERO);
    
    	__ trunc_l_d(F30, dval);
    	__ mfc1(dlo, F30);
    	__ mfc1(dhi, F31);
    	__ bind(L);
      break;
    }
     
    default: ShouldNotReachHere();
  }
}

void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
	if (op->init_check()) {
		add_debug_info_for_null_check_here(op->stub()->info());
		__ lw(AT,Address(op->klass()->as_register(),
          instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc)));
    __ addi(AT, AT, -instanceKlass::fully_initialized);		
		__ bne(AT,ZERO,*op->stub()->entry());
		__ delayed()->nop();
	}
	__ allocate_object(
			op->obj()->as_register(), 
			op->tmp1()->as_register(),
			op->tmp2()->as_register(),
			op->header_size(),
			op->object_size(),
			op->klass()->as_register(),
			*op->stub()->entry());

	__ bind(*op->stub()->continuation());
}

void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
	if (UseSlowPath ||
		(!UseFastNewObjectArray && (op->type() == T_OBJECT || op->type() == T_ARRAY)) ||
		(!UseFastNewTypeArray   && (op->type() != T_OBJECT && op->type() != T_ARRAY))) {
		__ b(*op->stub()->entry());
		__ delayed()->nop();  
	} else {
		Register len =  op->len()->as_register();
		Register tmp1 = op->tmp1()->as_register();
		Register tmp2 = op->tmp2()->as_register();
		Register tmp3 = op->tmp3()->as_register();
		__ allocate_array(op->obj()->as_register(),
				len,
				tmp1,
				tmp2,
				tmp3,
				arrayOopDesc::header_size(op->type()),
				array_element_size(op->type()),
				op->klass()->as_register(),
				*op->stub()->entry());
	}
	__ bind(*op->stub()->continuation());
}



void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
	LIR_Code code = op->code();
	if (code == lir_store_check) {
		Register value = op->object()->as_register();
		Register array = op->array()->as_register();
		Register k_RInfo = op->tmp1()->as_register();
		Register klass_RInfo = op->tmp2()->as_register();

		CodeStub* stub = op->stub();
		Label done;

		__ beq(value, ZERO, done);
		__ delayed()->nop();
		add_debug_info_for_null_check_here(op->info_for_exception());
		
		__ lw(k_RInfo, array, oopDesc::klass_offset_in_bytes());
		__ lw(klass_RInfo, value, oopDesc::klass_offset_in_bytes());

		__ lw(k_RInfo, k_RInfo,  objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)); 
		// get super_check_offset
  	//for SIGBUS, FIXME, Jerome	
		__ nop();	
		__ nop();	
		__ lw(T9, k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes());

		// See if we get an immediate positive hit
		__ add(AT, klass_RInfo, T9);
		__ lw(AT, AT, 0);
		__ beq(k_RInfo, AT, done);
		__ delayed()->nop();

		// check for immediate negative hit
		__ move(AT, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
		__ bne(T9, AT, *stub->entry());			// fail
		__ delayed()->nop();

		// check for self
		__ beq(klass_RInfo, k_RInfo, done);
		__ delayed()->nop();

		// super type array
		__ lw(T8, klass_RInfo, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes());
		// length
		__ lw(T9, T8, arrayOopDesc::length_offset_in_bytes());

		// base
		__ addi(T8, T8, arrayOopDesc::base_offset_in_bytes(T_OBJECT));

		Label miss, hit, loop;
		// T9:count, T8:base, k_RInfo: super klass
		__ bind(loop);
		__ beq(T9, ZERO, miss);
		__ delayed()->lw(AT, T8, 0);
		__ beq(AT, k_RInfo, hit);
		__ delayed();
		__ addiu(T9, T9, -1);
		__ b(loop);
		__ delayed();
		__ addi(T8, T8, 1 * wordSize);

		__ bind(miss);
		__ b(*stub->entry());
		__ delayed()->nop();

		__ bind(hit);
		__ sw(k_RInfo, klass_RInfo, sizeof(oopDesc) 
				+ Klass::secondary_super_cache_offset_in_bytes());

		__ bind(done);
	} else if (op->code() == lir_checkcast) {
		// we always need a stub for the failure case.
		CodeStub* stub = op->stub();
		Register obj = op->object()->as_register();
		Register k_RInfo = op->tmp1()->as_register();
		Register klass_RInfo = op->tmp2()->as_register();
		Register dst = op->result_opr()->as_register();
		ciKlass* k = op->klass();
		Register Rtmp1 = noreg; 
		Label done;
		if (obj == k_RInfo) {
			k_RInfo = dst;
		} else if (obj == klass_RInfo) {
			klass_RInfo = dst;
		}
		if (k->is_loaded()) {
			select_different_registers(obj, dst, k_RInfo, klass_RInfo);
		} else {
			Rtmp1 = op->tmp3()->as_register();
			select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1);
		}
		assert_different_registers(obj, k_RInfo, klass_RInfo);

		// patching may screw with our temporaries on sparc,
		// so let's do it before loading the class
		if (!k->is_loaded()) {
			jobject2reg_with_patching(k_RInfo, op->info_for_patch());
		} else {
			//ciObject2reg(k, k_RInfo);
			jobject2reg(k->encoding(),k_RInfo); 
		}
		assert(obj != k_RInfo, "must be different");
    int the_pc;
		if (op->profiled_method() != NULL) {
			ciMethod* method = op->profiled_method();
			int bci          = op->profiled_bci();

			Label profile_done;
			//  __ jcc(Assembler::notEqual, profile_done);
			__ bne(obj, ZERO, profile_done);
			__ delayed()->nop();

			// Object is null; update methodDataOop
			ciMethodData* md = method->method_data();
			if (md == NULL) {
				bailout("out of memory building methodDataOop");
				return;
			}
			ciProfileData* data = md->bci_to_data(bci);
			assert(data != NULL,       "need data for checkcast");
			assert(data->is_BitData(), "need BitData for checkcast");
			Register mdo  = klass_RInfo;
			int oop_index = __ oop_recorder()->find_index(md->encoding());
			RelocationHolder rspec = oop_Relocation::spec(oop_index);
			__ relocate(rspec);
			__ lui(mdo, Assembler::split_high((int)md->encoding()));
			__ addiu(mdo, mdo, Assembler::split_low((int)md->encoding()));

			Address data_addr(mdo, md->byte_offset_of_slot(data, DataLayout::header_offset()));
			//FIXME, it very ineffictive to replace orl with 3 mips instruction @jerome, 12/27,06 
			//__ orl(data_addr, BitData::null_flag_constant());
			int header_bits = DataLayout::flag_mask_to_header_mask(BitData::null_seen_byte_constant());
			__ lw(AT, data_addr); 
			__ ori(AT,AT, header_bits); 
			__ sw(AT,data_addr); 
			__ b(done);
			__ delayed()->nop(); 
			__ bind(profile_done);
		} else {
			__ beq(obj, ZERO, done);
			__ delayed()->nop();
		}
		__ verify_oop(obj);

		if (op->fast_check()) {
			// get object class
			// not a safepoint as obj null check happens earlier
			__ lw(AT, obj,  oopDesc::klass_offset_in_bytes());
			__ bne(AT, k_RInfo, *stub->entry());
			__ delayed()->nop();
			__ bind(done);
		} else {
			// get object class
			// not a safepoint as obj null check happens earlier
			__ lw(klass_RInfo, obj, oopDesc::klass_offset_in_bytes());
			if (k->is_loaded()) {
				__ lw(AT, klass_RInfo, k->super_check_offset());
				// See if we get an immediate positive hit
				if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() != k->super_check_offset()) {
					__ bne(AT, k_RInfo, *stub->entry());
					__ delayed()->nop();
				} else {
					// See if we get an immediate positive hit
					__ beq(AT, k_RInfo, done);
					__ delayed()->nop();
					// check for self
					__ beq(klass_RInfo, k_RInfo, done);
					__ delayed()->nop();

					// array
					__ lw(T8, klass_RInfo, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes());
					// length
					__ lw(T9, T8, arrayOopDesc::length_offset_in_bytes());

					// base
					__ addi(T8, T8, arrayOopDesc::base_offset_in_bytes(T_OBJECT));

					Label miss, hit, loop;
					// T9:count, T8:base, k_RInfo: super klass
					__ bind(loop);
					__ beq(T9, ZERO, miss);
					__ delayed()->lw(AT, T8, 0);
					__ beq(AT, k_RInfo, hit);
					__ delayed();
					__ addiu(T9, T9, -1);
					__ b(loop);
					__ delayed();
					__ addi(T8, T8, 1 * wordSize);

					__ bind(miss);
					__ b(*stub->entry());
					__ delayed()->nop();

					__ bind(hit);
					__ sw(k_RInfo, klass_RInfo, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
				}
				__ bind(done);
			} else {
				//   assert(dst != obj, "need different registers so we have a temporary");
      //  assert(dst != klass_RInfo && dst != k_RInfo, "need 3 registers");
       
				// super_check_offset
        __ lw(Rtmp1, k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes());
        // See if we get an immediate positive hit
				__ add(AT, klass_RInfo, Rtmp1);
				__ lw(AT, AT, 0);
				__ beq(k_RInfo, AT, done);
				__ delayed()->nop();
        // check for immediate negative hit
				__ move(AT, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());		
				__ bne(Rtmp1, AT,  *stub->entry());
				__ delayed()->nop();
        // check for self
				__ beq(klass_RInfo, k_RInfo, done);
				__ delayed()->nop();
        
				// array
				__ lw(T8, klass_RInfo, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes());
				// length
				__ lw(T9, T8, arrayOopDesc::length_offset_in_bytes());

				// base
				__ addi(T8, T8, arrayOopDesc::base_offset_in_bytes(T_OBJECT));

				Label miss, hit, loop;
				// T9:count, T8:base, k_RInfo: super klass
				__ bind(loop);
				__ beq(T9, ZERO, miss);
				__ delayed()->lw(AT, T8, 0);
				__ beq(AT, k_RInfo, hit);
				__ delayed();
				__ addiu(T9, T9, -1);
				__ b(loop);
				__ delayed();
				__ addi(T8, T8, 1 * wordSize);

				__ bind(miss);
				__ b(*stub->entry());
				__ delayed()->nop();

				__ bind(hit);
				__ sw(k_RInfo, klass_RInfo, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
        __ bind(done);
      }
      
    }
    if(dst!=obj)__ move(dst, obj);
	
  } else if (code == lir_instanceof) {
    Register obj = op->object()->as_register();
    Register k_RInfo = op->tmp1()->as_register();
    Register klass_RInfo = op->tmp2()->as_register();
    Register dst = op->result_opr()->as_register();
    ciKlass* k = op->klass();
    
    Label done;
    Label zero;
    Label one;
    
    if (obj == k_RInfo) {
	    k_RInfo = klass_RInfo;
	    klass_RInfo = obj;
    }

    // patching may screw with our temporaries on sparc,
    // so let's do it before loading the class
    if (!k->is_loaded()) {
      jobject2reg_with_patching(k_RInfo, op->info_for_patch());
    } else {
			jobject2reg(k->encoding(), k_RInfo);
		}

    assert(obj != k_RInfo, "must be different");

    __ verify_oop(obj);
    __ beq(obj, ZERO, zero);
    __ delayed()->nop();

    if (op->fast_check()) {
      // get object class
      // not a safepoint as obj null check happens earlier
			__ lw(AT, obj, oopDesc::klass_offset_in_bytes());
			__ beq(AT, k_RInfo, one);
			__ delayed()->nop();
    } else {
      // get object class
      // not a safepoint as obj null check happens earlier
      __ lw(klass_RInfo, obj, oopDesc::klass_offset_in_bytes());
      if (k->is_loaded()) {
        //  assert(dst != obj, "need different registers so we have a temporary");
        
        // See if we get an immediate positive hit
				__ lw(AT, klass_RInfo, k->super_check_offset());
				__ beq(AT, k_RInfo, one);
				__ delayed()->nop();
        if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() == k->super_check_offset()) {
          // check for self
					//ciObject2reg(k, AT);
		      jobject2reg(k->encoding(), AT);
		      __ beq(klass_RInfo, k_RInfo, one);
		      __ delayed()->nop();
					
					// array
					__ lw(T8, klass_RInfo, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes());
					// length
					__ lw(T9, T8, arrayOopDesc::length_offset_in_bytes());

					// base
					__ addi(T8, T8, arrayOopDesc::base_offset_in_bytes(T_OBJECT));

					Label loop, hit;
					// T9:count, T8:base, k_RInfo: super klass
					__ bind(loop);
					__ beq(T9, ZERO, zero);
					__ delayed()->lw(AT, T8, 0);
					__ beq(AT, k_RInfo, hit);
					__ delayed();
					__ addiu(T9, T9, -1);
					__ b(loop);
					__ delayed();
					__ addi(T8, T8, 1 * wordSize);

					__ bind(hit);
					__ sw(k_RInfo, klass_RInfo, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
					__ b(one);
					__ delayed()->nop();
        }
      } else {
        assert(dst != klass_RInfo && dst != k_RInfo, "need 3 registers");
        
        __ lw(T9, k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes());
				__ add(AT, klass_RInfo, T9);
				__ lw(AT, AT, 0);
				__ beq(k_RInfo, AT, one);
				__ delayed()->nop();
        // check for immediate negative hit
				__ move(AT, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
				__ bne(AT, T9, zero);
				__ delayed()->nop();
        // check for self
				__ beq(klass_RInfo, k_RInfo, one);
				__ delayed()->nop();
				
				// array
				__ lw(T8, klass_RInfo, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes());
				// length
				__ lw(T9, T8, arrayOopDesc::length_offset_in_bytes());

				// base
				__ addi(T8, T8, arrayOopDesc::base_offset_in_bytes(T_OBJECT));

				Label loop, hit;
				// T9:count, T8:base, k_RInfo: super klass
				__ bind(loop);
				__ beq(T9, ZERO, zero);
				__ delayed()->lw(AT, T8, 0);
				__ beq(AT, k_RInfo, hit);
				__ delayed();
				__ addi(T9, T9, -1);
				__ b(loop);
				__ delayed();
				__ addi(T8, T8, 1 * wordSize);

				__ bind(hit);
				__ sw(k_RInfo, klass_RInfo, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
				__ b(one);
				__ delayed()->nop();
			}
    }
    __ bind(zero);
    __ move(dst, ZERO);
    __ b(done);
		__ delayed()->nop();

    __ bind(one);
    __ move(dst, 1);

    __ bind(done);
  } else {
    ShouldNotReachHere();
  }
}


void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
  if (op->code() == lir_cas_long) {
    Register addr = op->addr()->as_register();
    if (os::is_MP()) {}    
    __ cmpxchg8(op->new_value()->as_register_lo(), 
				op->new_value()->as_register_hi(),				
				addr,
				op->cmp_value()->as_register_lo(),
				op->cmp_value()->as_register_hi());

  } else if (op->code() == lir_cas_int || op->code() == lir_cas_obj) {
    Register addr = op->addr()->as_register();
    Register newval = op->new_value()->as_register();
    Register cmpval = op->cmp_value()->as_register();
    assert(newval != NULL, "new val must be register");
    assert(cmpval != newval, "cmp and new values must be in different registers");
    assert(cmpval != addr, "cmp and addr must be in different registers");
    assert(newval != addr, "new value and addr must be in different registers");
    if (os::is_MP()) {
    }
		__ cmpxchg(newval, addr, cmpval);
  } else {
    Unimplemented();
  }
}

void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result) {
}

void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info,bool pop_fpu_stack) {
  assert(info == NULL || ((code == lir_rem || code == lir_div || code == lir_sub) && right->is_double_cpu()), "info is only for ldiv/lrem");
  if (left->is_double_cpu()) {
    assert(right->is_double_cpu(),"right must be long");
    assert(dest->is_double_cpu(), "dest must be long");
    
    Register op1_lo = left->as_register_lo();
    Register op1_hi = left->as_register_hi();
    Register op2_lo = right->as_register_lo();
    Register op2_hi = right->as_register_hi();
    Register dst_lo = dest->as_register_lo();
    Register dst_hi = dest->as_register_hi();

    switch (code) {
		case lir_add:
	//		assert_different_registers(dst_lo, op1_lo, op2_lo, op1_hi, op2_hi);
//			assert_different_registers( op1_lo, op2_lo, op1_hi, op2_hi);
			__ addu(dst_lo, op1_lo, op2_lo);
			__ sltu(AT, dst_lo, op2_lo);
			__ addu(dst_hi, op1_hi, op2_hi);
			__ addu(dst_hi, dst_hi, AT);
			break;
			
		case lir_sub:
//			assert_different_registers(dst_lo, op1_lo, op2_lo, op1_hi, op2_hi);
//			assert_different_registers( op1_lo, op2_lo, op1_hi, op2_hi);
			__ subu(dst_lo, op1_lo, op2_lo);
			__ sltu(AT, op1_lo, dst_lo);
			__ subu(dst_hi, op1_hi, op2_hi);
			__ subu(dst_hi, dst_hi, AT);
			break;
			
		case lir_mul:
//			assert_different_registers(dst_lo, dst_hi, op1_lo, op2_hi);
			{
				Label zero, quick, done;

				//zero?
				__ orr(AT, op2_lo, op1_lo);
				__ beq(AT, ZERO, zero);
				__ delayed(); 
				__ move(dst_hi, ZERO);

				//quick?
				__ orr(AT, op2_hi, op1_hi);
				__ beq(AT, ZERO, quick);
				__ delayed()->nop();

				__ multu(op2_lo, op1_hi);
				__ nop();
				__ nop();
				__ mflo(dst_hi);	
				__ multu(op2_hi, op1_lo);
				__ nop();
				__ nop();
				__ mflo(AT);

				__ bind(quick);
				__ multu(op2_lo, op1_lo);
				__ addu(dst_hi, dst_hi, AT);
				__ nop();
				__ mflo(dst_lo);
				__ mfhi(AT);
				__ b(done);
				__ delayed()->addu(dst_hi, dst_hi, AT);

				__ bind(zero);
				__ move(dst_lo, ZERO);
				__ bind(done);
			}
			break;

		default:
      ShouldNotReachHere();
 		}

  } else if (left->is_single_cpu()) {
    Register lreg = left->as_register();
    Register res = dest->as_register();
     
		if (right->is_single_cpu()) {
			Register rreg = right->as_register();
			switch (code) {
			case lir_add: 
				__ addu(res, lreg, rreg);  
				break;						

			case lir_mul: 
				__ mult(lreg, rreg);
				__ nop();
				__ nop();
				__ mflo(res);
				break;

			case lir_sub: 
				__ subu(res, lreg, rreg);  
				break;

			default:      
				ShouldNotReachHere();
			}
    } else if (right->is_constant()) {
      jint c = right->as_constant_ptr()->as_jint();

			switch (code) {
		        case lir_mul_strictfp:	
			case lir_mul:
				__ move(AT, c);
				__ mult(lreg, AT);
				__ nop();
				__ nop();
				__ mflo(res);
				break;
				
			case lir_add:
				if (Assembler::is_simm16(c)) {
					__ addiu(res, lreg, c);
				} else {
					__ move(AT, c);
					__ addu(res, lreg, AT);
				}
				break;

			case lir_sub:
				if (Assembler::is_simm16(-c)) {
					__ addiu(res, lreg, -c);
				} else {
					__ move(AT, c);
					__ subu(res, lreg, AT);
				}
				break;

			default: 
				ShouldNotReachHere();
			}

		} else {
			ShouldNotReachHere();
		}
	} else if (left->is_single_fpu()) {
    assert(right->is_single_fpu(),"right must be float");
    assert(dest->is_single_fpu(), "dest must be float");

		FloatRegister lreg = left->as_float_reg();
		FloatRegister rreg = right->as_float_reg();
		FloatRegister res = dest->as_float_reg();
		
		switch (code) {
		case lir_add: 
			__ add_s(res, lreg, rreg);            
			break;
		case lir_sub: 
			__ sub_s(res, lreg, rreg);          
			break;
		case lir_mul: 
		case lir_mul_strictfp:
			// i dont think we need special handling of this. FIXME
			__ mul_s(res, lreg, rreg);
			break;
		case lir_div: 
		case lir_div_strictfp:
			__ div_s(res, lreg, rreg);
			break;
//    case lir_rem: 
//      __ rem_s(res, lreg, rreg); 
//      break;
		default     : ShouldNotReachHere();
		}
	} else if (left->is_double_fpu()) {
    assert(right->is_double_fpu(),"right must be double");
    assert(dest->is_double_fpu(), "dest must be double");

		FloatRegister lreg = left->as_double_reg();
		FloatRegister rreg = right->as_double_reg();
		FloatRegister res = dest->as_double_reg();
		
		switch (code) {
		case lir_add: 
			__ add_d(res, lreg, rreg);            
			break;
		case lir_sub: 
			__ sub_d(res, lreg, rreg);          
			break;
		case lir_mul: 
		case lir_mul_strictfp:
			// i dont think we need special handling of this. FIXME
			// by yjl 9/13/2005
			__ mul_d(res, lreg, rreg);
			break;
		case lir_div: 
		case lir_div_strictfp:
			__ div_d(res, lreg, rreg);
			break;
//    case lir_rem: 
//      __ rem_d(res, lreg, rreg); 
//      break;
		default     : ShouldNotReachHere();
		}
	}
    else if (left->is_single_stack()||left->is_address()){
     assert(left == dest, "left and dest must be equal");
    Address laddr = (left->is_single_stack())? (frame_map()->address_for_slot(left->single_stack_ix())):(as_Address(left->as_address_ptr()));

    if (right->is_single_cpu()) {
      Register rreg = right->as_register();
      switch (code) {
        case lir_add: 
	        __ lw(AT, laddr);
      		__ add(AT,AT,rreg);
	        __ sw(AT, laddr);	
		      break;
        case lir_sub: 
      		__ lw(AT, laddr);
		      __ sub(AT,AT,rreg);
	        __ sw(AT, laddr);	
		      break;
        default:      ShouldNotReachHere();
      }
    } else if (right->is_constant()) {
	    jint c = right->as_constant_ptr()->as_jint();
	    switch (code) {
		    case lir_add: {
					  __ lw(AT, laddr); 
					  __ addi(AT, AT, c); 
					  __ sw(AT, laddr); 
					  break;
				  }
		    case lir_sub: {
					  __ lw(AT, laddr); 
					  __ addi(AT, AT, -c);
					  __ sw(AT, laddr);
					  break;
				  }
		    default: ShouldNotReachHere();
	    }
    } else {
	    ShouldNotReachHere();
    }
  } else {
		ShouldNotReachHere();
	}
}

void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op *op) {
//FIXME,lir_log, lir_log10,lir_abs,lir_sqrt,so many new lir instruction  @jerome
if (value->is_double_fpu()) {
   // assert(value->fpu_regnrLo() == 0 && dest->fpu_regnrLo() == 0, "both must be on TOS");
    switch(code) {
      case lir_log   : //__ flog() ; break;
      case lir_log10 : //__ flog10() ; 
               Unimplemented();
	      break;
      case lir_abs   : __ abs_d(dest->as_double_reg(), value->as_double_reg()) ; break;
      case lir_sqrt  : __ sqrt_d(dest->as_double_reg(), value->as_double_reg()); break;
      case lir_sin   :
        // Should consider not saving ebx if not necessary
        __ trigfunc('s', 0);
        break;
      case lir_cos :
        // Should consider not saving ebx if not necessary
       // assert(op->as_Op2()->fpu_stack_size() <= 6, "sin and cos need two free stack slots");
        __ trigfunc('c', 0);
        break;
      case lir_tan :
        // Should consider not saving ebx if not necessary
        __ trigfunc('t', 0);
        break;
      default      : ShouldNotReachHere();
    }
  } else {
    Unimplemented();
  }

 
 }
//FIXME, if right is on the stack!
void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) {
	if (left->is_single_cpu()) {
		Register dstreg = dst->as_register();
		Register reg = left->as_register();
		if (right->is_constant()) {
			int val = right->as_constant_ptr()->as_jint();
			__ move(AT, val);
			switch (code) {
			case lir_logic_and: 
				__ andr (dstreg, reg, AT); 
				break;
			case lir_logic_or:  
				__ orr(dstreg, reg, AT);
				break;
			case lir_logic_xor: 
				__ xorr(dstreg, reg, AT);
				break;
			default: ShouldNotReachHere();
			}
		} else if (right->is_stack()) {
			// added support for stack operands
			Address raddr = frame_map()->address_for_slot(right->single_stack_ix());
			switch (code) {
				case lir_logic_and: 
					__ lw(AT,raddr); 
					__ andr (reg, reg,AT); 
					break;
				case lir_logic_or:  
					__ lw(AT,raddr);	
					__ orr (reg, reg,AT); 
					break;
				case lir_logic_xor:
					__ lw(AT,raddr);
					__ xorr(reg,reg,AT);
					break;
				default: ShouldNotReachHere();
			}
		} else {
			Register rright = right->as_register();
			switch (code) {
				case lir_logic_and: __ andr (dstreg, reg, rright); break;
				case lir_logic_or : __ orr  (dstreg, reg, rright); break;
				case lir_logic_xor: __ xorr (dstreg, reg, rright); break;
				default: ShouldNotReachHere();
			}
		}
	} else {
		Register l_lo = left->as_register_lo();
		Register l_hi = left->as_register_hi();
		Register dst_lo = dst->as_register_lo();
		Register dst_hi = dst->as_register_hi();

		if (right->is_constant()) {
//			assert_different_registers(l_lo, l_hi, dst_lo, dst_hi);
			int r_lo = right->as_constant_ptr()->as_jint_lo();
			int r_hi = right->as_constant_ptr()->as_jint_hi();

			switch (code) {
				case lir_logic_and:
					__ move(AT, r_lo);					
					__ andr(dst_lo, l_lo, AT);
					__ move(AT, r_hi);
					__ andr(dst_hi, l_hi, AT);
					break;

				case lir_logic_or:
					__ move(AT, r_lo);					
					__ orr(dst_lo, l_lo, AT);
					__ move(AT, r_hi);
					__ orr(dst_hi, l_hi, AT);
					break;

				case lir_logic_xor:
					__ move(AT, r_lo);					
					__ xorr(dst_lo, l_lo, AT);
					__ move(AT, r_hi);
					__ xorr(dst_hi, l_hi, AT);
					break;

				default: ShouldNotReachHere();
			}

		} else {
			Register r_lo = right->as_register_lo();
			Register r_hi = right->as_register_hi();

			switch (code) {
				case lir_logic_and: 
					__ andr(dst_lo, l_lo, r_lo);
					__ andr(dst_hi, l_hi, r_hi);
					break;
				case lir_logic_or:
					__ orr(dst_lo, l_lo, r_lo);
					__ orr(dst_hi, l_hi, r_hi);
					break;
				case lir_logic_xor:
					__ xorr(dst_lo, l_lo, r_lo);
					__ xorr(dst_hi, l_hi, r_hi);
					break;
				default: ShouldNotReachHere();
			}
		}
	}
}

// we assume that eax and edx can be overwritten
void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr temp, LIR_Opr result, CodeEmitInfo* info) {

  assert(left->is_single_cpu(),   "left must be register");
  assert(right->is_single_cpu() || right->is_constant(),  "right must be register or constant");
  assert(result->is_single_cpu(), "result must be register");

  Register lreg = left->as_register();
  Register dreg = result->as_register();

  if (right->is_constant()) {
    int divisor = right->as_constant_ptr()->as_jint();
    assert(divisor!=0, "must be nonzero");
    __ move(AT, divisor);
    __ div(lreg, AT);
    __ nop();
    __ nop();
  } else {
    Register rreg = right->as_register();
    int idivl_offset = code_offset();
    __ div(lreg, rreg);
    __ nop();
    __ nop();
    add_debug_info_for_div0(idivl_offset, info);
  }
  
  // get the result
  if (code == lir_irem) {
    __ mfhi(dreg);
  } else if (code == lir_idiv) {
    __ mflo(dreg);
  } else {
    ShouldNotReachHere();
  }
}

void LIR_Assembler::arithmetic_frem(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr temp, LIR_Opr result, CodeEmitInfo* info) {
  if (left->is_single_fpu()) {
    assert(right->is_single_fpu(),"right must be float");
    assert(result->is_single_fpu(), "dest must be float");
    assert(temp->is_single_fpu(), "dest must be float");

    FloatRegister lreg = left->as_float_reg();
    FloatRegister rreg = right->as_float_reg();
    FloatRegister res = result->as_float_reg();
    FloatRegister tmp = temp->as_float_reg();
    
    switch (code) {
      case lir_frem: 
        __ rem_s(res, lreg, rreg, tmp); 
        break;
      default     : ShouldNotReachHere();
    }
  } else if (left->is_double_fpu()) {
    assert(right->is_double_fpu(),"right must be double");
    assert(result->is_double_fpu(), "dest must be double");
    assert(temp->is_double_fpu(), "dest must be double");

    FloatRegister lreg = left->as_double_reg();
    FloatRegister rreg = right->as_double_reg();
    FloatRegister res = result->as_double_reg();
    FloatRegister tmp = temp->as_double_reg();
    
    switch (code) {
      case lir_frem: 
        __ rem_d(res, lreg, rreg, tmp); 
        break;
      default     : ShouldNotReachHere();
    }
  }
}

void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst,LIR_Op2 * op) {	
	Register dstreg = dst->as_register();
	if (code == lir_cmp_fd2i) {
		if (left->is_single_fpu()) {			
			FloatRegister leftreg = left->as_float_reg();
			FloatRegister rightreg = right->as_float_reg();

			Label done;
			// equal?
			__ c_eq_s(leftreg, rightreg);
			__ bc1t(done);
			__ delayed();
			__ move(dstreg, ZERO);
			// less?
			__ c_olt_s(leftreg, rightreg); 
			__ bc1t(done);
			__ delayed();
			__ move(dstreg, -1);
			// great
			__ move(dstreg, 1);

			__ bind(done);			
		} else {
			assert(left->is_double_fpu(), "Must double");
			FloatRegister leftreg = left->as_double_reg();
			FloatRegister rightreg = right->as_double_reg();

			Label done;
			// equal?
			__ c_eq_d(leftreg, rightreg);
			__ bc1t(done);
			__ delayed();
			__ move(dstreg, ZERO);
			// less?
			__ c_olt_d(leftreg, rightreg);
			__ bc1t(done);
			__ delayed();
			__ move(dstreg, -1);
			// great
			__ move(dstreg, 1);

			__ bind(done);			
		}
	} else if (code == lir_ucmp_fd2i) {
		if (left->is_single_fpu()) {			
			FloatRegister leftreg = left->as_float_reg();
			FloatRegister rightreg = right->as_float_reg();

			Label done;
			// equal?
			__ c_eq_s(leftreg, rightreg);
			__ bc1t(done);
			__ delayed();
			__ move(dstreg, ZERO);
			// less?
			__ c_ult_s(leftreg, rightreg); 
			__ bc1t(done);
			__ delayed();
			__ move(dstreg, -1);
			// great
			__ move(dstreg, 1);

			__ bind(done);			
		} else {
			assert(left->is_double_fpu(), "Must double");
			FloatRegister leftreg = left->as_double_reg();
			FloatRegister rightreg = right->as_double_reg();

			Label done;
			// equal?
			__ c_eq_d(leftreg, rightreg);
			__ bc1t(done);
			__ delayed();
			__ move(dstreg, ZERO);
			// less?
			__ c_ult_d(leftreg, rightreg);
			__ bc1t(done);
			__ delayed();
			__ move(dstreg, -1);
			// great
			__ move(dstreg, 1);

			__ bind(done);			
		}
	} else {
		assert(code == lir_cmp_l2i, "check");
		Register l_lo, l_hi, r_lo, r_hi, d_lo, d_hi;
		l_lo = left->as_register_lo();
		l_hi = left->as_register_hi();
		r_lo = right->as_register_lo();
		r_hi = right->as_register_hi();

		Label done;
		// less?
		__ slt(AT, l_hi, r_hi);		
		__ bne(AT, ZERO, done);
		__ delayed();
		__ move(dstreg, -1);
		// great?
		__ slt(AT, r_hi, l_hi);
		__ bne(AT, ZERO, done);
		__ delayed();
		__ move(dstreg, 1);

		// now compare low 32 bits
		// below?
		__ sltu(AT, l_lo, r_lo);
		__ bne(AT, ZERO, done);
		__ delayed();
		__ move(dstreg, -1);
		// above?
		__ sltu(AT, r_lo, l_lo);
		__ bne(AT, ZERO, done);
		__ delayed();
		__ move(dstreg, 1);
		// equal
		__ move(dstreg, ZERO);

		__ bind(done);
	}
}


void LIR_Assembler::align_call(LIR_Code code) {
	if (os::is_MP()) {
		// make sure that the displacement word of the call ends up word aligned
		int offset = __ offset();
		switch (code) {
			case lir_static_call:  
			case lir_optvirtual_call: 
				offset += NativeCall::displacement_offset;
				break;
			case lir_icvirtual_call:
				offset += NativeCall::displacement_offset + NativeMovConstReg::instruction_size;
				break;
			case lir_virtual_call:  // currently, sparc-specific for niagara
			default: ShouldNotReachHere();
		}
		while (offset++ % BytesPerWord != 0) {
			__ nop();
		}
	}

}


void LIR_Assembler::call(address entry, relocInfo::relocType rtype, CodeEmitInfo* info) {
  	assert(!os::is_MP() || (__ offset() + NativeCall::displacement_offset) % BytesPerWord == 0,
         	"must be aligned");
  
  	__ call(entry, rtype);
  	__ delayed()->nop();
  	add_call_info(code_offset(), info);
}


void LIR_Assembler::ic_call(address entry, CodeEmitInfo* info) {
	RelocationHolder rh = virtual_call_Relocation::spec(pc());
	int oop_index = __ oop_recorder()->allocate_index((jobject)Universe::non_oop_word());
	RelocationHolder rspec = oop_Relocation::spec(oop_index);
	__ relocate(rspec);
	__ lui(IC_Klass, Assembler::split_high((int)Universe::non_oop_word()));
	__ addiu(IC_Klass, IC_Klass, Assembler::split_low((int)Universe::non_oop_word()));

	__ call(entry, rh);
	__ delayed()->nop();
	add_call_info(code_offset(), info);
}


/* Currently, vtable-dispatch is only enabled for sparc platforms */
void LIR_Assembler::vtable_call(int vtable_offset, CodeEmitInfo* info) {
    ShouldNotReachHere();
}



void LIR_Assembler::emit_static_call_stub() {
	address call_pc = __ pc();
	address stub = __ start_a_stub(call_stub_size);
	if (stub == NULL) {
		bailout("static call stub overflow");
		return;
	}

	int start = __ offset();
	/*  if (os::is_MP()) {
	// make sure that the displacement word of the call ends up word aligned
	int offset = __ offset() + NativeMovConstReg::instruction_size + NativeCall::displacement_offset;
	while (offset++ % BytesPerWord != 0) {
	__ nop();
	}
	}
	*/
	__ relocate(static_stub_Relocation::spec(call_pc));
	jobject o=NULL;    
	int oop_index = __ oop_recorder()->allocate_index((jobject)o);
	RelocationHolder rspec = oop_Relocation::spec(oop_index);
	__ relocate(rspec);
	__ lui(T7, Assembler::split_high((int)o));
	__ addiu(T7, T7, Assembler::split_low((int)o));

	// must be set to -1 at code generation time
	// assert(!os::is_MP() || ((__ offset() + 1) % BytesPerWord) == 0, "must be aligned on MP");
  __ lui(AT, Assembler::split_high((int)-1));
	__ addiu(AT, AT, Assembler::split_low((int)-1));
  __ jr(AT);
	__ delayed()->nop();
	assert(__ offset() - start <= call_stub_size, "stub too big")
	__ end_a_stub();
}


void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info, bool unwind) {
	assert(exceptionOop->as_register()== V0, "must match");
	assert(unwind || exceptionPC->as_register()== V1, "must match");

	// exception object is not added to oop map by LinearScan
	// (LinearScan assumes that no oops are in fixed registers)

	info->add_register_oop(exceptionOop);
	if (!unwind) {
		// get current pc information
		// pc is only needed if the method has an exception handler, the unwind code does not need it. 
		int pc_for_athrow  = (int)__ pc();
		int pc_for_athrow_offset = __ offset();
		Register epc = exceptionPC->as_register();
		//__ nop();
		// pc_for_athrow can not point to itself (relocInfo restriction), no need now
		__ relocate(relocInfo::internal_pc_type);
		__ lui(epc, Assembler::split_high(pc_for_athrow));
		__ addiu(epc, epc, Assembler::split_low(pc_for_athrow));
		add_call_info(pc_for_athrow_offset, info); // for exception handler
		__ verify_not_null_oop(V0);
		// search an exception handler (eax: exception oop, edx: throwing pc)
		if (compilation()->has_fpu_code()) {
			__ call(Runtime1::entry_for(Runtime1::handle_exception_id), 
				relocInfo::runtime_call_type);
		} else {
			__ call(Runtime1::entry_for(Runtime1::handle_exception_nofpu_id), 
				relocInfo::runtime_call_type);
		}
	} else {
		__ call(Runtime1::entry_for(Runtime1::unwind_exception_id), 
				relocInfo::runtime_call_type);
	}

	// enough room for two byte trap
	__ delayed()->nop();
}

void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) {
  // optimized version for linear scan:
  // * tmp must be unused
  assert(tmp->is_illegal(), "wasting a register if tmp is allocated");

  if (left->is_single_cpu()) {
    /*Register value = left->as_register();
    assert(value != SHIFT_count, "left cannot be ECX");

    switch (code) {
      case lir_shl:  __ shll(value); break;
      case lir_shr:  __ sarl(value); break;
      case lir_ushr: __ shrl(value); break;
      default: ShouldNotReachHere();
    }
 */ 
    Register value_reg = left->as_register();
    Register count_reg = count->as_register();
    Register dest_reg = dest->as_register();
    assert_different_registers(count_reg, value_reg);
    
		switch (code) {
      case lir_shl:  __ sllv(dest_reg, value_reg, count_reg); break;
      case lir_shr:  __ srav(dest_reg, value_reg, count_reg); break;
      case lir_ushr: __ srlv(dest_reg, value_reg, count_reg); break;
      default: ShouldNotReachHere();
    }
  
  } else if (left->is_double_cpu()) {
  /*  Register lo = left->as_register_lo();
    Register hi = left->as_register_hi();
    assert(lo != SHIFT_count && hi != SHIFT_count, "left cannot be ECX");

    switch (code) {
      case lir_shl:  __ lshl(hi, lo);        break;
      case lir_shr:  __ lshr(hi, lo, true);  break;
      case lir_ushr: __ lshr(hi, lo, false); break;
      default: ShouldNotReachHere();
   
     */
    Register creg = count->as_register();
    Register lo = left->as_register_lo();
    Register hi = left->as_register_hi();
    Register dlo = dest->as_register_lo(); 
    Register dhi = dest->as_register_hi(); 
		 
		__ andi(creg, creg, 0x3f);
	switch (code) {
		case lir_shl:
			{
				Label normal, done, notZero;

				//count=0
				__ bne(creg, ZERO, notZero);
				__ delayed()->nop();
				__ move(dlo, lo);
				__ b(done);
				__ delayed();
				__ move(dhi, hi);

				//count>=32
				__ bind(notZero);
				__ sltiu(AT, creg, BitsPerWord);
				__ bne(AT, ZERO, normal);
				__ delayed();
				__ addiu(AT, creg, (-1) * BitsPerWord);
				__ sllv(dhi, lo, AT);
				__ b(done);
				__ delayed();
				__ move(dlo, ZERO);

				//count<32
				__ bind(normal);
				__ sllv(dhi, hi, creg);
				__ move(AT, BitsPerWord);
				__ sub(AT, AT, creg);
				__ srlv(AT, lo, AT);
				__ orr(dhi, dhi, AT);
				__ sllv(dlo, lo, creg);
				__ bind(done);						
			}	
			break;
		case lir_shr:
			{
				Label normal, done, notZero;
				
				//count=0
				__ bne(creg, ZERO, notZero);
				__ delayed()->nop();
				__ move(dhi, hi);
				__ b(done);
				__ delayed();
				__ move(dlo, lo);

				//count>=32
				__ bind(notZero);
				__ sltiu(AT, creg, BitsPerWord);
				__ bne(AT, ZERO, normal);
				__ delayed();
				__ addiu(AT, creg, (-1) * BitsPerWord);
				__ srav(dlo, hi, AT);
				__ b(done);
				__ delayed();
				__ sra(dhi, hi, BitsPerWord - 1);

				//count<32
				__ bind(normal);
				__ srlv(dlo, lo, creg);
				__ move(AT, BitsPerWord);
				__ sub(AT, AT, creg);
				__ sllv(AT, hi, AT);
				__ orr(dlo, dlo, AT);
				__ srav(dhi, hi, creg);
				__ bind(done);
			}		
			break;
		case lir_ushr:
			{
				Label normal, done, notZero;

				//count=zero
				__ bne(creg, ZERO, notZero);
				__ delayed()->nop();
				__ move(dhi, hi);
				__ b(done);
				__ delayed();
				__ move(dlo, lo);

				//count>=32
				__ bind(notZero);
				__ sltiu(AT, creg, BitsPerWord);
				__ bne(AT, ZERO, normal);
				__ delayed();
				__ addi(AT, creg, (-1) * BitsPerWord);
				__ srlv(dlo, hi, AT);
				__ b(done);
				__ delayed();
				__ move(dhi, ZERO);

				//count<32
				__ bind(normal);
				__ srlv(dlo, lo, creg);
				__ move(AT, BitsPerWord);
				__ sub(AT, AT, creg);
				__ sllv(AT, hi, AT);
				__ orr(dlo, dlo, AT);
				__ srlv(dhi, hi, creg);
				__ bind(done);
			}
			break;
		default: ShouldNotReachHere();	 
	}
  } else {
    ShouldNotReachHere();
  }

}

// i add the 64 bit shift op here
void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint  count, LIR_Opr dest) {

	if (left->is_single_cpu()) {
		Register value_reg = left->as_register();
		Register dest_reg = dest->as_register();
		count = count & 0x1F; // Java spec

		switch (code) {
			case lir_shl:  __ sll(dest_reg, value_reg, count); break;
			case lir_shr:  __ sra(dest_reg, value_reg, count); break;
			case lir_ushr: __ srl(dest_reg, value_reg, count); break;
			default: ShouldNotReachHere();
		}

	} else if (dest->is_double_cpu()) {
		Register valuelo = left->as_register_lo();
		Register valuehi = left->as_register_hi();
		Register destlo = dest->as_register_lo();
		Register desthi = dest->as_register_hi();
		assert_different_registers(destlo, valuehi, desthi);
		count = count & 0x3f;
		switch (code) {
			case lir_shl:
				if (count==0) {
					__ move(destlo, valuelo);
					__ move(desthi, valuehi);
				} else if (count>=32) {
					__ sll(desthi, valuelo, count-32);
					__ move(destlo, ZERO);
				} else {
					__ srl(AT, valuelo, 32 - count);
					__ sll(destlo, valuelo, count);
					__ sll(desthi, valuehi, count);
					__ orr(desthi, desthi, AT);	
				}
				break;

			case lir_shr:
				if (count==0) {
					__ move(destlo, valuelo);
					__ move(desthi, valuehi);
				} else if (count>=32) {
					__ sra(destlo, valuehi, count-32);
					__ sra(desthi, valuehi, 31);
				} else {
					__ sll(AT, valuehi, 32 - count);
					__ sra(desthi, valuehi, count);
					__ srl(destlo, valuelo, count);
					__ orr(destlo, destlo, AT);	
				}
				break;

			case lir_ushr:
				if (count==0) {
					__ move(destlo, valuelo);
					__ move(desthi, valuehi);
				} else if (count>=32) {
					__ sra(destlo, valuehi, count-32);
					__ move(desthi, ZERO);
				} else {
					__ sll(AT, valuehi, 32 - count);
					__ srl(desthi, valuehi, count);
					__ srl(destlo, valuelo, count);
					__ orr(destlo, destlo, AT);	
				}
				break;

			default: ShouldNotReachHere();
		}	
	} else {
		ShouldNotReachHere();
	}
}

//void LIR_Assembler::push_parameter(Register r, int offset_from_sp_in_words) {
void LIR_Assembler::store_parameter(Register r, int offset_from_esp_in_words) {
	assert(offset_from_esp_in_words >= 0, "invalid offset from esp");
	int offset_from_sp_in_bytes = offset_from_esp_in_words * BytesPerWord;
	assert(offset_from_esp_in_words < frame_map()->reserved_argument_area_size(), "invalid offset");
	__ sw (r, SP, offset_from_sp_in_bytes);
}


void LIR_Assembler::store_parameter(jint c,     int offset_from_esp_in_words) {
	assert(offset_from_esp_in_words >= 0, "invalid offset from esp");
	int offset_from_sp_in_bytes = offset_from_esp_in_words * BytesPerWord;
	assert(offset_from_esp_in_words < frame_map()->reserved_argument_area_size(), "invalid offset");
	__ move(AT, c);
	__ sw(AT, SP, offset_from_sp_in_bytes);
}

void LIR_Assembler::store_parameter(jobject o,  int offset_from_esp_in_words) {
  assert(offset_from_esp_in_words >= 0, "invalid offset from esp");
  int offset_from_sp_in_bytes = offset_from_esp_in_words * BytesPerWord;
  assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
 // __ movl (Address(esp, offset_from_esp_in_bytes), o);
  //__ move(AT, o);
  int oop_index = __ oop_recorder()->find_index(o);
	RelocationHolder rspec = oop_Relocation::spec(oop_index);
	__ relocate(rspec);
	__ lui(AT, Assembler::split_high((int)o));
	__ addiu(AT, AT, Assembler::split_low((int)o));

  __ sw(AT, SP, offset_from_sp_in_bytes);

}


// This code replaces a call to arraycopy; no exception may 
// be thrown in this code, they must be thrown in the System.arraycopy
// activation frame; we could save some checks if this would not be the case
void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {


	ciArrayKlass* default_type = op->expected_type();
	Register src = op->src()->as_register();
	Register dst = op->dst()->as_register();
	Register src_pos = op->src_pos()->as_register();
	Register dst_pos = op->dst_pos()->as_register();
	Register length  = op->length()->as_register();
	Register tmp = T8;
#ifndef OPT_THREAD
	Register java_thread = T8;
#else
	Register java_thread = TREG;
#endif
	CodeStub* stub = op->stub();

	int flags = op->flags();
	BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL;
	if (basic_type == T_ARRAY) basic_type = T_OBJECT;

	// if we don't know anything or it's an object array, just go through the generic arraycopy
	if (default_type == NULL) {
		Label done;
// save outgoing arguments on stack in case call to System.arraycopy is needed
// HACK ALERT. This code used to push the parameters in a hardwired fashion
// for interpreter calling conventions. Now we have to do it in new style conventions.
// For the moment until C1 gets the new register allocator I just force all the
// args to the right place (except the register args) and then on the back side
// reload the register args properly if we go slow path. Yuck
		
// this is saved in the caller's reserved argument area
	//FIXME, maybe It will change something in the stack;	
		  // These are proper for the calling convention
		//store_parameter(length, 2);
		//store_parameter(dst_pos, 1);
		//store_parameter(dst, 0);

	 // these are just temporary placements until we need to reload
		//store_parameter(src_pos, 3);
		//store_parameter(src, 4);
		assert(src == T0 && src_pos == A0, "mismatch in calling convention");
	// pass arguments: may push as this is not a safepoint; SP must be fix at each safepoint

		__ push(src);
		__ push(dst);
		__ push(src_pos);
		__ push(dst_pos);
		__ push(length);


		// save SP and align
#ifndef OPT_THREAD
		__ get_thread(java_thread);
#endif
		__ sw(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
		__ addi(SP, SP, (-5) * wordSize);
		__ move(AT, -8);
		__ andr(SP, SP, AT);
		// push argument
		__ sw(length, SP, 4 * wordSize);
		__ move(A3, dst_pos);
		__ move(A2, dst);
		__ move(A1, src_pos);
		__ move(A0, src);
	  // make call
		address entry = CAST_FROM_FN_PTR(address, Runtime1::arraycopy);
		__ call(entry, relocInfo::runtime_call_type);	
		__ delayed()->nop();
		// restore SP
#ifndef OPT_THREAD
		__ get_thread(java_thread);
#endif
		__ lw(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));

		__ beq(V0, ZERO, *stub->continuation());
		__ delayed()->nop();
    __ super_pop(length); 
    __ super_pop(dst_pos);
    __ super_pop(src_pos);
    __ super_pop(dst);
    __ super_pop(src);


    __ b(*stub->entry());
    __ delayed()->nop(); 
    __ bind(*stub->continuation());
    return;
	}
	assert(default_type != NULL 
		&& default_type->is_array_klass() 
		&& default_type->is_loaded(), 
		"must be true at this point");

	int elem_size = type2aelembytes(basic_type);
	int shift_amount;
	switch (elem_size) {
		case 1 :shift_amount = 0; break;
		case 2 :shift_amount = 1; break;
		case 4 :shift_amount = 2; break;
		case 8 :shift_amount = 3; break;
		default:ShouldNotReachHere();
	}

	Address src_length_addr = Address(src, arrayOopDesc::length_offset_in_bytes());
	Address dst_length_addr = Address(dst, arrayOopDesc::length_offset_in_bytes());
	Address src_klass_addr = Address(src, oopDesc::klass_offset_in_bytes());
	Address dst_klass_addr = Address(dst, oopDesc::klass_offset_in_bytes());

	// test for NULL
	if (flags & LIR_OpArrayCopy::src_null_check) {
		__ beq(src, ZERO, *stub->entry());
		__ delayed()->nop();
	}
	if (flags & LIR_OpArrayCopy::dst_null_check) {
		__ beq(dst, ZERO, *stub->entry());
		__ delayed()->nop();
	}

	// check if negative
	if (flags & LIR_OpArrayCopy::src_pos_positive_check) {
		__ bltz(src_pos, *stub->entry());
		__ delayed()->nop();
	}
	if (flags & LIR_OpArrayCopy::dst_pos_positive_check) {
		__ bltz(dst_pos, *stub->entry());
		__ delayed()->nop();
	}
	if (flags & LIR_OpArrayCopy::length_positive_check) {
		__ bltz(length, *stub->entry());
		__ delayed()->nop();
	}

	if (flags & LIR_OpArrayCopy::src_range_check) {
		__ add(AT, src_pos, length);
		__ lw(tmp, src_length_addr);
		__ sltu(AT, tmp, AT);
		__ bne(AT, ZERO, *stub->entry());
		__ delayed()->nop();
	}
	if (flags & LIR_OpArrayCopy::dst_range_check) {
		__ add(AT, dst_pos, length);
		__ lw(tmp, dst_length_addr);
		__ sltu(AT, tmp, AT);
		__ bne(AT, ZERO, *stub->entry());
		__ delayed()->nop();
	}

	if (flags & LIR_OpArrayCopy::type_check) {
		__ lw(AT, src_klass_addr);
		__ lw(tmp, dst_klass_addr);
		__ bne(AT, tmp, *stub->entry());
		__ delayed()->nop();
	}

#ifdef ASSERT
	if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) {
		// Sanity check the known type with the incoming class.  For the
		// primitive case the types must match exactly.  For the object array
		// case, if no type check is needed then the dst type must match the
		// expected type and the src type is so subtype which we can't check.  If
		// a type check i needed then at this point the classes are known to be
		// the same but again which don't know which type so we can't check them.
		Label known_ok, halt;
		jobject2reg(default_type->encoding(), AT);
		__ lw(tmp, dst_klass_addr);
		if (basic_type != T_OBJECT) {
			__ bne(AT, tmp, halt);
			__ delayed()->nop();
			__ lw(tmp, src_klass_addr);			
		}
		__ beq(AT, tmp, known_ok);
		__ delayed()->nop();
		__ bind(halt);
		__ stop("incorrect type information in arraycopy");
		__ bind(known_ok);
	}
#endif
	__ push(src);
	__ push(dst);
	__ push(src_pos);
	__ push(dst_pos);
	__ push(length);


	assert(A0 != A1 &&
          A0 != length &&
          A1 != length, "register checks");
	__ move(AT, dst_pos);
	if (shift_amount > 0 && basic_type != T_OBJECT) {
		__ sll(A2, length, shift_amount);
	} else {
		if (length!=A2)
			__ move(A2, length);
	}
  __ move(A3, src_pos );	
	assert(A0 != dst_pos &&
          A0 != dst &&
          dst_pos != dst, "register checks");

  assert_different_registers(A0, dst_pos, dst);
	__ sll(AT, AT, shift_amount);
	__ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(basic_type));
	__ add(A1, dst, AT);

	__ sll(AT, A3, shift_amount);
	__ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(basic_type));
	__ add(A0, src, AT);



	if (basic_type == T_OBJECT) {
		__ call_VM_leaf(CAST_FROM_FN_PTR(address, Runtime1::oop_arraycopy), 3);
	} else {
		__ call_VM_leaf(CAST_FROM_FN_PTR(address, Runtime1::primitive_arraycopy), 3);
	}
  __ super_pop(length); 
  __ super_pop(dst_pos);
  __ super_pop(src_pos);
  __ super_pop(dst);
  __ super_pop(src);

	__ bind(*stub->continuation());
}


void LIR_Assembler::emit_lock(LIR_OpLock* op) {
	Register obj = op->obj_opr()->as_register();  // may not be an oop
	Register hdr = op->hdr_opr()->as_register();
	Register lock = op->lock_opr()->as_register();
	if (!UseFastLocking) {
		__ b(*op->stub()->entry());
	} else if (op->code() == lir_lock) {
		Register scratch = noreg;
		if (UseBiasedLocking) {
			scratch = op->scratch_opr()->as_register();
		}
		assert(BasicLock::displaced_header_offset_in_bytes() == 0, 
			"lock_reg must point to the displaced header");
		// add debug info for NullPointerException only if one is possible
		int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry());
		if (op->info() != NULL) {
			//add_debug_info_for_null_check_here(op->info());
			add_debug_info_for_null_check(null_check_offset,op->info());
		}
		// done
	} else if (op->code() == lir_unlock) {
		assert(BasicLock::displaced_header_offset_in_bytes() == 0, 
			"lock_reg must point to the displaced header");
		__ unlock_object(hdr, obj, lock, *op->stub()->entry());
	} else {
		Unimplemented();
	}
	__ bind(*op->stub()->continuation());
}



void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
	ciMethod* method = op->profiled_method();
	int bci          = op->profiled_bci();

	// Update counter for all call types
	ciMethodData* md = method->method_data();
	if (md == NULL) {
		bailout("out of memory building methodDataOop");
		return;
	}
	ciProfileData* data = md->bci_to_data(bci);
	assert(data->is_CounterData(), "need CounterData for calls");
	assert(op->mdo()->is_single_cpu(),  "mdo must be allocated");
	Register mdo  = op->mdo()->as_register();

	int oop_index = __ oop_recorder()->find_index(md->encoding());
	RelocationHolder rspec = oop_Relocation::spec(oop_index);
	__ relocate(rspec);
	__ lui(mdo, Assembler::split_high((int)md->encoding()));
	__ addiu(mdo, mdo, Assembler::split_low((int)md->encoding()));

	Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
	__ lw(AT,counter_addr); 
	__ addi(AT,AT, DataLayout::counter_increment); 
	__ sw(AT,counter_addr); 

	Bytecodes::Code bc = method->java_code_at_bci(bci);
	// Perform additional virtual call profiling for invokevirtual and
	// invokeinterface bytecodes
	if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
			Tier1ProfileVirtualCalls) {
		assert(op->recv()->is_single_cpu(), "recv must be allocated");
		Register recv = op->recv()->as_register();
		assert_different_registers(mdo, recv);
		assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls");
		ciKlass* known_klass = op->known_holder();
		if (Tier1OptimizeVirtualCallProfiling && known_klass != NULL) {
			// We know the type that will be seen at this call site; we can
			// statically update the methodDataOop rather than needing to do
			// dynamic tests on the receiver type

			// NOTE: we should probably put a lock around this search to
			// avoid collisions by concurrent compilations
			ciVirtualCallData* vc_data = (ciVirtualCallData*) data;
			uint i;
			for (i = 0; i < VirtualCallData::row_limit(); i++) {
				ciKlass* receiver = vc_data->receiver(i);
				if (known_klass->equals(receiver)) {
					Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
					__ lw(AT,data_addr); 
					__ addi(AT,AT,DataLayout::counter_increment);
					__ sw(AT,data_addr); 
					return;
				}
			}

			// Receiver type not found in profile data; select an empty slot

			// Note that this is less efficient than it should be because it
			// always does a write to the receiver part of the
			// VirtualCallData rather than just the first time
			for (i = 0; i < VirtualCallData::row_limit(); i++) {
				ciKlass* receiver = vc_data->receiver(i);
				if (receiver == NULL) {
					Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)));
					int oop_index = __ oop_recorder()->find_index(known_klass->encoding());
					RelocationHolder rspec = oop_Relocation::spec(oop_index);
					__ relocate(rspec);
					__ lui(AT, Assembler::split_high((int)known_klass->encoding()));
					__ addiu(AT, AT, Assembler::split_low((int)known_klass->encoding()));
					__ sw(AT,recv_addr); 
					Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
					__ lw(AT, data_addr); 
					__ addi(AT,AT,DataLayout::counter_increment);
					__ sw(AT,data_addr); 
					return;
				}
			}
		} else {
			__ lw(recv, Address(recv, oopDesc::klass_offset_in_bytes()));
			Label update_done;
			uint i;
			for (i = 0; i < VirtualCallData::row_limit(); i++) {
				Label next_test;
				// See if the receiver is receiver[n].
				__ lw(AT,Address(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)))); 
				__ bne(recv,AT,next_test);	
				__ delayed()->nop();	
				Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
				__ lw(AT,data_addr); 
				__ addi(AT,AT,DataLayout::counter_increment);
				__ sw(AT,data_addr); 
				__ b(update_done);
				__ delayed()->nop(); 
				__ bind(next_test);
			}

			// Didn't find receiver; find next empty slot and fill it in
			for (i = 0; i < VirtualCallData::row_limit(); i++) {
				Label next_test;
				Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)));
				__ lw(AT,recv_addr);   
				__ bne(AT,ZERO,next_test); 
				__ delayed()->nop();
				__ sw(recv,recv_addr); 
				__ move(AT,DataLayout::counter_increment); 
				__ sw(AT,Address(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))));	
				if (i < (VirtualCallData::row_limit() - 1)) {
					__ b(update_done);
					__ delayed()->nop(); 
				}
				__ bind(next_test);
			}

			__ bind(update_done);
		}
	}
}

void LIR_Assembler::emit_delay(LIR_OpDelay*) {
	Unimplemented();
}


void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) {
  __ lea(dst->as_register(), frame_map()->address_for_monitor_lock(monitor_no));
}

void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
	if (left->is_single_cpu()) {
		__ subu(dest->as_register(), ZERO, left->as_register());
	} else if (left->is_double_cpu()) {
		Register lo = left->as_register_lo();
		Register hi = left->as_register_hi();
		Register dlo = dest->as_register_lo();
		Register dhi = dest->as_register_hi();
    assert(dlo != hi, "register checks");
		__ nor(dlo, ZERO, lo);
		__ addiu(dlo, dlo, 1);
		__ sltiu(AT, dlo, 1);
		__ nor(dhi, ZERO, hi);
		__ addu(dhi, dhi, AT);
	} else if (left->is_single_fpu()) {
		//for mips , does it required ?      
		__ neg_s(dest->as_float_reg(), left->as_float_reg());
	} else if (left->is_double_fpu()) {
		//for mips , does it required ?      
		__ neg_d(dest->as_double_reg(), left->as_double_reg());
	}else {
		ShouldNotReachHere();
	}
}


void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest) {
	assert(addr->is_address() && dest->is_register(), "check");
	Register reg = dest->as_register();
	__ lea(dest->as_register(), as_Address(addr->as_address_ptr()));
}


void LIR_Assembler::jobject2reg(jobject o, Register reg) {
	if (o == NULL) { 
		// This seems wrong as we do not emit relocInfo 
		// for classes that are not loaded yet, i.e., they will be
		// never GC'd
		__ lui(reg, Assembler::split_high((int)o));
		__ addiu(reg, reg, Assembler::split_low((int)o));
	} else {
		int oop_index = __ oop_recorder()->find_index(o);
		RelocationHolder rspec = oop_Relocation::spec(oop_index);
		__ relocate(rspec);
		__ lui(reg, Assembler::split_high((int)o));
		__ addiu(reg, reg, Assembler::split_low((int)o));
	}
}

void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) {
 	assert(!tmp->is_valid(), "don't need temporary");
  __ call(dest, relocInfo::runtime_call_type);
  __ delayed()->nop(); 
  if (info != NULL) {
		add_call_info_here(info);
  }
}

/*  by yyq 7/22/2009
 *  i don't know the register allocator will allocate long or double in two consecutive registers
 *  if the allocator do like this, the lws below should be removed and lds be used.
 */

void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) {
	assert(type == T_LONG, "only for volatile long fields");
	if (info != NULL) {
		add_debug_info_for_null_check_here(info);
	}
 
	if(src->is_register() && dest->is_address()) {
		if(src->is_double_cpu()) {
    	__ sw(src->as_register_lo(), as_Address(dest->as_address_ptr()));
    	__ sw(src->as_register_hi(), as_Address(dest->as_address_ptr()).base(), 
				as_Address(dest->as_address_ptr()).disp() +4);
		} else if (src->is_double_fpu()) {
    	__ swc1(src->as_fpu_lo(), as_Address(dest->as_address_ptr()));
    	__ swc1(src->as_fpu_hi(), as_Address(dest->as_address_ptr()).base(), 
				as_Address(dest->as_address_ptr()).disp() +4);
		} else {
      ShouldNotReachHere();
		}
	} else if (src->is_address() && dest->is_register()){
		if(dest->is_double_cpu()) {
    	__ lw(dest->as_register_lo(), as_Address(src->as_address_ptr()));
    	__ lw(dest->as_register_hi(), as_Address(src->as_address_ptr()).base(), 
				as_Address(src->as_address_ptr()).disp() +4);
		} else if (dest->is_double_fpu()) {
    	__ lwc1(dest->as_fpu_lo(), as_Address(src->as_address_ptr()));
    	__ lwc1(dest->as_fpu_hi(), as_Address(src->as_address_ptr()).base(), 
				as_Address(src->as_address_ptr()).disp() +4);
		} else {
      ShouldNotReachHere();
		}
	} else {
    ShouldNotReachHere();
	}
}


void LIR_Assembler::membar() {
	__ sync();
}

void LIR_Assembler::membar_acquire() {
	__ sync();
}

void LIR_Assembler::membar_release() {
	__ sync();
}

void LIR_Assembler::get_thread(LIR_Opr result_reg) {
	assert(result_reg->is_register(), "check");
#ifndef OPT_THREAD
	__ get_thread(result_reg->as_register());
#else
	__ move(result_reg->as_register(), TREG);
#endif
}

void LIR_Assembler::peephole(LIR_List*) {
	// do nothing for now
}

#undef __ 

void LIR_Assembler::align_backward_branch_target() {
}