view hotspot/src/cpu/mips/vm/c1_LIRAssembler_mips.cpp @ 24:da31f361800f

Fix a bug in the register allocator. In MIPS, float registers containing double-precision operands must be both adjacent physically and aligned. A single-precision operand takes up two float registers temporarily. Thus, one of them is wasted.
author YANG Yongqiang <yangyongqiang@loongson.cn>
date Fri, 05 Nov 2010 17:36:30 +0800
parents 388ae1bd0bdd
children 8ef762f87d0e
line wrap: on
line source

#/*
 * Copyright 2000-2008 Sun Microsystems, Inc.  All Rights Reserved.
 * Copyright 2010 Lemote, Inc.  All Rights Reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
 * CA 95054 USA or visit www.sun.com if you need additional information or
 * have any questions.
 *
 */

# include "incls/_precompiled.incl"
# include "incls/_c1_LIRAssembler_mips.cpp.incl"

#define __ _masm->

static void select_different_registers(Register preserve,
		Register extra,
		Register &tmp1,
		Register &tmp2) {
	if (tmp1 == preserve) {
		assert_different_registers(tmp1, tmp2, extra);
		tmp1 = extra;
	} else if (tmp2 == preserve) {
		assert_different_registers(tmp1, tmp2, extra);
		tmp2 = extra;
	}
	assert_different_registers(preserve, tmp1, tmp2);
}



static void select_different_registers(Register preserve,
		Register extra,
		Register &tmp1,
		Register &tmp2,
		Register &tmp3) {
	if (tmp1 == preserve) {
		assert_different_registers(tmp1, tmp2, tmp3, extra);
		tmp1 = extra;
	} else if (tmp2 == preserve) {
		tmp2 = extra;
	} else if (tmp3 == preserve) {
		assert_different_registers(tmp1, tmp2, tmp3, extra);
		tmp3 = extra;
	}
	assert_different_registers(preserve, tmp1, tmp2, tmp3);
}

// need add method Assembler::is_simm16 in assembler_gs2.hpp
bool LIR_Assembler::is_small_constant(LIR_Opr opr) {
	if (opr->is_constant()) {
		LIR_Const* constant = opr->as_constant_ptr();
		switch (constant->type()) {
			case T_INT: {
				jint value = constant->as_jint();
				return Assembler::is_simm16(value);
				    }
			default:
				    return false;
		}
	}
	return false;
}
//FIXME, which register should be used?
LIR_Opr LIR_Assembler::receiverOpr() {
	return FrameMap::_t0_oop_opr;
}

LIR_Opr LIR_Assembler::incomingReceiverOpr() {
	return receiverOpr();
}

LIR_Opr LIR_Assembler::osrBufferPointer() {
	//return FrameMap::ecx_opr;
//	return FrameMap::_v1_opr;
	return FrameMap::_t0_opr;
}

//--------------fpu register translations-----------------------
// FIXME:I do not know what's to do for mips fpu

address LIR_Assembler::float_constant(float f) {
	address const_addr = __ float_constant(f);
	if (const_addr == NULL) {
		bailout("const section overflow");
		return __ code()->consts()->start();
	} else {
		return const_addr;
	}
}


address LIR_Assembler::double_constant(double d) {
	address const_addr = __ double_constant(d);
	if (const_addr == NULL) {
		bailout("const section overflow");
		return __ code()->consts()->start();
	} else {
		return const_addr;
	}
}





void LIR_Assembler::reset_FPU() {
	Unimplemented();
}


void LIR_Assembler::set_24bit_FPU() {
	Unimplemented();
}

//FIXME.
void LIR_Assembler::fpop() {
	// do nothing
}
void LIR_Assembler::fxch(int i) {
	// do nothing
}
void LIR_Assembler::fld(int i) {
	// do nothing
}
void LIR_Assembler::ffree(int i) {
	// do nothing
}

void LIR_Assembler::breakpoint() {
  __ brk(17);
}
//FIXME, opr can not be float?
void LIR_Assembler::push(LIR_Opr opr) {
	if (opr->is_single_cpu()) {
		__ push_reg(opr->as_register());
	} else if (opr->is_double_cpu()) {
		__ push_reg(opr->as_register_hi());
		__ push_reg(opr->as_register_lo());
	} else if (opr->is_stack()) {
		__ push_addr(frame_map()->address_for_slot(opr->single_stack_ix()));
	} else if (opr->is_constant()) {
		LIR_Const* const_opr = opr->as_constant_ptr();
		if (const_opr->type() == T_OBJECT) {
			__ push_oop(const_opr->as_jobject());
		} else if (const_opr->type() == T_INT) {
			__ push_jint(const_opr->as_jint());
		} else {
			ShouldNotReachHere();
		}
	} else {
		ShouldNotReachHere();
	}
}

void LIR_Assembler::pop(LIR_Opr opr) {
	if (opr->is_single_cpu() ) { 
		__ pop(opr->as_register());
	} else {
		assert(false, "Must be single word register or floating-point register");
	}
}


Address LIR_Assembler::as_Address(LIR_Address* addr) {
	Register reg = addr->base()->as_register();
	// now we need this for parameter pass
	return Address(reg, addr->disp());
}


Address LIR_Assembler::as_Address_lo(LIR_Address* addr) {
	return as_Address(addr);
}


Address LIR_Assembler::as_Address_hi(LIR_Address* addr) {
	Register reg = addr->base()->as_register();
	return Address(reg, addr->disp()+longSize/2);
}


//void LIR_Assembler::osr_entry(IRScope* scope, int number_of_locks, Label* continuation, int osr_bci) { 
void LIR_Assembler::osr_entry() { 
	//  assert(scope->is_top_scope(), "inlined OSR not yet implemented");
	offsets()->set_value(CodeOffsets::OSR_Entry, code_offset());
	BlockBegin* osr_entry = compilation()->hir()->osr_entry();
	ValueStack* entry_state = osr_entry->state();
	int number_of_locks = entry_state->locks_size();

	// we jump here if osr happens with the interpreter
	// state set up to continue at the beginning of the
	// loop that triggered osr - in particular, we have
	// the following registers setup:
	//
	// S7: interpreter locals pointer
	// V1: interpreter locks pointer
	// RA: return address
	//T0: OSR buffer
	// build frame
	// ciMethod* m = scope->method();
	ciMethod* m = compilation()->method();
	__ build_frame(initial_frame_size_in_bytes());

  // OSR buffer is
  //
  // locals[nlocals-1..0]
  // monitors[0..number_of_locks]
  //
  // locals is a direct copy of the interpreter frame so in the osr buffer
  // so first slot in the local array is the last local from the interpreter
  // and last slot is local[0] (receiver) from the interpreter
  //
  // Similarly with locks. The first lock slot in the osr buffer is the nth lock
  // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock
  // in the interpreter frame (the method lock if a sync method)

  // Initialize monitors in the compiled activation.
  //   T0: pointer to osr buffer
  //
  // All other registers are dead at this point and the locals will be
  // copied into place by code emitted in the IR.

  Register OSR_buf = osrBufferPointer()->as_register();

  
  // note: we do osr only if the expression stack at the loop beginning is empty,
  //       in which case the spill area is empty too and we don't have to setup
  //       spilled locals
  //
  // copy monitors
  // V1: pointer to locks
  { 
	  assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below");
	  int monitor_offset = BytesPerWord * method()->max_locals()+
		  (BasicObjectLock::size() * BytesPerWord) * (number_of_locks - 1);
	  for (int i = 0; i < number_of_locks; i++) {
		  int slot_offset =monitor_offset - (i * BasicObjectLock::size())*BytesPerWord;
#ifdef ASSERT
		  { 
			  Label L;
			  //__ lw(AT, V1, slot_offset * BytesPerWord + BasicObjectLock::obj_offset_in_bytes());
			  __ lw(AT, OSR_buf, slot_offset + BasicObjectLock::obj_offset_in_bytes());
			  __ bne(AT, ZERO, L);
			  __ delayed()->nop();
			  __ stop("locked object is NULL");
			  __ bind(L);
		  }
#endif
		  __ lw(AT, OSR_buf, slot_offset + BasicObjectLock::lock_offset_in_bytes());
		  __ sw(AT, frame_map()->address_for_monitor_lock(i));
		  __ lw(AT, OSR_buf, slot_offset + BasicObjectLock::obj_offset_in_bytes());
		  __ sw(AT, frame_map()->address_for_monitor_object(i));
	  }
  }
}


int LIR_Assembler::check_icache() {
	Register receiver = FrameMap::receiver_opr->as_register();
	Register ic_klass = IC_Klass;

	int offset = __ offset();
	__ inline_cache_check(receiver, IC_Klass);
	__ align(CodeEntryAlignment);
	return offset;


}

void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo* info) {
	jobject o = NULL;
	PatchingStub* patch = new PatchingStub(_masm, PatchingStub::load_klass_id);
	int oop_index = __ oop_recorder()->allocate_index(o);
	RelocationHolder rspec = oop_Relocation::spec(oop_index);
	__ relocate(rspec);
	__ lui(reg, Assembler::split_high((int)o));
	__ addiu(reg, reg, Assembler::split_low((int)o));
	// patching_epilog(patch, LIR_Op1::patch_normal, noreg, info);
	patching_epilog(patch, lir_patch_normal, reg, info);
}


void LIR_Assembler::monitorexit(LIR_Opr obj_opr, LIR_Opr lock_opr, Register unused, int monitor_no, Register exception) {

	if (exception->is_valid()) {
		// preserve exception
		// note: the monitor_exit runtime call is a leaf routine
		//       and cannot block => no GC can happen
		// The slow case (MonitorAccessStub) uses the first two stack slots
		// ([SP+0] and [SP+4]), therefore we store the exception at [esp+8]
		__ sw(exception, SP, 2 * wordSize);
	}

	Register obj_reg  = obj_opr->as_register();
	Register lock_reg = lock_opr->as_register();

	// compute pointer to BasicLock
	//Address lock_addr = frame_map()->address_for_monitor_lock_index(monitor_no);
	Address lock_addr = frame_map()->address_for_monitor_lock(monitor_no);
	__ lea(lock_reg, lock_addr);
	// unlock object
	MonitorAccessStub* slow_case = new MonitorExitStub(lock_opr, true, monitor_no);
	// temporary fix: must be created after exceptionhandler, therefore as call stub
	_slow_case_stubs->append(slow_case);
	if (UseFastLocking) {
		// try inlined fast unlocking first, revert to slow locking if it fails
		// note: lock_reg points to the displaced header since the displaced header offset is 0!
		assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
		__ unlock_object(NOREG, obj_reg, lock_reg, *slow_case->entry());
	} else {
		// always do slow unlocking
		// note: the slow unlocking code could be inlined here, however if we use
		//       slow unlocking, speed doesn't matter anyway and this solution is
		//       simpler and requires less duplicated code - additionally, the
		//       slow unlocking code is the same in either case which simplifies
		//       debugging
		__ b(*slow_case->entry());
		__ delayed()->nop();
	}
	// done
	__ bind(*slow_case->continuation());

	if (exception->is_valid()) {
		// restore exception
		__ lw(exception, SP, 2 * wordSize);
	}
}

// This specifies the esp decrement needed to build the frame
int LIR_Assembler::initial_frame_size_in_bytes() {
	// if rounding, must let FrameMap know!
	return (frame_map()->framesize() - 2)  * BytesPerWord; // subtract two words to account for return address and link
}

void LIR_Assembler::emit_exception_handler() { 
  // if the last instruction is a call (typically to do a throw which
  // is coming at the end after block reordering) the return address
  // must still point into the code area in order to avoid assertion
  // failures when searching for the corresponding bci => add a nop
  // (was bug 5/14/1999 - gri)
  // Lazy deopt bug 4932387. If last instruction is a call then we
  // need an area to patch where we won't overwrite the exception
  // handler. This means we need 5 bytes. Could use a fat_nop 
  // but since this never gets executed it doesn't really make
  // much difference.
  // 
	for (int i = 0; i < (NativeCall::instruction_size/4 + 1) ; i++ ) {
		__ nop();
	}

  // generate code for exception handler
	address handler_base = __ start_a_stub(exception_handler_size);
	if (handler_base == NULL) {
		//no enough space 
		bailout("exception handler overflow");
		return;
	}



	compilation()->offsets()->set_value(CodeOffsets::Exceptions, code_offset()); 
	// if the method does not have an exception handler, then there is
	// no reason to search for one
	if (compilation()->has_exception_handlers() || JvmtiExport::can_post_exceptions()) {
		// the exception oop and pc are in V0 and V1
		// no other registers need to be preserved, so invalidate them
		// check that there is really an exception
		__ verify_not_null_oop(V0);

		// search an exception handler (V0: exception oop, V1: throwing pc)
		__ call(Runtime1::entry_for(Runtime1::handle_exception_nofpu_id), 
				relocInfo::runtime_call_type);
		__ delayed()->nop();
    // if the call returns here, then the exception handler for particular
    // exception doesn't exist -> unwind activation and forward exception to caller
  }

	// the exception oop is in V0
	// no other registers need to be preserved, so invalidate them
	// check that there is really an exception
	__ verify_not_null_oop(V0);

	// unlock the receiver/klass if necessary
	// V0: exception
	ciMethod* method = compilation()->method();
	if (method->is_synchronized() && GenerateSynchronizationCode) {
		monitorexit(FrameMap::_t0_oop_opr, FrameMap::_t6_opr, NOREG, 0, V0);
	}

	// unwind activation and forward exception to caller
	// V0: exception
	__ jmp(Runtime1::entry_for(Runtime1::unwind_exception_id), 
				relocInfo::runtime_call_type);
	__ delayed()->nop();
	__ end_a_stub();
}

void LIR_Assembler::emit_deopt_handler() {
	// if the last instruction is a call (typically to do a throw which
	// is coming at the end after block reordering) the return address
 	// must still point into the code area in order to avoid assertion
 	// failures when searching for the corresponding bci => add a nop
 	// (was bug 5/14/1999 - gri)
 	
 	__ nop();
	
 	// generate code for exception handler
	address handler_base = __ start_a_stub(deopt_handler_size);
	if (handler_base == NULL) {
		// not enough space left for the handler
		bailout("deopt handler overflow");
		return;
	}
	#ifdef ASSERT
	int offset = code_offset();
	#endif // ASSERT
 
	compilation()->offsets()->set_value(CodeOffsets::Deopt, code_offset());
 
	InternalAddress here(__ pc());
	//FIXE:: may be wrong, Address_Literal
	__ lw(AT, __ as_Address(here) );
	__ push(AT);
	assert(code_offset() - offset <= deopt_handler_size, "overflow");
 	__ end_a_stub();

}


// Optimized Library calls
// This is the fast version of java.lang.String.compare; it has not
// OSR-entry and therefore, we generate a slow version for OSR's
//void LIR_Assembler::emit_string_compare(IRScope* scope) {
void LIR_Assembler::emit_string_compare(LIR_Opr arg0, LIR_Opr arg1, LIR_Opr dst, CodeEmitInfo* info) {
	// get two string object in T0&T1
	//receiver already in T0
	__ lw(T1, arg1->as_register());
	__ lw (T2, T0, java_lang_String::value_offset_in_bytes());	//value, T_CHAR array
	__ lw (AT, T0, java_lang_String::offset_offset_in_bytes());	//offset
	__ shl(AT, 1);
	__ add(T2, T2, AT);
	__ addi(T2, T2, arrayOopDesc::base_offset_in_bytes(T_CHAR));
	// Now T2 is the address of the first char in first string(T0)

	add_debug_info_for_null_check_here(info);
	__ lw (T3, T1, java_lang_String::value_offset_in_bytes());
	__ lw (AT, T1, java_lang_String::offset_offset_in_bytes());
	__ shl(AT, 1);
	__ add(T3, T3, AT);
	__ addi(T3, T3, arrayOopDesc::base_offset_in_bytes(T_CHAR));
	// Now T3 is the address of the first char in second string(T1)

	// compute minimum length (in T4) and difference of lengths (V0)
	Label L;
	__ lw (T4, Address(T0, java_lang_String::count_offset_in_bytes())); 
	// the length of the first string(T0)
	__ lw (T5, Address(T1, java_lang_String::count_offset_in_bytes()));	
	// the length of the second string(T1)

	__ subu(V0, T4, T5);
	__ blez(V0, L);
	__ delayed()->nop();
	__ move (T4, T5);
	__ bind (L);

	Label Loop, haveResult, LoopEnd;
	__ bind(Loop);
	__ beq(T4, ZERO, LoopEnd);
	__ delayed();

	__ addi(T2, T2, 2);

	// compare current character
	__ lhu(T5, T2, -2);
	__ lhu(T6, T3, 0);
	__ bne(T5, T6, haveResult);
	__ delayed();

	__ addi(T3, T3, 2);

	__ b(Loop);
	__ delayed()->addi(T4, T4, -1);

	__ bind(haveResult);
	__ subu(V0, T5, T6);

	__ bind(LoopEnd);
	return_op(FrameMap::_v0_opr);
}


void LIR_Assembler::return_op(LIR_Opr result) {
	assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == V0, "word returns are in V0");
	// Pop the stack before the safepoint code
	__ leave();
	__ lui(AT, Assembler::split_high((intptr_t)os::get_polling_page() 
			+ (SafepointPollOffset % os::vm_page_size())));
	__ relocate(relocInfo::poll_return_type);
	__ lw(AT, AT, Assembler::split_low((intptr_t)os::get_polling_page() 
			+ (SafepointPollOffset % os::vm_page_size())));

	__ jr(RA);
	__ delayed()->nop();
}

//read protect mem to ZERO won't cause the exception only in godson-2e, So I modify ZERO to AT .@jerome,11/25,2006
int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
  assert(info != NULL, "info must not be null for safepoint poll");
	int offset = __ offset();
  Register r = tmp->as_register();
	__ lui(r, Assembler::split_high((intptr_t)os::get_polling_page() 
				+ (SafepointPollOffset % os::vm_page_size())));
  add_debug_info_for_branch(info);
	__ relocate(relocInfo::poll_type);
	__ lw(AT, r, Assembler::split_low((intptr_t)os::get_polling_page() 
				+ (SafepointPollOffset % os::vm_page_size())));
	return offset; 
}

void LIR_Assembler::move_regs(Register from_reg, Register to_reg) {
	if (from_reg != to_reg) __ move(to_reg, from_reg);
}


void LIR_Assembler::swap_reg(Register a, Register b) {
	__ xorr(a, a, b);
	__ xorr(b, a, b);
	__ xorr(a, a, b);
}

void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
 	assert(src->is_constant(), "should not call otherwise");
  assert(dest->is_register(), "should not call otherwise");
  LIR_Const* c = src->as_constant_ptr();
	switch (c->type()) {
    case T_INT:
    	{
    		jint con = c->as_jint();
    		if (dest->is_single_cpu()) {
    			assert(patch_code == lir_patch_none, "no patching handled here");
    			__ move(dest->as_register(), con);
    		} else {
    			assert(dest->is_single_fpu(), "wrong register kind");
    			__ move(AT, con);
    			__ mtc1(AT, dest->as_float_reg());
    		}
    	}
    	break;
    
    case T_LONG:
    	{
    		jlong con = c->as_jlong();
    		jint* conhi = (jint*)&con + 1;
    		jint* conlow = (jint*)&con;
    
    		if (dest->is_double_cpu()) {
    			__ move(dest->as_register_lo(), *conlow);
    			__ move(dest->as_register_hi(), *conhi);
    		} else {
    		//	assert(dest->is_double(), "wrong register kind");
    			__ move(AT, *conlow);
    			__ mtc1(AT, dest->as_double_reg());
    			__ move(AT, *conhi);
    			__ mtc1(AT, dest->as_double_reg()+1);
    		}
    	}
    	break;
    
    case T_OBJECT:
    	{
    		if (patch_code == lir_patch_none) {
          jobject2reg(c->as_jobject(), dest->as_register());
    		} else {
    			jobject2reg_with_patching(dest->as_register(), info);
    		}
    	}
    	break;
    
    case T_FLOAT:
    	{
    		address const_addr = float_constant(c->as_jfloat());
    		assert (const_addr != NULL, "must create float constant in the constant table");
    
    		if (dest->is_single_fpu()) {
    			__ relocate(relocInfo::internal_pc_type);
    			__ lui(AT, Assembler::split_high((int)const_addr));
    			__ addiu(AT, AT, Assembler::split_low((int)const_addr));
    			__ lwc1(dest->as_float_reg(), AT, 0);
    
    		} else {
    			assert(dest->is_single_cpu(), "Must be a cpu register.");
    			assert(dest->as_register() != AT, "AT can not be allocated.");
    
    			__ relocate(relocInfo::internal_pc_type);
    			__ lui(AT, Assembler::split_high((int)const_addr));
    			__ addiu(AT, AT, Assembler::split_low((int)const_addr));
    			__ lw(dest->as_register(), AT, 0); 
    		}
    	}
    	break;
    
    case T_DOUBLE:
    	{
    		address const_addr = double_constant(c->as_jdouble());
    		assert (const_addr != NULL, "must create double constant in the constant table");
    		
    		if (dest->is_double_fpu()) {
    			__ relocate(relocInfo::internal_pc_type);
    			__ lui(AT, Assembler::split_high((int)const_addr));
    			__ addiu(AT, AT, Assembler::split_low((int)const_addr));
    			__ lwc1(dest->as_double_reg(), AT, 0);
    			__ lwc1(dest->as_double_reg()+1, AT, 4);					
    		} else {
    			assert(dest->as_register_lo() != AT, "AT can not be allocated.");
    			assert(dest->as_register_hi() != AT, "AT can not be allocated.");
    
    			__ relocate(relocInfo::internal_pc_type);
    			__ lui(AT, Assembler::split_high((int)const_addr));
    			__ addiu(AT, AT, Assembler::split_low((int)const_addr));
    			__ lw(dest->as_register_lo(), AT, 0);
    			__ lw(dest->as_register_hi(), AT, 4);
    		}
    	}
    	break;
    
    default:
    	ShouldNotReachHere();
	}
}


void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) {
	assert(src->is_constant(), "should not call otherwise");
 	assert(dest->is_stack(), "should not call otherwise");
   	LIR_Const* c = src->as_constant_ptr();
	switch (c->type()) {
  	case T_INT:  // fall through
  	case T_FLOAT:
		  __ move(AT, c->as_jint_bits());
		  __ sw(AT, frame_map()->address_for_slot(dest->single_stack_ix()));
		  break;

  	case T_OBJECT:
    	if (c->as_jobject() == NULL) {
			  __ sw(ZERO, frame_map()->address_for_slot(dest->single_stack_ix()));
		  } else {
			  int oop_index = __ oop_recorder()->find_index(c->as_jobject());
			  RelocationHolder rspec = oop_Relocation::spec(oop_index);
			  __ relocate(rspec);
			  __ lui(AT, Assembler::split_high((int)c->as_jobject()));
			  __ addiu(AT, AT, Assembler::split_low((int)c->as_jobject()));
			  __ sw(AT, frame_map()->address_for_slot(dest->single_stack_ix()));
			}
		  break;
  	case T_LONG:  // fall through
  	case T_DOUBLE:
      __ move(AT, c->as_jint_lo_bits());
		  __ sw(AT, frame_map()->address_for_slot(dest->double_stack_ix(),
						lo_word_offset_in_bytes));
 		  __ move(AT, c->as_jint_hi_bits());
		  __ sw(AT, frame_map()->address_for_slot(dest->double_stack_ix(),
						hi_word_offset_in_bytes));
		  break;
  	default:
  		ShouldNotReachHere();
  }
}

void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info ) {
	assert(src->is_constant(), "should not call otherwise");
	assert(dest->is_address(), "should not call otherwise");
	LIR_Const* c = src->as_constant_ptr();
	LIR_Address* addr = dest->as_address_ptr();

	if (info != NULL) add_debug_info_for_null_check_here(info);
	switch (type) {
		case T_LONG: // fall through
		case T_DOUBLE:
			__ move(AT, c->as_jint_hi_bits());
			__ sw(AT, as_Address_hi(addr));
			__ move(AT, c->as_jint_lo_bits());
			__ sw(AT, as_Address_lo(addr));
			break; 
		case T_OBJECT:  // fall through
		case T_ARRAY:   
			if (c->as_jobject() == NULL){
				__ sw(ZERO, as_Address(addr));
			} else {
				int oop_index = __ oop_recorder()->find_index(c->as_jobject());
				RelocationHolder rspec = oop_Relocation::spec(oop_index);
				__ relocate(rspec);
				__ lui(AT, Assembler::split_high((int)c->as_jobject()));
				__ addiu(AT, AT, Assembler::split_low((int)c->as_jobject()));
				__ sw(AT, as_Address(addr));
			}
			break;
		case T_INT:     // fall through
		case T_FLOAT:  
			__ move(AT, c->as_jint_bits());
			__ sw(AT, as_Address(addr));
			break;
		case T_BOOLEAN: // fall through
		case T_BYTE:    
			__ move(AT, c->as_jint());
			__ sb(AT, as_Address(addr));
			break;
		case T_CHAR:    // fall through
		case T_SHORT:   
			__ move(AT, c->as_jint());
			__ sh(AT, as_Address(addr));
			break;
		default: ShouldNotReachHere();
	};
}

void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) {
  assert(src->is_register(), "should not call otherwise");
  assert(dest->is_register(), "should not call otherwise");
  if (dest->is_float_kind() && src->is_float_kind()) {
		if (dest->is_single_fpu()) {
			assert(src->is_single_fpu(), "must both be float");
			 __ mov_s(dest->as_float_reg(), src->as_float_reg());
		} else {
			assert(src->is_double_fpu(), "must bothe be double");
			__ mov_d( dest->as_double_reg(),src->as_double_reg());
		}
  } else if (!dest->is_float_kind() && !src->is_float_kind()) {
	  if (dest->is_single_cpu()) {
		  assert(src->is_single_cpu(), "must match");
		  move_regs(src->as_register(), dest->as_register());
	  } else if (dest->is_double_cpu()) {
		  //      assert(src->is_double_cpu() && !src->overlaps(dest), "must match and not overlap");
		  assert(src->is_double_cpu(),"must match and not overlap");
      if (src->as_register_hi() != dest->as_register_lo()) {
		    move_regs(src->as_register_lo(), dest->as_register_lo());
		    move_regs(src->as_register_hi(), dest->as_register_hi());
      } else if (src->as_register_lo() != dest->as_register_hi()) {
	  	  move_regs(src->as_register_hi(), dest->as_register_hi());     
  		  move_regs(src->as_register_lo(), dest->as_register_lo());
      } else {
        swap_reg(src->as_register_lo(), src->as_register_hi());
      }

	  }
  } else {
	  // float to int or int to float moves
	  if (dest->is_double_cpu()) {
		  assert(src->is_double_fpu(), "must match");
		  __ mfc1(dest->as_register_lo(), src->as_double_reg());
		  __ mfc1(dest->as_register_hi(), src->as_double_reg() + 1);
	  } else if (dest->is_single_cpu()) {
		  assert(src->is_single_fpu(), "must match");
		  __ mfc1(dest->as_register(), src->as_float_reg());
	  } else if (dest->is_double_fpu()) {
		  assert(src->is_double_cpu(), "must match");
		  __ mtc1(src->as_register_lo(), dest->as_double_reg());
		  __ mtc1(src->as_register_hi(), dest->as_double_reg() + 1);
	  } else if (dest->is_single_fpu()) {
		  assert(src->is_single_cpu(), "must match");
		  __ mtc1(src->as_register(), dest->as_float_reg());
	  }
  }
}


void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type,bool pop_fpu_stack) {
  assert(src->is_register(), "should not call otherwise");
  assert(dest->is_stack(), "should not call otherwise");

  if (src->is_single_cpu()) {
    Address dst = frame_map()->address_for_slot(dest->single_stack_ix());
    if (type == T_OBJECT || type == T_ARRAY) {
      __ verify_oop(src->as_register());
    }
    __ sw(src->as_register(),dst);  
  } else if (src->is_double_cpu()) {
    Address dstLO = frame_map()->address_for_slot(dest->double_stack_ix(), lo_word_offset_in_bytes);
    Address dstHI = frame_map()->address_for_slot(dest->double_stack_ix(), hi_word_offset_in_bytes);
     __ sw(src->as_register_lo(),dstLO);
     __ sw(src->as_register_hi(),dstHI);
  }else if (src->is_single_fpu()) {
    Address dst_addr = frame_map()->address_for_slot(dest->single_stack_ix());
    __ swc1(src->as_float_reg(), dst_addr);

  } else if (src->is_double_fpu()) {
    Address dst_addr = frame_map()->address_for_slot(dest->double_stack_ix());
    __ swc1(src->as_double_reg(), dst_addr);
    __ swc1(src->as_double_reg() + 1, dst_addr.base(), dst_addr.disp() + 4);

  } else {
    ShouldNotReachHere();
  }
}

//FIXME
void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info,bool pop_fpu_stack, bool/*unaliged*/) {
  LIR_Address* to_addr = dest->as_address_ptr();
  Register dest_reg = to_addr->base()->as_register();	
	PatchingStub* patch = NULL;
	bool needs_patching = (patch_code != lir_patch_none);
  Register disp_reg = NOREG;
	int disp_value = to_addr->disp();

	if (type == T_ARRAY || type == T_OBJECT) {
    __ verify_oop(src->as_register());
  }

	if (needs_patching) {
		patch = new PatchingStub(_masm, PatchingStub::access_field_id);
		assert(!src->is_double_cpu() || 
				patch_code == lir_patch_none || 
				patch_code == lir_patch_normal, 
				"patching doesn't match register");
	}
        
	if (info != NULL) {
     	add_debug_info_for_null_check_here(info);
  }
	if (needs_patching) {
		disp_reg = AT;
		__ lui(AT, Assembler::split_high(disp_value));
		__ addiu(AT, AT, Assembler::split_low(disp_value));
	} else if (!Assembler::is_simm16(disp_value)) { 
		disp_reg = AT;
		__ lui(AT, Assembler::split_high(disp_value));
	}
	int offset = code_offset();

	switch(type) {
	case T_DOUBLE:
		assert(src->is_double_fpu(), "just check");
		if (disp_reg == noreg) {
			__ swc1(src->as_double_reg(), dest_reg, disp_value);
			__ swc1(src->as_double_reg()+1, dest_reg, disp_value+4);
		} else if (needs_patching) {
			__ add(AT, dest_reg, disp_reg);
			offset = code_offset();
			__ swc1(src->as_double_reg(), AT, 0);
			__ swc1(src->as_double_reg()+1, AT, 4);
		} else {
			__ add(AT, dest_reg, disp_reg);
			offset = code_offset();
			__ swc1(src->as_double_reg(), AT, Assembler::split_low(disp_value));
			__ swc1(src->as_double_reg()+1, AT, Assembler::split_low(disp_value) + 4);
		}
		break;

	case T_FLOAT:
	//	assert(src->is_single_cpu(), "just check");

		if (disp_reg == noreg) {
			__ swc1(src->as_float_reg(), dest_reg, disp_value);
		} else if(needs_patching) {
			__ add(AT, dest_reg, disp_reg);
			offset = code_offset();
			__ swc1(src->as_float_reg(), AT, 0);
		} else {
			__ add(AT, dest_reg, disp_reg);
			offset = code_offset();
			__ swc1(src->as_float_reg(), AT, Assembler::split_low(disp_value));
		}
		break;
		
	case T_LONG: {
    Register from_lo = src->as_register_lo();
  	Register from_hi = src->as_register_hi();
  	Register base = to_addr->base()->as_register();
   	Register index = noreg;
    if (to_addr->index()->is_register()) {
   	  index = to_addr->index()->as_register();
  	}
  	if (base == from_lo || index == from_lo) {
      assert(base != from_hi, "can't be");
      assert(index == noreg || (index != base && index != from_hi), "can't handle this");
      __ sw(from_hi,as_Address_hi(to_addr));  
		  if (patch != NULL) {
        patching_epilog(patch, lir_patch_high, base, info);
        patch = new PatchingStub(_masm, PatchingStub::access_field_id);
        patch_code = lir_patch_low;
      }
      __ sw(from_lo,as_Address_lo(to_addr)); 
		} else {
      assert(index == noreg || (index != base && index != from_lo), "can't handle this");
    	__ sw(from_lo,as_Address_lo(to_addr)); 
		  if (patch != NULL) {
        patching_epilog(patch, lir_patch_low, base, info);
        patch = new PatchingStub(_masm, PatchingStub::access_field_id);
        patch_code = lir_patch_high;
      }
      __ sw(from_hi,as_Address_hi(to_addr));  
    }
		break;
  }
	case T_ADDRESS:
	case T_ARRAY:
	case T_OBJECT:
	case T_INT:
		//assert(from_reg.is_word(), "just check");
		if (disp_reg == noreg) {
			__ sw(src->as_register(), dest_reg, disp_value);
		} else if (needs_patching) {
			__ add(AT, dest_reg, disp_reg);
			offset = code_offset();
			__ sw(src->as_register(), AT, 0);
		} else {
			__ add(AT, dest_reg, disp_reg);
			offset = code_offset();
			__ sw(src->as_register(), AT, Assembler::split_low(disp_value));
		}
		break;

	case T_CHAR:
	case T_SHORT:
//		assert(from_reg.is_word(), "just check");

		if (disp_reg == noreg) {
			__ sh(src->as_register(), dest_reg, disp_value);
		} else if (needs_patching) {
			__ add(AT, dest_reg, disp_reg);
			offset = code_offset();
			__ sh(src->as_register(), AT, 0);
		} else {
			__ add(AT, dest_reg, disp_reg);
			offset = code_offset();
			__ sh(src->as_register(), AT, Assembler::split_low(disp_value));
		}
		break;

	case T_BYTE:
	case T_BOOLEAN:
		assert(src->is_single_cpu(), "just check");

		if (disp_reg == noreg) {
			__ sb(src->as_register(), dest_reg, disp_value);
		} else if (needs_patching) {
			__ add(AT, dest_reg, disp_reg);
			offset = code_offset();
			__ sb(src->as_register(), AT, 0);
		} else {
			__ add(AT, dest_reg, disp_reg);
			offset = code_offset();
			__ sb(src->as_register(), AT, Assembler::split_low(disp_value));
		}
		break;

	default:
		ShouldNotReachHere();
	}

  if (needs_patching) {
    patching_epilog(patch, patch_code, to_addr->base()->as_register(), info);
  }
}



void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) {
	assert(src->is_stack(), "should not call otherwise");
  assert(dest->is_register(), "should not call otherwise");
	if (dest->is_single_cpu()) {
    __ lw(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()));
    if (type == T_ARRAY || type == T_OBJECT) {
      __ verify_oop(dest->as_register());
    }
	} else if (dest->is_double_cpu()) {
		Address src_addr_LO = frame_map()->address_for_slot(src->double_stack_ix(),lo_word_offset_in_bytes);
		Address src_addr_HI = frame_map()->address_for_slot(src->double_stack_ix(), hi_word_offset_in_bytes);
		__ lw(dest->as_register_lo(), src_addr_LO);
		__ lw(dest->as_register_hi(), src_addr_HI);
	}else if (dest->is_single_fpu()) {
		Address addr = frame_map()->address_for_slot(src->single_stack_ix());
		__ lwc1(dest->as_float_reg(), addr);
	} else if (dest->is_double_fpu())  {
		Address src_addr_LO = frame_map()->address_for_slot(src->double_stack_ix(),lo_word_offset_in_bytes);
		Address src_addr_HI = frame_map()->address_for_slot(src->double_stack_ix(), hi_word_offset_in_bytes);
		__ lwc1(dest->as_double_reg(), src_addr_LO);
		__ lwc1(dest->as_double_reg()+1, src_addr_HI);
	} else {
		assert(dest->is_single_cpu(), "cannot be anything else but a single cpu");
		assert(type!= T_ILLEGAL, "Bad type in stack2reg")
		Address addr = frame_map()->address_for_slot(src->single_stack_ix());
		__ lw(dest->as_register(), addr);
	}
}

void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) {
  if (src->is_single_stack()) {
      __ lw(AT, frame_map()->address_for_slot(src ->single_stack_ix())); 
      __ sw(AT, frame_map()->address_for_slot(dest->single_stack_ix()));
  } else if (src->is_double_stack()) {
      __ lw(AT, frame_map()->address_for_slot(src ->double_stack_ix())); 
      __ sw(AT, frame_map()->address_for_slot(dest->double_stack_ix())); 
      __ lw(AT, frame_map()->address_for_slot(src ->double_stack_ix(),4)); 
      __ sw(AT, frame_map()->address_for_slot(dest ->double_stack_ix(),4)); 
  } else {
    ShouldNotReachHere();
  }
}



// if patching needed, be sure the instruction at offset is a MoveMemReg
void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool) {
	assert(src->is_address(), "should not call otherwise");
  assert(dest->is_register(), "should not call otherwise");
  LIR_Address* addr = src->as_address_ptr();
  Address from_addr = as_Address(addr);
	
	Register src_reg = addr->base()->as_register();
	Register disp_reg = noreg;
	int disp_value = addr->disp();
	bool needs_patching = (patch_code != lir_patch_none);

	PatchingStub* patch = NULL;
	if (needs_patching) {
		patch = new PatchingStub(_masm, PatchingStub::access_field_id);
//		assert(!to_reg.is_long() || patch_code == LIR_Op1::patch_low || patch_code == LIR_Op1::patch_high, "patching doesn't match register");
	}

	// we must use lui&addiu, 
	if (needs_patching) {
		disp_reg = AT;
		__ lui(AT, Assembler::split_high(disp_value));
		__ addiu(AT, AT, Assembler::split_low(disp_value));
	} else if (!Assembler::is_simm16(disp_value)) {
		disp_reg = AT;
		__ lui(AT, Assembler::split_high(disp_value));
	}

	// remember the offset of the load.  The patching_epilog must be done
	// before the call to add_debug_info, otherwise the PcDescs don't get
	// entered in increasing order.
	int offset = code_offset();

	switch(type) {
    case T_BOOLEAN:
    case T_BYTE: {
    	//assert(to_reg.is_word(), "just check");
    	if (disp_reg == noreg) {
    		__ lb(dest->as_register(), src_reg, disp_value);
    	} else if (needs_patching) {
    		__ add(AT, src_reg, disp_reg);
    		offset = code_offset();
    		__ lb(dest->as_register(), AT, 0);
    	} else {
    		__ add(AT, src_reg, disp_reg);
    		offset = code_offset();
    		__ lb(dest->as_register(), AT, Assembler::split_low(disp_value));
    	}
    }
    break;
    
    case T_CHAR: {
    		//assert(to_reg.is_word(), "just check");
    	if (disp_reg == noreg) {
    		__ lhu(dest->as_register(), src_reg, disp_value);
    	} else if (needs_patching) {
    		__ add(AT, src_reg, disp_reg);
    		offset = code_offset();
    		__ lhu(dest->as_register(), AT, 0);
    	} else {
    		__ add(AT, src_reg, disp_reg);
    		offset = code_offset();
    		__ lhu(dest->as_register(), AT, Assembler::split_low(disp_value));
    	}
    }
    break;
    
    case T_SHORT: {
    	//	assert(to_reg.is_word(), "just check");
    	if (disp_reg == noreg) {
    		__ lh(dest->as_register(), src_reg, disp_value);
    	} else if (needs_patching) {
    		__ add(AT, src_reg, disp_reg);
    		offset = code_offset();
    		__ lh(dest->as_register(), AT, 0);
    	} else {
    		__ add(AT, src_reg, disp_reg);
    		offset = code_offset();
    		__ lh(dest->as_register(), AT, Assembler::split_low(disp_value));
    	}
    }
    break;
    
    case T_INT:
    case T_OBJECT:
    case T_ARRAY: {
    		//assert(to_reg.is_word(), "just check");
    	if (disp_reg == noreg) {
    		__ lw(dest->as_register(), src_reg, disp_value);
    	} else if (needs_patching) {
    		__ add(AT, src_reg, disp_reg);
    		offset = code_offset();
    		__ lw(dest->as_register(), AT, 0);
    	} else {
    		__ add(AT, src_reg, disp_reg);
    		offset = code_offset();
    		__ lw(dest->as_register(), AT, Assembler::split_low(disp_value));
    	}
    }
    break;
    
    case T_LONG: {	
    	Register to_lo = dest->as_register_lo();
      Register to_hi = dest->as_register_hi();
      Register base = addr->base()->as_register();
      Register index = noreg;
      if (addr->index()->is_register()) {
      	index = addr->index()->as_register();
      }
      if ((base == to_lo && index == to_hi) ||(base == to_hi && index == to_lo)) {
        // addresses with 2 registers are only formed as a result of
        // array access so this code will never have to deal with
        // patches or null checks.
        assert(info == NULL && patch == NULL, "must be");
        __ lea(to_hi, as_Address(addr));
        __ lw(to_lo, Address(to_hi));
        __ lw(to_hi, Address(to_hi, BytesPerWord));
      } else if (base == to_lo || index == to_lo) {
        assert(base != to_hi, "can't be");
        assert(index == noreg || (index != base && index != to_hi), "can't handle this");
        __ lw(to_hi, as_Address_hi(addr));
        if (patch != NULL) {
        	patching_epilog(patch, lir_patch_high, base, info);
        	patch = new PatchingStub(_masm, PatchingStub::access_field_id);
        	patch_code = lir_patch_low;
        }
        __ lw(to_lo, as_Address_lo(addr));
      } else {
        assert(index == noreg || (index != base && index != to_lo), "can't handle this");
        __ lw(to_lo, as_Address_lo(addr));
        if (patch != NULL) {
        	patching_epilog(patch, lir_patch_low, base, info);
        	patch = new PatchingStub(_masm, PatchingStub::access_field_id);
        	patch_code = lir_patch_high;
        }
        __ lw(to_hi, as_Address_hi(addr));
      }
    }
    break;

    case T_FLOAT: {
    	//assert(to_reg.is_float(), "just check");
    	if (disp_reg == noreg) {
    		__ lwc1(dest->as_float_reg(), src_reg, disp_value);
    	} else if (needs_patching) {
    		__ add(AT, src_reg, disp_reg);
    		offset = code_offset();
    		__ lwc1(dest->as_float_reg(), AT, 0);
    	} else {
    		__ add(AT, src_reg, disp_reg);
    		offset = code_offset();
    		__ lwc1(dest->as_float_reg(), AT, Assembler::split_low(disp_value));
    	}
    }
    break;
    
    case T_DOUBLE: {
    	//assert(to_reg.is_double(), "just check");
    
    	if (disp_reg == noreg) {
    		__ lwc1(dest->as_double_reg(), src_reg, disp_value);
    		__ lwc1(dest->as_double_reg()+1, src_reg, disp_value+4);
    	} else if (needs_patching) {
    		__ add(AT, src_reg, disp_reg);
    		offset = code_offset();
    		__ lwc1(dest->as_double_reg(), AT, 0);
    		__ lwc1(dest->as_double_reg()+1, AT, 4);
    	} else {
    		__ add(AT, src_reg, disp_reg);
    		offset = code_offset();
    		__ lwc1(dest->as_double_reg(), AT, Assembler::split_low(disp_value));
    		__ lwc1(dest->as_double_reg()+1, AT, Assembler::split_low(disp_value) + 4);
    	}
    }
    break;
    	
    default:
    	ShouldNotReachHere();
	}

	if (needs_patching) {
		patching_epilog(patch, patch_code, src_reg, info);
	}

	if (info != NULL) add_debug_info_for_null_check(offset, info);
}


void LIR_Assembler::prefetchr(LIR_Opr src) {
  LIR_Address* addr = src->as_address_ptr();
  Address from_addr = as_Address(addr);
}


void LIR_Assembler::prefetchw(LIR_Opr src) {
}

NEEDS_CLEANUP; // This could be static? 
Address::ScaleFactor LIR_Assembler::array_element_size(BasicType type) const {
  int elem_size = type2aelembytes(type);
  switch (elem_size) {
    case 1: return Address::times_1;
    case 2: return Address::times_2;
    case 4: return Address::times_4;
    case 8: return Address::times_8;
  }
  ShouldNotReachHere();
  return Address::no_scale;
}


void LIR_Assembler::emit_op3(LIR_Op3* op) {
 switch (op->code()) {
    case lir_frem:
      arithmetic_frem(
        op->code(),
        op->in_opr1(),
        op->in_opr2(),
        op->in_opr3(),
        op->result_opr(),
        op->info());
      break;

    case lir_idiv:
    case lir_irem:
      arithmetic_idiv(
        op->code(),
        op->in_opr1(),
        op->in_opr2(),
        op->in_opr3(),
        op->result_opr(),
        op->info());
      break;
    default:      ShouldNotReachHere(); break;
  }
}

void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
	LIR_Opr opr1 = op->left();
	LIR_Opr opr2 = op->right();
	LIR_Condition condition = op->cond();
#ifdef ASSERT
	assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label");
	if (op->block() != NULL)  _branch_target_blocks.append(op->block());
	if (op->ublock() != NULL) _branch_target_blocks.append(op->ublock());
#endif	
	if (op->cond() == lir_cond_always) {
		__ b(*op->label());
		__ delayed()->nop();
		return;
	}
  if (opr1->is_single_cpu()) {
		Register reg_op1 = opr1->as_register();
		if (opr2->is_single_cpu()) {
#ifdef OPT_RANGECHECK
			assert(!op->check(), "just check");
#endif
			Register reg_op2 = opr2->as_register();
			switch (condition) {
		    case lir_cond_equal:
		    	__ beq(reg_op1, reg_op2, *op->label());
		    	break;
		    case lir_cond_notEqual:
		    	__ bne(reg_op1, reg_op2, *op->label());
		    	break;
		    case lir_cond_less:
		    	// AT = 1 TRUE
		    	__ slt(AT, reg_op1, reg_op2);
		    	__ bne(AT, ZERO, *op->label());
		    	break;
		    case lir_cond_lessEqual:
		    	// AT = 0 TRUE
		    	__ slt(AT, reg_op2, reg_op1);
		    	__ beq(AT, ZERO, *op->label());
		    	break;
		    case lir_cond_belowEqual:
		    	// AT = 0 TRUE
		    	__ sltu(AT, reg_op2, reg_op1);
		    	__ beq(AT, ZERO, *op->label());
		    	break;
		    case lir_cond_greaterEqual:
		    	// AT = 0 TRUE
		    	__ slt(AT, reg_op1, reg_op2);
		    	__ beq(AT, ZERO, *op->label());
		    	break;
		    case lir_cond_aboveEqual:
		    	// AT = 0 TRUE
		    	__ sltu(AT, reg_op1, reg_op2);
		    	__ beq(AT, ZERO, *op->label());
		    	break;
		    case lir_cond_greater:
		    	// AT = 1 TRUE
		    	__ slt(AT, reg_op2, reg_op1);
		    	__ bne(AT, ZERO, *op->label());
		    	break;				
		    default: ShouldNotReachHere();
			}
		} else if (opr2->is_constant()) {
			jint temp_value;
			bool is_object = false;
			if (opr2->pointer()->as_constant()->type() == T_INT) {
				temp_value = (jint)(opr2->as_jint());
			} else if (opr2->pointer()->as_constant()->type() == T_OBJECT) {
				is_object = true;
				temp_value = (jint)(opr2->as_jobject());
			} else {
				ShouldNotReachHere();
			}

			switch (condition) {
			  case lir_cond_equal:
#ifdef OPT_RANGECHECK
			  assert(!op->check(), "just check");
#endif
			  if (temp_value) {
				  if (is_object) {
				    int oop_index = __ oop_recorder()->allocate_index((jobject)temp_value);
				    RelocationHolder rspec = oop_Relocation::spec(oop_index);
				    __ relocate(rspec);
    			}
    			__ move(AT, temp_value);
    			__ beq(reg_op1, AT, *op->label());
    		} else {
    			__ beq(reg_op1, ZERO, *op->label());
    		}
    		break;
    			
    		case lir_cond_notEqual:
#ifdef OPT_RANGECHECK
	    		assert(!op->check(), "just check");
#endif
			    if (temp_value) {
      			if (is_object) {
      				int oop_index = __ oop_recorder()->allocate_index((jobject)temp_value);
      				RelocationHolder rspec = oop_Relocation::spec(oop_index);
      				__ relocate(rspec);
      			}
      			__ move(AT, temp_value);
      			__ bne(reg_op1, AT, *op->label());
      		} else {
      			__ bne(reg_op1, ZERO, *op->label());
      		}
      		break;
      			
      	case lir_cond_less:
#ifdef OPT_RANGECHECK
			    assert(!op->check(), "just check");
#endif
	    		// AT = 1 TRUE
	    		if (Assembler::is_simm16(temp_value)) {
	    			__ slti(AT, reg_op1, temp_value);
	    		} else {
	    			__ move(AT, temp_value);
	    			__ slt(AT, reg_op1, AT);
	    		}
	    		__ bne(AT, ZERO, *op->label());
	    		break;
				
		  	case lir_cond_lessEqual:
#ifdef OPT_RANGECHECK
			    assert(!op->check(), "just check");
#endif
    			// AT = 0 TRUE
    			__ move(AT, temp_value);
    			__ slt(AT, AT, reg_op1);
    			__ beq(AT, ZERO, *op->label());
    			break;
    			
    		case lir_cond_belowEqual:
    			// AT = 0 TRUE
#ifdef OPT_RANGECHECK
    			if (op->check()) {
    				__ move(AT, temp_value);
    				add_debug_info_for_range_check_here(op->info(), temp_value);
    				__ tgeu(AT, reg_op1, 29);
    			} else {
#endif
	    			__ move(AT, temp_value);
	    			__ sltu(AT, AT, reg_op1);
	    			__ beq(AT, ZERO, *op->label());
#ifdef OPT_RANGECHECK
		  		}
#endif
				  break;
				
			  case lir_cond_greaterEqual:
#ifdef OPT_RANGECHECK
  			  assert(!op->check(), "just check");
#endif
    			// AT = 0 TRUE
    			if (Assembler::is_simm16(temp_value)) {
    				__ slti(AT, reg_op1, temp_value);
    			} else {
    				__ move(AT, temp_value);
    				__ slt(AT, reg_op1, AT);
    			}
    			__ beq(AT, ZERO, *op->label());
    			break;
				
	  		case lir_cond_aboveEqual:
#ifdef OPT_RANGECHECK
		    	assert(!op->check(), "just check");
#endif
    			// AT = 0 TRUE
    			if (Assembler::is_simm16(temp_value)) {
    				__ sltiu(AT, reg_op1, temp_value);
    			} else {
    				__ move(AT, temp_value);
    				__ sltu(AT, reg_op1, AT);
    			}
    			__ beq(AT, ZERO, *op->label());
    			break;
    			
    		case lir_cond_greater:
#ifdef OPT_RANGECHECK
			    assert(!op->check(), "just check");
#endif
    			// AT = 1 TRUE
    			__ move(AT, temp_value);
    			__ slt(AT, AT, reg_op1);
    			__ bne(AT, ZERO, *op->label());
    			break;				
    
    		default: ShouldNotReachHere();
    		}
    
    	} else {
    		if (opr2->is_address()) {
    			__ lw(AT, as_Address(opr2->pointer()->as_address()));
    		} else if (opr2->is_stack()) {
    			__ lw(AT, frame_map()->address_for_slot(opr2->single_stack_ix()));
    		} else {
    			ShouldNotReachHere();
    		}
    		switch (condition) {
    			case lir_cond_equal:
#ifdef OPT_RANGECHECK
			      assert(!op->check(), "just check");
#endif
					  __ beq(reg_op1, AT, *op->label());
					  break;
				  case lir_cond_notEqual:
#ifdef OPT_RANGECHECK
			      assert(!op->check(), "just check");
#endif
					  __ bne(reg_op1, AT, *op->label());
					  break;
				  case lir_cond_less:
#ifdef OPT_RANGECHECK
			      assert(!op->check(), "just check");
#endif
    				// AT = 1 TRUE
    				__ slt(AT, reg_op1, AT);
    				__ bne(AT, ZERO, *op->label());
    				break;
    			case lir_cond_lessEqual:
#ifdef OPT_RANGECHECK
	      		assert(!op->check(), "just check");
#endif
    				// AT = 0 TRUE
    				__ slt(AT, AT, reg_op1);
    				__ beq(AT, ZERO, *op->label());
    				break;
    			case lir_cond_belowEqual:
#ifdef OPT_RANGECHECK
	      		assert(!op->check(), "just check");
#endif
    				// AT = 0 TRUE
    				__ sltu(AT, AT, reg_op1);
    				__ beq(AT, ZERO, *op->label());
    				break;
    			case lir_cond_greaterEqual:
#ifdef OPT_RANGECHECK
	      		assert(!op->check(), "just check");
#endif
    				// AT = 0 TRUE
    				__ slt(AT, reg_op1, AT);
    				__ beq(AT, ZERO, *op->label());
    				break;
    			case lir_cond_aboveEqual:
    				// AT = 0 TRUE
#ifdef OPT_RANGECHECK
    				if (op->check()) {
    					add_debug_info_for_range_check_here(op->info(), opr1->rinfo());
    					__ tgeu(reg_op1, AT, 29);
    				} else {
#endif
	  				  __ sltu(AT, reg_op1, AT);
	  				  __ beq(AT, ZERO, *op->label());
#ifdef OPT_RANGECHECK
					  }
#endif
					  break;
				  case lir_cond_greater:
#ifdef OPT_RANGECHECK
			      assert(!op->check(), "just check");
#endif
					  // AT = 1 TRUE
					  __ slt(AT, AT, reg_op1);
					  __ bne(AT, ZERO, *op->label());
					  break;				
				  default: ShouldNotReachHere();
			  }
		  }
#ifdef OPT_RANGECHECK
		  if (!op->check())
#endif
		  __ delayed()->nop();

    } else if(opr1->is_address() || opr1->is_stack()) {
#ifdef OPT_RANGECHECK
			assert(!op->check(), "just check");
#endif
      if (opr2->is_constant()) {
			  jint temp_value;
        if (opr2->as_constant_ptr()->type() == T_INT) {
          temp_value = (jint)opr2->as_constant_ptr()->as_jint();
        } else if (opr2->as_constant_ptr()->type() == T_OBJECT) {
          temp_value = (jint)opr2->as_constant_ptr()->as_jobject();
        } else {
    			ShouldNotReachHere();
    		}
   
    		if (Assembler::is_simm16(temp_value)) {
    			if (opr1->is_address()) {
    				__ lw(AT, as_Address(opr1->pointer()->as_address()));
    			} else {
    				__ lw(AT, frame_map()->address_for_slot(opr1->single_stack_ix()));
    			}
   
    			switch(condition) {
    			
    		    case lir_cond_equal:
    		    	__ addi(AT, AT, -(int)temp_value);	
    		    	__ beq(AT, ZERO, *op->label());
    		    	break;
    		    case lir_cond_notEqual:
    		    	__ addi(AT, AT, -(int)temp_value);	
    		    	__ bne(AT, ZERO, *op->label());
    		    	break;
    		    case lir_cond_less:
    		    	// AT = 1 TRUE
    		    	__ slti(AT, AT, temp_value);
    		    	__ bne(AT, ZERO, *op->label());
    		    	break;
    		    case lir_cond_lessEqual:
    		    	// AT = 0 TRUE
    		    	__ addi(AT, AT, -temp_value);	
    		    	__ slt(AT, ZERO, AT);
    		    	__ beq(AT, ZERO, *op->label());
    		    	break;
    		    case lir_cond_belowEqual:
    		    	// AT = 0 TRUE
    		    	__ addiu(AT, AT, -temp_value);	
    		    	__ sltu(AT, ZERO, AT);
    		    	__ beq(AT, ZERO, *op->label());
    		    	break;
    		    case lir_cond_greaterEqual:
    		    	// AT = 0 TRUE
    		    	__ slti(AT, AT, temp_value);
    		    	__ beq(AT, ZERO, *op->label());
    		    	break;
    		    case lir_cond_aboveEqual:
    		    	// AT = 0 TRUE
    		    	__ sltiu(AT, AT, temp_value);
    		    	__ beq(AT, ZERO, *op->label());
    		    	break;
    		    case lir_cond_greater:
    		    	// AT = 1 TRUE
    		    	__ addi(AT, AT, -temp_value);		
    		    	__ slt(AT, ZERO, AT);
    		    	__ bne(AT, ZERO, *op->label());
    		    	break;				
    		    
    		    default:
    		    	Unimplemented();
    			}
    		} else {
          Unimplemented();
        }
      } else {
        Unimplemented();
      }
    	__ delayed()->nop();
   
    } else if(opr1->is_double_cpu()) {
#ifdef OPT_RANGECHECK
			assert(!op->check(), "just check");
#endif
	    Register opr1_lo = opr1->as_register_lo();
	    Register opr1_hi = opr1->as_register_hi();

	    if (opr2->is_double_cpu()) {
	    	Register opr2_lo = opr2->as_register_lo();
	    	Register opr2_hi = opr2->as_register_hi();
	    	switch (condition) {
	        case lir_cond_equal: {
	        	Label L;
	        	__ bne(opr1_lo, opr2_lo, L);
	        	__ delayed()->nop();
	        	__ beq(opr1_hi, opr2_hi, *op->label());
	        	__ delayed()->nop();
	        	__ bind(L);
	        }
	        break;

	        case lir_cond_notEqual:
	        	__ bne(opr1_lo, opr2_lo, *op->label());
	        	__ delayed()->nop();
	        	__ bne(opr1_hi, opr2_hi, *op->label());
	        	__ delayed()->nop();
	        	break;

	        case lir_cond_less: { 
	        	Label L;
	        	
	        	// if hi less then jump
	        	__ slt(AT, opr1_hi, opr2_hi);
	        	__ bne(AT, ZERO, *op->label());
	        	__ delayed()->nop();
	        	
	        	// if hi great then fail
	        	__ bne(opr1_hi, opr2_hi, L);
	        	__ delayed();
	        
	        	// now just comp lo as unsigned
	        	__ sltu(AT, opr1_lo, opr2_lo);
	        	__ bne(AT, ZERO, *op->label());
	        	__ delayed()->nop();

	        	__ bind(L);
	        }
	        break;

	        case lir_cond_lessEqual: {
	        	Label L;
	        	
	        	// if hi great then fail
	        	__ slt(AT, opr2_hi, opr1_hi);
	        	__ bne(AT, ZERO, L);
	        	__ delayed()->nop();
	        	
	        	// if hi less then jump
	        	__ bne(opr2_hi, opr1_hi, *op->label());
	        	__ delayed();

	        	// now just comp lo as unsigned
	        	__ sltu(AT, opr2_lo, opr1_lo);
	        	__ beq(AT, ZERO, *op->label());
	        	__ delayed()->nop();

	        	__ bind(L);
	        }
	        break;

	        case lir_cond_belowEqual: {
	          Label L;
	        		
	        	// if hi great then fail
	        	__ sltu(AT, opr2_hi, opr1_hi);
	        	__ bne(AT, ZERO, L);
	        	__ delayed()->nop();
	        	
	        	// if hi less then jump
	        	__ bne(opr2_hi, opr1_hi, *op->label());
	        	__ delayed();

	        	// now just comp lo as unsigned
	        	__ sltu(AT, opr2_lo, opr1_lo);
	        	__ beq(AT, ZERO, *op->label());
	        	__ delayed()->nop();

	        	__ bind(L);
	        }
	        break;

	        case lir_cond_greaterEqual: {
	        	Label L;
	        	
	        	// if hi less then fail
	        	__ slt(AT, opr1_hi, opr2_hi);
	        	__ bne(AT, ZERO, L);
	        	__ delayed()->nop();
	        	
	        	// if hi great then jump
	        	__ bne(opr2_hi, opr1_hi, *op->label());
	        	__ delayed();

	        	// now just comp lo as unsigned
	        	__ sltu(AT, opr1_lo, opr2_lo);
	        	__ beq(AT, ZERO, *op->label());
	        	__ delayed()->nop();

	        	__ bind(L);
	        }
	        break;

	        case lir_cond_aboveEqual: {
	        	Label L;
	        	
	        	// if hi less then fail
	        	__ sltu(AT, opr1_hi, opr2_hi);
	        	__ bne(AT, ZERO, L);
	        	__ delayed()->nop();
	        	
	        	// if hi great then jump
	        	__ bne(opr2_hi, opr1_hi, *op->label());
	        	__ delayed();

	        	// now just comp lo as unsigned
	        	__ sltu(AT, opr1_lo, opr2_lo);
	        	__ beq(AT, ZERO, *op->label());
	        	__ delayed()->nop();

	        	__ bind(L);
	        }
	        break;

	        case lir_cond_greater: {
	        	Label L;
	        	
	        	// if hi great then jump
	        	__ slt(AT, opr2_hi, opr1_hi);
	        	__ bne(AT, ZERO, *op->label());
	        	__ delayed()->nop();
	        	
	        	// if hi less then fail
	        	__ bne(opr2_hi, opr1_hi, L);
	        	__ delayed();

	        	// now just comp lo as unsigned
	        	__ sltu(AT, opr2_lo, opr1_lo);
	        	__ bne(AT, ZERO, *op->label());
	        	__ delayed()->nop();

	        	__ bind(L);
	        }
	        break;				
	        	
	        default: ShouldNotReachHere();
	      }
	    	
	    } else if(opr2->is_constant()) {
	    	jlong lv = opr2->as_jlong();
	    	jint iv_lo = (jint)lv;
	    	jint iv_hi = (jint)(lv>>32);
	    	bool is_zero = (lv==0);

	    	switch (condition) {
	        case lir_cond_equal: 
	        	if (is_zero) {
	        		__ orr(AT, opr1_lo, opr1_hi);
	        		__ beq(AT, ZERO, *op->label());
	        		__ delayed()->nop();
	        	} else {
	        		Label L;
	        		__ move(T8, iv_lo);
	        		__ bne(opr1_lo, T8, L);
	        		__ delayed();
	        		__ move(T8, iv_hi);
	        		__ beq(opr1_hi, T8, *op->label());
	        		__ delayed()->nop();
	        		__ bind(L);
	        	}
	        	break;

	        case lir_cond_notEqual:
	        	if (is_zero) {
	        		__ orr(AT, opr1_lo, opr1_hi);
	        		__ bne(AT, ZERO, *op->label());
	        		__ delayed()->nop();
	        	} else {
	        		__ move(T8, iv_lo);
	        		__ bne(opr1_lo, T8, *op->label());
	        		__ delayed();
	        		__ move(T8, iv_hi);
	        		__ bne(opr1_hi, T8, *op->label());
	        		__ delayed()->nop();
	        	}
	        	break;

	        case lir_cond_less:
	        	if (is_zero) {
	        		__ bltz(opr1_hi, *op->label());
	        		__ delayed()->nop();
	        	} else { 
	        		Label L;
	        		
	        		// if hi less then jump
	        		__ move(T8, iv_hi);
	        		__ slt(AT, opr1_hi, T8);
	        		__ bne(AT, ZERO, *op->label());
	        		__ delayed()->nop();
	        		
	        		// if hi great then fail
	        		__ bne(opr1_hi, T8, L);
	        		__ delayed();
	        	
	        		// now just comp lo as unsigned
	        		if (Assembler::is_simm16(iv_lo)) {
	        			__ sltiu(AT, opr1_lo, iv_lo);
	        		} else {
	        			__ move(T8, iv_lo);
	        			__ sltu(AT, opr1_lo, T8);
	        		}
	        		__ bne(AT, ZERO, *op->label());
	        		__ delayed()->nop();

	        		__ bind(L);
	        	}
	        	break;

	        case lir_cond_lessEqual:
	        	if (is_zero) {
	        		__ bltz(opr1_hi, *op->label());
	        		__ delayed()->nop();
	        		__ orr(AT, opr1_hi, opr1_lo);
	        		__ beq(AT, ZERO, *op->label());
	        		__ delayed();
	        	} else {
	        		Label L;
	        		
	        		// if hi great then fail
	        		__ move(T8, iv_hi);
	        		__ slt(AT, T8, opr1_hi);
	        		__ bne(AT, ZERO, L);
	        		__ delayed()->nop();
	        		
	        		// if hi less then jump
	        		__ bne(T8, opr1_hi, *op->label());
	        		__ delayed();

	        		// now just comp lo as unsigned
	        		__ move(T8, iv_lo);
	        		__ sltu(AT, T8, opr1_lo);
	        		__ beq(AT, ZERO, *op->label());
	        		__ delayed()->nop();

	        		__ bind(L);
	        	}
	        	break;

	        case lir_cond_belowEqual:
	        	if (is_zero) {
	        		__ orr(AT, opr1_hi, opr1_lo);
	        		__ beq(AT, ZERO, *op->label());
	        		__ delayed()->nop();
	        	} else {
	        		Label L;
	        		
	        		// if hi great then fail
	        		__ move(T8, iv_hi);
	        		__ sltu(AT, T8, opr1_hi);
	        		__ bne(AT, ZERO, L);
	        		__ delayed()->nop();
	        		
	        		// if hi less then jump
	        		__ bne(T8, opr1_hi, *op->label());
	        		__ delayed();

	        		// now just comp lo as unsigned
	        		__ move(T8, iv_lo);
	        		__ sltu(AT, T8, opr1_lo);
	        		__ beq(AT, ZERO, *op->label());
	        		__ delayed()->nop();

	        		__ bind(L);
	        	}
	        	break;

	        case lir_cond_greaterEqual:
	        	if (is_zero) {
	        		__ bgez(opr1_hi, *op->label());
	        		__ delayed()->nop();
	        	} else {
	        		Label L;
	        		
	        		// if hi less then fail
	        		__ move(T8, iv_hi);
	        		__ slt(AT, opr1_hi, T8);
	        		__ bne(AT, ZERO, L);
	        		__ delayed()->nop();
	        		
	        		// if hi great then jump
	        		__ bne(T8, opr1_hi, *op->label());
	        		__ delayed();

	        		// now just comp lo as unsigned
	        		if (Assembler::is_simm16(iv_lo)) {
	        			__ sltiu(AT, opr1_lo, iv_lo);
	        		} else {
	        			__ move(T8, iv_lo);
	        			__ sltu(AT, opr1_lo, T8);
	        		}
	        		__ beq(AT, ZERO, *op->label());
	        		__ delayed()->nop();

	        		__ bind(L);
	        	}
	        	break;

	        case lir_cond_aboveEqual:
	        	if (is_zero) {
	        		__ b(*op->label());
	        		__ delayed()->nop();
	        	} else {
	        		Label L;
	        		
	        		// if hi less then fail
	        		__ move(T8, iv_hi);
	        		__ sltu(AT, opr1_hi, T8);
	        		__ bne(AT, ZERO, L);
	        		__ delayed()->nop();
	        		
	        		// if hi great then jump
	        		__ bne(T8, opr1_hi, *op->label());
	        		__ delayed();

	        		// now just comp lo as unsigned
	        		if (Assembler::is_simm16(iv_lo)) {
	        			__ sltiu(AT, opr1_lo, iv_lo);
	        		} else {
	        			__ move(T8, iv_lo);
	        			__ sltu(AT, opr1_lo, T8);
	        		}
	        		__ beq(AT, ZERO, *op->label());
	        		__ delayed()->nop();

	        		__ bind(L);
	        	}
	        	break;

	        case lir_cond_greater:
	        	if (is_zero) {
	        		Label L;
	        		__ bgtz(opr1_hi, *op->label());
	        		__ delayed()->nop();
	        		__ bne(opr1_hi, ZERO, L);
	        		__ delayed()->nop();
	        		__ bne(opr1_lo, ZERO, *op->label());
	        		__ delayed()->nop();
	        		__ bind(L);
	        	} else {
	        		Label L;
	        		
	        		// if hi great then jump
	        		__ move(T8, iv_hi);
	        		__ slt(AT, T8, opr1_hi);
	        		__ bne(AT, ZERO, *op->label());
	        		__ delayed()->nop();
	        		
	        		// if hi less then fail
	        		__ bne(T8, opr1_hi, L);
	        		__ delayed();

	        		// now just comp lo as unsigned
	        		__ move(T8, iv_lo);
	        		__ sltu(AT, T8, opr1_lo);
	        		__ bne(AT, ZERO, *op->label());
	        		__ delayed()->nop();

	        		__ bind(L);
	        	}
	        	break;				
	        	
	        default: 
	        	ShouldNotReachHere();
	      }
	    } else {
	    	Unimplemented();
	    }
    } else if (opr1->is_single_fpu()) {
#ifdef OPT_RANGECHECK
			assert(!op->check(), "just check");
#endif
	    assert(opr2->is_single_fpu(), "change the code");
	    
	    FloatRegister reg_op1 = opr1->as_float_reg();
	    FloatRegister reg_op2 = opr2->as_float_reg();
	    //	bool un_ls
	    bool un_jump = (op->ublock()->label()==op->label());
               	
	    Label& L = *op->label();
	    
	    switch (condition) {
	      case lir_cond_equal:
	      	if (un_jump)
	      		__ c_ueq_s(reg_op1, reg_op2);
	      	else 
	      		__ c_eq_s(reg_op1, reg_op2);
	      	__ bc1t(L);

	      	break;

	      case lir_cond_notEqual:
	      	if (un_jump)
	      		__ c_eq_s(reg_op1, reg_op2);
	      	else 
	      		__ c_ueq_s(reg_op1, reg_op2);
	      	__ bc1f(L);

	      	break;

	      case lir_cond_less:
	      	if (un_jump)
	      		__ c_ult_s(reg_op1, reg_op2);
	      	else
	      		__ c_olt_s(reg_op1, reg_op2);
	      	__ bc1t(L);

	      	break;

	      case lir_cond_lessEqual:
	      case lir_cond_belowEqual:
	      	if (un_jump)
	      		__ c_ule_s(reg_op1, reg_op2);
	      	else
	      		__ c_ole_s(reg_op1, reg_op2);
	      	__ bc1t(L);

	      	break;
	      	
	      case lir_cond_greaterEqual:
	      case lir_cond_aboveEqual:
	      	if (un_jump)
	      		__ c_olt_s(reg_op1, reg_op2);
	      	else 
	      		__ c_ult_s(reg_op1, reg_op2);
	      	__ bc1f(L);
	      	
	      	break;
	      	
	      case lir_cond_greater:
	      	if (un_jump)
	      		__ c_ole_s(reg_op1, reg_op2);
	      	else 
	      		__ c_ule_s(reg_op1, reg_op2);
	      	__ bc1f(L);
	      	
	      	break;				
	      	
	      default: 
	      	ShouldNotReachHere();
	      }
	      __ delayed()->nop();
      } else if (opr1->is_double_fpu()) {
#ifdef OPT_RANGECHECK
			  assert(!op->check(), "just check");
#endif
      	assert(opr2->is_double_fpu(), "change the code");
      
      	FloatRegister reg_op1 = opr1->as_double_reg();
      	FloatRegister reg_op2 = opr2->as_double_reg();
      	bool un_jump = (op->ublock()->label()==op->label());
      	Label& L = *op->label();
      	
      	switch (condition) {
      	case lir_cond_equal:
      		if (un_jump)
      			__ c_ueq_d(reg_op1, reg_op2);
      		else 
      			__ c_eq_d(reg_op1, reg_op2);
      		__ bc1t(L);
      
      		break;
      
      	case lir_cond_notEqual:
      		if (un_jump)
      			__ c_eq_d(reg_op1, reg_op2);
      		else 
      			__ c_ueq_d(reg_op1, reg_op2);
      		__ bc1f(L);
      
      		break;
      
      	case lir_cond_less:
      		if (un_jump)
      			__ c_ult_d(reg_op1, reg_op2);
      		else
      			__ c_olt_d(reg_op1, reg_op2);
      		__ bc1t(L);
      
      		break;
      
      	case lir_cond_lessEqual:
      	case lir_cond_belowEqual:
      		if (un_jump)
      			__ c_ule_d(reg_op1, reg_op2);
      		else
      			__ c_ole_d(reg_op1, reg_op2);
      		__ bc1t(L);
      
      		break;
      		
      	case lir_cond_greaterEqual:
      	case lir_cond_aboveEqual:
      		if (un_jump)
      			__ c_olt_d(reg_op1, reg_op2);
      		else 
      			__ c_ult_d(reg_op1, reg_op2);
      		__ bc1f(L);
      		
      		break;
      		
      	case lir_cond_greater:
      		if (un_jump)
      			__ c_ole_d(reg_op1, reg_op2);
      		else 
      			__ c_ule_d(reg_op1, reg_op2);
      		__ bc1f(L);
      		
      		break;				
      		
      	default: 
      		ShouldNotReachHere();
      	}
      	__ delayed()->nop();
      } else {
    Unimplemented();
	}
}


void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
  LIR_Opr value        = op->in_opr();
  LIR_Opr src       = op->in_opr();
  LIR_Opr dest      = op->result_opr();
  Bytecodes::Code code = op->bytecode();

  switch (code) {
    case Bytecodes::_i2l: 
      move_regs(src->as_register(), dest->as_register_lo());
      __ sra (dest->as_register_hi(), dest->as_register_lo(), 31);
      break;
        
    case Bytecodes::_l2i:
			move_regs (src->as_register_lo(), dest->as_register());
      break;

    case Bytecodes::_i2b:
	    move_regs (src->as_register(), dest->as_register());
      __ sign_extend_byte(dest->as_register());
      break;

    case Bytecodes::_i2c:
      __ andi(dest->as_register(), src->as_register(), 0xFFFF);
      break;

    case Bytecodes::_i2s:
	    move_regs (src->as_register(), dest->as_register());
      __ sign_extend_short(dest->as_register());
      break;

    case Bytecodes::_f2d:
	    __ cvt_d_s(dest->as_double_reg(), src->as_float_reg());
      break;

    case Bytecodes::_d2f:
	    __ cvt_s_d(dest->as_float_reg(), src->as_double_reg());
	    break;
    case Bytecodes::_i2f: {
	    FloatRegister df = dest->as_float_reg();
	    if(src->is_single_cpu()) {
	    	__ mtc1(src->as_register(), df);
	    	__ cvt_s_w(df, df);
	    } else if (src->is_stack()) {
	    	Address src_addr = src->is_single_stack()
	    		? frame_map()->address_for_slot(src->single_stack_ix())
	    		: frame_map()->address_for_slot(src->double_stack_ix()); 
	    	__ lw(AT, src_addr);
	    	__ mtc1(AT, df);
	    	__ cvt_s_w(df, df);
	    } else {
	    	Unimplemented();
	    }
	    break;
	  }
    case Bytecodes::_i2d: {	
      FloatRegister dd = dest->as_double_reg();
      if (src->is_single_cpu()) {
      	__ mtc1(src->as_register(), dd);
      	__ cvt_d_w(dd, dd);
      } else if (src->is_stack()) {
      	Address src_addr = src->is_single_stack()
      		? frame_map()->address_for_slot(value->single_stack_ix())
      		: frame_map()->address_for_slot(value->double_stack_ix()); 
      	__ lw(AT, src_addr);
      	__ mtc1(AT, dd);
      	__ cvt_d_w(dd, dd);
      } else {
      	Unimplemented();
      }
	    break;
	  }
    case Bytecodes::_f2i: {
	    FloatRegister fval = src->as_float_reg();
	    Register dreg = dest->as_register();

	    Label L;
	    __ c_un_s(fval, fval);    //NaN?
	    __ bc1t(L);
	    __ delayed();
	    __ move(dreg, ZERO);

	    __ trunc_w_s(F30, fval);
	    __ mfc1(dreg, F30);
	    __ bind(L);
	    break;
	  }
    case Bytecodes::_d2i: {
      FloatRegister dval = src->as_double_reg();
      Register dreg = dest->as_register();
      
      Label L;
      __ c_un_d(dval, dval);    //NaN?
      __ bc1t(L);
      __ delayed();
      __ move(dreg, ZERO);
      
      __ trunc_w_d(F30, dval);
      __ mfc1(dreg, F30);
      __ bind(L);
      break;
    }
    case Bytecodes::_l2f: {
	    FloatRegister ldf = dest->as_float_reg();
	    if (src->is_double_cpu()) {
	    	__ mtc1(src->as_register_lo(), ldf);
	    	__ mtc1(src->as_register_hi(), ldf + 1);
	    	__ cvt_s_l(ldf, ldf);				
	    } else if (src->is_double_stack()) {
	    	Address src_addr=frame_map()->address_for_slot(value->double_stack_ix());
	    	__ lw(AT, src_addr);
	    	__ mtc1(AT, ldf);
	    	__ lw(AT, src_addr.base(), src_addr.disp() + 4);
	    	__ mtc1(AT, ldf + 1);
	    	__ cvt_s_l(ldf, ldf);
	    } else {
	    	Unimplemented();
	    }
	    break;
  	}
    case Bytecodes::_l2d: {
    	FloatRegister ldd = dest->as_double_reg();
    	if (src->is_double_cpu()) {
    		__ mtc1(src->as_register_lo(), ldd);
    		__ mtc1(src->as_register_hi(), ldd + 1);
    		__ cvt_d_l(ldd, ldd);
    	} else if (src->is_double_stack()) {
    		Address src_addr = frame_map()->address_for_slot(src->double_stack_ix());
    		__ lw(AT, src_addr);
    		__ mtc1(AT, ldd);
    		__ lw(AT, src_addr.base(), src_addr.disp() + 4);
    		__ mtc1(AT, ldd + 1);
    		__ cvt_d_l(ldd, ldd);
    	} else {
    		Unimplemented();
    	}
      break;
    }
    
    case Bytecodes::_f2l: {
    	FloatRegister fval = src->as_float_reg();
    	Register dlo = dest->as_register_lo();
    	Register dhi = dest->as_register_hi();
    
    	Label L;
    	__ move(dhi, ZERO);
    	__ c_un_s(fval, fval);    //NaN?
    	__ bc1t(L);
    	__ delayed();
    	__ move(dlo, ZERO);
    
    	__ trunc_l_s(F30, fval);
    	__ mfc1(dlo, F30);
    	__ mfc1(dhi, F31);
    	__ bind(L);
      break;
    }
    case Bytecodes::_d2l: {
    	FloatRegister dval = src->as_double_reg();
    	Register dlo = dest->as_register_lo();
    	Register dhi = dest->as_register_hi();
    
    	Label L;
    	__ move(dhi, ZERO);
    	__ c_un_d(dval, dval);    //NaN?
    	__ bc1t(L);
    	__ delayed();
    	__ move(dlo, ZERO);
    
    	__ trunc_l_d(F30, dval);
    	__ mfc1(dlo, F30);
    	__ mfc1(dhi, F31);
    	__ bind(L);
      break;
    }
     
    default: ShouldNotReachHere();
  }
}

void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
	if (op->init_check()) {
		add_debug_info_for_null_check_here(op->stub()->info());
		__ lw(AT,Address(op->klass()->as_register(),
          instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc)));
    __ addi(AT, AT, -instanceKlass::fully_initialized);		
		__ bne(AT,ZERO,*op->stub()->entry());
		__ delayed()->nop();
	}
	__ allocate_object(
			op->obj()->as_register(), 
			op->tmp1()->as_register(),
			op->tmp2()->as_register(),
			op->header_size(),
			op->object_size(),
			op->klass()->as_register(),
			*op->stub()->entry());

	__ bind(*op->stub()->continuation());
}

void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
	if (UseSlowPath ||
		(!UseFastNewObjectArray && (op->type() == T_OBJECT || op->type() == T_ARRAY)) ||
		(!UseFastNewTypeArray   && (op->type() != T_OBJECT && op->type() != T_ARRAY))) {
		__ b(*op->stub()->entry());
		__ delayed()->nop();  
	} else {
		Register len =  op->len()->as_register();
		Register tmp1 = op->tmp1()->as_register();
		Register tmp2 = op->tmp2()->as_register();
		Register tmp3 = op->tmp3()->as_register();
		__ allocate_array(op->obj()->as_register(),
				len,
				tmp1,
				tmp2,
				tmp3,
				arrayOopDesc::header_size(op->type()),
				array_element_size(op->type()),
				op->klass()->as_register(),
				*op->stub()->entry());
	}
	__ bind(*op->stub()->continuation());
}



void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
	LIR_Code code = op->code();
	if (code == lir_store_check) {
    Register value = op->object()->as_register();
    Register array = op->array()->as_register();
    Register k_RInfo = op->tmp1()->as_register();
    Register klass_RInfo = op->tmp2()->as_register();

    CodeStub* stub = op->stub();
    Label done;

    __ beq(value, ZERO, done);
    __ delayed()->nop();
    add_debug_info_for_null_check_here(op->info_for_exception());
    
    __ lw(k_RInfo, array, oopDesc::klass_offset_in_bytes());
    __ lw(klass_RInfo, value, oopDesc::klass_offset_in_bytes());

    __ lw(k_RInfo, k_RInfo,  objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)); 
    // get super_check_offset
  //for SIGBUS, FIXME, Jerome 
    __ nop(); 
    __ nop(); 
    __ lw(T9, k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes());

    // See if we get an immediate positive hit
    __ add(AT, klass_RInfo, T9);
    __ lw(AT, AT, 0);
    __ beq(k_RInfo, AT, done);
    __ delayed()->nop();

    // check for immediate negative hit
    __ move(AT, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
    __ bne(T9, AT, *stub->entry());     // fail
    __ delayed()->nop();

    // check for self
    __ beq(klass_RInfo, k_RInfo, done);
    __ delayed()->nop();

    // super type array
    __ lw(T8, klass_RInfo, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes());
    // length
    __ lw(T9, T8, arrayOopDesc::length_offset_in_bytes());

    // base
    __ addi(T8, T8, arrayOopDesc::base_offset_in_bytes(T_OBJECT));

    Label miss, hit, loop;
    // T9:count, T8:base, k_RInfo: super klass
    __ bind(loop);
    __ beq(T9, ZERO, miss);
    __ delayed()->lw(AT, T8, 0);
    __ beq(AT, k_RInfo, hit);
    __ delayed();
    __ addiu(T9, T9, -1);
    __ b(loop);
    __ delayed();
    __ addi(T8, T8, 1 * wordSize);

    __ bind(miss);
    __ b(*stub->entry());
    __ delayed()->nop();

    __ bind(hit);
    __ sw(k_RInfo, klass_RInfo, sizeof(oopDesc) 
        + Klass::secondary_super_cache_offset_in_bytes());

    __ bind(done);
	} else if (op->code() == lir_checkcast) {
		// we always need a stub for the failure case.
		CodeStub* stub = op->stub();
		Register obj = op->object()->as_register();
		Register k_RInfo = op->tmp1()->as_register();
		Register klass_RInfo = op->tmp2()->as_register();
		Register dst = op->result_opr()->as_register();
		ciKlass* k = op->klass();
		Register Rtmp1 = noreg; 
		Label done;
		if (obj == k_RInfo) {
			k_RInfo = dst;
		} else if (obj == klass_RInfo) {
			klass_RInfo = dst;
		}
		if (k->is_loaded()) {
			select_different_registers(obj, dst, k_RInfo, klass_RInfo);
		} else {
			Rtmp1 = op->tmp3()->as_register();
			select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1);
		}
		assert_different_registers(obj, k_RInfo, klass_RInfo);

		// patching may screw with our temporaries on sparc,
		// so let's do it before loading the class
		if (!k->is_loaded()) {
			jobject2reg_with_patching(k_RInfo, op->info_for_patch());
		} else {
			//ciObject2reg(k, k_RInfo);
			jobject2reg(k->encoding(),k_RInfo); 
		}
		assert(obj != k_RInfo, "must be different");
    int the_pc;
		if (op->profiled_method() != NULL) {
			ciMethod* method = op->profiled_method();
			int bci          = op->profiled_bci();

			Label profile_done;
			//  __ jcc(Assembler::notEqual, profile_done);
			__ bne(obj, ZERO, profile_done);
			__ delayed()->nop();

			// Object is null; update methodDataOop
			ciMethodData* md = method->method_data();
			if (md == NULL) {
				bailout("out of memory building methodDataOop");
				return;
			}
			ciProfileData* data = md->bci_to_data(bci);
			assert(data != NULL,       "need data for checkcast");
			assert(data->is_BitData(), "need BitData for checkcast");
			Register mdo  = klass_RInfo;
			int oop_index = __ oop_recorder()->find_index(md->encoding());
			RelocationHolder rspec = oop_Relocation::spec(oop_index);
			__ relocate(rspec);
			__ lui(mdo, Assembler::split_high((int)md->encoding()));
			__ addiu(mdo, mdo, Assembler::split_low((int)md->encoding()));

			Address data_addr(mdo, md->byte_offset_of_slot(data, DataLayout::header_offset()));
			//FIXME, it very ineffictive to replace orl with 3 mips instruction @jerome, 12/27,06 
			//__ orl(data_addr, BitData::null_flag_constant());
			int header_bits = DataLayout::flag_mask_to_header_mask(BitData::null_seen_byte_constant());
			__ lw(AT, data_addr); 
			__ ori(AT,AT, header_bits); 
			__ sw(AT,data_addr); 
			__ b(done);
			__ delayed()->nop(); 
			__ bind(profile_done);
		} else {
			__ beq(obj, ZERO, done);
			__ delayed()->nop();
		}
		__ verify_oop(obj);

		if (op->fast_check()) {
			// get object class
			// not a safepoint as obj null check happens earlier
			__ lw(AT, obj,  oopDesc::klass_offset_in_bytes());
			__ bne(AT, k_RInfo, *stub->entry());
			__ delayed()->nop();
			__ bind(done);
		} else {
			// get object class
			// not a safepoint as obj null check happens earlier
			__ lw(klass_RInfo, obj, oopDesc::klass_offset_in_bytes());
			if (k->is_loaded()) {
				__ lw(AT, klass_RInfo, k->super_check_offset());
				// See if we get an immediate positive hit
				if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() != k->super_check_offset()) {
					__ bne(AT, k_RInfo, *stub->entry());
					__ delayed()->nop();
				} else {
					// See if we get an immediate positive hit
					__ beq(AT, k_RInfo, done);
					__ delayed()->nop();
					// check for self
					__ beq(klass_RInfo, k_RInfo, done);
					__ delayed()->nop();

					// array
					__ lw(T8, klass_RInfo, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes());
					// length
					__ lw(T9, T8, arrayOopDesc::length_offset_in_bytes());

					// base
					__ addi(T8, T8, arrayOopDesc::base_offset_in_bytes(T_OBJECT));

					Label miss, hit, loop;
					// T9:count, T8:base, k_RInfo: super klass
					__ bind(loop);
					__ beq(T9, ZERO, miss);
					__ delayed()->lw(AT, T8, 0);
					__ beq(AT, k_RInfo, hit);
					__ delayed();
					__ addiu(T9, T9, -1);
					__ b(loop);
					__ delayed();
					__ addi(T8, T8, 1 * wordSize);

					__ bind(miss);
					__ b(*stub->entry());
					__ delayed()->nop();

					__ bind(hit);
					__ sw(k_RInfo, klass_RInfo, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
				}
				__ bind(done);
			} else {
				//   assert(dst != obj, "need different registers so we have a temporary");
      //  assert(dst != klass_RInfo && dst != k_RInfo, "need 3 registers");
       
				// super_check_offset
        __ lw(Rtmp1, k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes());
        // See if we get an immediate positive hit
				__ add(AT, klass_RInfo, Rtmp1);
				__ lw(AT, AT, 0);
				__ beq(k_RInfo, AT, done);
				__ delayed()->nop();
        // check for immediate negative hit
				__ move(AT, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());		
				__ bne(Rtmp1, AT,  *stub->entry());
				__ delayed()->nop();
        // check for self
				__ beq(klass_RInfo, k_RInfo, done);
				__ delayed()->nop();
        
				// array
				__ lw(T8, klass_RInfo, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes());
				// length
				__ lw(T9, T8, arrayOopDesc::length_offset_in_bytes());

				// base
				__ addi(T8, T8, arrayOopDesc::base_offset_in_bytes(T_OBJECT));

				Label miss, hit, loop;
				// T9:count, T8:base, k_RInfo: super klass
				__ bind(loop);
				__ beq(T9, ZERO, miss);
				__ delayed()->lw(AT, T8, 0);
				__ beq(AT, k_RInfo, hit);
				__ delayed();
				__ addiu(T9, T9, -1);
				__ b(loop);
				__ delayed();
				__ addi(T8, T8, 1 * wordSize);

				__ bind(miss);
				__ b(*stub->entry());
				__ delayed()->nop();

				__ bind(hit);
				__ sw(k_RInfo, klass_RInfo, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
        __ bind(done);
      }
      
    }
    if(dst!=obj)__ move(dst, obj);
	
  } else if (code == lir_instanceof) {
    Register obj = op->object()->as_register();
    Register k_RInfo = op->tmp1()->as_register();
    Register klass_RInfo = op->tmp2()->as_register();
    Register dst = op->result_opr()->as_register();
    ciKlass* k = op->klass();
    
    Label done;
    Label zero;
    Label one;
    
    if (obj == k_RInfo) {
	    k_RInfo = klass_RInfo;
	    klass_RInfo = obj;
    }

    // patching may screw with our temporaries on sparc,
    // so let's do it before loading the class
    if (!k->is_loaded()) {
      jobject2reg_with_patching(k_RInfo, op->info_for_patch());
    } else {
			jobject2reg(k->encoding(), k_RInfo);
		}

    assert(obj != k_RInfo, "must be different");

    __ verify_oop(obj);
    __ beq(obj, ZERO, zero);
    __ delayed()->nop();

    if (op->fast_check()) {
      // get object class
      // not a safepoint as obj null check happens earlier
			__ lw(AT, obj, oopDesc::klass_offset_in_bytes());
			__ beq(AT, k_RInfo, one);
			__ delayed()->nop();
    } else {
      // get object class
      // not a safepoint as obj null check happens earlier
      __ lw(klass_RInfo, obj, oopDesc::klass_offset_in_bytes());
      if (k->is_loaded()) {
        //  assert(dst != obj, "need different registers so we have a temporary");
        
        // See if we get an immediate positive hit
				__ lw(AT, klass_RInfo, k->super_check_offset());
				__ beq(AT, k_RInfo, one);
				__ delayed()->nop();
        if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() == k->super_check_offset()) {
          // check for self
					//ciObject2reg(k, AT);
		      jobject2reg(k->encoding(), AT);
		      __ beq(klass_RInfo, k_RInfo, one);
		      __ delayed()->nop();
					
					// array
					__ lw(T8, klass_RInfo, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes());
					// length
					__ lw(T9, T8, arrayOopDesc::length_offset_in_bytes());

					// base
					__ addi(T8, T8, arrayOopDesc::base_offset_in_bytes(T_OBJECT));

					Label loop, hit;
					// T9:count, T8:base, k_RInfo: super klass
					__ bind(loop);
					__ beq(T9, ZERO, zero);
					__ delayed()->lw(AT, T8, 0);
					__ beq(AT, k_RInfo, hit);
					__ delayed();
					__ addiu(T9, T9, -1);
					__ b(loop);
					__ delayed();
					__ addi(T8, T8, 1 * wordSize);

					__ bind(hit);
					__ sw(k_RInfo, klass_RInfo, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
					__ b(one);
					__ delayed()->nop();
        }
      } else {
        assert(dst != klass_RInfo && dst != k_RInfo, "need 3 registers");
        
        __ lw(T9, k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes());
				__ add(AT, klass_RInfo, T9);
				__ lw(AT, AT, 0);
				__ beq(k_RInfo, AT, one);
				__ delayed()->nop();
        // check for immediate negative hit
				__ move(AT, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
				__ bne(AT, T9, zero);
				__ delayed()->nop();
        // check for self
				__ beq(klass_RInfo, k_RInfo, one);
				__ delayed()->nop();
				
				// array
				__ lw(T8, klass_RInfo, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes());
				// length
				__ lw(T9, T8, arrayOopDesc::length_offset_in_bytes());

				// base
				__ addi(T8, T8, arrayOopDesc::base_offset_in_bytes(T_OBJECT));

				Label loop, hit;
				// T9:count, T8:base, k_RInfo: super klass
				__ bind(loop);
				__ beq(T9, ZERO, zero);
				__ delayed()->lw(AT, T8, 0);
				__ beq(AT, k_RInfo, hit);
				__ delayed();
				__ addi(T9, T9, -1);
				__ b(loop);
				__ delayed();
				__ addi(T8, T8, 1 * wordSize);

				__ bind(hit);
				__ sw(k_RInfo, klass_RInfo, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
				__ b(one);
				__ delayed()->nop();
			}
    }
    __ bind(zero);
    __ move(dst, ZERO);
    __ b(done);
		__ delayed()->nop();

    __ bind(one);
    __ move(dst, 1);

    __ bind(done);
  } else {
    ShouldNotReachHere();
  }
}


void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
  if (op->code() == lir_cas_long) {
    Register addr = op->addr()->as_register();
    if (os::is_MP()) {}    
    __ cmpxchg8(op->new_value()->as_register_lo(), 
				op->new_value()->as_register_hi(),				
				addr,
				op->cmp_value()->as_register_lo(),
				op->cmp_value()->as_register_hi());

  } else if (op->code() == lir_cas_int || op->code() == lir_cas_obj) {
    Register addr = op->addr()->as_register();
    Register newval = op->new_value()->as_register();
    Register cmpval = op->cmp_value()->as_register();
    assert(newval != NULL, "new val must be register");
    assert(cmpval != newval, "cmp and new values must be in different registers");
    assert(cmpval != addr, "cmp and addr must be in different registers");
    assert(newval != addr, "new value and addr must be in different registers");
    if (os::is_MP()) {
    }
		__ cmpxchg(newval, addr, cmpval);
  } else {
    Unimplemented();
  }
}

void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result) {
}

void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info,bool pop_fpu_stack) {
  assert(info == NULL || ((code == lir_rem || code == lir_div || code == lir_sub) && right->is_double_cpu()), "info is only for ldiv/lrem");
  if (left->is_double_cpu()) {
    assert(right->is_double_cpu(),"right must be long");
    assert(dest->is_double_cpu(), "dest must be long");
    
    Register op1_lo = left->as_register_lo();
    Register op1_hi = left->as_register_hi();
    Register op2_lo = right->as_register_lo();
    Register op2_hi = right->as_register_hi();
    Register dst_lo = dest->as_register_lo();
    Register dst_hi = dest->as_register_hi();

    switch (code) {
		case lir_add:
	//		assert_different_registers(dst_lo, op1_lo, op2_lo, op1_hi, op2_hi);
//			assert_different_registers( op1_lo, op2_lo, op1_hi, op2_hi);
			__ addu(dst_lo, op1_lo, op2_lo);
			__ sltu(AT, dst_lo, op2_lo);
			__ addu(dst_hi, op1_hi, op2_hi);
			__ addu(dst_hi, dst_hi, AT);
			break;
			
		case lir_sub:
//			assert_different_registers(dst_lo, op1_lo, op2_lo, op1_hi, op2_hi);
//			assert_different_registers( op1_lo, op2_lo, op1_hi, op2_hi);
			__ subu(dst_lo, op1_lo, op2_lo);
			__ sltu(AT, op1_lo, dst_lo);
			__ subu(dst_hi, op1_hi, op2_hi);
			__ subu(dst_hi, dst_hi, AT);
			break;
			
		case lir_mul:
//			assert_different_registers(dst_lo, dst_hi, op1_lo, op2_hi);
			{
				Label zero, quick, done;

				//zero?
				__ orr(AT, op2_lo, op1_lo);
				__ beq(AT, ZERO, zero);
				__ delayed(); 
				__ move(dst_hi, ZERO);

				//quick?
				__ orr(AT, op2_hi, op1_hi);
				__ beq(AT, ZERO, quick);
				__ delayed()->nop();

				__ multu(op2_lo, op1_hi);
				__ nop();
				__ nop();
				__ mflo(dst_hi);	
				__ multu(op2_hi, op1_lo);
				__ nop();
				__ nop();
				__ mflo(AT);

				__ bind(quick);
				__ multu(op2_lo, op1_lo);
				__ addu(dst_hi, dst_hi, AT);
				__ nop();
				__ mflo(dst_lo);
				__ mfhi(AT);
				__ b(done);
				__ delayed()->addu(dst_hi, dst_hi, AT);

				__ bind(zero);
				__ move(dst_lo, ZERO);
				__ bind(done);
			}
			break;

		default:
      ShouldNotReachHere();
 		}

  } else if (left->is_single_cpu()) {
    Register lreg = left->as_register();
    Register res = dest->as_register();
     
		if (right->is_single_cpu()) {
			Register rreg = right->as_register();
			switch (code) {
			case lir_add: 
				__ addu(res, lreg, rreg);  
				break;						

			case lir_mul: 
				__ mult(lreg, rreg);
				__ nop();
				__ nop();
				__ mflo(res);
				break;

			case lir_sub: 
				__ subu(res, lreg, rreg);  
				break;

			default:      
				ShouldNotReachHere();
			}
    } else if (right->is_constant()) {
      jint c = right->as_constant_ptr()->as_jint();

			switch (code) {
		        case lir_mul_strictfp:	
			case lir_mul:
				__ move(AT, c);
				__ mult(lreg, AT);
				__ nop();
				__ nop();
				__ mflo(res);
				break;
				
			case lir_add:
				if (Assembler::is_simm16(c)) {
					__ addiu(res, lreg, c);
				} else {
					__ move(AT, c);
					__ addu(res, lreg, AT);
				}
				break;

			case lir_sub:
				if (Assembler::is_simm16(-c)) {
					__ addiu(res, lreg, -c);
				} else {
					__ move(AT, c);
					__ subu(res, lreg, AT);
				}
				break;

			default: 
				ShouldNotReachHere();
			}

		} else {
			ShouldNotReachHere();
		}
	} else if (left->is_single_fpu()) {
    assert(right->is_single_fpu(),"right must be float");
    assert(dest->is_single_fpu(), "dest must be float");

		FloatRegister lreg = left->as_float_reg();
		FloatRegister rreg = right->as_float_reg();
		FloatRegister res = dest->as_float_reg();
		
		switch (code) {
		case lir_add: 
			__ add_s(res, lreg, rreg);            
			break;
		case lir_sub: 
			__ sub_s(res, lreg, rreg);          
			break;
		case lir_mul: 
		case lir_mul_strictfp:
			// i dont think we need special handling of this. FIXME
			__ mul_s(res, lreg, rreg);
			break;
		case lir_div: 
		case lir_div_strictfp:
			__ div_s(res, lreg, rreg);
			break;
//    case lir_rem: 
//      __ rem_s(res, lreg, rreg); 
//      break;
		default     : ShouldNotReachHere();
		}
	} else if (left->is_double_fpu()) {
    assert(right->is_double_fpu(),"right must be double");
    assert(dest->is_double_fpu(), "dest must be double");

		FloatRegister lreg = left->as_double_reg();
		FloatRegister rreg = right->as_double_reg();
		FloatRegister res = dest->as_double_reg();
		
		switch (code) {
		case lir_add: 
			__ add_d(res, lreg, rreg);            
			break;
		case lir_sub: 
			__ sub_d(res, lreg, rreg);          
			break;
		case lir_mul: 
		case lir_mul_strictfp:
			// i dont think we need special handling of this. FIXME
			// by yjl 9/13/2005
			__ mul_d(res, lreg, rreg);
			break;
		case lir_div: 
		case lir_div_strictfp:
			__ div_d(res, lreg, rreg);
			break;
//    case lir_rem: 
//      __ rem_d(res, lreg, rreg); 
//      break;
		default     : ShouldNotReachHere();
		}
	}
    else if (left->is_single_stack()||left->is_address()){
     assert(left == dest, "left and dest must be equal");
    Address laddr = (left->is_single_stack())? (frame_map()->address_for_slot(left->single_stack_ix())):(as_Address(left->as_address_ptr()));

    if (right->is_single_cpu()) {
      Register rreg = right->as_register();
      switch (code) {
        case lir_add: 
	        __ lw(AT, laddr);
      		__ add(AT,AT,rreg);
	        __ sw(AT, laddr);	
		      break;
        case lir_sub: 
      		__ lw(AT, laddr);
		      __ sub(AT,AT,rreg);
	        __ sw(AT, laddr);	
		      break;
        default:      ShouldNotReachHere();
      }
    } else if (right->is_constant()) {
	    jint c = right->as_constant_ptr()->as_jint();
	    switch (code) {
		    case lir_add: {
					  __ lw(AT, laddr); 
					  __ addi(AT, AT, c); 
					  __ sw(AT, laddr); 
					  break;
				  }
		    case lir_sub: {
					  __ lw(AT, laddr); 
					  __ addi(AT, AT, -c);
					  __ sw(AT, laddr);
					  break;
				  }
		    default: ShouldNotReachHere();
	    }
    } else {
	    ShouldNotReachHere();
    }
  } else {
		ShouldNotReachHere();
	}
}

void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op *op) {
//FIXME,lir_log, lir_log10,lir_abs,lir_sqrt,so many new lir instruction  @jerome
if (value->is_double_fpu()) {
   // assert(value->fpu_regnrLo() == 0 && dest->fpu_regnrLo() == 0, "both must be on TOS");
    switch(code) {
      case lir_log   : //__ flog() ; break;
      case lir_log10 : //__ flog10() ; 
               Unimplemented();
	      break;
      case lir_abs   : __ abs_d(dest->as_double_reg(), value->as_double_reg()) ; break;
      case lir_sqrt  : __ sqrt_d(dest->as_double_reg(), value->as_double_reg()); break;
      case lir_sin   :
        // Should consider not saving ebx if not necessary
        __ trigfunc('s', 0);
        break;
      case lir_cos :
        // Should consider not saving ebx if not necessary
       // assert(op->as_Op2()->fpu_stack_size() <= 6, "sin and cos need two free stack slots");
        __ trigfunc('c', 0);
        break;
      case lir_tan :
        // Should consider not saving ebx if not necessary
        __ trigfunc('t', 0);
        break;
      default      : ShouldNotReachHere();
    }
  } else {
    Unimplemented();
  }

 
 }
//FIXME, if right is on the stack!
void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) {
	if (left->is_single_cpu()) {
		Register dstreg = dst->as_register();
		Register reg = left->as_register();
		if (right->is_constant()) {
			int val = right->as_constant_ptr()->as_jint();
			__ move(AT, val);
			switch (code) {
			case lir_logic_and: 
				__ andr (dstreg, reg, AT); 
				break;
			case lir_logic_or:  
				__ orr(dstreg, reg, AT);
				break;
			case lir_logic_xor: 
				__ xorr(dstreg, reg, AT);
				break;
			default: ShouldNotReachHere();
			}
		} else if (right->is_stack()) {
			// added support for stack operands
			Address raddr = frame_map()->address_for_slot(right->single_stack_ix());
			switch (code) {
				case lir_logic_and: 
					__ lw(AT,raddr); 
					__ andr (reg, reg,AT); 
					break;
				case lir_logic_or:  
					__ lw(AT,raddr);	
					__ orr (reg, reg,AT); 
					break;
				case lir_logic_xor:
					__ lw(AT,raddr);
					__ xorr(reg,reg,AT);
					break;
				default: ShouldNotReachHere();
			}
		} else {
			Register rright = right->as_register();
			switch (code) {
				case lir_logic_and: __ andr (dstreg, reg, rright); break;
				case lir_logic_or : __ orr  (dstreg, reg, rright); break;
				case lir_logic_xor: __ xorr (dstreg, reg, rright); break;
				default: ShouldNotReachHere();
			}
		}
	} else {
		Register l_lo = left->as_register_lo();
		Register l_hi = left->as_register_hi();
		Register dst_lo = dst->as_register_lo();
		Register dst_hi = dst->as_register_hi();

		if (right->is_constant()) {
//			assert_different_registers(l_lo, l_hi, dst_lo, dst_hi);
			int r_lo = right->as_constant_ptr()->as_jint_lo();
			int r_hi = right->as_constant_ptr()->as_jint_hi();

			switch (code) {
				case lir_logic_and:
					__ move(AT, r_lo);					
					__ andr(dst_lo, l_lo, AT);
					__ move(AT, r_hi);
					__ andr(dst_hi, l_hi, AT);
					break;

				case lir_logic_or:
					__ move(AT, r_lo);					
					__ orr(dst_lo, l_lo, AT);
					__ move(AT, r_hi);
					__ orr(dst_hi, l_hi, AT);
					break;

				case lir_logic_xor:
					__ move(AT, r_lo);					
					__ xorr(dst_lo, l_lo, AT);
					__ move(AT, r_hi);
					__ xorr(dst_hi, l_hi, AT);
					break;

				default: ShouldNotReachHere();
			}

		} else {
			Register r_lo = right->as_register_lo();
			Register r_hi = right->as_register_hi();

			switch (code) {
				case lir_logic_and: 
					__ andr(dst_lo, l_lo, r_lo);
					__ andr(dst_hi, l_hi, r_hi);
					break;
				case lir_logic_or:
					__ orr(dst_lo, l_lo, r_lo);
					__ orr(dst_hi, l_hi, r_hi);
					break;
				case lir_logic_xor:
					__ xorr(dst_lo, l_lo, r_lo);
					__ xorr(dst_hi, l_hi, r_hi);
					break;
				default: ShouldNotReachHere();
			}
		}
	}
}

// we assume that eax and edx can be overwritten
void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr temp, LIR_Opr result, CodeEmitInfo* info) {

  assert(left->is_single_cpu(),   "left must be register");
  assert(right->is_single_cpu() || right->is_constant(),  "right must be register or constant");
  assert(result->is_single_cpu(), "result must be register");

  Register lreg = left->as_register();
  Register dreg = result->as_register();

  if (right->is_constant()) {
    int divisor = right->as_constant_ptr()->as_jint();
    assert(divisor!=0, "must be nonzero");
    __ move(AT, divisor);
    __ div(lreg, AT);
    __ nop();
    __ nop();
  } else {
    Register rreg = right->as_register();
    int idivl_offset = code_offset();
    __ div(lreg, rreg);
    __ nop();
    __ nop();
    add_debug_info_for_div0(idivl_offset, info);
  }
  
  // get the result
  if (code == lir_irem) {
    __ mfhi(dreg);
  } else if (code == lir_idiv) {
    __ mflo(dreg);
  } else {
    ShouldNotReachHere();
  }
}

void LIR_Assembler::arithmetic_frem(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr temp, LIR_Opr result, CodeEmitInfo* info) {
  if (left->is_single_fpu()) {
    assert(right->is_single_fpu(),"right must be float");
    assert(result->is_single_fpu(), "dest must be float");
    assert(temp->is_single_fpu(), "dest must be float");

    FloatRegister lreg = left->as_float_reg();
    FloatRegister rreg = right->as_float_reg();
    FloatRegister res = result->as_float_reg();
    FloatRegister tmp = temp->as_float_reg();
    
    switch (code) {
      case lir_frem: 
        __ rem_s(res, lreg, rreg, tmp); 
        break;
      default     : ShouldNotReachHere();
    }
  } else if (left->is_double_fpu()) {
    assert(right->is_double_fpu(),"right must be double");
    assert(result->is_double_fpu(), "dest must be double");
    assert(temp->is_double_fpu(), "dest must be double");

    FloatRegister lreg = left->as_double_reg();
    FloatRegister rreg = right->as_double_reg();
    FloatRegister res = result->as_double_reg();
    FloatRegister tmp = temp->as_double_reg();
    
    switch (code) {
      case lir_frem: 
        __ rem_d(res, lreg, rreg, tmp); 
        break;
      default     : ShouldNotReachHere();
    }
  }
}

void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst,LIR_Op2 * op) {	
	Register dstreg = dst->as_register();
	if (code == lir_cmp_fd2i) {
		if (left->is_single_fpu()) {			
			FloatRegister leftreg = left->as_float_reg();
			FloatRegister rightreg = right->as_float_reg();

			Label done;
			// equal?
			__ c_eq_s(leftreg, rightreg);
			__ bc1t(done);
			__ delayed();
			__ move(dstreg, ZERO);
			// less?
			__ c_olt_s(leftreg, rightreg); 
			__ bc1t(done);
			__ delayed();
			__ move(dstreg, -1);
			// great
			__ move(dstreg, 1);

			__ bind(done);			
		} else {
			assert(left->is_double_fpu(), "Must double");
			FloatRegister leftreg = left->as_double_reg();
			FloatRegister rightreg = right->as_double_reg();

			Label done;
			// equal?
			__ c_eq_d(leftreg, rightreg);
			__ bc1t(done);
			__ delayed();
			__ move(dstreg, ZERO);
			// less?
			__ c_olt_d(leftreg, rightreg);
			__ bc1t(done);
			__ delayed();
			__ move(dstreg, -1);
			// great
			__ move(dstreg, 1);

			__ bind(done);			
		}
	} else if (code == lir_ucmp_fd2i) {
		if (left->is_single_fpu()) {			
			FloatRegister leftreg = left->as_float_reg();
			FloatRegister rightreg = right->as_float_reg();

			Label done;
			// equal?
			__ c_eq_s(leftreg, rightreg);
			__ bc1t(done);
			__ delayed();
			__ move(dstreg, ZERO);
			// less?
			__ c_ult_s(leftreg, rightreg); 
			__ bc1t(done);
			__ delayed();
			__ move(dstreg, -1);
			// great
			__ move(dstreg, 1);

			__ bind(done);			
		} else {
			assert(left->is_double_fpu(), "Must double");
			FloatRegister leftreg = left->as_double_reg();
			FloatRegister rightreg = right->as_double_reg();

			Label done;
			// equal?
			__ c_eq_d(leftreg, rightreg);
			__ bc1t(done);
			__ delayed();
			__ move(dstreg, ZERO);
			// less?
			__ c_ult_d(leftreg, rightreg);
			__ bc1t(done);
			__ delayed();
			__ move(dstreg, -1);
			// great
			__ move(dstreg, 1);

			__ bind(done);			
		}
	} else {
		assert(code == lir_cmp_l2i, "check");
		Register l_lo, l_hi, r_lo, r_hi, d_lo, d_hi;
		l_lo = left->as_register_lo();
		l_hi = left->as_register_hi();
		r_lo = right->as_register_lo();
		r_hi = right->as_register_hi();

		Label done;
		// less?
		__ slt(AT, l_hi, r_hi);		
		__ bne(AT, ZERO, done);
		__ delayed();
		__ move(dstreg, -1);
		// great?
		__ slt(AT, r_hi, l_hi);
		__ bne(AT, ZERO, done);
		__ delayed();
		__ move(dstreg, 1);

		// now compare low 32 bits
		// below?
		__ sltu(AT, l_lo, r_lo);
		__ bne(AT, ZERO, done);
		__ delayed();
		__ move(dstreg, -1);
		// above?
		__ sltu(AT, r_lo, l_lo);
		__ bne(AT, ZERO, done);
		__ delayed();
		__ move(dstreg, 1);
		// equal
		__ move(dstreg, ZERO);

		__ bind(done);
	}
}


void LIR_Assembler::align_call(LIR_Code code) {
	if (os::is_MP()) {
		// make sure that the displacement word of the call ends up word aligned
		int offset = __ offset();
		switch (code) {
			case lir_static_call:  
			case lir_optvirtual_call: 
				offset += NativeCall::displacement_offset;
				break;
			case lir_icvirtual_call:
				offset += NativeCall::displacement_offset + NativeMovConstReg::instruction_size;
				break;
			case lir_virtual_call:  // currently, sparc-specific for niagara
			default: ShouldNotReachHere();
		}
		while (offset++ % BytesPerWord != 0) {
			__ nop();
		}
	}

}


void LIR_Assembler::call(address entry, relocInfo::relocType rtype, CodeEmitInfo* info) {
  	assert(!os::is_MP() || (__ offset() + NativeCall::displacement_offset) % BytesPerWord == 0,
         	"must be aligned");
  
  	__ call(entry, rtype);
  	__ delayed()->nop();
  	add_call_info(code_offset(), info);
}


void LIR_Assembler::ic_call(address entry, CodeEmitInfo* info) {
	RelocationHolder rh = virtual_call_Relocation::spec(pc());
	int oop_index = __ oop_recorder()->allocate_index((jobject)Universe::non_oop_word());
	RelocationHolder rspec = oop_Relocation::spec(oop_index);
	__ relocate(rspec);
	__ lui(IC_Klass, Assembler::split_high((int)Universe::non_oop_word()));
	__ addiu(IC_Klass, IC_Klass, Assembler::split_low((int)Universe::non_oop_word()));

	__ call(entry, rh);
	__ delayed()->nop();
	add_call_info(code_offset(), info);
}


/* Currently, vtable-dispatch is only enabled for sparc platforms */
void LIR_Assembler::vtable_call(int vtable_offset, CodeEmitInfo* info) {
    ShouldNotReachHere();
}



void LIR_Assembler::emit_static_call_stub() {
	address call_pc = __ pc();
	address stub = __ start_a_stub(call_stub_size);
	if (stub == NULL) {
		bailout("static call stub overflow");
		return;
	}

	int start = __ offset();
	/*  if (os::is_MP()) {
	// make sure that the displacement word of the call ends up word aligned
	int offset = __ offset() + NativeMovConstReg::instruction_size + NativeCall::displacement_offset;
	while (offset++ % BytesPerWord != 0) {
	__ nop();
	}
	}
	*/
	__ relocate(static_stub_Relocation::spec(call_pc));
	jobject o=NULL;    
	int oop_index = __ oop_recorder()->allocate_index((jobject)o);
	RelocationHolder rspec = oop_Relocation::spec(oop_index);
	__ relocate(rspec);
	__ lui(T7, Assembler::split_high((int)o));
	__ addiu(T7, T7, Assembler::split_low((int)o));

	// must be set to -1 at code generation time
	// assert(!os::is_MP() || ((__ offset() + 1) % BytesPerWord) == 0, "must be aligned on MP");
  __ lui(AT, Assembler::split_high((int)-1));
	__ addiu(AT, AT, Assembler::split_low((int)-1));
  __ jr(AT);
	__ delayed()->nop();
	assert(__ offset() - start <= call_stub_size, "stub too big")
	__ end_a_stub();
}


void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info, bool unwind) {
	assert(exceptionOop->as_register()== V0, "must match");
	assert(unwind || exceptionPC->as_register()== V1, "must match");

	// exception object is not added to oop map by LinearScan
	// (LinearScan assumes that no oops are in fixed registers)

	info->add_register_oop(exceptionOop);
	if (!unwind) {
		// get current pc information
		// pc is only needed if the method has an exception handler, the unwind code does not need it. 
		int pc_for_athrow  = (int)__ pc();
		int pc_for_athrow_offset = __ offset();
		Register epc = exceptionPC->as_register();
		//__ nop();
		// pc_for_athrow can not point to itself (relocInfo restriction), no need now
		__ relocate(relocInfo::internal_pc_type);
		__ lui(epc, Assembler::split_high(pc_for_athrow));
		__ addiu(epc, epc, Assembler::split_low(pc_for_athrow));
		add_call_info(pc_for_athrow_offset, info); // for exception handler
		__ verify_not_null_oop(V0);
		// search an exception handler (eax: exception oop, edx: throwing pc)
		if (compilation()->has_fpu_code()) {
			__ call(Runtime1::entry_for(Runtime1::handle_exception_id), 
				relocInfo::runtime_call_type);
		} else {
			__ call(Runtime1::entry_for(Runtime1::handle_exception_nofpu_id), 
				relocInfo::runtime_call_type);
		}
	} else {
		__ call(Runtime1::entry_for(Runtime1::unwind_exception_id), 
				relocInfo::runtime_call_type);
	}

	// enough room for two byte trap
	__ delayed()->nop();
}

void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) {
  // optimized version for linear scan:
  // * tmp must be unused
  assert(tmp->is_illegal(), "wasting a register if tmp is allocated");

  if (left->is_single_cpu()) {
    /*Register value = left->as_register();
    assert(value != SHIFT_count, "left cannot be ECX");

    switch (code) {
      case lir_shl:  __ shll(value); break;
      case lir_shr:  __ sarl(value); break;
      case lir_ushr: __ shrl(value); break;
      default: ShouldNotReachHere();
    }
 */ 
    Register value_reg = left->as_register();
    Register count_reg = count->as_register();
    Register dest_reg = dest->as_register();
    assert_different_registers(count_reg, value_reg);
    
		switch (code) {
      case lir_shl:  __ sllv(dest_reg, value_reg, count_reg); break;
      case lir_shr:  __ srav(dest_reg, value_reg, count_reg); break;
      case lir_ushr: __ srlv(dest_reg, value_reg, count_reg); break;
      default: ShouldNotReachHere();
    }
  
  } else if (left->is_double_cpu()) {
  /*  Register lo = left->as_register_lo();
    Register hi = left->as_register_hi();
    assert(lo != SHIFT_count && hi != SHIFT_count, "left cannot be ECX");

    switch (code) {
      case lir_shl:  __ lshl(hi, lo);        break;
      case lir_shr:  __ lshr(hi, lo, true);  break;
      case lir_ushr: __ lshr(hi, lo, false); break;
      default: ShouldNotReachHere();
   
     */
    Register creg = count->as_register();
    Register lo = left->as_register_lo();
    Register hi = left->as_register_hi();
    Register dlo = dest->as_register_lo(); 
    Register dhi = dest->as_register_hi(); 
		 
		__ andi(creg, creg, 0x3f);
	switch (code) {
		case lir_shl:
			{
				Label normal, done, notZero;

				//count=0
				__ bne(creg, ZERO, notZero);
				__ delayed()->nop();
				__ move(dlo, lo);
				__ b(done);
				__ delayed();
				__ move(dhi, hi);

				//count>=32
				__ bind(notZero);
				__ sltiu(AT, creg, BitsPerWord);
				__ bne(AT, ZERO, normal);
				__ delayed();
				__ addiu(AT, creg, (-1) * BitsPerWord);
				__ sllv(dhi, lo, AT);
				__ b(done);
				__ delayed();
				__ move(dlo, ZERO);

				//count<32
				__ bind(normal);
				__ sllv(dhi, hi, creg);
				__ move(AT, BitsPerWord);
				__ sub(AT, AT, creg);
				__ srlv(AT, lo, AT);
				__ orr(dhi, dhi, AT);
				__ sllv(dlo, lo, creg);
				__ bind(done);						
			}	
			break;
		case lir_shr:
			{
				Label normal, done, notZero;
				
				//count=0
				__ bne(creg, ZERO, notZero);
				__ delayed()->nop();
				__ move(dhi, hi);
				__ b(done);
				__ delayed();
				__ move(dlo, lo);

				//count>=32
				__ bind(notZero);
				__ sltiu(AT, creg, BitsPerWord);
				__ bne(AT, ZERO, normal);
				__ delayed();
				__ addiu(AT, creg, (-1) * BitsPerWord);
				__ srav(dlo, hi, AT);
				__ b(done);
				__ delayed();
				__ sra(dhi, hi, BitsPerWord - 1);

				//count<32
				__ bind(normal);
				__ srlv(dlo, lo, creg);
				__ move(AT, BitsPerWord);
				__ sub(AT, AT, creg);
				__ sllv(AT, hi, AT);
				__ orr(dlo, dlo, AT);
				__ srav(dhi, hi, creg);
				__ bind(done);
			}		
			break;
		case lir_ushr:
			{
				Label normal, done, notZero;

				//count=zero
				__ bne(creg, ZERO, notZero);
				__ delayed()->nop();
				__ move(dhi, hi);
				__ b(done);
				__ delayed();
				__ move(dlo, lo);

				//count>=32
				__ bind(notZero);
				__ sltiu(AT, creg, BitsPerWord);
				__ bne(AT, ZERO, normal);
				__ delayed();
				__ addi(AT, creg, (-1) * BitsPerWord);
				__ srlv(dlo, hi, AT);
				__ b(done);
				__ delayed();
				__ move(dhi, ZERO);

				//count<32
				__ bind(normal);
				__ srlv(dlo, lo, creg);
				__ move(AT, BitsPerWord);
				__ sub(AT, AT, creg);
				__ sllv(AT, hi, AT);
				__ orr(dlo, dlo, AT);
				__ srlv(dhi, hi, creg);
				__ bind(done);
			}
			break;
		default: ShouldNotReachHere();	 
	}
  } else {
    ShouldNotReachHere();
  }

}

// i add the 64 bit shift op here
void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint  count, LIR_Opr dest) {

	if (left->is_single_cpu()) {
		Register value_reg = left->as_register();
		Register dest_reg = dest->as_register();
		count = count & 0x1F; // Java spec

		switch (code) {
			case lir_shl:  __ sll(dest_reg, value_reg, count); break;
			case lir_shr:  __ sra(dest_reg, value_reg, count); break;
			case lir_ushr: __ srl(dest_reg, value_reg, count); break;
			default: ShouldNotReachHere();
		}

	} else if (dest->is_double_cpu()) {
		Register valuelo = left->as_register_lo();
		Register valuehi = left->as_register_hi();
		Register destlo = dest->as_register_lo();
		Register desthi = dest->as_register_hi();
		assert_different_registers(destlo, valuehi, desthi);
		count = count & 0x3f;
		switch (code) {
			case lir_shl:
				if (count==0) {
					__ move(destlo, valuelo);
					__ move(desthi, valuehi);
				} else if (count>=32) {
					__ sll(desthi, valuelo, count-32);
					__ move(destlo, ZERO);
				} else {
					__ srl(AT, valuelo, 32 - count);
					__ sll(destlo, valuelo, count);
					__ sll(desthi, valuehi, count);
					__ orr(desthi, desthi, AT);	
				}
				break;

			case lir_shr:
				if (count==0) {
					__ move(destlo, valuelo);
					__ move(desthi, valuehi);
				} else if (count>=32) {
					__ sra(destlo, valuehi, count-32);
					__ sra(desthi, valuehi, 31);
				} else {
					__ sll(AT, valuehi, 32 - count);
					__ sra(desthi, valuehi, count);
					__ srl(destlo, valuelo, count);
					__ orr(destlo, destlo, AT);	
				}
				break;

			case lir_ushr:
				if (count==0) {
					__ move(destlo, valuelo);
					__ move(desthi, valuehi);
				} else if (count>=32) {
					__ sra(destlo, valuehi, count-32);
					__ move(desthi, ZERO);
				} else {
					__ sll(AT, valuehi, 32 - count);
					__ srl(desthi, valuehi, count);
					__ srl(destlo, valuelo, count);
					__ orr(destlo, destlo, AT);	
				}
				break;

			default: ShouldNotReachHere();
		}	
	} else {
		ShouldNotReachHere();
	}
}

//void LIR_Assembler::push_parameter(Register r, int offset_from_sp_in_words) {
void LIR_Assembler::store_parameter(Register r, int offset_from_esp_in_words) {
	assert(offset_from_esp_in_words >= 0, "invalid offset from esp");
	int offset_from_sp_in_bytes = offset_from_esp_in_words * BytesPerWord;
	assert(offset_from_esp_in_words < frame_map()->reserved_argument_area_size(), "invalid offset");
	__ sw (r, SP, offset_from_sp_in_bytes);
}


void LIR_Assembler::store_parameter(jint c,     int offset_from_esp_in_words) {
	assert(offset_from_esp_in_words >= 0, "invalid offset from esp");
	int offset_from_sp_in_bytes = offset_from_esp_in_words * BytesPerWord;
	assert(offset_from_esp_in_words < frame_map()->reserved_argument_area_size(), "invalid offset");
	__ move(AT, c);
	__ sw(AT, SP, offset_from_sp_in_bytes);
}

void LIR_Assembler::store_parameter(jobject o,  int offset_from_esp_in_words) {
  assert(offset_from_esp_in_words >= 0, "invalid offset from esp");
  int offset_from_sp_in_bytes = offset_from_esp_in_words * BytesPerWord;
  assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
 // __ movl (Address(esp, offset_from_esp_in_bytes), o);
  //__ move(AT, o);
  int oop_index = __ oop_recorder()->find_index(o);
	RelocationHolder rspec = oop_Relocation::spec(oop_index);
	__ relocate(rspec);
	__ lui(AT, Assembler::split_high((int)o));
	__ addiu(AT, AT, Assembler::split_low((int)o));

  __ sw(AT, SP, offset_from_sp_in_bytes);

}


// This code replaces a call to arraycopy; no exception may 
// be thrown in this code, they must be thrown in the System.arraycopy
// activation frame; we could save some checks if this would not be the case
void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {


	ciArrayKlass* default_type = op->expected_type();
	Register src = op->src()->as_register();
	Register dst = op->dst()->as_register();
	Register src_pos = op->src_pos()->as_register();
	Register dst_pos = op->dst_pos()->as_register();
	Register length  = op->length()->as_register();
	Register tmp = T8;
#ifndef OPT_THREAD
	Register java_thread = T8;
#else
	Register java_thread = TREG;
#endif
	CodeStub* stub = op->stub();

	int flags = op->flags();
	BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL;
	if (basic_type == T_ARRAY) basic_type = T_OBJECT;

	// if we don't know anything or it's an object array, just go through the generic arraycopy
	if (default_type == NULL) {
		Label done;
// save outgoing arguments on stack in case call to System.arraycopy is needed
// HACK ALERT. This code used to push the parameters in a hardwired fashion
// for interpreter calling conventions. Now we have to do it in new style conventions.
// For the moment until C1 gets the new register allocator I just force all the
// args to the right place (except the register args) and then on the back side
// reload the register args properly if we go slow path. Yuck
		
// this is saved in the caller's reserved argument area
	//FIXME, maybe It will change something in the stack;	
		  // These are proper for the calling convention
		//store_parameter(length, 2);
		//store_parameter(dst_pos, 1);
		//store_parameter(dst, 0);

	 // these are just temporary placements until we need to reload
		//store_parameter(src_pos, 3);
		//store_parameter(src, 4);
		assert(src == T0 && src_pos == A0, "mismatch in calling convention");
	// pass arguments: may push as this is not a safepoint; SP must be fix at each safepoint

		__ push(src);
		__ push(dst);
		__ push(src_pos);
		__ push(dst_pos);
		__ push(length);


		// save SP and align
#ifndef OPT_THREAD
		__ get_thread(java_thread);
#endif
		__ sw(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
		__ addi(SP, SP, (-5) * wordSize);
		__ move(AT, -8);
		__ andr(SP, SP, AT);
		// push argument
		__ sw(length, SP, 4 * wordSize);
		__ move(A3, dst_pos);
		__ move(A2, dst);
		__ move(A1, src_pos);
		__ move(A0, src);
	  // make call
		address entry = CAST_FROM_FN_PTR(address, Runtime1::arraycopy);
		__ call(entry, relocInfo::runtime_call_type);	
		__ delayed()->nop();
		// restore SP
#ifndef OPT_THREAD
		__ get_thread(java_thread);
#endif
		__ lw(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));

		__ beq(V0, ZERO, *stub->continuation());
		__ delayed()->nop();
    __ super_pop(length); 
    __ super_pop(dst_pos);
    __ super_pop(src_pos);
    __ super_pop(dst);
    __ super_pop(src);


    __ b(*stub->entry());
    __ delayed()->nop(); 
    __ bind(*stub->continuation());
    return;
	}
	assert(default_type != NULL 
		&& default_type->is_array_klass() 
		&& default_type->is_loaded(), 
		"must be true at this point");

	int elem_size = type2aelembytes(basic_type);
	int shift_amount;
	switch (elem_size) {
		case 1 :shift_amount = 0; break;
		case 2 :shift_amount = 1; break;
		case 4 :shift_amount = 2; break;
		case 8 :shift_amount = 3; break;
		default:ShouldNotReachHere();
	}

	Address src_length_addr = Address(src, arrayOopDesc::length_offset_in_bytes());
	Address dst_length_addr = Address(dst, arrayOopDesc::length_offset_in_bytes());
	Address src_klass_addr = Address(src, oopDesc::klass_offset_in_bytes());
	Address dst_klass_addr = Address(dst, oopDesc::klass_offset_in_bytes());

	// test for NULL
	if (flags & LIR_OpArrayCopy::src_null_check) {
		__ beq(src, ZERO, *stub->entry());
		__ delayed()->nop();
	}
	if (flags & LIR_OpArrayCopy::dst_null_check) {
		__ beq(dst, ZERO, *stub->entry());
		__ delayed()->nop();
	}

	// check if negative
	if (flags & LIR_OpArrayCopy::src_pos_positive_check) {
		__ bltz(src_pos, *stub->entry());
		__ delayed()->nop();
	}
	if (flags & LIR_OpArrayCopy::dst_pos_positive_check) {
		__ bltz(dst_pos, *stub->entry());
		__ delayed()->nop();
	}
	if (flags & LIR_OpArrayCopy::length_positive_check) {
		__ bltz(length, *stub->entry());
		__ delayed()->nop();
	}

	if (flags & LIR_OpArrayCopy::src_range_check) {
		__ add(AT, src_pos, length);
		__ lw(tmp, src_length_addr);
		__ sltu(AT, tmp, AT);
		__ bne(AT, ZERO, *stub->entry());
		__ delayed()->nop();
	}
	if (flags & LIR_OpArrayCopy::dst_range_check) {
		__ add(AT, dst_pos, length);
		__ lw(tmp, dst_length_addr);
		__ sltu(AT, tmp, AT);
		__ bne(AT, ZERO, *stub->entry());
		__ delayed()->nop();
	}

	if (flags & LIR_OpArrayCopy::type_check) {
		__ lw(AT, src_klass_addr);
		__ lw(tmp, dst_klass_addr);
		__ bne(AT, tmp, *stub->entry());
		__ delayed()->nop();
	}

#ifdef ASSERT
	if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) {
		// Sanity check the known type with the incoming class.  For the
		// primitive case the types must match exactly.  For the object array
		// case, if no type check is needed then the dst type must match the
		// expected type and the src type is so subtype which we can't check.  If
		// a type check i needed then at this point the classes are known to be
		// the same but again which don't know which type so we can't check them.
		Label known_ok, halt;
		jobject2reg(default_type->encoding(), AT);
		__ lw(tmp, dst_klass_addr);
		if (basic_type != T_OBJECT) {
			__ bne(AT, tmp, halt);
			__ delayed()->nop();
			__ lw(tmp, src_klass_addr);			
		}
		__ beq(AT, tmp, known_ok);
		__ delayed()->nop();
		__ bind(halt);
		__ stop("incorrect type information in arraycopy");
		__ bind(known_ok);
	}
#endif
	__ push(src);
	__ push(dst);
	__ push(src_pos);
	__ push(dst_pos);
	__ push(length);


	assert(A0 != A1 &&
          A0 != length &&
          A1 != length, "register checks");
	__ move(AT, dst_pos);
	if (shift_amount > 0 && basic_type != T_OBJECT) {
		__ sll(A2, length, shift_amount);
	} else {
		if (length!=A2)
			__ move(A2, length);
	}
  __ move(A3, src_pos );	
	assert(A0 != dst_pos &&
          A0 != dst &&
          dst_pos != dst, "register checks");

  assert_different_registers(A0, dst_pos, dst);
	__ sll(AT, AT, shift_amount);
	__ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(basic_type));
	__ add(A1, dst, AT);

	__ sll(AT, A3, shift_amount);
	__ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(basic_type));
	__ add(A0, src, AT);



	if (basic_type == T_OBJECT) {
		__ call_VM_leaf(CAST_FROM_FN_PTR(address, Runtime1::oop_arraycopy), 3);
	} else {
		__ call_VM_leaf(CAST_FROM_FN_PTR(address, Runtime1::primitive_arraycopy), 3);
	}
  __ super_pop(length); 
  __ super_pop(dst_pos);
  __ super_pop(src_pos);
  __ super_pop(dst);
  __ super_pop(src);

	__ bind(*stub->continuation());
}


void LIR_Assembler::emit_lock(LIR_OpLock* op) {
	Register obj = op->obj_opr()->as_register();  // may not be an oop
	Register hdr = op->hdr_opr()->as_register();
	Register lock = op->lock_opr()->as_register();
	if (!UseFastLocking) {
		__ b(*op->stub()->entry());
	} else if (op->code() == lir_lock) {
		Register scratch = noreg;
		if (UseBiasedLocking) {
			scratch = op->scratch_opr()->as_register();
		}
		assert(BasicLock::displaced_header_offset_in_bytes() == 0, 
			"lock_reg must point to the displaced header");
		// add debug info for NullPointerException only if one is possible
		int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry());
		if (op->info() != NULL) {
			//add_debug_info_for_null_check_here(op->info());
			add_debug_info_for_null_check(null_check_offset,op->info());
		}
		// done
	} else if (op->code() == lir_unlock) {
		assert(BasicLock::displaced_header_offset_in_bytes() == 0, 
			"lock_reg must point to the displaced header");
		__ unlock_object(hdr, obj, lock, *op->stub()->entry());
	} else {
		Unimplemented();
	}
	__ bind(*op->stub()->continuation());
}



void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
	ciMethod* method = op->profiled_method();
	int bci          = op->profiled_bci();

	// Update counter for all call types
	ciMethodData* md = method->method_data();
	if (md == NULL) {
		bailout("out of memory building methodDataOop");
		return;
	}
	ciProfileData* data = md->bci_to_data(bci);
	assert(data->is_CounterData(), "need CounterData for calls");
	assert(op->mdo()->is_single_cpu(),  "mdo must be allocated");
	Register mdo  = op->mdo()->as_register();

	int oop_index = __ oop_recorder()->find_index(md->encoding());
	RelocationHolder rspec = oop_Relocation::spec(oop_index);
	__ relocate(rspec);
	__ lui(mdo, Assembler::split_high((int)md->encoding()));
	__ addiu(mdo, mdo, Assembler::split_low((int)md->encoding()));

	Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
	__ lw(AT,counter_addr); 
	__ addi(AT,AT, DataLayout::counter_increment); 
	__ sw(AT,counter_addr); 

	Bytecodes::Code bc = method->java_code_at_bci(bci);
	// Perform additional virtual call profiling for invokevirtual and
	// invokeinterface bytecodes
	if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
			Tier1ProfileVirtualCalls) {
		assert(op->recv()->is_single_cpu(), "recv must be allocated");
		Register recv = op->recv()->as_register();
		assert_different_registers(mdo, recv);
		assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls");
		ciKlass* known_klass = op->known_holder();
		if (Tier1OptimizeVirtualCallProfiling && known_klass != NULL) {
			// We know the type that will be seen at this call site; we can
			// statically update the methodDataOop rather than needing to do
			// dynamic tests on the receiver type

			// NOTE: we should probably put a lock around this search to
			// avoid collisions by concurrent compilations
			ciVirtualCallData* vc_data = (ciVirtualCallData*) data;
			uint i;
			for (i = 0; i < VirtualCallData::row_limit(); i++) {
				ciKlass* receiver = vc_data->receiver(i);
				if (known_klass->equals(receiver)) {
					Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
					__ lw(AT,data_addr); 
					__ addi(AT,AT,DataLayout::counter_increment);
					__ sw(AT,data_addr); 
					return;
				}
			}

			// Receiver type not found in profile data; select an empty slot

			// Note that this is less efficient than it should be because it
			// always does a write to the receiver part of the
			// VirtualCallData rather than just the first time
			for (i = 0; i < VirtualCallData::row_limit(); i++) {
				ciKlass* receiver = vc_data->receiver(i);
				if (receiver == NULL) {
					Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)));
					int oop_index = __ oop_recorder()->find_index(known_klass->encoding());
					RelocationHolder rspec = oop_Relocation::spec(oop_index);
					__ relocate(rspec);
					__ lui(AT, Assembler::split_high((int)known_klass->encoding()));
					__ addiu(AT, AT, Assembler::split_low((int)known_klass->encoding()));
					__ sw(AT,recv_addr); 
					Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
					__ lw(AT, data_addr); 
					__ addi(AT,AT,DataLayout::counter_increment);
					__ sw(AT,data_addr); 
					return;
				}
			}
		} else {
			__ lw(recv, Address(recv, oopDesc::klass_offset_in_bytes()));
			Label update_done;
			uint i;
			for (i = 0; i < VirtualCallData::row_limit(); i++) {
				Label next_test;
				// See if the receiver is receiver[n].
				__ lw(AT,Address(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)))); 
				__ bne(recv,AT,next_test);	
				__ delayed()->nop();	
				Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
				__ lw(AT,data_addr); 
				__ addi(AT,AT,DataLayout::counter_increment);
				__ sw(AT,data_addr); 
				__ b(update_done);
				__ delayed()->nop(); 
				__ bind(next_test);
			}

			// Didn't find receiver; find next empty slot and fill it in
			for (i = 0; i < VirtualCallData::row_limit(); i++) {
				Label next_test;
				Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)));
				__ lw(AT,recv_addr);   
				__ bne(AT,ZERO,next_test); 
				__ delayed()->nop();
				__ sw(recv,recv_addr); 
				__ move(AT,DataLayout::counter_increment); 
				__ sw(AT,Address(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))));	
				if (i < (VirtualCallData::row_limit() - 1)) {
					__ b(update_done);
					__ delayed()->nop(); 
				}
				__ bind(next_test);
			}

			__ bind(update_done);
		}
	}
}

void LIR_Assembler::emit_delay(LIR_OpDelay*) {
	Unimplemented();
}


void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) {
  __ lea(dst->as_register(), frame_map()->address_for_monitor_lock(monitor_no));
}

void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
	if (left->is_single_cpu()) {
		__ subu(dest->as_register(), ZERO, left->as_register());
	} else if (left->is_double_cpu()) {
		Register lo = left->as_register_lo();
		Register hi = left->as_register_hi();
		Register dlo = dest->as_register_lo();
		Register dhi = dest->as_register_hi();
    assert(dlo != hi, "register checks");
		__ nor(dlo, ZERO, lo);
		__ addiu(dlo, dlo, 1);
		__ sltiu(AT, dlo, 1);
		__ nor(dhi, ZERO, hi);
		__ addu(dhi, dhi, AT);
	} else if (left->is_single_fpu()) {
		//for mips , does it required ?      
		__ neg_s(dest->as_float_reg(), left->as_float_reg());
	} else if (left->is_double_fpu()) {
		//for mips , does it required ?      
		__ neg_d(dest->as_double_reg(), left->as_double_reg());
	}else {
		ShouldNotReachHere();
	}
}


void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest) {
	assert(addr->is_address() && dest->is_register(), "check");
	Register reg = dest->as_register();
	__ lea(dest->as_register(), as_Address(addr->as_address_ptr()));
}


void LIR_Assembler::jobject2reg(jobject o, Register reg) {
	if (o == NULL) { 
		// This seems wrong as we do not emit relocInfo 
		// for classes that are not loaded yet, i.e., they will be
		// never GC'd
		__ lui(reg, Assembler::split_high((int)o));
		__ addiu(reg, reg, Assembler::split_low((int)o));
	} else {
		int oop_index = __ oop_recorder()->find_index(o);
		RelocationHolder rspec = oop_Relocation::spec(oop_index);
		__ relocate(rspec);
		__ lui(reg, Assembler::split_high((int)o));
		__ addiu(reg, reg, Assembler::split_low((int)o));
	}
}

void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) {
 	assert(!tmp->is_valid(), "don't need temporary");
  __ call(dest, relocInfo::runtime_call_type);
  __ delayed()->nop(); 
  if (info != NULL) {
		add_call_info_here(info);
  }
}

/*  by yyq 7/22/2009
 *  i don't know the register allocator will allocate long or double in two consecutive registers
 *  if the allocator do like this, the lws below should be removed and lds be used.
 */

void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) {
	assert(type == T_LONG, "only for volatile long fields");
	if (info != NULL) {
		add_debug_info_for_null_check_here(info);
	}
 
	if(src->is_register() && dest->is_address()) {
		if(src->is_double_cpu()) {
    	__ sw(src->as_register_lo(), as_Address(dest->as_address_ptr()));
    	__ sw(src->as_register_hi(), as_Address(dest->as_address_ptr()).base(), 
				as_Address(dest->as_address_ptr()).disp() +4);
		} else if (src->is_double_fpu()) {
    	__ swc1(src->as_fpu_lo(), as_Address(dest->as_address_ptr()));
    	__ swc1(src->as_fpu_hi(), as_Address(dest->as_address_ptr()).base(), 
				as_Address(dest->as_address_ptr()).disp() +4);
		} else {
      ShouldNotReachHere();
		}
	} else if (src->is_address() && dest->is_register()){
		if(dest->is_double_cpu()) {
    	__ lw(dest->as_register_lo(), as_Address(src->as_address_ptr()));
    	__ lw(dest->as_register_hi(), as_Address(src->as_address_ptr()).base(), 
				as_Address(src->as_address_ptr()).disp() +4);
		} else if (dest->is_double_fpu()) {
    	__ lwc1(dest->as_fpu_lo(), as_Address(src->as_address_ptr()));
    	__ lwc1(dest->as_fpu_hi(), as_Address(src->as_address_ptr()).base(), 
				as_Address(src->as_address_ptr()).disp() +4);
		} else {
      ShouldNotReachHere();
		}
	} else {
    ShouldNotReachHere();
	}
}


void LIR_Assembler::membar() {
	__ sync();
}

void LIR_Assembler::membar_acquire() {
	__ sync();
}

void LIR_Assembler::membar_release() {
	__ sync();
}

void LIR_Assembler::get_thread(LIR_Opr result_reg) {
	assert(result_reg->is_register(), "check");
#ifndef OPT_THREAD
	__ get_thread(result_reg->as_register());
#else
	__ move(result_reg->as_register(), TREG);
#endif
}

void LIR_Assembler::peephole(LIR_List*) {
	// do nothing for now
}

#undef __ 

void LIR_Assembler::align_backward_branch_target() {
}