changeset 10:7eeee95a5a53

Fix five bugs related to safepoint_poll, double-precision operand, verify_oop operation and safepoint_return respectively. 1. pc_offset for oopMap at safepoint_poll used by add_debug_info_branch must be the offset of the instruction which causes an exception. 2. To avoid the failure of type-checking, when value of LIRConst, which is single-precision or double-precision, is got through a common path. However, as_jint_lo_bits and as_jint_hi_bits, which are much more general, should be used. 3. In the stack2reg function, when operand is double-precision, two float registers are filled with content of the same stack address. We should not do that. Fix it. 4. In the verify_oop_addr function, the address of the object to be verified may use SP, so the object must be loaded before changing SP. 5. Let safepoint_return use AT. 6. Do some codes cleaning work.
author YANG Yongqiang <yangyongqiang@loongson.cn>
date Sat, 23 Oct 2010 21:08:56 +0000
parents 3713353e23db
children d3aee0aef6b6
files hotspot/src/cpu/mips/vm/assembler_mips.cpp hotspot/src/cpu/mips/vm/c1_CodeStubs_mips.cpp hotspot/src/cpu/mips/vm/c1_LIRAssembler_mips.cpp hotspot/src/cpu/mips/vm/c1_Runtime1_mips.cpp hotspot/src/cpu/mips/vm/sharedRuntime_mips.cpp hotspot/src/cpu/mips/vm/stubGenerator_mips.cpp hotspot/src/cpu/mips/vm/templateTable_mips.cpp hotspot/src/share/vm/runtime/frame.cpp hotspot/src/share/vm/runtime/sharedRuntime.cpp
diffstat 9 files changed, 686 insertions(+), 2343 deletions(-) [+]
line wrap: on
line diff
--- a/hotspot/src/cpu/mips/vm/assembler_mips.cpp	Fri Oct 15 20:37:51 2010 +0000
+++ b/hotspot/src/cpu/mips/vm/assembler_mips.cpp	Sat Oct 23 21:08:56 2010 +0000
@@ -2051,44 +2051,11 @@
 
 
 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
-/*
-	if (!VerifyOops) return;
-
-  // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord);
-  // Pass register number to verify_oop_subroutine
-  char* b = new char[strlen(s) + 50];
-  sprintf(b, "verify_oop_addr: %s", s);
-
-  push(rax);                          // save rax,
-  // addr may contain rsp so we will have to adjust it based on the push
-  // we just did
-  // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
-  // stores rax into addr which is backwards of what was intended.
-  if (addr.uses(rsp)) {
-    lea(rax, addr);
-    pushptr(Address(rax, BytesPerWord));
-  } else {
-    pushptr(addr);
-  }
-
-  ExternalAddress buffer((address) b);
-  // pass msg argument
-  // avoid using pushptr, as it modifies scratch registers
-  // and our contract is not to modify anything
-  movptr(rax, buffer.addr());
-  push(rax);
-
-  // call indirectly to solve generation ordering problem
-  movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
-  call(rax);
-  // Caller pops the arguments and restores rax, from the stack
-*/
 	if (!VerifyOops) {
 		nop();
 		return;
 	}
 	// Pass register number to verify_oop_subroutine
-	Address adjust(addr.base(),addr.disp()+BytesPerWord);	
 	char* b = new char[strlen(s) + 50];
 	sprintf(b, "verify_oop_addr: %s",  s);
 
@@ -2099,10 +2066,10 @@
 	sw(A1, SP, - 5*wordSize);	
 	sw(AT, SP, - 6*wordSize);	
 	sw(T9, SP, - 7*wordSize);	
+	lw(A1, addr);   // addr may use SP, so load from it before change SP
 	addiu(SP, SP, - 7 * wordSize);
 
 	move(A0, (int)b);
-	lw(A1, adjust);
 	// call indirectly to solve generation ordering problem
 	move(AT, (int)StubRoutines::verify_oop_subroutine_entry_address());        	
 	lw(T9, AT, 0);
@@ -2120,11 +2087,11 @@
 
 // used registers :  T5, T6
 void MacroAssembler::verify_oop_subroutine() {
-	// [sp - 1]: ra
-	// [sp + 0]: char* error message    A0
-	// [sp + 1]: oop   object to verify A1
-
-	Label exit, error, error1,error2,error3,error4;
+	// RA: ra
+	// A0: char* error message    
+	// A1: oop   object to verify 
+
+	Label exit, error;
 	// increment counter
 	move(T5, (int)StubRoutines::verify_oop_count_addr());
 	lw(AT, T5, 0);
@@ -2141,18 +2108,13 @@
 	move(AT, oop_mask);
 	andr(T5, A1, AT);
 	move(AT, oop_bits);
-	/*
-	//jerome_for_debug
 	bne(T5, AT, error);
 	delayed()->nop();
-	 */
+
 	// make sure klass is 'reasonable'
 	lw(T5, A1, oopDesc::klass_offset_in_bytes()); // get klass
-	/*
-	//jerome_for_debug
-	beq(T5, ZERO, error1);                        // if klass is NULL it is broken
+	beq(T5, ZERO, error);                        // if klass is NULL it is broken
 	delayed()->nop();
-	 */
 	// Check if the klass is in the right area of memory
 	const int klass_mask = Universe::verify_klass_mask();
 	const int klass_bits = Universe::verify_klass_bits();
@@ -2160,18 +2122,17 @@
 	move(AT, klass_mask);
 	andr(T6, T5, AT);
 	move(AT, klass_bits);
-	bne(T6, AT, error2);
+	bne(T6, AT, error);
 	delayed()->nop();
-
 	// make sure klass' klass is 'reasonable'
 	lw(T5, T5, oopDesc::klass_offset_in_bytes()); // get klass' klass
-	beq(T5, ZERO, error3);  // if klass' klass is NULL it is broken
+	beq(T5, ZERO, error);  // if klass' klass is NULL it is broken
 	delayed()->nop();
 
 	move(AT, klass_mask);
 	andr(T6, T5, AT);
 	move(AT, klass_bits);
-	bne(T6, AT, error4);
+	bne(T6, AT, error);
 	delayed()->nop();     // if klass not in right area of memory it is broken too.
 
 	// return if everything seems ok
@@ -2182,35 +2143,14 @@
 
 	// handle errors
 	bind(error);
-	lw(AT, ZERO, 16);	
-	sw(RA, SP, (-1) * wordSize);
-	sw(FP, SP, (-2) * wordSize);
-	//save_frame(::round_to(sizeof(RegistersForDebugging) / BytesPerWord, 2) + 2);
-	//RegistersForDebugging::save_registers(this);
-	//move(A1, SP);
-	//addi(SP, SP, (-2) * wordSize);
 	pushad();
-	addi(SP, SP, (-3) * wordSize);
+	addi(SP, SP, (-1) * wordSize);
 	call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
 	delayed()->nop();
-	//addi(SP, SP, 2 * wordSize);
-	addiu(SP, SP, 3 * wordSize);
+	addiu(SP, SP, 1 * wordSize);
 	popad();	
-	//RegistersForDebugging::restore_registers(this, SP);
-	//restore();
-	lw(RA, SP, (-1) * wordSize);
-	lw(FP, SP, (-2) * wordSize);
 	jr(RA);
 	delayed()->nop();
-	//jerome_for_debug
-	bind(error1);
-	stop("error1");
-	bind(error2);
-	stop("error2");
-	bind(error3);
-	stop("error3");
-	bind(error4);
-	stop("error4");
 }
 
 void MacroAssembler::verify_tlab(Register t1, Register t2) {
--- a/hotspot/src/cpu/mips/vm/c1_CodeStubs_mips.cpp	Fri Oct 15 20:37:51 2010 +0000
+++ b/hotspot/src/cpu/mips/vm/c1_CodeStubs_mips.cpp	Sat Oct 23 21:08:56 2010 +0000
@@ -33,42 +33,6 @@
 double ConversionStub::double_zero = 0.0;
 
 void ConversionStub::emit_code(LIR_Assembler* ce) {
-	/*
-	   __ bind(_entry);
-	   assert(bytecode() == Bytecodes::_f2i || bytecode() == Bytecodes::_d2i, "other conversions do not require stub");
-
-
-	   if (input()->is_single_xmm()) {
-	   __ comiss(input()->as_xmm_float_reg(),
-	   ExternalAddress((address)&float_zero));
-	   } else if (input()->is_double_xmm()) {
-	   __ comisd(input()->as_xmm_double_reg(),
-	   ExternalAddress((address)&double_zero));
-	   } else {
-	   LP64_ONLY(ShouldNotReachHere());
-	   __ push(rax);
-	   __ ftst();
-	   __ fnstsw_ax();
-	   __ sahf();
-	   __ pop(rax);
-	   }
-
-	   Label NaN, do_return;
-	   __ jccb(Assembler::parity, NaN);
-	   __ jccb(Assembler::below, do_return);
-
-	// input is > 0 -> return maxInt
-	// result register already contains 0x80000000, so subtracting 1 gives 0x7fffffff
-	__ decrement(result()->as_register());
-	__ jmpb(do_return);
-
-	// input is NaN -> return 0
-	__ bind(NaN);
-	__ xorptr(result()->as_register(), result()->as_register());
-
-	__ bind(do_return);
-	__ jmp(_continuation);
-	 */
 	__ bind(_entry);
 	assert(bytecode() == Bytecodes::_f2i || bytecode() == Bytecodes::_d2i, "other conversions do not require stub");
 }
@@ -471,7 +435,6 @@
 void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) {
 	ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
 	__ bind(_entry);
-	//__ call(RuntimeAddress(Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id)));
 	__ call(Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id), relocInfo::runtime_call_type);
 	__ delayed()->nop();
 	ce->add_call_info_here(_info);
@@ -488,7 +451,6 @@
 	if (_obj->is_cpu_register()) {
 		ce->store_parameter(_obj->as_register(), 0);
 	}
-	//__ call(RuntimeAddress(Runtime1::entry_for(_stub)));
 	__ call(Runtime1::entry_for(_stub), relocInfo::runtime_call_type);
 	__ delayed()->nop();
 	ce->add_call_info_here(_info);
--- a/hotspot/src/cpu/mips/vm/c1_LIRAssembler_mips.cpp	Fri Oct 15 20:37:51 2010 +0000
+++ b/hotspot/src/cpu/mips/vm/c1_LIRAssembler_mips.cpp	Sat Oct 23 21:08:56 2010 +0000
@@ -549,32 +549,28 @@
 	// Pop the stack before the safepoint code
 	__ leave();
  //FIXME I have no idea it is safe to use A0
-	__ lui(A0, Assembler::split_high((intptr_t)os::get_polling_page() 
+/*	__ lui(AT, Assembler::split_high((intptr_t)os::get_polling_page() 
 			+ (SafepointPollOffset % os::vm_page_size())));
 	__ relocate(relocInfo::poll_return_type);
-	__ lw(AT, A0, Assembler::split_low((intptr_t)os::get_polling_page() 
+	__ lw(AT, AT, Assembler::split_low((intptr_t)os::get_polling_page() 
 			+ (SafepointPollOffset % os::vm_page_size())));
+*/
 	__ jr(RA);
 	__ delayed()->nop();
 }
 
 //read protect mem to ZERO won't cause the exception only in godson-2e, So I modify ZERO to AT .@jerome,11/25,2006
 int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
-	if (info != NULL) {
-		add_debug_info_for_branch(info);
-	}else{
-		ShouldNotReachHere();
-	} 
+  assert(info != NULL, "info must not be null for safepoint poll");
 	int offset = __ offset();
   Register r = tmp->as_register();
 	__ lui(r, Assembler::split_high((intptr_t)os::get_polling_page() 
 				+ (SafepointPollOffset % os::vm_page_size())));
+  add_debug_info_for_branch(info);
 	__ relocate(relocInfo::poll_type);
 	__ lw(AT, r, Assembler::split_low((intptr_t)os::get_polling_page() 
 				+ (SafepointPollOffset % os::vm_page_size())));
-	
 	return offset; 
-
 }
 
 void LIR_Assembler::move_regs(Register from_reg, Register to_reg) {
@@ -707,47 +703,40 @@
 
 
 void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) {
-  	assert(src->is_constant(), "should not call otherwise");
-   	assert(dest->is_stack(), "should not call otherwise");
-     	LIR_Const* c = src->as_constant_ptr();
-  	switch (c->type()) {
-    		case T_INT:  // fall through
-    		case T_FLOAT:
- 			__ move(AT, c->as_jint_bits());
-			__ sw(AT, frame_map()->address_for_slot(dest->single_stack_ix()));
-			break;
-
-    		case T_OBJECT:
-      			//__ movl(frame_map()->address_for_slot(dest->single_stack_ix()), c->as_jobject());
-      			if (c->as_jobject() == NULL) {
-				__ sw(ZERO, frame_map()->address_for_slot(dest->single_stack_ix()));
-			} else {
-				int oop_index = __ oop_recorder()->find_index(c->as_jobject());
-				RelocationHolder rspec = oop_Relocation::spec(oop_index);
-				__ relocate(rspec);
-				__ lui(AT, Assembler::split_high((int)c->as_jobject()));
-				__ addiu(AT, AT, Assembler::split_low((int)c->as_jobject()));
-				__ sw(AT, frame_map()->address_for_slot(dest->single_stack_ix()));
-				}
-			break;
-    		case T_LONG:  // fall through
-    		case T_DOUBLE:
-      	//		__ movl(frame_map()->address_for_slot(dest->double_stack_ix(),
-	//					lo_word_offset_in_bytes), c->as_jint_lo_bits());
-      	//		__ movl(frame_map()->address_for_slot(dest->double_stack_ix(),
-         //                              hi_word_offset_in_bytes), c->as_jint_hi_bits());
-      			__ move(AT, c->as_jint_lo());
-			__ sw(AT, frame_map()->address_for_slot(dest->double_stack_ix(),
-							lo_word_offset_in_bytes));
-	 		__ move(AT, c->as_jint_hi());
-			__ sw(AT, frame_map()->address_for_slot(dest->double_stack_ix(),
-							hi_word_offset_in_bytes));
-			break;
-
-    		default:
-    			ShouldNotReachHere();
+	assert(src->is_constant(), "should not call otherwise");
+ 	assert(dest->is_stack(), "should not call otherwise");
+   	LIR_Const* c = src->as_constant_ptr();
+	switch (c->type()) {
+  	case T_INT:  // fall through
+  	case T_FLOAT:
+		  __ move(AT, c->as_jint_bits());
+		  __ sw(AT, frame_map()->address_for_slot(dest->single_stack_ix()));
+		  break;
+
+  	case T_OBJECT:
+    	if (c->as_jobject() == NULL) {
+			  __ sw(ZERO, frame_map()->address_for_slot(dest->single_stack_ix()));
+		  } else {
+			  int oop_index = __ oop_recorder()->find_index(c->as_jobject());
+			  RelocationHolder rspec = oop_Relocation::spec(oop_index);
+			  __ relocate(rspec);
+			  __ lui(AT, Assembler::split_high((int)c->as_jobject()));
+			  __ addiu(AT, AT, Assembler::split_low((int)c->as_jobject()));
+			  __ sw(AT, frame_map()->address_for_slot(dest->single_stack_ix()));
+			}
+		  break;
+  	case T_LONG:  // fall through
+  	case T_DOUBLE:
+      __ move(AT, c->as_jint_lo_bits());
+		  __ sw(AT, frame_map()->address_for_slot(dest->double_stack_ix(),
+						lo_word_offset_in_bytes));
+ 		  __ move(AT, c->as_jint_hi_bits());
+		  __ sw(AT, frame_map()->address_for_slot(dest->double_stack_ix(),
+						hi_word_offset_in_bytes));
+		  break;
+  	default:
+  		ShouldNotReachHere();
   }
-
 }
 
 void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info ) {
@@ -760,9 +749,9 @@
 	switch (type) {
 		case T_LONG: // fall through
 		case T_DOUBLE:
-			__ move(AT, c->as_jint_hi());
+			__ move(AT, c->as_jint_hi_bits());
 			__ sw(AT, as_Address_hi(addr));
-			__ move(AT, c->as_jint_lo());
+			__ move(AT, c->as_jint_lo_bits());
 			__ sw(AT, as_Address_lo(addr));
 			break; 
 		case T_OBJECT:  // fall through
@@ -803,13 +792,9 @@
   if (dest->is_float_kind() && src->is_float_kind()) {
 		if (dest->is_single_fpu()) {
 			assert(src->is_single_fpu(), "must both be float");
-			//__ mfc1(AT, src->as_float_reg());
-			//__ mtc1(AT, dest->as_float_reg());
 			 __ mov_s(dest->as_float_reg(), src->as_float_reg());
 		} else {
 			assert(src->is_double_fpu(), "must bothe be double");
-			//__ dmfc1(AT, src->as_double_reg());
-			//__ dmtc1(AT, dest->as_double_reg());
 			__ mov_d( dest->as_double_reg(),src->as_double_reg());
 		}
   } else if (!dest->is_float_kind() && !src->is_float_kind()) {
@@ -852,27 +837,18 @@
     if (type == T_OBJECT || type == T_ARRAY) {
       __ verify_oop(src->as_register());
     }
- //   __ movl (dst, src->as_register());
-     __ sw(src->as_register(),dst);  
+    __ sw(src->as_register(),dst);  
   } else if (src->is_double_cpu()) {
     Address dstLO = frame_map()->address_for_slot(dest->double_stack_ix(), lo_word_offset_in_bytes);
     Address dstHI = frame_map()->address_for_slot(dest->double_stack_ix(), hi_word_offset_in_bytes);
-   // __ movl (dstLO, src->as_register_lo());
-   //__ movl (dstHI, src->as_register_hi());
      __ sw(src->as_register_lo(),dstLO);
      __ sw(src->as_register_hi(),dstHI);
   }else if (src->is_single_fpu()) {
-    assert(src->fpu_regnr() == 0, "argument must be on TOS");
     Address dst_addr = frame_map()->address_for_slot(dest->single_stack_ix());
-   // if (pop_fpu_stack)     __ fstp_s (dst_addr);
-    //else                   __ fst_s  (dst_addr);
-     __ swc1(src->as_float_reg(), dst_addr);
+    __ swc1(src->as_float_reg(), dst_addr);
 
   } else if (src->is_double_fpu()) {
-    //assert(src->fpu_regnrLo() == 0, "argument must be on TOS");
     Address dst_addr = frame_map()->address_for_slot(dest->double_stack_ix());
-   // if (pop_fpu_stack)     __ fstp_d (dst_addr);
-  //  else                   __ fst_d  (dst_addr);
     __ swc1(src->as_double_reg(), dst_addr);
     __ swc1(src->as_double_reg() + 1, dst_addr.base(), dst_addr.disp() + 4);
 
@@ -890,7 +866,9 @@
        	Register disp_reg = NOREG;
 	int disp_value = to_addr->disp();
 
-	if (type == T_ARRAY || type == T_OBJECT) __ verify_oop(src->as_register());
+	if (type == T_ARRAY || type == T_OBJECT) {
+    __ verify_oop(src->as_register());
+  }
 
 	if (needs_patching) {
 		patch = new PatchingStub(_masm, PatchingStub::access_field_id);
@@ -1050,13 +1028,13 @@
 
 void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) {
 	assert(src->is_stack(), "should not call otherwise");
-  	assert(dest->is_register(), "should not call otherwise");
+  assert(dest->is_register(), "should not call otherwise");
 	if (dest->is_single_cpu()) {
     //		__ movl(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()));
-    		__ lw(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()));
-    	if (type == T_ARRAY || type == T_OBJECT) {
-      		__ verify_oop(dest->as_register());
-    	}
+    __ lw(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()));
+    if (type == T_ARRAY || type == T_OBJECT) {
+      __ verify_oop(dest->as_register());
+    }
 	} else if (dest->is_double_cpu()) {
 		Address src_addr_LO = frame_map()->address_for_slot(src->double_stack_ix(),lo_word_offset_in_bytes);
 		Address src_addr_HI = frame_map()->address_for_slot(src->double_stack_ix(), hi_word_offset_in_bytes);
@@ -1066,12 +1044,11 @@
 		Address addr = frame_map()->address_for_slot(src->single_stack_ix());
 		__ lwc1(dest->as_float_reg(), addr);
 	} else if (dest->is_double_fpu())  {
-		Address src_addr_LO = frame_map()->address_for_slot(src->double_stack_ix());
-		Address src_addr_HI = frame_map()->address_for_slot(src->double_stack_ix());
+		Address src_addr_LO = frame_map()->address_for_slot(src->double_stack_ix(),lo_word_offset_in_bytes);
+		Address src_addr_HI = frame_map()->address_for_slot(src->double_stack_ix(), hi_word_offset_in_bytes);
 		__ lwc1(dest->as_double_reg(), src_addr_LO);
 		__ lwc1(dest->as_double_reg()+1, src_addr_HI);
-	}
-		else {
+	} else {
 		assert(dest->is_single_cpu(), "cannot be anything else but a single cpu");
 		assert(type!= T_ILLEGAL, "Bad type in stack2reg")
 		Address addr = frame_map()->address_for_slot(src->single_stack_ix());
@@ -1146,166 +1123,159 @@
 	int offset = code_offset();
 
 	switch(type) {
-	case T_BOOLEAN:
-	case T_BYTE:
-		{
-			//assert(to_reg.is_word(), "just check");
-			if (disp_reg == noreg) {
-				__ lb(dest->as_register(), src_reg, disp_value);
-			} else if (needs_patching) {
-				__ add(AT, src_reg, disp_reg);
-				offset = code_offset();
-				__ lb(dest->as_register(), AT, 0);
-			} else {
-				__ add(AT, src_reg, disp_reg);
-				offset = code_offset();
-				__ lb(dest->as_register(), AT, Assembler::split_low(disp_value));
-			}
-		}
-		break;
-
-	case T_CHAR:
-		{
-			//assert(to_reg.is_word(), "just check");
-			
-			if (disp_reg == noreg) {
-				__ lhu(dest->as_register(), src_reg, disp_value);
-			} else if (needs_patching) {
-				__ add(AT, src_reg, disp_reg);
-				offset = code_offset();
-				__ lhu(dest->as_register(), AT, 0);
-			} else {
-				__ add(AT, src_reg, disp_reg);
-				offset = code_offset();
-				__ lhu(dest->as_register(), AT, Assembler::split_low(disp_value));
-			}
-		}
-		break;
-
-	case T_SHORT:
-		{
-		//	assert(to_reg.is_word(), "just check");
-			
-			if (disp_reg == noreg) {
-				__ lh(dest->as_register(), src_reg, disp_value);
-			} else if (needs_patching) {
-				__ add(AT, src_reg, disp_reg);
-				offset = code_offset();
-				__ lh(dest->as_register(), AT, 0);
-			} else {
-				__ add(AT, src_reg, disp_reg);
-				offset = code_offset();
-				__ lh(dest->as_register(), AT, Assembler::split_low(disp_value));
-			}
-		}
-		break;
-
-	case T_INT:
-	case T_OBJECT:
-	case T_ARRAY:
-		{
-			//assert(to_reg.is_word(), "just check");
-			
-			if (disp_reg == noreg) {
-				__ lw(dest->as_register(), src_reg, disp_value);
-			} else if (needs_patching) {
-				__ add(AT, src_reg, disp_reg);
-				offset = code_offset();
-				__ lw(dest->as_register(), AT, 0);
-			} else {
-				__ add(AT, src_reg, disp_reg);
-				offset = code_offset();
-				__ lw(dest->as_register(), AT, Assembler::split_low(disp_value));
-			}
-		}
-		break;
-
-	case T_LONG:
-		{	
-		Register to_lo = dest->as_register_lo();
-     		Register to_hi = dest->as_register_hi();
-      		Register base = addr->base()->as_register();
-      		Register index = noreg;
-      		if (addr->index()->is_register()) {
+    case T_BOOLEAN:
+    case T_BYTE:
+    	{
+    		//assert(to_reg.is_word(), "just check");
+    		if (disp_reg == noreg) {
+    			__ lb(dest->as_register(), src_reg, disp_value);
+    		} else if (needs_patching) {
+    			__ add(AT, src_reg, disp_reg);
+    			offset = code_offset();
+    			__ lb(dest->as_register(), AT, 0);
+    		} else {
+    			__ add(AT, src_reg, disp_reg);
+    			offset = code_offset();
+    			__ lb(dest->as_register(), AT, Assembler::split_low(disp_value));
+    		}
+    	}
+    	break;
+    
+    case T_CHAR:
+    	{
+    		//assert(to_reg.is_word(), "just check");
+    		
+    		if (disp_reg == noreg) {
+    			__ lhu(dest->as_register(), src_reg, disp_value);
+    		} else if (needs_patching) {
+    			__ add(AT, src_reg, disp_reg);
+    			offset = code_offset();
+    			__ lhu(dest->as_register(), AT, 0);
+    		} else {
+    			__ add(AT, src_reg, disp_reg);
+    			offset = code_offset();
+    			__ lhu(dest->as_register(), AT, Assembler::split_low(disp_value));
+    		}
+    	}
+    	break;
+    
+    case T_SHORT:
+    	{
+    	//	assert(to_reg.is_word(), "just check");
+    		
+    		if (disp_reg == noreg) {
+    			__ lh(dest->as_register(), src_reg, disp_value);
+    		} else if (needs_patching) {
+    			__ add(AT, src_reg, disp_reg);
+    			offset = code_offset();
+    			__ lh(dest->as_register(), AT, 0);
+    		} else {
+    			__ add(AT, src_reg, disp_reg);
+    			offset = code_offset();
+    			__ lh(dest->as_register(), AT, Assembler::split_low(disp_value));
+    		}
+    	}
+    	break;
+    
+    case T_INT:
+    case T_OBJECT:
+    case T_ARRAY:
+    	{
+    		//assert(to_reg.is_word(), "just check");
+    		
+    		if (disp_reg == noreg) {
+    			__ lw(dest->as_register(), src_reg, disp_value);
+    		} else if (needs_patching) {
+    			__ add(AT, src_reg, disp_reg);
+    			offset = code_offset();
+    			__ lw(dest->as_register(), AT, 0);
+    		} else {
+    			__ add(AT, src_reg, disp_reg);
+    			offset = code_offset();
+    			__ lw(dest->as_register(), AT, Assembler::split_low(disp_value));
+    		}
+    	}
+    	break;
+    
+    case T_LONG:
+    	{	
+    	Register to_lo = dest->as_register_lo();
+       	Register to_hi = dest->as_register_hi();
+        	Register base = addr->base()->as_register();
+        	Register index = noreg;
+        	if (addr->index()->is_register()) {
         		index = addr->index()->as_register();
-      		}
-      		if ((base == to_lo && index == to_hi) ||(base == to_hi && index == to_lo)) {
-        	// addresses with 2 registers are only formed as a result of
-       		 // array access so this code will never have to deal with
-        	// patches or null checks.
-        	assert(info == NULL && patch == NULL, "must be");
-        //	__ leal(to_hi, as_Address(addr));
-        	__ lea(to_hi, as_Address(addr));
-        	//__ movl(to_lo, Address(to_hi));
-        	__ lw(to_lo, Address(to_hi));
-        	//__ movl(to_hi, Address(to_hi, BytesPerWord));
-        	__ lw(to_hi, Address(to_hi, BytesPerWord));
-      		} else if (base == to_lo || index == to_lo) {
-        	assert(base != to_hi, "can't be");
-        	assert(index == noreg || (index != base && index != to_hi), "can't handle this");
-        	//__ movl(to_hi, as_Address_hi(addr));
-        	__ lw(to_hi, as_Address_hi(addr));
-        	if (patch != NULL) {
-          		patching_epilog(patch, lir_patch_high, base, info);
-          		patch = new PatchingStub(_masm, PatchingStub::access_field_id);
-          		patch_code = lir_patch_low;
         	}
-        	//__ movl(to_lo, as_Address_lo(addr));
-        	__ lw(to_lo, as_Address_lo(addr));
-      		} else {
-        	assert(index == noreg || (index != base && index != to_lo), "can't handle this");
-        	//__ movl(to_lo, as_Address_lo(addr));
-        	__ lw(to_lo, as_Address_lo(addr));
-        	if (patch != NULL) {
-          	patching_epilog(patch, lir_patch_low, base, info);
-          	patch = new PatchingStub(_masm, PatchingStub::access_field_id);
-          	patch_code = lir_patch_high;
+        	if ((base == to_lo && index == to_hi) ||(base == to_hi && index == to_lo)) {
+            // addresses with 2 registers are only formed as a result of
+            // array access so this code will never have to deal with
+            // patches or null checks.
+            assert(info == NULL && patch == NULL, "must be");
+            __ lea(to_hi, as_Address(addr));
+            __ lw(to_lo, Address(to_hi));
+            __ lw(to_hi, Address(to_hi, BytesPerWord));
+        	} else if (base == to_lo || index == to_lo) {
+            assert(base != to_hi, "can't be");
+            assert(index == noreg || (index != base && index != to_hi), "can't handle this");
+            __ lw(to_hi, as_Address_hi(addr));
+            if (patch != NULL) {
+            	patching_epilog(patch, lir_patch_high, base, info);
+            	patch = new PatchingStub(_masm, PatchingStub::access_field_id);
+            	patch_code = lir_patch_low;
+            }
+        	  __ lw(to_lo, as_Address_lo(addr));
+        	} else {
+        	  assert(index == noreg || (index != base && index != to_lo), "can't handle this");
+        	  __ lw(to_lo, as_Address_lo(addr));
+            if (patch != NULL) {
+            	patching_epilog(patch, lir_patch_low, base, info);
+            	patch = new PatchingStub(_masm, PatchingStub::access_field_id);
+            	patch_code = lir_patch_high;
+            }
+        	  __ lw(to_hi, as_Address_hi(addr));
         	}
-        	//__ movl(to_hi, as_Address_hi(addr));
-        	__ lw(to_hi, as_Address_hi(addr));
-      		}
-      		break;
+        	break;
+      	}
+    case T_FLOAT:
+    	{
+    		//assert(to_reg.is_float(), "just check");
+    		if (disp_reg == noreg) {
+    			__ lwc1(dest->as_float_reg(), src_reg, disp_value);
+    		} else if (needs_patching) {
+    			__ add(AT, src_reg, disp_reg);
+    			offset = code_offset();
+    			__ lwc1(dest->as_float_reg(), AT, 0);
+    		} else {
+    			__ add(AT, src_reg, disp_reg);
+    			offset = code_offset();
+    			__ lwc1(dest->as_float_reg(), AT, Assembler::split_low(disp_value));
     		}
-	case T_FLOAT:
-		{
-			//assert(to_reg.is_float(), "just check");
-			if (disp_reg == noreg) {
-				__ lwc1(dest->as_float_reg(), src_reg, disp_value);
-			} else if (needs_patching) {
-				__ add(AT, src_reg, disp_reg);
-				offset = code_offset();
-				__ lwc1(dest->as_float_reg(), AT, 0);
-			} else {
-				__ add(AT, src_reg, disp_reg);
-				offset = code_offset();
-				__ lwc1(dest->as_float_reg(), AT, Assembler::split_low(disp_value));
-			}
-		}
-		break;
-
-	case T_DOUBLE:
-		{
-			//assert(to_reg.is_double(), "just check");
-
-			if (disp_reg == noreg) {
-				__ lwc1(dest->as_double_reg(), src_reg, disp_value);
-				__ lwc1(dest->as_double_reg()+1, src_reg, disp_value+4);
-			} else if (needs_patching) {
-				__ add(AT, src_reg, disp_reg);
-				offset = code_offset();
-				__ lwc1(dest->as_double_reg(), AT, 0);
-				__ lwc1(dest->as_double_reg()+1, AT, 4);
-			} else {
-				__ add(AT, src_reg, disp_reg);
-				offset = code_offset();
-				__ lwc1(dest->as_double_reg(), AT, Assembler::split_low(disp_value));
-				__ lwc1(dest->as_double_reg()+1, AT, Assembler::split_low(disp_value) + 4);
-			}
-		}
-		break;
-		
-	default:
-		ShouldNotReachHere();
+    	}
+    	break;
+    
+    case T_DOUBLE:
+    	{
+    		//assert(to_reg.is_double(), "just check");
+    
+    		if (disp_reg == noreg) {
+    			__ lwc1(dest->as_double_reg(), src_reg, disp_value);
+    			__ lwc1(dest->as_double_reg()+1, src_reg, disp_value+4);
+    		} else if (needs_patching) {
+    			__ add(AT, src_reg, disp_reg);
+    			offset = code_offset();
+    			__ lwc1(dest->as_double_reg(), AT, 0);
+    			__ lwc1(dest->as_double_reg()+1, AT, 4);
+    		} else {
+    			__ add(AT, src_reg, disp_reg);
+    			offset = code_offset();
+    			__ lwc1(dest->as_double_reg(), AT, Assembler::split_low(disp_value));
+    			__ lwc1(dest->as_double_reg()+1, AT, Assembler::split_low(disp_value) + 4);
+    		}
+    	}
+    	break;
+    	
+    default:
+    	ShouldNotReachHere();
 	}
 
 	if (needs_patching) {
@@ -1319,29 +1289,12 @@
 void LIR_Assembler::prefetchr(LIR_Opr src) {
   LIR_Address* addr = src->as_address_ptr();
   Address from_addr = as_Address(addr);
-/*
-  if (VM_Version::supports_sse2()) {
-    __ prefetchnta(from_addr);
-  } else if (VM_Version::supports_sse()) {
-    __ prefetcht2(from_addr);
-  }
-*/
-  }
+}
 
 
 void LIR_Assembler::prefetchw(LIR_Opr src) {
- /*
-  * if (!VM_Version::supports_prefetchw()) {
-    prefetchr(src);
-    return;
-  }
-
-  LIR_Address* addr = src->as_address_ptr();
-  Address from_addr = as_Address(addr);
-
-  __ prefetchw(from_addr);
-*/
-  }
+}
+
 NEEDS_CLEANUP; // This could be static? 
 Address::ScaleFactor LIR_Assembler::array_element_size(BasicType type) const {
   int elem_size = type2aelembytes(type);
@@ -2792,6 +2745,7 @@
 		}
 
     assert(obj != k_RInfo, "must be different");
+
     __ verify_oop(obj);
     __ beq(obj, ZERO, zero);
     __ delayed()->nop();
@@ -3168,56 +3122,35 @@
     else if (left->is_single_stack()||left->is_address()){
      assert(left == dest, "left and dest must be equal");
     Address laddr = (left->is_single_stack())? (frame_map()->address_for_slot(left->single_stack_ix())):(as_Address(left->as_address_ptr()));
-   /* if (left->is_single_stack()) {
-       laddr = frame_map()->address_for_slot(left->single_stack_ix());
-    } else if (left->is_address()) {
-      laddr = as_Address(left->as_address_ptr());
-    } else {
-      ShouldNotReachHere();
-    }
-*/
+
     if (right->is_single_cpu()) {
       Register rreg = right->as_register();
       switch (code) {
-        case lir_add: //__ addl(laddr, rreg); 
+        case lir_add: 
 	        __ lw(AT, laddr);
-		__ add(AT,AT,rreg);
+      		__ add(AT,AT,rreg);
 	        __ sw(AT, laddr);	
-		break;
+		      break;
         case lir_sub: 
-	//	__ subl(laddr, rreg); 
-		__ lw(AT, laddr);
-		__ sub(AT,AT,rreg);
+      		__ lw(AT, laddr);
+		      __ sub(AT,AT,rreg);
 	        __ sw(AT, laddr);	
-		
-		break;
+		      break;
         default:      ShouldNotReachHere();
       }
     } else if (right->is_constant()) {
 	    jint c = right->as_constant_ptr()->as_jint();
 	    switch (code) {
 		    case lir_add: {
-					  //  switch (c) {
-					  //    case  1: __ incl(laddr);    break;
-					  //   case -1: __ decl(laddr);    break;
-					  //  default: __ addl(laddr, c); break;
-					  //FIXME, If c is not a 16-imm, it will be wrong here 
 					  __ lw(AT, laddr); 
 					  __ addi(AT, AT, c); 
 					  __ sw(AT, laddr); 
-					  //}
 					  break;
 				  }
 		    case lir_sub: {
-					  //  switch (c) 
-					  // {
-					  // case  1: __ decl(laddr);    break;
-					  // case -1: __ incl(laddr);    break;
-					  //default: __ subl(laddr, c); break;
 					  __ lw(AT, laddr); 
 					  __ addi(AT, AT, -c);
 					  __ sw(AT, laddr);
-					  //}
 					  break;
 				  }
 		    default: ShouldNotReachHere();
@@ -3225,11 +3158,7 @@
     } else {
 	    ShouldNotReachHere();
     }
-
-
-    } 
-  
-  else {
+  } else {
 		ShouldNotReachHere();
 	}
 }
@@ -3278,7 +3207,6 @@
 			case lir_logic_and: 
 				__ andr (dstreg, reg, AT); 
 				break;
-		//	case lir_logic_orcc: // fall through
 			case lir_logic_or:  
 				__ orr(dstreg, reg, AT);
 				break;
@@ -3291,17 +3219,15 @@
 			// added support for stack operands
 			Address raddr = frame_map()->address_for_slot(right->single_stack_ix());
 			switch (code) {
-				case lir_logic_and: //__ andl (reg, raddr); 
+				case lir_logic_and: 
 					__ lw(AT,raddr); 
 					__ andr (reg, reg,AT); 
 					break;
 				case lir_logic_or:  
-					// __ orl  (reg, raddr); 
 					__ lw(AT,raddr);	
 					__ orr (reg, reg,AT); 
 					break;
 				case lir_logic_xor:
-					// __ xorl (reg, raddr); 
 					__ lw(AT,raddr);
 					__ xorr(reg,reg,AT);
 					break;
@@ -3311,7 +3237,6 @@
 			Register rright = right->as_register();
 			switch (code) {
 				case lir_logic_and: __ andr (dstreg, reg, rright); break;
-						    //case lir_logic_orcc: // fall through
 				case lir_logic_or : __ orr  (dstreg, reg, rright); break;
 				case lir_logic_xor: __ xorr (dstreg, reg, rright); break;
 				default: ShouldNotReachHere();
@@ -3325,7 +3250,6 @@
 
 		if (right->is_constant()) {
 //			assert_different_registers(l_lo, l_hi, dst_lo, dst_hi);
-
 			int r_lo = right->as_constant_ptr()->as_jint_lo();
 			int r_hi = right->as_constant_ptr()->as_jint_hi();
 
@@ -3357,9 +3281,6 @@
 		} else {
 			Register r_lo = right->as_register_lo();
 			Register r_hi = right->as_register_hi();
-#if 0
-			assert_different_registers(l_lo, l_hi, dst_lo, dst_hi, r_lo, r_hi);
-#endif
 
 			switch (code) {
 				case lir_logic_and: 
@@ -3927,18 +3848,18 @@
 }
 
 void LIR_Assembler::store_parameter(jobject o,  int offset_from_esp_in_words) {
-   assert(offset_from_esp_in_words >= 0, "invalid offset from esp");
-   int offset_from_sp_in_bytes = offset_from_esp_in_words * BytesPerWord;
-   assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
-  // __ movl (Address(esp, offset_from_esp_in_bytes), o);
-   //__ move(AT, o);
-   int oop_index = __ oop_recorder()->find_index(o);
+  assert(offset_from_esp_in_words >= 0, "invalid offset from esp");
+  int offset_from_sp_in_bytes = offset_from_esp_in_words * BytesPerWord;
+  assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
+ // __ movl (Address(esp, offset_from_esp_in_bytes), o);
+  //__ move(AT, o);
+  int oop_index = __ oop_recorder()->find_index(o);
 	RelocationHolder rspec = oop_Relocation::spec(oop_index);
 	__ relocate(rspec);
 	__ lui(AT, Assembler::split_high((int)o));
 	__ addiu(AT, AT, Assembler::split_low((int)o));
 
-   __ sw(AT, SP, offset_from_sp_in_bytes);
+  __ sw(AT, SP, offset_from_sp_in_bytes);
 
 }
 
--- a/hotspot/src/cpu/mips/vm/c1_Runtime1_mips.cpp	Fri Oct 15 20:37:51 2010 +0000
+++ b/hotspot/src/cpu/mips/vm/c1_Runtime1_mips.cpp	Sat Oct 23 21:08:56 2010 +0000
@@ -87,7 +87,7 @@
 	lw(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); //by yyq
 	//FIXME , in x86 version , the second parameter is false, why true here? @jerome, 12/31, 06  
 	//  reset_last_Java_frame(thread, true);
-	reset_last_Java_frame(thread, true, true);
+	reset_last_Java_frame(thread, true, false);
 	// check for pending exceptions
 	{ 
 		Label L;
@@ -115,10 +115,8 @@
 		} else {
 			jmp(Runtime1::entry_for(Runtime1::forward_exception_id), 
 					relocInfo::runtime_call_type);
-			delayed()->nop(); 
+			delayed()->nop();
 		}
-
-
 		bind(L);
 	}
 	// get oop results if there are any and reset the values in the thread
@@ -137,15 +135,12 @@
 
 
 int StubAssembler::call_RT(Register oop_result1, Register oop_result2, address entry, Register arg1) {
-	///pushl(arg1);
 	if (arg1 != A1) move(A1, arg1);
 	return call_RT(oop_result1, oop_result2, entry, 1);
 }
 
 
 int StubAssembler::call_RT(Register oop_result1, Register oop_result2, address entry, Register arg1, Register arg2) {
-	///pushl(arg2);
-	///pushl(arg1);
 	if (arg1!=A1) move(A1, arg1);
 	if (arg2!=A2) move(A2, arg2); assert(arg2 != A1, "smashed argument");
 	return call_RT(oop_result1, oop_result2, entry, 2);
@@ -153,9 +148,6 @@
 
 
 int StubAssembler::call_RT(Register oop_result1, Register oop_result2, address entry, Register arg1, Register arg2, Register arg3) {
-	///pushl(arg3);
-	///pushl(arg2);
-	///pushl(arg1);
 	if (arg1!=A1) move(A1, arg1);
 	if (arg2!=A2) move(A2, arg2); assert(arg2 != A1, "smashed argument");
 	if (arg3!=A3) move(A3, arg3); assert(arg3 != A1 && arg3 != A2, "smashed argument");			
@@ -218,7 +210,6 @@
 const int fpu_stack_as_doubles_size = 64;
 */
 const int float_regs_as_doubles_size_in_words = 16;
-//const int xmm_regs_as_doubles_size_in_words = 16;
 
 //FIXME, 
 // Stack layout for saving/restoring  all the registers needed during a runtime
@@ -269,75 +260,13 @@
 //FIXME, I have no idea which register should be saved . @jerome
 static OopMap* generate_oop_map(StubAssembler* sasm, int num_rt_args,
 		bool save_fpu_registers = true, bool describe_fpu_registers = false) {
+
 	int frame_size = reg_save_frame_size + num_rt_args; // args + thread
 	sasm->set_frame_size(frame_size);
 
 	// record saved value locations in an OopMap
 	// locations are offsets from sp after runtime call; num_rt_args is number of arguments 
 	// in call, including thread
-	OopMap* map = new OopMap(frame_size, 0);
-	/*  map->set_callee_saved(VMRegImpl::stack2reg(eax_off + num_rt_args), eax->as_VMReg());
-	    map->set_callee_saved(VMRegImpl::stack2reg(ecx_off + num_rt_args), ecx->as_VMReg());
-	    map->set_callee_saved(VMRegImpl::stack2reg(edx_off + num_rt_args), edx->as_VMReg());
-	    map->set_callee_saved(VMRegImpl::stack2reg(ebx_off + num_rt_args), ebx->as_VMReg());
-	    map->set_callee_saved(VMRegImpl::stack2reg(esi_off + num_rt_args), esi->as_VMReg());
-	    map->set_callee_saved(VMRegImpl::stack2reg(edi_off + num_rt_args), edi->as_VMReg());
-	    */
-	map->set_callee_saved(VMRegImpl::stack2reg(V1_off + num_rt_args), V1->as_VMReg());
-	map->set_callee_saved(VMRegImpl::stack2reg(V0_off + num_rt_args), V0->as_VMReg());
-	map->set_callee_saved(VMRegImpl::stack2reg(T0_off + num_rt_args), T0->as_VMReg());
-	return map;
-}
-
-#if 0
-static void print_live_registers(StubAssembler* sasm)
-{
-	__ pushad();
-	__ addiu(SP, SP, -1 * wordSize);
-	for(int i = 0; i < 32; i++)
-	{
-		__ move(A0,(Register)(i));
-		__ call(CAST_FROM_FN_PTR(address, SharedRuntime::print_int));
-		__ delayed()->nop();
-	}
-
-	__ addiu(SP, SP, wordSize);
-	__ popad();
-}
-#endif
-
-//FIXME, Is it enough to save this registers  by yyq
-static OopMap* save_live_registers(StubAssembler* sasm, 
-                                   int num_rt_args,
-		                   bool save_fpu_registers = true, 
-                                   bool describe_fpu_registers = false) {
-  //const int reg_save_frame_size = return_off + 1 + num_rt_args;
-  __ block_comment("save_live_registers");
-  int frame_size = reg_save_frame_size + num_rt_args; // args + thread //by yyq
-  sasm->set_frame_size(frame_size);
-  // save all register state - int, fpu  
-  __ addi(SP, SP, -(reg_save_frame_size-2)* wordSize);
-  
-  for (Register r = T0; r != T7->successor(); r = r->successor() ) {
-    __ sw(r, SP, (r->encoding() - T0->encoding() + T0_off) * wordSize);
-  }
-  for (Register r = S0; r != S7->successor(); r = r->successor() ) {
-    __ sw(r, SP, (r->encoding() - S0->encoding() + S0_off) * wordSize);
-  }
-  __ sw(V0, SP, V0_off * wordSize);
-  __ sw(V1, SP, V1_off * wordSize);	
-  
-  // save all fp data registers in double-precision format for use in possible deoptimization;
-  // must first restore FPUStatusWord that was initialized by push_FPU_state 
-  // (fnsave instruction)
-  
-  // record saved value locations in an OopMap
-  // locations are offsets from sp after runtime call;
-  // num_rt_args is number of arguments in call including thread
-  
-  // locate the stack base for the register save area
-  //  const int base = SharedInfo::stack0 + num_rt_args;
-  
   OopMap* map = new OopMap(reg_save_frame_size, 0);
   
   map->set_callee_saved(VMRegImpl::stack2reg(V0_off + num_rt_args), V0->as_VMReg());
@@ -350,8 +279,29 @@
   for (Register r = S0; r != S7->successor(); r = r->successor() ) {
     map->set_callee_saved(VMRegImpl::stack2reg(S0_off + num_rt_args + i++), r->as_VMReg());
   }
+	return map;
+}
+
+//FIXME, Is it enough to save this registers  by yyq
+static OopMap* save_live_registers(StubAssembler* sasm, 
+                                   int num_rt_args,
+		                   bool save_fpu_registers = true, 
+                                   bool describe_fpu_registers = false) {
+  //const int reg_save_frame_size = return_off + 1 + num_rt_args;
+  __ block_comment("save_live_registers");
+  // save all register state - int, fpu  
+  __ addi(SP, SP, -(reg_save_frame_size - 2)* wordSize);
   
-  return map;
+  for (Register r = T0; r != T7->successor(); r = r->successor() ) {
+    __ sw(r, SP, (r->encoding() - T0->encoding() + T0_off) * wordSize);
+  }
+  for (Register r = S0; r != S7->successor(); r = r->successor() ) {
+    __ sw(r, SP, (r->encoding() - S0->encoding() + S0_off) * wordSize);
+  }
+  __ sw(V0, SP, V0_off * wordSize);
+  __ sw(V1, SP, V1_off * wordSize);	
+  
+  return generate_oop_map(sasm, num_rt_args, save_fpu_registers, describe_fpu_registers);
 }
 
 static void restore_fpu(StubAssembler* sasm, bool restore_fpu_registers = true) {
@@ -383,7 +333,7 @@
    	__ lw(r, SP, (r->encoding() - S0->encoding() + S0_off) * wordSize);
    }
    __ lw(V1, SP, V1_off * wordSize);	
-   __ addiu(SP, SP, (reg_save_frame_size - 2) * wordSize);
+   __ addiu(SP, SP, (reg_save_frame_size - 2)* wordSize);
 }
 void Runtime1::initialize_pd() {
   // nothing to do
@@ -391,32 +341,24 @@
 
 // target: the entry point of the method that creates and posts the exception oop
 // has_argument: true if the exception needs an argument (passed on stack because registers must be preserved)
-//OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, bool has_argument) {
 OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, bool has_argument) {
 	// preserve all registers
-	int num_rt_args = has_argument ? 2 : 1;
-//	OopMap* oop_map = save_live_registers(sasm, num_rt_args);
 	OopMap* oop_map = save_live_registers(sasm, 0);
 
 	// now all registers are saved and can be used freely
 	// verify that no old value is used accidentally
 	//all reigster are saved , I think mips do not need this
-	// __ invalidate_registers(true, true, true, true, true, true);
 
 	// registers used by this stub
-	//  const Register temp_reg = ebx;
 	const Register temp_reg = T3; 
 	// load argument for exception that is passed as an argument into the stub
 	if (has_argument) {
-		//  __ movl(temp_reg, Address(ebp, 2*BytesPerWord));
 		__ lw(temp_reg, Address(FP, 2*BytesPerWord));
-		//__ pushl(temp_reg);
-	//	__ push(temp_reg);
 	}
 	int call_offset;
 	if (has_argument) 
 	 	call_offset = __ call_RT(noreg, noreg, target, temp_reg);
-        else
+  else
 	 	call_offset = __ call_RT(noreg, noreg, target);
 	
 	OopMapSet* oop_maps = new OopMapSet();
@@ -430,29 +372,22 @@
 //FIXME I do not know which reigster to use.should use T3 as real_return_addr @jerome
 void Runtime1::generate_handle_exception(StubAssembler *sasm, OopMapSet* oop_maps, OopMap* oop_map, bool save_fpu_registers) {
 	// incoming parameters
-	// const Register exception_oop = eax;
 	const Register exception_oop = V0;
-	//  const Register exception_pc = edx;
 	const Register exception_pc = V1;
 	// other registers used in this stub
-	// const Register real_return_addr = ebx;
 	const Register real_return_addr = T3;
-	// const Register thread = edi;
 	const Register thread = S6;
 
 	__ block_comment("generate_handle_exception");
 
 #ifdef TIERED
 	// C2 can leave the fpu stack dirty
-	// if (UseSSE < 2 ) {
 	__ empty_FPU_stack();
 	//}
 #endif // TIERED
 
-	// verify that only eax and edx is valid at this time
-	//for mips , I think this is not required
-	// __ invalidate_registers(false, true, true, false, true, true);
-	// verify that eax contains a valid exception
+	// verify that only V0 and V1 is valid at this time
+	// verify that V0 contains a valid exception
 	__ verify_not_null_oop(exception_oop);
 
 	// load address of JavaThread object for thread-local data
@@ -462,16 +397,12 @@
 	// check that fields in JavaThread for exception oop and issuing pc are 
 	// empty before writing to them
 	Label oop_empty;
-	//__ cmpl(Address(thread, JavaThread::exception_oop_offset()), 0);
-	//__ jcc(Assembler::equal, oop_empty);
 	__ lw(AT,Address(thread, in_bytes(JavaThread::exception_oop_offset()))); 
 	__ beq(AT,ZERO,oop_empty); 
 	__ delayed()->nop(); 
 	__ stop("exception oop already set");
 	__ bind(oop_empty);
 	Label pc_empty;
-	//  __ cmpl(Address(thread, JavaThread::exception_pc_offset()), 0);
-	// __ jcc(Assembler::equal, pc_empty);
 	__ lw(AT,Address(thread, in_bytes(JavaThread::exception_pc_offset()))); 
 	__ beq(AT,ZERO,pc_empty); 
 	__ delayed()->nop(); 
@@ -481,52 +412,41 @@
 
 	// save exception oop and issuing pc into JavaThread
 	// (exception handler will load it from here)
-	//__ movl(Address(thread, JavaThread::exception_oop_offset()), exception_oop);
-	__ sw(exception_oop,Address(thread, in_bytes(JavaThread::exception_oop_offset())));
-	//__ movl(Address(thread, JavaThread::exception_pc_offset()), exception_pc);
-	__ sw(exception_pc,Address(thread, in_bytes(JavaThread::exception_pc_offset())));
+	__ sw(exception_oop, Address(thread, in_bytes(JavaThread::exception_oop_offset())));
+	__ sw(exception_pc, Address(thread, in_bytes(JavaThread::exception_pc_offset())));
 
 	// save real return address (pc that called this stub)
-	//  __ movl(real_return_addr, Address(ebp, 1*BytesPerWord));   
-	//__ lw(real_return_addr, Address(ebp, 1*BytesPerWord));   
 	__ lw(real_return_addr, FP, 1*BytesPerWord);   
-	// __ movl(Address(esp, temp_1_off * BytesPerWord), real_return_addr);
-	__ sw(real_return_addr,SP, temp_1_off * BytesPerWord);
+	__ sw(real_return_addr, SP, temp_1_off * BytesPerWord);
 
 	// patch throwing pc into return address (has bci & oop map)
-	//__ movl(Address(ebp, 1*BytesPerWord), exception_pc);       
-	__ sw(exception_pc,FP, 1*BytesPerWord);       
+	__ sw(exception_pc, FP, 1*BytesPerWord);       
 	// compute the exception handler. 
 	// the exception oop and the throwing pc are read from the fields in JavaThread
 	int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, 
 				exception_handler_for_pc));
 	oop_maps->add_gc_map(call_offset, oop_map);
-	// eax: handler address or NULL if no handler exists
+	// V0:  handler address or NULL if no handler exists
 	//      will be the deopt blob if nmethod was deoptimized while we looked up
 	//      handler regardless of whether handler existed in the nmethod.
 
-	// only eax is valid at this time, all other registers have been destroyed by the 
+	// only V0 is valid at this time, all other registers have been destroyed by the 
 	// runtime call
-	//  __ invalidate_registers(false, true, true, true, true, true);
 
 	// Do we have an exception handler in the nmethod?
 	Label no_handler;
 	Label done;
-	//  __ testl(eax, eax);
-	//  __ jcc(Assembler::zero, no_handler);
-	__ beq(exception_oop,ZERO,no_handler);
+	__ beq(V0, ZERO, no_handler);
 	__ delayed()->nop(); 
 	// exception handler found
 	// patch the return address -> the stub will directly return to the exception handler
-	// __ movl(Address(ebp, 1*BytesPerWord), eax); 
-	__ sw(exception_oop, FP, 1*BytesPerWord); 
+	__ sw(V0, FP, 1 * BytesPerWord); 
 
 	// restore registers
 	restore_live_registers(sasm, save_fpu_registers);
 
 	// return to exception handler
 	__ leave();
-	//__ ret(0);
 	__ jr(RA);
 	__ delayed()->nop(); 
 	__ bind(no_handler);
@@ -535,26 +455,19 @@
 	// there is no need to restore the registers
 
 	// restore the real return address that was saved before the RT-call
-	//__ movl(real_return_addr, Address(esp, temp_1_off * BytesPerWord));
-	//__ movl(Address(ebp, 1*BytesPerWord), real_return_addr);
-	__ lw(real_return_addr,SP, temp_1_off * BytesPerWord);
-	__ sw(real_return_addr, FP, 1*BytesPerWord); 
+	__ lw(real_return_addr, SP, temp_1_off * BytesPerWord);
+	__ sw(real_return_addr, FP, 1 * BytesPerWord); 
 	// load address of JavaThread object for thread-local data
 	__ get_thread(thread);
 	// restore exception oop into eax (convention for unwind code)
-	//  __ movl(exception_oop, Address(thread, JavaThread::exception_oop_offset()));
 	__ lw(exception_oop, thread, in_bytes(JavaThread::exception_oop_offset()));
 
 	// clear exception fields in JavaThread because they are no longer needed
 	// (fields must be cleared because they are processed by GC otherwise)
-	// __ movl(Address(thread, JavaThread::exception_oop_offset()), NULL_WORD);
-	//  __ movl(Address(thread, JavaThread::exception_pc_offset()), NULL_WORD);
 	__ sw(ZERO,thread, in_bytes(JavaThread::exception_oop_offset()));
 	__ sw(ZERO,thread, in_bytes(JavaThread::exception_pc_offset())); 
 	// pop the stub frame off
 	__ leave();
-	//__addiu(SP, FP, wordSize);
-	//__lw(FP, SP, (-1) * wordSize);
 	generate_unwind_exception(sasm);
 	__ stop("should not reach here");
 }
@@ -577,18 +490,14 @@
 	// check that fields in JavaThread for exception oop and issuing pc are empty
 	__ get_thread(thread);
 	Label oop_empty;
-	//  __ cmpl(Address(thread, JavaThread::exception_oop_offset()), 0);
 	__ lw(AT, thread, in_bytes(JavaThread::exception_oop_offset())); 
-	//__ jcc(Assembler::equal, oop_empty);
 	__ beq(AT,ZERO,oop_empty); 
 	__ delayed()->nop(); 
 	__ stop("exception oop must be empty");
 	__ bind(oop_empty);
 
 	Label pc_empty;
-	// __ cmpl(Address(thread, JavaThread::exception_pc_offset()), 0);
 	__ lw(AT, thread, in_bytes(JavaThread::exception_pc_offset())); 
-	//__ jcc(Assembler::equal, pc_empty);
 	__ beq(AT,ZERO, pc_empty); 
 	__ delayed()->nop(); 
 	__ stop("exception pc must be empty");
@@ -601,12 +510,10 @@
 	__ addi(SP, FP, wordSize);	
 	__ lw(FP, SP, - 4);
 	// store return address (is on top of stack after leave)
-	// __ movl(exception_pc, Address(esp));
 	__ lw(exception_pc,SP,0);
 	__ verify_oop(exception_oop);
 
 	// save exception oop from eax to stack before call
-	// __ pushl(exception_oop);
 	__ push(exception_oop);
 	// search the exception handler address of the caller (using the return address)
 	__ call_VM_leaf(CAST_FROM_FN_PTR(address, 
@@ -614,13 +521,10 @@
 	// eax: exception handler address of the caller
 
 	// only eax is valid at this time, all other registers have been destroyed by the call
-	// __ invalidate_registers(false, true, true, true, true, true);
 
 	// move result of call into correct register
-	//__ movl(handler_addr, eax);
 	__ move(handler_addr, V0);
 	// restore exception oop in eax (required convention of exception handler)
-	// __ popl(exception_oop);
 	__ super_pop(exception_oop);
 
 	__ verify_oop(exception_oop);
@@ -628,7 +532,6 @@
 	// get throwing pc (= return address).
 	// edx has been destroyed by the call, so it must be set again
 	// the pop is also necessary to simulate the effect of a ret(0)
-	// __ popl(exception_pc);
 	__  super_pop(exception_pc);
 	// verify that that there is really a valid exception in eax
 	__ verify_not_null_oop(exception_oop);
@@ -642,7 +545,6 @@
 	// eax: exception oop
 	// edx: throwing pc
 	// ebx: exception handler
-	//  __ jmp(handler_addr);
 	__ jr(handler_addr);
 	__ delayed()->nop();
 }
@@ -710,10 +612,8 @@
 #endif
     
 	__ lw (SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
-	//  __ reset_last_Java_frame(thread, true);
 	__ reset_last_Java_frame(thread, true,true);
 	// discard thread arg
-//	__ addi(SP, SP, 1 * wordSize);
 	// check for pending exceptions
 	{ 
 		Label L, skip;
@@ -723,11 +623,17 @@
 		__ delayed()->nop();
 		// exception pending => remove activation and forward to exception handler
 
-		//		__ beq(V0, ZERO, no_deopt);	// have we deoptimized?
 		__ bne(V0,ZERO, skip);	
 		__ delayed()->nop();	
-		///	__ beq(V0, ZERO, Runtime1::entry_for(Runtime1::forward_exception_id),
 		//			relocInfo::runtime_call_type);
+    __ pushad();
+    __ move(A0, 0x66bb);
+    __ addiu(SP, SP, -4);
+    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::print_int));
+    __ delayed()->nop();
+    __ addiu(SP, SP, 4);
+    __ popad();
+
 		__ jmp(Runtime1::entry_for(Runtime1::forward_exception_id), 
 				relocInfo::runtime_call_type); 
 		__ delayed()->nop(); 	
@@ -737,25 +643,19 @@
 		// JavaThread, so copy and clear pending exception.
 
 		// load and clear pending exception
-		// __ movl(eax, Address(thread, Thread::pending_exception_offset()));
 		__ lw(V0, Address(thread,in_bytes(Thread::pending_exception_offset())));
-		//__ movl(Address(thread, Thread::pending_exception_offset()), NULL_WORD);
 		__ sw(ZERO,Address(thread, in_bytes(Thread::pending_exception_offset())));
 
 		// check that there is really a valid exception 
-		//__ verify_not_null_oop(eax);
 		__ verify_not_null_oop(V0);
 
 		// load throwing pc: this is the return address of the stub
-		// __ movl(edx, Address(esp, return_off * BytesPerWord));
 		__ lw(V1, Address(SP, return_off * BytesPerWord));
 
 
 #ifdef ASSERT
 		// check that fields in JavaThread for exception oop and issuing pc are empty
 		Label oop_empty;
-		// __ cmpl(Address(thread, JavaThread::exception_oop_offset()), 0);
-		//__ jcc(Assembler::equal, oop_empty);
 		__ lw(AT, Address(thread, in_bytes(JavaThread::exception_oop_offset()))); 
 		__ beq(AT,ZERO,oop_empty); 
 		__ delayed()->nop(); 
@@ -763,8 +663,6 @@
 		__ bind(oop_empty);
 
 		Label pc_empty;
-		// __ cmpl(Address(thread, JavaThread::exception_pc_offset()), 0);
-		//__ jcc(Assembler::equal, pc_empty);
 		__ lw(AT, Address(thread, in_bytes(JavaThread::exception_pc_offset()))); 
 		__ beq(AT,ZERO,pc_empty); 
 		__ delayed()->nop(); 
@@ -773,16 +671,12 @@
 #endif
 
 		// store exception oop and throwing pc to JavaThread
-		//    __ movl(Address(thread, JavaThread::exception_oop_offset()), eax);
 		__ sw(V0,Address(thread, in_bytes(JavaThread::exception_oop_offset())));
-		//__ movl(Address(thread, JavaThread::exception_pc_offset()), edx);
 		__ sw(V1,Address(thread, in_bytes(JavaThread::exception_pc_offset())));
 
 		restore_live_registers(sasm);
 
 		__ leave();
-		// __ addl(esp, 4);  // remove return address from stack
-		//__ addi(SP,SP, 4);  // remove return address from stack
 
 		// Forward the exception directly to deopt blob. We can blow no
 		// registers and must leave throwing pc on the stack.  A patch may
@@ -820,7 +714,6 @@
 }
 
 
-//OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm, int* frame_size) {
 OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
 	// for better readability
 	const bool must_gc_arguments = true;
@@ -847,22 +740,16 @@
         const Register thread = TREG;
         const Register exception_oop = V0;
         const Register exception_pc = V1;
-
         // load pending exception oop into eax
-       // __ movl(exception_oop, Address(thread, Thread::pending_exception_offset()));
         __ lw(exception_oop, thread, in_bytes(Thread::pending_exception_offset()));
         // clear pending exception
-        //__ movl(Address(thread, Thread::pending_exception_offset()), NULL_WORD);
-        __ sw(ZERO,thread, in_bytes(Thread::pending_exception_offset()));
+        __ sw(ZERO, thread, in_bytes(Thread::pending_exception_offset()));
 
-        // load issuing PC (the return address for this stub) into edx
-        //__ movl(exception_pc, Address(ebp, 1*BytesPerWord));
+        // load issuing PC (the return address for this stub) into V1
         __ lw(exception_pc, FP, 1*BytesPerWord);
 
         // make sure that the vm_results are cleared (may be unnecessary)
-        //__ movl(Address(thread, JavaThread::vm_result_offset()), NULL_WORD);
         __ sw(ZERO,Address(thread, in_bytes(JavaThread::vm_result_offset())));
-        //__ movl(Address(thread, JavaThread::vm_result_2_offset()), NULL_WORD);
         __ sw(ZERO,Address(thread, in_bytes(JavaThread::vm_result_2_offset())));
 
         // verify that that there is really a valid exception in eax
@@ -870,7 +757,7 @@
 
 
         oop_maps = new OopMapSet();
-        OopMap* oop_map = generate_oop_map(sasm, 1);
+        OopMap* oop_map = generate_oop_map(sasm, 0);
         generate_handle_exception(sasm, oop_maps, oop_map);
         __ stop("should not reach here");
       }
@@ -968,15 +855,14 @@
 
 #ifdef TIERED
 //FIXME, I hava no idea which register to use
-   case counter_overflow_id:
+    case counter_overflow_id:
       {
-//        Register bci = eax;
         Register bci = T5;
         __ enter();
         OopMap* map = save_live_registers(sasm, 0);
         // Retrieve bci
         __ lw(bci, Address(ebp, 2*BytesPerWord)); 
-	int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), bci);
+	      int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), bci);
         oop_maps = new OopMapSet();
         oop_maps->add_gc_map(call_offset, map);
         restore_live_registers(sasm);
@@ -989,42 +875,42 @@
 
 
 
-   case new_type_array_id:
-   case new_object_array_id:
+    case new_type_array_id:
+    case new_object_array_id:
       { 
-	// i use T2 as length register, T4 as klass register, V0 as result register. 
-	// MUST accord with NewTypeArrayStub::emit_code, NewObjectArrayStub::emit_code
-	Register length   = T2; // Incoming
-	Register klass    = T4; // Incoming
-	Register obj      = V0; // Result
-
-	if (id == new_type_array_id) {
+        // i use T2 as length register, T4 as klass register, V0 as result register. 
+        // MUST accord with NewTypeArrayStub::emit_code, NewObjectArrayStub::emit_code
+        Register length   = T2; // Incoming
+        Register klass    = T4; // Incoming
+        Register obj      = V0; // Result
+        
+        if (id == new_type_array_id) {
           __ set_info("new_type_array", dont_gc_arguments);
-	} else {
+        } else {
           __ set_info("new_object_array", dont_gc_arguments);
-	}
-
-	if (UseTLAB && FastTLABRefill) {
-	  Register arr_size = T0;
+        }
+        
+        if (UseTLAB && FastTLABRefill) {
+          Register arr_size = T0;
           Register t1       = T1; 
           Register t2       = T3;
           Label slow_path;
           assert_different_registers(length, klass, obj, arr_size, t1, t2);
-
+        
           // check that array length is small enough for fast path
           __ move(AT, C1_MacroAssembler::max_array_allocation_length);
           __ slt(AT, AT, length);
-	  __ bne(AT, ZERO, slow_path);
+          __ bne(AT, ZERO, slow_path);
           __ delayed()->nop();
-
-	  // if we got here then the TLAB allocation failed, so try
+        
+          // if we got here then the TLAB allocation failed, so try
           // refilling the TLAB or allocating directly from eden.
           Label retry_tlab, try_eden;
           //T0,T1,T5,T8 have changed! 
           __ tlab_refill(retry_tlab, try_eden, slow_path); // preserves ebx & edx
-
+        
           __ bind(retry_tlab);
-
+        
           // get the allocation size: (length << (layout_helper & 0x1F)) + header_size
           __ lw(t1, klass, klassOopDesc::header_size() * HeapWordSize 
                            + Klass::layout_helper_offset_in_bytes());	 
@@ -1036,8 +922,8 @@
           __ add(arr_size, AT, arr_size);
           __ addi(arr_size, arr_size, MinObjAlignmentInBytesMask);  // align up
           __ andi(arr_size, arr_size, ~MinObjAlignmentInBytesMask);
-
- 
+        
+        
           __ tlab_allocate(obj, arr_size, 0, t1, t2, slow_path);  // preserves arr_size
           __ initialize_header(obj, klass, length,t1,t2);
           __ lbu(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize 
@@ -1050,7 +936,7 @@
           __ verify_oop(obj);
           __ jr(RA);
           __ delayed()->nop();
-
+        
           __ bind(try_eden);
           // get the allocation size: (length << (layout_helper & 0x1F)) + header_size
           __ lw(t1, klass, klassOopDesc::header_size() * HeapWordSize 
@@ -1063,10 +949,10 @@
           __ add(arr_size, AT, arr_size);
           __ addi(arr_size, arr_size, MinObjAlignmentInBytesMask);  // align up
           __ andi(arr_size, arr_size, ~MinObjAlignmentInBytesMask);
-
-
+        
+        
           __ eden_allocate(obj, arr_size, 0, t1, t2, slow_path);  // preserves arr_size
-
+        
           __ initialize_header(obj, klass, length,t1,t2);
           __ lbu(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize 
                                     + Klass::layout_helper_offset_in_bytes() 
@@ -1074,29 +960,28 @@
           __ andi(t1, t1, Klass::_lh_header_size_mask);
           __ sub(arr_size, arr_size, t1);  // body length
           __ add(t1, t1, obj);             // body start
-
+        
           __ initialize_body(t1, arr_size, 0, t2);
           __ verify_oop(obj);
           __ jr(RA);
           __ delayed()->nop();
           __ bind(slow_path);
         }
-
-
+       
+      
         __ enter();
         OopMap* map = save_live_registers(sasm, 0);
         int call_offset;
         if (id == new_type_array_id) {
           call_offset = __ call_RT(obj, noreg, 
-                                   CAST_FROM_FN_PTR(address, new_type_array), klass, length);
+                                    CAST_FROM_FN_PTR(address, new_type_array), klass, length);
         } else {
           call_offset = __ call_RT(obj, noreg, 
-				   CAST_FROM_FN_PTR(address, new_object_array), klass, length);
-	}
-
+                                   CAST_FROM_FN_PTR(address, new_object_array), klass, length);
+        }
+      
         oop_maps = new OopMapSet();
         oop_maps->add_gc_map(call_offset, map);
-	      //FIXME 
         restore_live_registers_except_V0(sasm);
         __ verify_oop(obj);
         __ leave();	
@@ -1105,7 +990,7 @@
       }
       break;
 
-	case new_multi_array_id:
+    case new_multi_array_id:
       { 
 	      StubFrame f(sasm, "new_multi_array", dont_gc_arguments);
 	     //refer to c1_LIRGenerate_mips.cpp:do_NewmultiArray 
@@ -1120,7 +1005,6 @@
 	      oop_maps = new OopMapSet();
 	      oop_maps->add_gc_map(call_offset, map);
 	      //FIXME 
-	      // 	restore_live_registers_except_eax(sasm);
 	      restore_live_registers_except_V0(sasm);
 	      // V0: new multi array
 	      __ verify_oop(V0);
@@ -1134,66 +1018,34 @@
 
 	      // The object is passed on the stack and we haven't pushed a
 	      // frame yet so it's one work away from top of stack.
-	      // __ movl(eax, Address(esp, 1 * BytesPerWord));
-	    //jerome_for_debug 
-	//      __ lw(V0, Address(SP, (-2)* BytesPerWord));
-	 //     __ move(AT, (int)&jerome1); 
-	  //    __ sw(V0, AT, 0); 
-	   // __ lw(V0, Address(SP,  (-1)* BytesPerWord));
-	    //  __ move(AT, (int)&jerome2); 
-	     // __ sw(V0, AT, 0); 
-	   // __ lw(V0, Address(SP, 0 * BytesPerWord));
-	    //  __ move(AT, (int)&jerome3); 
-	     // __ sw(V0, AT, 0); 
-	   // __ lw(V0, Address(SP, 1 * BytesPerWord));
-	    //  __ move(AT, (int)&jerome4); 
-	     // __ sw(V0, AT, 0); 
-	   // __ lw(V0, Address(SP, 2 * BytesPerWord));
-	    //  __ move(AT, (int)&jerome5); 
-	     // __ sw(V0, AT, 0); 
-	     // __ move(AT, (int)&jerome6); 
-	     // __ sw(T0, AT, 0); 
-
-//reference to LIRGenerator::do_RegisterFinalizer, call_runtime
-	      //__ lw(V0, Address(SP, 0 * BytesPerWord));
+        //reference to LIRGenerator::do_RegisterFinalizer, call_runtime
 	      __ move(V0, A0); 
 	      __ verify_oop(V0);
 	      // load the klass and check the has finalizer flag
 	      Label register_finalizer;
-	      // Register t = esi;
 	      Register t = T5;
-	      //__ movl(t, Address(eax, oopDesc::klass_offset_in_bytes()));
 	      __ lw(t, Address(V0, oopDesc::klass_offset_in_bytes()));
-	      //__ movl(t, Address(t, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc)));
 	      __ lw(t, Address(t, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc)));
-	      //__ testl(t, JVM_ACC_HAS_FINALIZER);
-	      //__ jcc(Assembler::notZero, register_finalizer);
 	      __ move(AT, JVM_ACC_HAS_FINALIZER); 
 	      __ andr(AT, AT, t); 
 	    
-	      //__ andi(AT,AT, JVM_ACC_HAS_FINALIZER); 
 	      __ bne(AT,ZERO, register_finalizer);	
 	      __ delayed()->nop();	
-	      //__ ret(0);
 	      __ jr(RA); 
 	      __ delayed()->nop(); 
 	      __ bind(register_finalizer);
 	      __ enter();
-	     // OopMap* map = save_live_registers(sasm, 2 /*num_rt_args */);
 	      OopMap* map = save_live_registers(sasm, 0 /*num_rt_args */);
 
-	      //__ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, 
-	                         //SharedRuntime::register_finalizer), eax);
 	      int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, 
 				      SharedRuntime::register_finalizer), V0);
 	      oop_maps = new OopMapSet();
-              oop_maps->add_gc_map(call_offset, map);
+        oop_maps->add_gc_map(call_offset, map);
 
 	      // Now restore all the live registers
 	      restore_live_registers(sasm);
 
 	      __ leave();
-	      //__ ret(0);
 	      __ jr(RA);
 	      __ delayed()->nop();
       }
@@ -1203,7 +1055,7 @@
 	case throw_range_check_failed_id:
       { StubFrame f(sasm, "range_check_failed", dont_gc_arguments);
 	      oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, 
-				      throw_range_check_exception),true);
+              throw_range_check_exception),true);
       }
       break;
 
@@ -1284,25 +1136,16 @@
 			// A0:klass_RInfo		sub
 			// A1:k->encoding() super
 			__ set_info("slow_subtype_check", dont_gc_arguments);
-			///        __ pushl(edi);
-			///        __ pushl(esi);
-			///        __ pushl(ecx);
-			///        __ pushl(eax);
-			///    __ movl(esi, Address(esp, (super_off - 1) * BytesPerWord)); // super
-			///    __ movl(eax, Address(esp, (sub_off   - 1) * BytesPerWord)); // sub
 			__ sw(T0, SP, (-1) * wordSize);
 			__ sw(T1, SP, (-2) * wordSize);
 			__ addiu(SP, SP, (-2) * wordSize);
 
-			///__ movl(edi,Address(esi,sizeof(oopDesc) 
 			//+ Klass::secondary_supers_offset_in_bytes()));
 			__ lw(AT, A0, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes());
-			///        __ movl(ecx,Address(edi,arrayOopDesc::length_offset_in_bytes()));
 			__ lw(T1, AT, arrayOopDesc::length_offset_in_bytes());
 			__ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
 
 			Label miss, hit, loop;
-			///        __ repne_scan();
 			//			T1:count, AT:array, A1:sub maybe supper
 			__ bind(loop);
 			__ beq(T1, ZERO, miss);
@@ -1315,9 +1158,7 @@
 			__ addiu(AT, AT, 4);
 
 			__ bind(hit);
-			///__ movl(Address(esi,sizeof(oopDesc) 
 			//+ Klass::secondary_super_cache_offset_in_bytes()), eax);
-			///__ movl(Address(esp, (super_off   - 1) * BytesPerWord), 1); // result
 			__ sw(A1, A0, sizeof(oopDesc) 
 					+ Klass::secondary_super_cache_offset_in_bytes());
 			__ addiu(V0, ZERO, 1);
@@ -1329,7 +1170,6 @@
 
 
 			__ bind(miss);
-			/// __ movl(Address(esp, (super_off   - 1) * BytesPerWord), 0); // result
 			__ move(V0, ZERO);
 			__ addiu(SP, SP, 2 * wordSize);
 			__ lw(T0, SP, (-1) * wordSize);
@@ -1339,67 +1179,53 @@
 		}
 		break;
 
-        case monitorenter_nofpu_id:
-              save_fpu_registers = false;
-              // fall through
+  case monitorenter_nofpu_id:
+    save_fpu_registers = false;// fall through
 
 	case monitorenter_id:
-	      {     /* 
-		       StubFrame f(sasm, "monitorenter", dont_gc_arguments, V0, T6);
-		      // V0: object
-		      // T6: lock address
-		      __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), V0, T6);
-		      */	
-		      StubFrame f(sasm, "monitorenter", dont_gc_arguments);
-		   //   OopMap* map = save_live_registers(sasm, 3, save_fpu_registers);
-		      OopMap* map = save_live_registers(sasm, 0, save_fpu_registers);
+    {
+	    StubFrame f(sasm, "monitorenter", dont_gc_arguments);
+	    OopMap* map = save_live_registers(sasm, 0, save_fpu_registers);
 
-		      //f.load_argument(1, eax); // eax: object
-		      f.load_argument(1, V0); // eax: object
-		      //f.load_argument(0, ebx); // ebx: lock address
-		      f.load_argument(0, T6); // ebx: lock address
-		      int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, 
-					      monitorenter), V0, T6);
+	    f.load_argument(1, V0); // V0: object
+	    f.load_argument(0, T6); // T6: lock address
+	    int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, 
+		       monitorenter), V0, T6);
 
-		      oop_maps = new OopMapSet();
-		      oop_maps->add_gc_map(call_offset, map);
-		      restore_live_registers(sasm, save_fpu_registers);
-	      }
-	      break;
+	    oop_maps = new OopMapSet();
+	    oop_maps->add_gc_map(call_offset, map);
+	    restore_live_registers(sasm, save_fpu_registers);
+	  }
+	  break;
 
 	case monitorexit_nofpu_id:
-	      save_fpu_registers = false;
+	  save_fpu_registers = false;
 	      // fall through
 	case monitorexit_id:
-	      { 
-		      StubFrame f(sasm, "monitorexit", dont_gc_arguments);
-		     // OopMap* map = save_live_registers(sasm, 2, save_fpu_registers);
-		      OopMap* map = save_live_registers(sasm, 0, save_fpu_registers);
-
-		      //f.load_argument(0, eax); // eax: lock address
-		      f.load_argument(0, T6); // eax: lock address
-		      // note: really a leaf routine but must setup last java sp
-		      //       => use call_RT for now (speed can be improved by
-		      //       doing last java sp setup manually)
-		      //  int call_offset = __ call_RT(noreg, noreg, 
-		      //  CAST_FROM_FN_PTR(address, monitorexit), eax);
-		      int call_offset = __ call_RT(noreg, noreg, 
-				      CAST_FROM_FN_PTR(address, monitorexit), T6);
-		      oop_maps = new OopMapSet();
-		      oop_maps->add_gc_map(call_offset, map);
-		      restore_live_registers(sasm, save_fpu_registers);
-
-	      }
-	      break;
+    { 
+      StubFrame f(sasm, "monitorexit", dont_gc_arguments);
+      OopMap* map = save_live_registers(sasm, 0, save_fpu_registers);
+  
+      f.load_argument(0, T6); // eax: lock address
+      // note: really a leaf routine but must setup last java sp
+      //       => use call_RT for now (speed can be improved by
+      //       doing last java sp setup manually)
+      int call_offset = __ call_RT(noreg, noreg, 
+  	                                CAST_FROM_FN_PTR(address, monitorexit), T6);
+      oop_maps = new OopMapSet();
+      oop_maps->add_gc_map(call_offset, map);
+      restore_live_registers(sasm, save_fpu_registers);
+  
+    }
+    break;
 	      //  case init_check_patching_id:
 	case access_field_patching_id:
-	      { 
-		      StubFrame f(sasm, "access_field_patching", dont_gc_arguments);
-		      // we should set up register map
-		      oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, 
-					      access_field_patching));
-	      }
-	      break;
+    { 
+      StubFrame f(sasm, "access_field_patching", dont_gc_arguments);
+      // we should set up register map
+      oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching));
+    }
+    break;
 
 	case load_klass_patching_id:
 		{ 
@@ -1421,34 +1247,30 @@
 			int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, 
 						Runtime1::post_jvmti_exception_throw), V0);
 			oop_maps = new OopMapSet();
-//			oop_maps->add_gc_map(call_offset, true, map);
 			oop_maps->add_gc_map(call_offset,  map);
 			restore_live_registers(sasm);
 		}
 		break;
 	case dtrace_object_alloc_id:
-		{ // eax: object
+		{ 
 			// V0:object 
 			StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments);
 			// we can't gc here so skip the oopmap but make sure that all
 			// the live registers get saved.
-			//save_live_registers(sasm, 1);
 			save_live_registers(sasm, 0);
 
-			//__ pushl(eax);
 			__ push_reg(V0);
 			__ call(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc),
 					relocInfo::runtime_call_type);
-			//__ popl(eax);
 			__ super_pop(V0);
 
 			restore_live_registers(sasm);
 		}
 		break;
 	case fpu2long_stub_id:
-	{
+	  {
                    //FIXME, I hava no idea how to port this	
-	}
+	  }
 	default:
 		{ StubFrame f(sasm, "unimplemented entry", dont_gc_arguments);
 			__ move(A1, (int)id);
--- a/hotspot/src/cpu/mips/vm/sharedRuntime_mips.cpp	Fri Oct 15 20:37:51 2010 +0000
+++ b/hotspot/src/cpu/mips/vm/sharedRuntime_mips.cpp	Sat Oct 23 21:08:56 2010 +0000
@@ -280,11 +280,7 @@
 // a frame with no abi restrictions. Since we must observe abi restrictions
 // (like the placement of the register window) the slots must be biased by
 // the following value.
-/*
-static int reg2offset(VMReg r) {
-  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
-}
-*/
+
 static int reg2offset_in(VMReg r) { 
 	// Account for saved ebp and return address
 	// This should really be in_preserve_stack_slots
@@ -297,257 +293,56 @@
 
 // ---------------------------------------------------------------------------
 // Read the array of BasicTypes from a signature, and compute where the
-// arguments should go.  Values in the VMRegPair regs array refer to 4-byte (VMRegImpl::stack_slot_size)
-// quantities.  Values less than VMRegImpl::stack0 are registers, those above
-// refer to 4-byte stack slots.  All stack slots are based off of the window
-// top.  VMRegImpl::stack0 refers to the first slot past the 16-word window,
+// arguments should go.  Values in the VMRegPair regs array refer to 4-byte
+// quantities.  Values less than SharedInfo::stack0 are registers, those above
+// refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
+// as framesizes are fixed.
+// VMRegImpl::stack0 refers to the first slot 0(sp).
 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
-// values 0-63 (up to RegisterImpl::number_of_registers) are the 64-bit
-// integer registers.  Values 64-95 are the (32-bit only) float registers.
-// Each 32-bit quantity is given its own number, so the integer registers
-// (in either 32- or 64-bit builds) use 2 numbers.  For example, there is
-// an O0-low and an O0-high.  Essentially, all int register numbers are doubled.
-
-// Register results are passed in O0-O5, for outgoing call arguments.  To
-// convert to incoming arguments, convert all O's to I's.  The regs array
-// refer to the low and hi 32-bit words of 64-bit registers or stack slots.
-// If the regs[].second() field is set to VMRegImpl::Bad(), it means it's unused (a
-// 32-bit value was passed).  If both are VMRegImpl::Bad(), it means no value was
-// passed (used as a placeholder for the other half of longs and doubles in
-// the 64-bit build).  regs[].second() is either VMRegImpl::Bad() or regs[].second() is
-// regs[].first()+1 (regs[].first() may be misaligned in the C calling convention).
-// Sparc never passes a value in regs[].second() but not regs[].first() (regs[].first()
-// == VMRegImpl::Bad() && regs[].second() != VMRegImpl::Bad()) nor unrelated values in the
-// same VMRegPair.
+// up to RegisterImpl::number_of_registers) are the 32-bit
+// integer registers.
+
+// Pass first five oop/int args in registers T0, A0 - A3.
+// Pass float/double/long args in stack.
+// Doubles have precedence, so if you pass a mix of floats and doubles
+// the doubles will grab the registers before the floats will.
 
 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
 // either 32-bit or 64-bit depending on the build.  The OUTPUTS are in 32-bit
-// units regardless of build.
+// units regardless of build. Of course for i486 there is no 64 bit build
 
 
 // ---------------------------------------------------------------------------
-// The compiled Java calling convention.  The Java convention always passes
-// 64-bit values in adjacent aligned locations (either registers or stack),
-// floats in float registers and doubles in aligned float pairs.  Values are
-// packed in the registers.  There is no backing varargs store for values in
-// registers.  In the 32-bit build, longs are passed in G1 and G4 (cannot be
-// passed in I's, because longs in I's get their heads chopped off at
-// interrupt).
-/*
-int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
-                                           VMRegPair *regs,
-                                           int total_args_passed,
-                                           int is_outgoing) {
-  assert(F31->as_VMReg()->is_reg(), "overlapping stack/register numbers");
-
-  // Convention is to pack the first 6 int/oop args into the first 6 registers
-  // (I0-I5), extras spill to the stack.  Then pack the first 8 float args
-  // into F0-F7, extras spill to the stack.  Then pad all register sets to
-  // align.  Then put longs and doubles into the same registers as they fit,
-  // else spill to the stack.
-  const int int_reg_max = SPARC_ARGS_IN_REGS_NUM;
-  const int flt_reg_max = 8;
-  //
-  // Where 32-bit 1-reg longs start being passed
-  // In tiered we must pass on stack because c1 can't use a "pair" in a single reg.
-  // So make it look like we've filled all the G regs that c2 wants to use.
-  Register g_reg = TieredCompilation ? noreg : G1;
-
-  // Count int/oop and float args.  See how many stack slots we'll need and
-  // where the longs & doubles will go.
-  int int_reg_cnt   = 0;
-  int flt_reg_cnt   = 0;
-  // int stk_reg_pairs = frame::register_save_words*(wordSize>>2);
-  // int stk_reg_pairs = SharedRuntime::out_preserve_stack_slots();
-  int stk_reg_pairs = 0;
-  for (int i = 0; i < total_args_passed; i++) {
-    switch (sig_bt[i]) {
-    case T_LONG:                // LP64, longs compete with int args
-      assert(sig_bt[i+1] == T_VOID, "");
-#ifdef _LP64
-      if (int_reg_cnt < int_reg_max) int_reg_cnt++;
-#endif
-      break;
-    case T_OBJECT:
-    case T_ARRAY:
-    case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address
-      if (int_reg_cnt < int_reg_max) int_reg_cnt++;
-#ifndef _LP64
-      else                            stk_reg_pairs++;
-#endif
-      break;
-    case T_INT:
-    case T_SHORT:
-    case T_CHAR:
-    case T_BYTE:
-    case T_BOOLEAN:
-      if (int_reg_cnt < int_reg_max) int_reg_cnt++;
-      else                            stk_reg_pairs++;
-      break;
-    case T_FLOAT:
-      if (flt_reg_cnt < flt_reg_max) flt_reg_cnt++;
-      else                            stk_reg_pairs++;
-      break;
-    case T_DOUBLE:
-      assert(sig_bt[i+1] == T_VOID, "");
-      break;
-    case T_VOID:
-      break;
-    default:
-      ShouldNotReachHere();
-    }
-  }
-
-  // This is where the longs/doubles start on the stack.
-  stk_reg_pairs = (stk_reg_pairs+1) & ~1; // Round
-
-  int int_reg_pairs = (int_reg_cnt+1) & ~1; // 32-bit 2-reg longs only
-  int flt_reg_pairs = (flt_reg_cnt+1) & ~1;
-
-  // int stk_reg = frame::register_save_words*(wordSize>>2);
-  // int stk_reg = SharedRuntime::out_preserve_stack_slots();
-  int stk_reg = 0;
-  int int_reg = 0;
-  int flt_reg = 0;
-
-  // Now do the signature layout
-  for (int i = 0; i < total_args_passed; i++) {
-    switch (sig_bt[i]) {
-    case T_INT:
-    case T_SHORT:
-    case T_CHAR:
-    case T_BYTE:
-    case T_BOOLEAN:
-#ifndef _LP64
-    case T_OBJECT:
-    case T_ARRAY:
-    case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address
-#endif // _LP64
-      if (int_reg < int_reg_max) {
-        Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);
-        regs[i].set1(r->as_VMReg());
-      } else {
-        regs[i].set1(VMRegImpl::stack2reg(stk_reg++));
-      }
-      break;
-
-#ifdef _LP64
-    case T_OBJECT:
-    case T_ARRAY:
-    case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address
-      if (int_reg < int_reg_max) {
-        Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);
-        regs[i].set2(r->as_VMReg());
-      } else {
-        regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
-        stk_reg_pairs += 2;
-      }
-      break;
-#endif // _LP64
-
-    case T_LONG:
-      assert(sig_bt[i+1] == T_VOID, "expecting VOID in other half");
-#ifdef _LP64
-        if (int_reg < int_reg_max) {
-          Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);
-          regs[i].set2(r->as_VMReg());
-        } else {
-          regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
-          stk_reg_pairs += 2;
-        }
-#else
-#ifdef COMPILER2
-        // For 32-bit build, can't pass longs in O-regs because they become
-        // I-regs and get trashed.  Use G-regs instead.  G1 and G4 are almost
-        // spare and available.  This convention isn't used by the Sparc ABI or
-        // anywhere else. If we're tiered then we don't use G-regs because c1
-        // can't deal with them as a "pair". (Tiered makes this code think g's are filled)
-        // G0: zero
-        // G1: 1st Long arg
-        // G2: global allocated to TLS
-        // G3: used in inline cache check
-        // G4: 2nd Long arg
-        // G5: used in inline cache check
-        // G6: used by OS
-        // G7: used by OS
-
-        if (g_reg == G1) {
-          regs[i].set2(G1->as_VMReg()); // This long arg in G1
-          g_reg = G4;                  // Where the next arg goes
-        } else if (g_reg == G4) {
-          regs[i].set2(G4->as_VMReg()); // The 2nd long arg in G4
-          g_reg = noreg;               // No more longs in registers
-        } else {
-          regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
-          stk_reg_pairs += 2;
-        }
-#else // COMPILER2
-        if (int_reg_pairs + 1 < int_reg_max) {
-          if (is_outgoing) {
-            regs[i].set_pair(as_oRegister(int_reg_pairs + 1)->as_VMReg(), as_oRegister(int_reg_pairs)->as_VMReg());
-          } else {
-            regs[i].set_pair(as_iRegister(int_reg_pairs + 1)->as_VMReg(), as_iRegister(int_reg_pairs)->as_VMReg());
-          }
-          int_reg_pairs += 2;
-        } else {
-          regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
-          stk_reg_pairs += 2;
-        }
-#endif // COMPILER2
-#endif // _LP64
-      break;
-
-    case T_FLOAT:
-      if (flt_reg < flt_reg_max) regs[i].set1(as_FloatRegister(flt_reg++)->as_VMReg());
-      else                       regs[i].set1(    VMRegImpl::stack2reg(stk_reg++));
-      break;
-    case T_DOUBLE:
-      assert(sig_bt[i+1] == T_VOID, "expecting half");
-      if (flt_reg_pairs + 1 < flt_reg_max) {
-        regs[i].set2(as_FloatRegister(flt_reg_pairs)->as_VMReg());
-        flt_reg_pairs += 2;
-      } else {
-        regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
-        stk_reg_pairs += 2;
-      }
-      break;
-    case T_VOID: regs[i].set_bad();  break; // Halves of longs & doubles
-    default:
-      ShouldNotReachHere();
-    }
-  }
-
-  // retun the amount of stack space these arguments will need.
-  return stk_reg_pairs;
-
-}
-*/
+// The compiled Java calling convention.
+// Pass first five oop/int args in registers T0, A0 - A3.
+// Pass float/double/long args in stack.
+// Doubles have precedence, so if you pass a mix of floats and doubles
+// the doubles will grab the registers before the floats will.
 
 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
                                            VMRegPair *regs,
                                            int total_args_passed,
                                            int is_outgoing) {
-	uint    stack = 0;          // Starting stack position for args on stack
-
-
-	// Pass first oop/int args in registers T0 .
+	// Starting stack position for args on stack
+  uint    stack = 0;
+
+	// Pass first five oop/int args in registers T0, A0 - A3.
 	uint reg_arg0 = 9999;
 	uint reg_arg1 = 9999;
 	uint reg_arg2 = 9999;
 	uint reg_arg3 = 9999;
 	uint reg_arg4 = 9999;
-//	uint reg_arg1 = 9999;
 
  
-// Pass doubles & longs &float  ligned on the stack.  First count stack slots for doubles
+  // Pass doubles & longs &float  ligned on the stack.  First count stack slots for doubles
 	int i;
 	for( i = 0; i < total_args_passed; i++) {
 		if( sig_bt[i] == T_DOUBLE || sig_bt[i] == T_LONG ) {
 			stack += 2;
 		}
 	}
-	int dstack = 0;             // Separate counter for placing doubles
- for( i = 0; i < total_args_passed; i++) {
+	int dstack = 0;  // Separate counter for placing doubles
+  for( i = 0; i < total_args_passed; i++) {
     // From the type and the argument number (count) compute the location
     switch( sig_bt[i] ) {
     case T_SHORT:
@@ -561,30 +356,24 @@
 	    if( reg_arg0 == 9999 )  {
 		    reg_arg0 = i;
 		    regs[i].set1(T0->as_VMReg());
-	    }
-	    else if( reg_arg1 == 9999 )  {
+	    } else if( reg_arg1 == 9999 ) {
 		    reg_arg1 = i;
 		    regs[i].set1(A0->as_VMReg());
-	    }
-	   else if( reg_arg2 == 9999 )  {
+	    } else if( reg_arg2 == 9999 ) {
 		    reg_arg2 = i;
 		    regs[i].set1(A1->as_VMReg());
-	    }else if( reg_arg3 == 9999 )  {
+	    }else if( reg_arg3 == 9999 ) {
 		    reg_arg3 = i;
 		    regs[i].set1(A2->as_VMReg());
-	    }else if( reg_arg4 == 9999 )  {
+	    }else if( reg_arg4 == 9999 ) {
 		    reg_arg4 = i;
 		    regs[i].set1(A3->as_VMReg());
-	    }
-	    else 
-	    {
+	    } else {
 		    regs[i].set1(VMRegImpl::stack2reg(stack++));
 	    }
 	    break;
     case T_FLOAT:
 	    regs[i].set1(VMRegImpl::stack2reg(stack++));
-	   // regs[i].set2(VMRegImpl::stack2reg(dstack));
-	   // dstack += 2;
 	    break;
     case T_LONG:      
 	    assert(sig_bt[i+1] == T_VOID, "missing Half" ); 
@@ -603,7 +392,6 @@
 		 break;
     }
  }
-
   // return value can be odd number of VMRegImpl stack slots make multiple of 2
   return round_to(stack, 2);
 }
@@ -1950,18 +1738,14 @@
 	// restoring them except ebp. ebp is the only callee save register
 	// as far as the interpreter and the compiler(s) are concerned.
 
-        //refer to register_mips.hpp:IC_Klass
-	//  const Register ic_reg = eax;
+  //refer to register_mips.hpp:IC_Klass
 	const Register ic_reg = T1;
-	//  const Register receiver = ecx;
 	const Register receiver = T0;
 	Label hit;
 	Label exception_pending;
 
 	__ verify_oop(receiver);
-	// __ cmpl(ic_reg, Address(receiver, oopDesc::klass_offset_in_bytes()));
 	__ lw(AT, receiver, oopDesc::klass_offset_in_bytes()); 
-	//__ jcc(Assembler::equal, hit);
 	__ beq(AT, ic_reg, hit); 
 	__ delayed()->nop(); 
 	__ jmp(ic_miss, relocInfo::runtime_call_type);
@@ -1983,9 +1767,7 @@
 		// Since hashCode is usually polymorphic at call sites we can't do
 		// this optimization at the call site without a lot of work.
 		Label slowCase;
-		// Register receiver = ecx;
 		Register receiver = T0;
-		//Register result = eax;
 		Register result = V0;
 		__ lw ( result, receiver, oopDesc::mark_offset_in_bytes()); 
 		// check if locked
@@ -2115,10 +1897,6 @@
 	//
 	OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
 
-	// Mark location of ebp
-	// map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, 
-	// ebp->as_VMReg());
-
 	// We know that we only have args in at most two integer registers (ecx, edx). So eax, ebx
 	// Are free to temporaries if we have to do  stack to steck moves.
 	// All inbound args are referenced based on ebp and all outbound args via esp.
@@ -2161,11 +1939,9 @@
 	if (method->is_static()) {
 
 		//  load opp into a register
-		// __ movl(oop_handle_reg, JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()));
 		int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local(
 					Klass::cast(method->method_holder())->java_mirror()));
 
-		//printf("oop_index =0x%x \n", oop_index);		
 		
 		RelocationHolder rspec = oop_Relocation::spec(oop_index);
 		__ relocate(rspec);
@@ -2176,7 +1952,6 @@
 		
 	//	__ verify_oop(oop_handle_reg);
 		// Now handlize the static class mirror it's known not-null.
-		// __ movl(Address(esp, klass_offset), oop_handle_reg);
 		__ sw( oop_handle_reg, SP, klass_offset); 
 		map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
 		
@@ -2212,7 +1987,7 @@
 		__ lui(AT, Assembler::split_high(save_pc));
 		__ addiu(AT, AT, Assembler::split_low(save_pc));
 	}
-        __ sw(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
+  __ sw(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
  
 
 	// We have all of the arguments setup at this point. We must not touch any register
@@ -2266,25 +2041,15 @@
 */
 	{ 
 		SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
-		//  __ movl(eax, JNIHandles::make_local(method()));
 		int oop_index = __ oop_recorder()->find_index( JNIHandles::make_local(method()));
 		RelocationHolder rspec = oop_Relocation::spec(oop_index);
 		__ relocate(rspec);
 		__ lui(T6, Assembler::split_high((int)JNIHandles::make_local(method())));
 		__ addiu(T6, T6, Assembler::split_low((int)JNIHandles::make_local(method())));
 
-
-	 //        __ push(A1);	
-	  //       __ push(A2);	
-	   //      __ push(A3);	
-
 		__ call_VM_leaf(
 				CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), 
-				//   thread, eax);
 		   thread, T6); 
-	//	__ pop(A3);	
-	//	__ pop(A2);	
-	//	__ pop(A1);	
 
 	}
 
@@ -3582,35 +3347,35 @@
   __ get_thread(thread);
 #endif
 
-   __ set_last_Java_frame(thread, NOREG, NOREG, NULL);
-
-   __ sw(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
+  __ set_last_Java_frame(thread, NOREG, NOREG, NULL);
+
+  __ sw(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
   // push the one argument (relative to the oopmap)
-//	__ addiu(SP, SP, - 2*wordSize);
-   __ addiu(SP, SP, - wordSize);
-   __ move(AT, -8);
-   __ andr(SP, SP, AT);	
-   __ move(A0, thread);
-	
-   __ relocate(relocInfo::internal_pc_type); 
-   {	
-	int save_pc = (int)__ pc() +  12 + NativeCall::return_address_offset;
-	__ lui(AT, Assembler::split_high(save_pc));
-	__ addiu(AT, AT, Assembler::split_low(save_pc));
-   }
-   __ sw(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
-
-   __ lui(T9, Assembler::split_high((int)Deoptimization::fetch_unroll_info));
-   __ addiu(T9, T9, Assembler::split_low((int)Deoptimization::fetch_unroll_info));
-   __ jalr(T9);
-   __ delayed()->nop();
-   oop_maps->add_gc_map(__ pc() - start, map);
-// pop the arg so now frame offsets (slots) don't include any arg.
-   __ lw(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
-   __ reset_last_Java_frame(thread, false, true);
+  //	__ addiu(SP, SP, - 2*wordSize);
+  __ addiu(SP, SP, - wordSize);
+  __ move(AT, -8);
+  __ andr(SP, SP, AT);	
+  __ move(A0, thread);
+ 
+  __ relocate(relocInfo::internal_pc_type); 
+  {	
+    int save_pc = (int)__ pc() +  12 + NativeCall::return_address_offset;
+    __ lui(AT, Assembler::split_high(save_pc));
+    __ addiu(AT, AT, Assembler::split_low(save_pc));
+  }
+  __ sw(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
+
+  __ lui(T9, Assembler::split_high((int)Deoptimization::fetch_unroll_info));
+  __ addiu(T9, T9, Assembler::split_low((int)Deoptimization::fetch_unroll_info));
+  __ jalr(T9);
+  __ delayed()->nop();
+  oop_maps->add_gc_map(__ pc() - start, map);
+  // pop the arg so now frame offsets (slots) don't include any arg.
+  __ lw(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
+  __ reset_last_Java_frame(thread, false, true);
 
   // Load UnrollBlock into S7
-   __ move(unroll, V0);
+  __ move(unroll, V0);
 
   // Store frame locals in registers or memory
 
@@ -3708,9 +3473,6 @@
   // frame and the stack walking of interpreter_sender will get the unextended sp
   // value and not the "real" sp value.
   __ move(sender_sp, SP); 
-// Address sp_temp(unroll, Deoptimization::UnrollBlock::sender_sp_temp_offset_in_bytes());
-//__ movl(sp_temp, esp);
-//__ sw(SP, sp_temp);
   __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
   __ sub(SP, SP, AT);
 			
@@ -3724,7 +3486,6 @@
    __ addi(FP, SP, (-2) * wordSize);	// save old & set new FP
    __ sub(SP, SP, T2); 			// Prolog!
   // This value is corrected by layout_activation_impl
-  //__ movl(Address(ebp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD );
    __ sw(ZERO, FP, frame::interpreter_frame_last_sp_offset * wordSize); 
    __ sw(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
    __ move(sender_sp, SP);	// pass to next frame
@@ -3737,13 +3498,10 @@
   // Re-push self-frame
    __ sw(FP, SP, (-2) * wordSize);
    __ addi(FP, SP, - 2 * wordSize);	// save old & set new ebp
- //  __ addi(SP, SP, -(reg_save_frame_size) * wordSize);
    __ addi(SP, SP, -(frame_size_in_words - additional_words) * wordSize);
 	
    // Restore frame locals after moving the frame
-//	__ sw(V0, SP, V0_off * wordSize);
   __ sw(V0, SP, RegisterSaver::v0Offset() * wordSize);
-//	__ sw(V1, SP, V1_off * wordSize);
   __ sw(V1, SP, RegisterSaver::v1Offset() * wordSize);
   __ swc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local
   __ swc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize);
@@ -3766,27 +3524,19 @@
   }
   __ sw(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
 	
-	//__ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type);
   __ lui(T9, Assembler::split_high((int)Deoptimization::unpack_frames));
   __ addiu(T9, T9, Assembler::split_low((int)Deoptimization::unpack_frames));
   __ jalr(T9);			
   __ delayed()->nop();
   // Set an oopmap for the call site
- // oop_maps->add_gc_map(__ offset(), true, new OopMap(reg_save_frame_size + 2, 0));
   oop_maps->add_gc_map(__ offset(), new OopMap( frame_size_in_words , 0));
 
-//	__ addi(SP, SP, 2 * wordSize);
- //FIXME here, do we need it? 
-//  __ push(V0);	
 	
   __ lw(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
- // __ reset_last_Java_frame(thread, false, true);
   __ reset_last_Java_frame(thread, false, false);
   
   // Collect return values
-//	__ lw(V0, SP, V0_off * wordSize);
   __ lw(V0, SP, (RegisterSaver::v0Offset() + additional_words +1) * wordSize);
-//	__ lw(V1, SP, V1_off * wordSize);
   __ lw(V1, SP, (RegisterSaver::v1Offset() + additional_words +1) * wordSize);
 //FIXME, 
   // Clear floating point stack before returning to interpreter
@@ -4007,11 +3757,12 @@
   // restore return values to their stack-slots with the new SP.
   __ move(A0, thread);
   __ move(A1, Deoptimization::Unpack_uncommon_trap);
-  __ addiu(SP, SP, -8);
+  __ addiu(SP, SP, -2 * wordSize);
   __ lui(T9, Assembler::split_high((int)Deoptimization::unpack_frames));
   __ addiu(T9, T9, Assembler::split_high((int)Deoptimization::unpack_frames));
   __ jalr(T9);
   __ delayed()->nop();
+  __ addiu(SP, SP, 2 * wordSize); //by yyq
   // Set an oopmap for the call site
   oop_maps->add_gc_map( __ offset(), true, new OopMap( framesize, 0 ) );
 
@@ -4036,25 +3787,12 @@
 //------------------------------generate_handler_blob-------------------
 //
 // Generate a special Compile2Runtime blob that saves all registers, and sets
-// up an OopMap.
+// up an OopMap and calls safepoint code to stop the compiled code for
+// a safepoint.
 //
 // This blob is jumped to (via a breakpoint and the signal handler) from a
-// safepoint in compiled code.  On entry to this blob, O7 contains the
-// address in the original nmethod at which we should resume normal execution.
-// Thus, this blob looks like a subroutine which must preserve lots of
-// registers and return normally.  Note that O7 is never register-allocated,
-// so it is guaranteed to be free here.
-//
-
-// The hardest part of what this blob must do is to save the 64-bit %o
-// registers in the 32-bit build.  A simple 'save' turn the %o's to %i's and
-// an interrupt will chop off their heads.  Making space in the caller's frame
-// first will let us save the 64-bit %o's before save'ing, but we cannot hand
-// the adjusted FP off to the GC stack-crawler: this will modify the caller's
-// SP and mess up HIS OopMaps.  So we first adjust the caller's SP, then save
-// the 64-bit %o's, then do a save, then fixup the caller's SP (our FP).
-// Tricky, tricky, tricky...
-
+// safepoint in compiled code. 
+ 
 static SafepointBlob* generate_handler_blob(address call_ptr, bool cause_return) {
 
   // Account for thread arg in our frame
@@ -4072,18 +3810,15 @@
   CodeBuffer  buffer ("handler_blob", 1024, 512);
   MacroAssembler* masm = new MacroAssembler( &buffer);
   
-  ///const Register java_thread = edi; // callee-saved for VC++
-  const Register thread = TREG; // callee-saved for VC++
+  const Register thread = TREG; 
   address start   = __ pc();  
   address call_pc = NULL;  
 
   // If cause_return is true we are at a poll_return and there is
-  // the return address on the stack to the caller on the nmethod
-  // that is safepoint. We can leave this return on the stack and
+  // the return address in RA to the caller on the nmethod
+  // that is safepoint. We can leave this return in RA and
   // effectively complete the return and safepoint in the caller.
-  // Otherwise we push space for a return address that the safepoint
-  // handler will install later to make the stack walking sensible.
-	// i dont think we need this in godson.
+  // Otherwise we load exception pc to RA.
 #ifndef OPT_THREAD
   __ get_thread(thread);
 #endif
@@ -4097,7 +3832,7 @@
   // address of the call in order to generate an oopmap. Hence, we do all the
   // work outselvs.
 
-  //i pass thread argument and setup last_Java_sp
+  // pass thread argument and setup last_Java_sp
   __ addiu(SP, SP, -wordSize); 
 
   __ move(A0, thread);
@@ -4121,7 +3856,6 @@
   // oop-registers and debug-info registers as callee-saved.  This
   // will allow deoptimization at this safepoint to find all possible
   // debug-info recordings, as well as let GC find all oops.
-
   oop_maps->add_gc_map(__ offset(),  map);
   __ addiu(SP, SP, wordSize);
 
@@ -4177,14 +3911,12 @@
 
 	int frame_size_words;
 	//we put the thread in A0 
-	enum frame_layout { 
-		extra_words = 0 };
 
 	OopMapSet *oop_maps = new OopMapSet();
 	OopMap* map = NULL;
 
 	int start = __ offset();
-	map = RegisterSaver::save_live_registers(masm, extra_words, &frame_size_words);
+	map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
 	
  	
 	int frame_complete = __ offset();
@@ -4208,15 +3940,14 @@
 
 	__ lui(T9, Assembler::split_high((int)destination));
 	__ addiu(T9, T9, Assembler::split_low((int)destination));
-   	__ jalr(T9);
-   	__ delayed()->nop();
+  __ jalr(T9);
+  __ delayed()->nop();
 
 	// Set an oopmap for the call site.
 	// We need this not only for callee-saved registers, but also for volatile
 	// registers that the compiler might be keeping live across a safepoint.
-	//printf("resolve blob start = 0x%x, offset=0x%x \n", (int)start, (int)(__ offset())); 
 	oop_maps->add_gc_map( __ offset() - start, map);
-	// eax contains the address we are going to jump to assuming no exception got installed
+	// V0 contains the address we are going to jump to assuming no exception got installed
 	__ get_thread(thread);
 	__ lw(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
 	// clear last_Java_sp
@@ -4233,8 +3964,7 @@
 	__ sw(V0, SP, RegisterSaver::v0_Offset() * wordSize);
 	RegisterSaver::restore_live_registers(masm);
 
-	// We are back the the original state on entry and ready to go.
-	//  __ jmp(eax);
+	// We are back the the original state on entry and ready to go the callee method.
 	__ jr(V0);
 	__ delayed()->nop();
 	// Pending exception after the safepoint
@@ -4247,9 +3977,7 @@
 	//forward_exception_entry need return address on the stack 
 	__ push(RA);
 	__ get_thread(thread);
-	//  __ movl(Address(thread, JavaThread::vm_result_offset()), NULL_WORD);
 	__ sw(ZERO, thread, in_bytes(JavaThread::vm_result_offset())); 
-	// __ movl(eax, Address(thread, Thread::pending_exception_offset()));
 	__ lw(V0, thread, in_bytes(Thread::pending_exception_offset()));
 	__ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
 	__ delayed() -> nop();
--- a/hotspot/src/cpu/mips/vm/stubGenerator_mips.cpp	Fri Oct 15 20:37:51 2010 +0000
+++ b/hotspot/src/cpu/mips/vm/stubGenerator_mips.cpp	Sat Oct 23 21:08:56 2010 +0000
@@ -63,120 +63,6 @@
 
 class StubGenerator: public StubCodeGenerator {
  private:
-#if 0
-  void inc_counter_np_(int& counter) {
-    __ incrementl(ExternalAddress((address)&counter));
-  }
-
-  // Call stubs are used to call Java from C
-  //
-  // Linux Arguments:
-  //    c_rarg0:   call wrapper address                   address
-  //    c_rarg1:   result                                 address
-  //    c_rarg2:   result type                            BasicType
-  //    c_rarg3:   method                                 methodOop
-  //    c_rarg4:   (interpreter) entry point              address
-  //    c_rarg5:   parameters                             intptr_t*
-  //    16(rbp): parameter size (in words)              int
-  //    24(rbp): thread                                 Thread*
-  //
-  //     [ return_from_Java     ] <--- rsp
-  //     [ argument word n      ]
-  //      ...
-  // -12 [ argument word 1      ]
-  // -11 [ saved r15            ] <--- rsp_after_call
-  // -10 [ saved r14            ]
-  //  -9 [ saved r13            ]
-  //  -8 [ saved r12            ]
-  //  -7 [ saved rbx            ]
-  //  -6 [ call wrapper         ]
-  //  -5 [ result               ]
-  //  -4 [ result type          ]
-  //  -3 [ method               ]
-  //  -2 [ entry point          ]
-  //  -1 [ parameters           ]
-  //   0 [ saved rbp            ] <--- rbp
-  //   1 [ return address       ]
-  //   2 [ parameter size       ]
-  //   3 [ thread               ]
-  //
-  // Windows Arguments:
-  //    c_rarg0:   call wrapper address                   address
-  //    c_rarg1:   result                                 address
-  //    c_rarg2:   result type                            BasicType
-  //    c_rarg3:   method                                 methodOop
-  //    48(rbp): (interpreter) entry point              address
-  //    56(rbp): parameters                             intptr_t*
-  //    64(rbp): parameter size (in words)              int
-  //    72(rbp): thread                                 Thread*
-  //
-  //     [ return_from_Java     ] <--- rsp
-  //     [ argument word n      ]
-  //      ...
-  //  -8 [ argument word 1      ]
-  //  -7 [ saved r15            ] <--- rsp_after_call
-  //  -6 [ saved r14            ]
-  //  -5 [ saved r13            ]
-  //  -4 [ saved r12            ]
-  //  -3 [ saved rdi            ]
-  //  -2 [ saved rsi            ]
-  //  -1 [ saved rbx            ]
-  //   0 [ saved rbp            ] <--- rbp
-  //   1 [ return address       ]
-  //   2 [ call wrapper         ]
-  //   3 [ result               ]
-  //   4 [ result type          ]
-  //   5 [ method               ]
-  //   6 [ entry point          ]
-  //   7 [ parameters           ]
-  //   8 [ parameter size       ]
-  //   9 [ thread               ]
-  //
-  //    Windows reserves the callers stack space for arguments 1-4.
-  //    We spill c_rarg0-c_rarg3 to this space.
-
-  // Call stub stack layout word offsets from rbp
-  enum call_stub_layout {
-#ifdef _WIN64
-    rsp_after_call_off = -7,
-    r15_off            = rsp_after_call_off,
-    r14_off            = -6,
-    r13_off            = -5,
-    r12_off            = -4,
-    rdi_off            = -3,
-    rsi_off            = -2,
-    rbx_off            = -1,
-    rbp_off            =  0,
-    retaddr_off        =  1,
-    call_wrapper_off   =  2,
-    result_off         =  3,
-    result_type_off    =  4,
-    method_off         =  5,
-    entry_point_off    =  6,
-    parameters_off     =  7,
-    parameter_size_off =  8,
-    thread_off         =  9
-#else
-    rsp_after_call_off = -12,
-    mxcsr_off          = rsp_after_call_off,
-    r15_off            = -11,
-    r14_off            = -10,
-    r13_off            = -9,
-    r12_off            = -8,
-    rbx_off            = -7,
-    call_wrapper_off   = -6,
-    result_off         = -5,
-    result_type_off    = -4,
-    method_off         = -3,
-    entry_point_off    = -2,
-    parameters_off     = -1,
-    rbp_off            =  0,
-    retaddr_off        =  1,
-    parameter_size_off =  2,
-    thread_off         =  3
-#endif
-  };
-#endif
 
   // ABI mips o32
 	// This fig is not MIPS ABI. It is call Java from C ABI.
@@ -215,177 +101,162 @@
 
     // same as in generate_catch_exception()!
 
-	// stub code
-	/* do not call enter because we have not a consistent view about whether or not we should 
-	   save ra and fp register.
-	   */
-	// save ra and fp
-	__ sw(RA, SP, (-1) * wordSize);
-	__ sw(FP, SP, (-2) * wordSize);
-
-	__ sw(BCP, SP, (-3) * wordSize);
-	__ sw(LVP, SP, (-4) * wordSize);
-	__ sw(TSR, SP, (-5) * wordSize);
-
-	__ sw(S1, SP, (-6) * wordSize);
-	__ sw(S3, SP, (-7) * wordSize);
-	__ sw(S4, SP, (-8) * wordSize);
-	__ sw(S5, SP, (-9) * wordSize);
-	__ sw(S6, SP, (-10) * wordSize);
+    // stub code
+    // save ra and fp
+    __ sw(RA, SP, (-1) * wordSize);
+    __ sw(FP, SP, (-2) * wordSize);
+    
+    __ sw(BCP, SP, (-3) * wordSize);
+    __ sw(LVP, SP, (-4) * wordSize);
+    __ sw(TSR, SP, (-5) * wordSize);
+    
+    __ sw(S1, SP, (-6) * wordSize);
+    __ sw(S3, SP, (-7) * wordSize);
+    __ sw(S4, SP, (-8) * wordSize);
+    __ sw(S5, SP, (-9) * wordSize);
+    __ sw(S6, SP, (-10) * wordSize);
 
 #ifdef OPT_THREAD
-	__ get_thread(TREG);
+	  __ get_thread(TREG);
 #endif
-		
-
-	// lw parameter_size
-	__ lw(T0, SP, 6 * wordSize);
-	// I think 14 is the max gap between argument and callee saved register
-	//__ move(FP, SP);
-	__ addi(FP, SP, (-2) * wordSize);
-	__ addi(SP, SP, (-10) * wordSize);
-
-	// save parameter
-//	__ sw(A0, FP, 0 * wordSize);
-//	__ sw(A1, FP, 1 * wordSize);
-//	__ sw(A2, FP, 2 * wordSize);
-//	__ sw(A3, FP, 3 * wordSize);
-	__ sw(A0, FP, 2 * wordSize);
-	__ sw(A1, FP, 3 * wordSize);
-	__ sw(A2, FP, 4 * wordSize);
-	__ sw(A3, FP, 5 * wordSize);
+    	
+    
+    // lw parameter_size
+    __ lw(T0, SP, 6 * wordSize);
+    // I think 14 is the max gap between argument and callee saved register
+    __ addi(FP, SP, (-2) * wordSize);
+    __ addi(SP, SP, (-10) * wordSize);
+    
+    __ sw(A0, FP, 2 * wordSize);
+    __ sw(A1, FP, 3 * wordSize);
+    __ sw(A2, FP, 4 * wordSize);
+    __ sw(A3, FP, 5 * wordSize);
 
 
 #ifdef ASSERT
-	// make sure we have no pending exceptions
-	{ Label L;
-		// load thread
-//		__ lw(T2, FP, 7 * wordSize);
-		__ lw(T2, FP, 9 * wordSize);
-		__ lw(T3, T2, in_bytes(Thread::pending_exception_offset()));
-		__ beq(T3, ZERO, L); 
-		__ delayed()->nop();
-		/* FIXME: I do not know how to realize stop in mips arch, do it in the future */
-		__ stop("StubRoutines::call_stub: entered with pending exception");
-		__ bind(L);
-	}
+    // make sure we have no pending exceptions
+    { 
+      Label L;
+    	__ lw(T2, FP, 9 * wordSize);
+    	__ lw(T3, T2, in_bytes(Thread::pending_exception_offset()));
+    	__ beq(T3, ZERO, L); 
+    	__ delayed()->nop();
+    	/* FIXME: I do not know how to realize stop in mips arch, do it in the future */
+    	__ stop("StubRoutines::call_stub: entered with pending exception");
+    	__ bind(L);
+    }
 #endif
 
-	// pass parameters if any
-	Label parameters_done;
-	// judge if the parameter_size equals 0
-	__ beq(T0, ZERO, parameters_done);
-	__ delayed()->nop();
-	__ sll(AT,T0,Interpreter::logStackElementSize());
-	__ sub(SP, SP, AT); 
-	__ move(AT, -StackAlignmentInBytes); 
-	__ andr(SP, SP , AT); 
-  // Copy Java parameters in reverse order (receiver last)
-	// Note that the argument order is inverted in the process
-	// source is edx[ecx: N-1..0]
-	// dest   is esp[ebx: 0..N-1]
-	Label loop;
-//	__ lw(T2, FP, 5 * wordSize);   // parameter pointer in T2,refernce to the stack arch
-	__ lw(T2, FP, 7 * wordSize);   // parameter pointer in T2,refernce to the stack arch
-	__ move(T4, ZERO);
-	__ bind(loop);
-	if (TaggedStackInterpreter) {
-	__ sll(T5, T0, 3);   
-	__ add(T5, T5, T2);	    
-	__ lw(AT, T5,  -2*wordSize);	
-	__ sll(T5,T4,3); 
-	__ add(T5,T5, SP); 
-	__ sw(AT, T5, Interpreter::expr_tag_offset_in_bytes(0)); 
-	}
-
-	// get parameter
-	__ sll(T5, T0, 2);   
-	__ add(T5, T5, T2);	    
-	__ lw(AT, T5,  -wordSize);	
-	__ sll(T5,T4,2); 
-	__ add(T5,T5, SP); 
-	__ sw(AT, T5, Interpreter::expr_offset_in_bytes(0)); 
-	__ addi(T4,T4,1); 
-	__ addi(T0,T0,-1); 
-	__ bne(T0, ZERO, loop);
-	__ delayed()->nop();
-	// advance to next parameter
-
-	// call Java function
-	__ bind(parameters_done);
-
-	// receiver in V0, methodOop in T7
-
-	//    __ lw(T7, FP, 3 * wordSize);        // get methodOop
-	__ move(T7, A3);
-//	__ lw(T9, FP, 4 * wordSize);       	// get entry_point
-	__ lw(T9, FP, 6 * wordSize);       	// get entry_point
-	__ move(T5,SP);             //set sender sp 
-	__ jalr(T9);
-	__ delayed()->nop();
-	return_address = __ pc();
-
-	Label common_return;
-	__ bind(common_return);
-
-	// store result depending on type
-	// (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
-//	__ lw(T0, FP, 1 * wordSize); 	// result --> T0
-	__ lw(T0, FP, 3 * wordSize); 	// result --> T0
-	Label is_long, is_float, is_double, exit;
-//	__ lw(T2, FP, 2 * wordSize);	// result_type --> T2
-	__ lw(T2, FP, 4 * wordSize);	// result_type --> T2
-	__ addi(T3, T2, (-1) * T_LONG);
-	__ beq(T3, ZERO, is_long);
-	__ delayed()->addi(T3, T2, (-1) * T_FLOAT);
-	__ beq(T3, ZERO, is_float);
-	__ delayed()->addi(T3, T2, (-1) * T_DOUBLE);
-	__ beq(T3, ZERO, is_double);
-	__ delayed()->nop();
-
-	// handle T_INT case
-	__ sw(V0, T0, 0 * wordSize);
-	__ bind(exit);
-
-	// restore 
-	__ addi(SP, FP, 2 * wordSize );
-	__ lw(RA, SP, -1 * wordSize);
-	__ lw(FP, SP, -2 * wordSize);
-	__ lw(BCP, SP, -3 * wordSize);
-	__ lw(LVP, SP, -4 * wordSize);
-	__ lw(TSR, SP, -5 * wordSize);
-
-	__ lw(S1, SP, (-6) * wordSize);
-	__ lw(S3, SP, (-7) * wordSize);
-	__ lw(S4, SP, (-8) * wordSize);
-	__ lw(S5, SP, (-9) * wordSize);
-	__ lw(S6, SP, (-10) * wordSize);
-	// return
-	__ jr(RA);
-	__ delayed()->nop();
-
-	// handle return types different from T_INT
-	__ bind(is_long);
-	__ sw(V0, T0, 0 * wordSize);
-	__ sw(V1, T0, 1 * wordSize);
-	__ b(exit);
-	__ delayed()->nop();
-
-	__ bind(is_float);
-	__ swc1(F0, T0, 0 * wordSize);
-	__ b(exit);
-	__ delayed()->nop();
-
-	__ bind(is_double);
-	__ swc1(F0, T0, 0 * wordSize);
-	__ swc1(F1, T0, 1 * wordSize);
-	__ b(exit);
-	__ delayed()->nop();
-	//FIXME, 1.6 x86 version add operation of fpu here
-	StubRoutines::gs2::set_call_stub_compiled_return(__ pc());
-	__ b(common_return);
-	__ delayed()->nop(); 
-	return start;
+    // pass parameters if any
+    Label parameters_done;
+    // judge if the parameter_size equals 0
+    __ beq(T0, ZERO, parameters_done);
+    __ delayed()->nop();
+    __ sll(AT,T0,Interpreter::logStackElementSize());
+    __ sub(SP, SP, AT); 
+    __ move(AT, -StackAlignmentInBytes); 
+    __ andr(SP, SP , AT); 
+    // Copy Java parameters in reverse order (receiver last)
+    // Note that the argument order is inverted in the process
+    // source is edx[ecx: N-1..0]
+    // dest   is esp[ebx: 0..N-1]
+    Label loop;
+    __ lw(T2, FP, 7 * wordSize);   // parameter pointer in T2,refernce to the stack arch
+    __ move(T4, ZERO);
+    __ bind(loop);
+    if (TaggedStackInterpreter) {
+    __ sll(T5, T0, 3);   
+    __ add(T5, T5, T2);	    
+    __ lw(AT, T5,  -2*wordSize);	
+    __ sll(T5,T4,3); 
+    __ add(T5,T5, SP); 
+    __ sw(AT, T5, Interpreter::expr_tag_offset_in_bytes(0)); 
+    }
+    
+    // get parameter
+    __ sll(T5, T0, 2);   
+    __ add(T5, T5, T2);	    
+    __ lw(AT, T5,  -wordSize);	
+    __ sll(T5,T4,2); 
+    __ add(T5,T5, SP); 
+    __ sw(AT, T5, Interpreter::expr_offset_in_bytes(0)); 
+    __ addi(T4,T4,1); 
+    __ addi(T0,T0,-1); 
+    __ bne(T0, ZERO, loop);
+    __ delayed()->nop();
+    // advance to next parameter
+    
+    // call Java function
+    __ bind(parameters_done);
+    
+    // receiver in V0, methodOop in T7
+    
+    __ move(T7, A3);
+    __ lw(T9, FP, 6 * wordSize);       	// get entry_point
+    __ move(T5,SP);             //set sender sp 
+    __ jalr(T9);
+    __ delayed()->nop();
+    return_address = __ pc();
+    
+    Label common_return;
+    __ bind(common_return);
+    
+    // store result depending on type
+    // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
+    __ lw(T0, FP, 3 * wordSize); 	// result --> T0
+    Label is_long, is_float, is_double, exit;
+    __ lw(T2, FP, 4 * wordSize);	// result_type --> T2
+    __ addi(T3, T2, (-1) * T_LONG);
+    __ beq(T3, ZERO, is_long);
+    __ delayed()->addi(T3, T2, (-1) * T_FLOAT);
+    __ beq(T3, ZERO, is_float);
+    __ delayed()->addi(T3, T2, (-1) * T_DOUBLE);
+    __ beq(T3, ZERO, is_double);
+    __ delayed()->nop();
+    
+    // handle T_INT case
+    __ sw(V0, T0, 0 * wordSize);
+    __ bind(exit);
+    
+    // restore 
+    __ addi(SP, FP, 2 * wordSize );
+    __ lw(RA, SP, -1 * wordSize);
+    __ lw(FP, SP, -2 * wordSize);
+    __ lw(BCP, SP, -3 * wordSize);
+    __ lw(LVP, SP, -4 * wordSize);
+    __ lw(TSR, SP, -5 * wordSize);
+    
+    __ lw(S1, SP, (-6) * wordSize);
+    __ lw(S3, SP, (-7) * wordSize);
+    __ lw(S4, SP, (-8) * wordSize);
+    __ lw(S5, SP, (-9) * wordSize);
+    __ lw(S6, SP, (-10) * wordSize);
+    // return
+    __ jr(RA);
+    __ delayed()->nop();
+    
+    // handle return types different from T_INT
+    __ bind(is_long);
+    __ sw(V0, T0, 0 * wordSize);
+    __ sw(V1, T0, 1 * wordSize);
+    __ b(exit);
+    __ delayed()->nop();
+    
+    __ bind(is_float);
+    __ swc1(F0, T0, 0 * wordSize);
+    __ b(exit);
+    __ delayed()->nop();
+    
+    __ bind(is_double);
+    __ swc1(F0, T0, 0 * wordSize);
+    __ swc1(F1, T0, 1 * wordSize);
+    __ b(exit);
+    __ delayed()->nop();
+    //FIXME, 1.6 x86 version add operation of fpu here
+    StubRoutines::gs2::set_call_stub_compiled_return(__ pc());
+    __ b(common_return);
+    __ delayed()->nop(); 
+    return start;
   }
 
   // Return point for a Java call if there's an exception thrown in
@@ -408,7 +279,6 @@
 
 	  // get thread directly
 #ifndef OPT_THREAD
-	  //__ lw(thread, FP, 7 * wordSize);
 	  __ lw(thread, FP, 9 * wordSize);
 #endif
 
@@ -425,8 +295,6 @@
 	  // set pending exception
 	  __ verify_oop(V0);
 	  __ sw(V0, thread, in_bytes(Thread::pending_exception_offset()));
-	//  __ move(AT, (int)&jerome1); 
-	 // __ sw(V0, AT, 0); 
 	  __ move(AT, (int)__FILE__);
 	  __ sw(AT, thread, in_bytes(Thread::exception_file_offset   ()));
 	  __ move(AT, (int)__LINE__);
@@ -506,190 +374,12 @@
 		// T9: exception handler
 		// V1: throwing pc
 		__ verify_oop(V0);
-/*	__ move(AT, (int)&jerome1 );
-	__ sw(SP, AT, 0); 	
-	__ move(AT, (int)&jerome2 );
-	__ sw(FP, AT, 0); 	
-	__ move(AT, (int)&jerome3 );
-	__ sw(RA, AT, 0); 	
-	__ move(AT, (int)&jerome4 );
-	__ sw(T9, AT, 0); 	
-	__ move(AT, (int)&jerome5 );
-	__ sw(ZERO, AT, 0); 	
-	__ move(AT, (int)&jerome6 );
-	__ sw(ZERO, AT, 0); 	
-	__ move(AT, (int)&jerome7 );
-	__ sw(ZERO, AT, 0); 	
-	__ move(AT, (int)&jerome10 );
-	__ sw(ZERO, AT, 0); 	
-	
-	__ pushad();
-	
-	//__ enter();
-	__ call(CAST_FROM_FN_PTR(address, SharedRuntime::print_call_statistics), 
-				relocInfo::runtime_call_type);
-	__ delayed()->nop();
-	
-	//__ leave();
-	__ popad();
-
-
-  */	
 		__ jr(T9);
 		__ delayed()->nop();
 
 		return start;
   }
 
-  // Support for jint atomic::xchg(jint exchange_value, volatile jint* dest)
-  //
-  // Arguments :
-  //    c_rarg0: exchange_value
-  //    c_rarg0: dest
-  //
-  // Result:
-  //    *dest <- ex, return (orig *dest)
-#if 0
-  address generate_atomic_xchg() {
-    StubCodeMark mark(this, "StubRoutines", "atomic_xchg");
-    address start = __ pc();
-
-    __ movl(rax, c_rarg0); // Copy to eax we need a return value anyhow
-    __ xchgl(rax, Address(c_rarg1, 0)); // automatic LOCK
-    __ ret(0);
-
-    return start;
-  }
-
-  // Support for intptr_t atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest)
-  //
-  // Arguments :
-  //    c_rarg0: exchange_value
-  //    c_rarg1: dest
-  //
-  // Result:
-  //    *dest <- ex, return (orig *dest)
-  address generate_atomic_xchg_ptr() {
-    StubCodeMark mark(this, "StubRoutines", "atomic_xchg_ptr");
-    address start = __ pc();
-
-    __ movptr(rax, c_rarg0); // Copy to eax we need a return value anyhow
-    __ xchgptr(rax, Address(c_rarg1, 0)); // automatic LOCK
-    __ ret(0);
-
-    return start;
-  }
-
-  // Support for jint atomic::atomic_cmpxchg(jint exchange_value, volatile jint* dest,
-  //                                         jint compare_value)
-  //
-  // Arguments :
-  //    c_rarg0: exchange_value
-  //    c_rarg1: dest
-  //    c_rarg2: compare_value
-  //
-  // Result:
-  //    if ( compare_value == *dest ) {
-  //       *dest = exchange_value
-  //       return compare_value;
-  //    else
-  //       return *dest;
-  address generate_atomic_cmpxchg() {
-    StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg");
-    address start = __ pc();
-
-    __ movl(rax, c_rarg2);
-   if ( os::is_MP() ) __ lock();
-    __ cmpxchgl(c_rarg0, Address(c_rarg1, 0));
-    __ ret(0);
-
-    return start;
-  }
-
-  // Support for jint atomic::atomic_cmpxchg_long(jlong exchange_value,
-  //                                             volatile jlong* dest,
-  //                                             jlong compare_value)
-  // Arguments :
-  //    c_rarg0: exchange_value
-  //    c_rarg1: dest
-  //    c_rarg2: compare_value
-  //
-  // Result:
-  //    if ( compare_value == *dest ) {
-  //       *dest = exchange_value
-  //       return compare_value;
-  //    else
-  //       return *dest;
-  address generate_atomic_cmpxchg_long() {
-    StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg_long");
-    address start = __ pc();
-
-    __ movq(rax, c_rarg2);
-   if ( os::is_MP() ) __ lock();
-    __ cmpxchgq(c_rarg0, Address(c_rarg1, 0));
-    __ ret(0);
-
-    return start;
-  }
-
-  // Support for jint atomic::add(jint add_value, volatile jint* dest)
-  //
-  // Arguments :
-  //    c_rarg0: add_value
-  //    c_rarg1: dest
-  //
-  // Result:
-  //    *dest += add_value
-  //    return *dest;
-  address generate_atomic_add() {
-    StubCodeMark mark(this, "StubRoutines", "atomic_add");
-    address start = __ pc();
-
-    __ movl(rax, c_rarg0);
-   if ( os::is_MP() ) __ lock();
-    __ xaddl(Address(c_rarg1, 0), c_rarg0);
-    __ addl(rax, c_rarg0);
-    __ ret(0);
-
-    return start;
-  }
-
-  // Support for intptr_t atomic::add_ptr(intptr_t add_value, volatile intptr_t* dest)
-  //
-  // Arguments :
-  //    c_rarg0: add_value
-  //    c_rarg1: dest
-  //
-  // Result:
-  //    *dest += add_value
-  //    return *dest;
-  address generate_atomic_add_ptr() {
-    StubCodeMark mark(this, "StubRoutines", "atomic_add_ptr");
-    address start = __ pc();
-
-    __ movptr(rax, c_rarg0); // Copy to eax we need a return value anyhow
-   if ( os::is_MP() ) __ lock();
-    __ xaddptr(Address(c_rarg1, 0), c_rarg0);
-    __ addptr(rax, c_rarg0);
-    __ ret(0);
-
-    return start;
-  }
-
-  // Support for intptr_t OrderAccess::fence()
-  //
-  // Arguments :
-  //
-  // Result:
-  address generate_orderaccess_fence() {
-    StubCodeMark mark(this, "StubRoutines", "orderaccess_fence");
-    address start = __ pc();
-    __ mfence();
-    __ ret(0);
-
-    return start;
-  }
-#endif
   // Support for intptr_t get_previous_fp()
   //
   // This routine is used to find the previous frame pointer for the
@@ -709,225 +399,6 @@
     __ delayed()->nop();
     return start;
   }
-#if 0
-  //----------------------------------------------------------------------------------------------------
-  // Support for void verify_mxcsr()
-  //
-  // This routine is used with -Xcheck:jni to verify that native
-  // JNI code does not return to Java code without restoring the
-  // MXCSR register to our expected state.
-
-  address generate_verify_mxcsr() {
-    StubCodeMark mark(this, "StubRoutines", "verify_mxcsr");
-    address start = __ pc();
-
-    const Address mxcsr_save(rsp, 0);
-
-    if (CheckJNICalls) {
-      Label ok_ret;
-      __ push(rax);
-      __ subptr(rsp, wordSize);      // allocate a temp location
-      __ stmxcsr(mxcsr_save);
-      __ movl(rax, mxcsr_save);
-      __ andl(rax, MXCSR_MASK);    // Only check control and mask bits
-      __ cmpl(rax, *(int *)(StubRoutines::x86::mxcsr_std()));
-      __ jcc(Assembler::equal, ok_ret);
-
-      __ warn("MXCSR changed by native JNI code, use -XX:+RestoreMXCSROnJNICall");
-
-      __ ldmxcsr(ExternalAddress(StubRoutines::x86::mxcsr_std()));
-
-      __ bind(ok_ret);
-      __ addptr(rsp, wordSize);
-      __ pop(rax);
-    }
-
-    __ ret(0);
-
-    return start;
-  }
-
-  address generate_f2i_fixup() {
-    StubCodeMark mark(this, "StubRoutines", "f2i_fixup");
-    Address inout(rsp, 5 * wordSize); // return address + 4 saves
-
-    address start = __ pc();
-
-    Label L;
-
-    __ push(rax);
-    __ push(c_rarg3);
-    __ push(c_rarg2);
-    __ push(c_rarg1);
-
-    __ movl(rax, 0x7f800000);
-    __ xorl(c_rarg3, c_rarg3);
-    __ movl(c_rarg2, inout);
-    __ movl(c_rarg1, c_rarg2);
-    __ andl(c_rarg1, 0x7fffffff);
-    __ cmpl(rax, c_rarg1); // NaN? -> 0
-    __ jcc(Assembler::negative, L);
-    __ testl(c_rarg2, c_rarg2); // signed ? min_jint : max_jint
-    __ movl(c_rarg3, 0x80000000);
-    __ movl(rax, 0x7fffffff);
-    __ cmovl(Assembler::positive, c_rarg3, rax);
-
-    __ bind(L);
-    __ movptr(inout, c_rarg3);
-
-    __ pop(c_rarg1);
-    __ pop(c_rarg2);
-    __ pop(c_rarg3);
-    __ pop(rax);
-
-    __ ret(0);
-
-    return start;
-  }
-
-  address generate_f2l_fixup() {
-    StubCodeMark mark(this, "StubRoutines", "f2l_fixup");
-    Address inout(rsp, 5 * wordSize); // return address + 4 saves
-    address start = __ pc();
-
-    Label L;
-
-    __ push(rax);
-    __ push(c_rarg3);
-    __ push(c_rarg2);
-    __ push(c_rarg1);
-
-    __ movl(rax, 0x7f800000);
-    __ xorl(c_rarg3, c_rarg3);
-    __ movl(c_rarg2, inout);
-    __ movl(c_rarg1, c_rarg2);
-    __ andl(c_rarg1, 0x7fffffff);
-    __ cmpl(rax, c_rarg1); // NaN? -> 0
-    __ jcc(Assembler::negative, L);
-    __ testl(c_rarg2, c_rarg2); // signed ? min_jlong : max_jlong
-    __ mov64(c_rarg3, 0x8000000000000000);
-    __ mov64(rax, 0x7fffffffffffffff);
-    __ cmov(Assembler::positive, c_rarg3, rax);
-
-    __ bind(L);
-    __ movptr(inout, c_rarg3);
-
-    __ pop(c_rarg1);
-    __ pop(c_rarg2);
-    __ pop(c_rarg3);
-    __ pop(rax);
-
-    __ ret(0);
-
-    return start;
-  }
-
-  address generate_d2i_fixup() {
-    StubCodeMark mark(this, "StubRoutines", "d2i_fixup");
-    Address inout(rsp, 6 * wordSize); // return address + 5 saves
-
-    address start = __ pc();
-
-    Label L;
-
-    __ push(rax);
-    __ push(c_rarg3);
-    __ push(c_rarg2);
-    __ push(c_rarg1);
-    __ push(c_rarg0);
-
-    __ movl(rax, 0x7ff00000);
-    __ movq(c_rarg2, inout);
-    __ movl(c_rarg3, c_rarg2);
-    __ mov(c_rarg1, c_rarg2);
-    __ mov(c_rarg0, c_rarg2);
-    __ negl(c_rarg3);
-    __ shrptr(c_rarg1, 0x20);
-    __ orl(c_rarg3, c_rarg2);
-    __ andl(c_rarg1, 0x7fffffff);
-    __ xorl(c_rarg2, c_rarg2);
-    __ shrl(c_rarg3, 0x1f);
-    __ orl(c_rarg1, c_rarg3);
-    __ cmpl(rax, c_rarg1);
-    __ jcc(Assembler::negative, L); // NaN -> 0
-    __ testptr(c_rarg0, c_rarg0); // signed ? min_jint : max_jint
-    __ movl(c_rarg2, 0x80000000);
-    __ movl(rax, 0x7fffffff);
-    __ cmov(Assembler::positive, c_rarg2, rax);
-
-    __ bind(L);
-    __ movptr(inout, c_rarg2);
-
-    __ pop(c_rarg0);
-    __ pop(c_rarg1);
-    __ pop(c_rarg2);
-    __ pop(c_rarg3);
-    __ pop(rax);
-
-    __ ret(0);
-
-    return start;
-  }
-
-  address generate_d2l_fixup() {
-    StubCodeMark mark(this, "StubRoutines", "d2l_fixup");
-    Address inout(rsp, 6 * wordSize); // return address + 5 saves
-
-    address start = __ pc();
-
-    Label L;
-
-    __ push(rax);
-    __ push(c_rarg3);
-    __ push(c_rarg2);
-    __ push(c_rarg1);
-    __ push(c_rarg0);
-
-    __ movl(rax, 0x7ff00000);
-    __ movq(c_rarg2, inout);
-    __ movl(c_rarg3, c_rarg2);
-    __ mov(c_rarg1, c_rarg2);
-    __ mov(c_rarg0, c_rarg2);
-    __ negl(c_rarg3);
-    __ shrptr(c_rarg1, 0x20);
-    __ orl(c_rarg3, c_rarg2);
-    __ andl(c_rarg1, 0x7fffffff);
-    __ xorl(c_rarg2, c_rarg2);
-    __ shrl(c_rarg3, 0x1f);
-    __ orl(c_rarg1, c_rarg3);
-    __ cmpl(rax, c_rarg1);
-    __ jcc(Assembler::negative, L); // NaN -> 0
-    __ testq(c_rarg0, c_rarg0); // signed ? min_jlong : max_jlong
-    __ mov64(c_rarg2, 0x8000000000000000);
-    __ mov64(rax, 0x7fffffffffffffff);
-    __ cmovq(Assembler::positive, c_rarg2, rax);
-
-    __ bind(L);
-    __ movq(inout, c_rarg2);
-
-    __ pop(c_rarg0);
-    __ pop(c_rarg1);
-    __ pop(c_rarg2);
-    __ pop(c_rarg3);
-    __ pop(rax);
-
-    __ ret(0);
-
-    return start;
-  }
-
-  address generate_fp_mask(const char *stub_name, int64_t mask) {
-    StubCodeMark mark(this, "StubRoutines", stub_name);
-
-    __ align(16);
-    address start = __ pc();
-
-    __ emit_data64( mask, relocInfo::none );
-    __ emit_data64( mask, relocInfo::none );
-
-    return start;
-  }
-#endif
   // The following routine generates a subroutine to throw an
   // asynchronous UnknownError when an unsafe access gets a fault that
   // could not be reasonably prevented by the programmer.  (Example:
@@ -935,15 +406,12 @@
   address generate_handler_for_unsafe_access() {
 		StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access");
 		address start = __ pc();
-		//  __ pushl(0);                      // hole for return address-to-be
 		__ pushad();                      // push registers
 		//  Address next_pc(esp, RegisterImpl::number_of_registers * BytesPerWord);
 		__ call(CAST_FROM_FN_PTR(address, handle_unsafe_access), relocInfo::runtime_call_type);
 		__ delayed()->nop(); 
-		//__ movl(next_pc, eax);            // stuff next address 
 		__ sw(V0, SP, RegisterImpl::number_of_registers * BytesPerWord); 
 		__ popad();
-		// __ ret(0);                        // jump to next address
 		__ jr(RA);
 		__ delayed()->nop();  
 		return start;
@@ -967,95 +435,11 @@
   address generate_verify_oop() {
 	  StubCodeMark mark(this, "StubRoutines", "verify_oop");
 	  address start = __ pc();
-
 	  __ verify_oop_subroutine(); 
-
+    address end = __ pc();
 	  return start;
   }
-/*
-  static address disjoint_byte_copy_entry;
-  static address disjoint_short_copy_entry;
-  static address disjoint_int_copy_entry;
-  static address disjoint_long_copy_entry;
-  static address disjoint_oop_copy_entry;
 
-  static address byte_copy_entry;
-  static address short_copy_entry;
-  static address int_copy_entry;
-  static address long_copy_entry;
-  static address oop_copy_entry;
-
-  static address checkcast_copy_entry;
-
-  //
-  // Verify that a register contains clean 32-bits positive value
-  // (high 32-bits are 0) so it could be used in 64-bits shifts.
-  //
-  //  Input:
-  //    Rint  -  32-bits value
-  //    Rtmp  -  scratch
-  //
-  void assert_clean_int(Register Rint, Register Rtmp) {
-#ifdef ASSERT
-    Label L;
-    assert_different_registers(Rtmp, Rint);
-    __ movslq(Rtmp, Rint);
-    __ cmpq(Rtmp, Rint);
-    __ jcc(Assembler::equal, L);
-    __ stop("high 32-bits of int value are not 0");
-    __ bind(L);
-#endif
-  }
-*/
-  //  Generate overlap test for array copy stubs
-  //
-  //  Input:
-  //     c_rarg0 - from
-  //     c_rarg1 - to
-  //     c_rarg2 - element count
-  //
-  //  Output:
-  //     rax   - &from[element count - 1]
-  //
-/*
-  void array_overlap_test(address no_overlap_target, Address::ScaleFactor sf) {
-    assert(no_overlap_target != NULL, "must be generated");
-    array_overlap_test(no_overlap_target, NULL, sf);
-  }
-  void array_overlap_test(Label& L_no_overlap, Address::ScaleFactor sf) {
-    array_overlap_test(NULL, &L_no_overlap, sf);
-  }
-  void array_overlap_test(address no_overlap_target, Label* NOLp, Address::ScaleFactor sf) {
-    const Register from     = c_rarg0;
-    const Register to       = c_rarg1;
-    const Register count    = c_rarg2;
-    const Register end_from = rax;
-
-    __ cmpptr(to, from);
-    __ lea(end_from, Address(from, count, sf, 0));
-    if (NOLp == NULL) {
-      ExternalAddress no_overlap(no_overlap_target);
-      __ jump_cc(Assembler::belowEqual, no_overlap);
-      __ cmpptr(to, end_from);
-      __ jump_cc(Assembler::aboveEqual, no_overlap);
-    } else {
-      __ jcc(Assembler::belowEqual, (*NOLp));
-      __ cmpptr(to, end_from);
-      __ jcc(Assembler::aboveEqual, (*NOLp));
-    }
-  }
-*/
- //
-  //  Generate overlap test for array copy stubs
-  //
-  //  Input:
-  //     4(esp)    -  array1
-  //     8(esp)    -  array2
-  //    12(esp)    -  element count
-  //
-  //  Note: this code can only use %eax, %ecx, and %edx
-  //
-  
   //
   //  Generate overlap test for array copy stubs
   //
@@ -1079,257 +463,17 @@
 		  case 3: sf = Address::times_8; break;
 	  }
 
-	  //  __ movl(eax, Address(esp, 4));  // from
-	  //  __ movl(edx, Address(esp, 8));  // to
-	  //  __ movl(ecx, Address(esp, 12));  // count
-	  //   __ cmpl(edx, eax);
-
-	  //  __ leal(eax, Address(eax, ecx, sf, -elem_size));  // from + (count - 1) * elem_size
 	  __ sll(T5, A2, sf); 
 	  __ add(T5, T5, A0); 
 	  __ lea(T4, Address(T5, -elem_size)); 
-	  // __ jcc(Assembler::belowEqual, no_overlap_target);
 	  __ sub(AT, A1,A0); 
 	  __ blez(AT, no_overlap_target); 
 	  __ delayed()->nop(); 
-	  // __ cmpl(edx, eax);
 	  __ sub(AT, A1, T4); 
-	  // __ jcc(Assembler::above, no_overlap_target);
 	  __ bgtz(AT, no_overlap_target); 
 	  __ delayed()->nop(); 
 
   }
-	/*
-  // Shuffle first three arg regs on Windows into Linux/Solaris locations.
-  //
-  // Outputs:
-  //    rdi - rcx
-  //    rsi - rdx
-  //    rdx - r8
-  //    rcx - r9
-  //
-  // Registers r9 and r10 are used to save rdi and rsi on Windows, which latter
-  // are non-volatile.  r9 and r10 should not be used by the caller.
-  //
-  void setup_arg_regs(int nargs = 3) {
-    const Register saved_rdi = r9;
-    const Register saved_rsi = r10;
-    assert(nargs == 3 || nargs == 4, "else fix");
-#ifdef _WIN64
-    assert(c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9,
-           "unexpected argument registers");
-    if (nargs >= 4)
-      __ mov(rax, r9);  // r9 is also saved_rdi
-    __ movptr(saved_rdi, rdi);
-    __ movptr(saved_rsi, rsi);
-    __ mov(rdi, rcx); // c_rarg0
-    __ mov(rsi, rdx); // c_rarg1
-    __ mov(rdx, r8);  // c_rarg2
-    if (nargs >= 4)
-      __ mov(rcx, rax); // c_rarg3 (via rax)
-#else
-    assert(c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx,
-           "unexpected argument registers");
-#endif
-  }
-
-  void restore_arg_regs() {
-    const Register saved_rdi = r9;
-    const Register saved_rsi = r10;
-#ifdef _WIN64
-    __ movptr(rdi, saved_rdi);
-    __ movptr(rsi, saved_rsi);
-#endif
-  }
-
-  // Generate code for an array write pre barrier
-  //
-  //     addr    -  starting address
-  //     count    -  element count
-  //
-  //     Destroy no registers!
-  //
-  void  gen_write_ref_array_pre_barrier(Register addr, Register count) {
-    BarrierSet* bs = Universe::heap()->barrier_set();
-    switch (bs->kind()) {
-      case BarrierSet::G1SATBCT:
-      case BarrierSet::G1SATBCTLogging:
-        {
-          __ pusha();                      // push registers
-          if (count == c_rarg0) {
-            if (addr == c_rarg1) {
-              // exactly backwards!!
-              __ xchgptr(c_rarg1, c_rarg0);
-            } else {
-              __ movptr(c_rarg1, count);
-              __ movptr(c_rarg0, addr);
-            }
-
-          } else {
-            __ movptr(c_rarg0, addr);
-            __ movptr(c_rarg1, count);
-          }
-          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre)));
-          __ popa();
-        }
-        break;
-      case BarrierSet::CardTableModRef:
-      case BarrierSet::CardTableExtension:
-      case BarrierSet::ModRef:
-        break;
-      default:
-        ShouldNotReachHere();
-
-    }
-  }
-
-  //
-  // Generate code for an array write post barrier
-  //
-  //  Input:
-  //     start    - register containing starting address of destination array
-  //     end      - register containing ending address of destination array
-  //     scratch  - scratch register
-  //
-  //  The input registers are overwritten.
-  //  The ending address is inclusive.
-  void  gen_write_ref_array_post_barrier(Register start, Register end, Register scratch) {
-    assert_different_registers(start, end, scratch);
-    BarrierSet* bs = Universe::heap()->barrier_set();
-    switch (bs->kind()) {
-      case BarrierSet::G1SATBCT:
-      case BarrierSet::G1SATBCTLogging:
-
-        {
-          __ pusha();                      // push registers (overkill)
-          // must compute element count unless barrier set interface is changed (other platforms supply count)
-          assert_different_registers(start, end, scratch);
-          __ lea(scratch, Address(end, wordSize));
-          __ subptr(scratch, start);
-          __ shrptr(scratch, LogBytesPerWord);
-          __ mov(c_rarg0, start);
-          __ mov(c_rarg1, scratch);
-          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)));
-          __ popa();
-        }
-        break;
-      case BarrierSet::CardTableModRef:
-      case BarrierSet::CardTableExtension:
-        {
-          CardTableModRefBS* ct = (CardTableModRefBS*)bs;
-          assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
-
-          Label L_loop;
-
-           __ shrptr(start, CardTableModRefBS::card_shift);
-           __ shrptr(end, CardTableModRefBS::card_shift);
-           __ subptr(end, start); // number of bytes to copy
-
-          intptr_t disp = (intptr_t) ct->byte_map_base;
-          if (__ is_simm32(disp)) {
-            Address cardtable(noreg, noreg, Address::no_scale, disp);
-            __ lea(scratch, cardtable);
-          } else {
-            ExternalAddress cardtable((address)disp);
-            __ lea(scratch, cardtable);
-          }
-
-          const Register count = end; // 'end' register contains bytes count now
-          __ addptr(start, scratch);
-        __ BIND(L_loop);
-          __ movb(Address(start, count, Address::times_1), 0);
-          __ decrement(count);
-          __ jcc(Assembler::greaterEqual, L_loop);
-        }
-        break;
-      default:
-        ShouldNotReachHere();
-
-    }
-  }
-
-
-  // Copy big chunks forward
-  //
-  // Inputs:
-  //   end_from     - source arrays end address
-  //   end_to       - destination array end address
-  //   qword_count  - 64-bits element count, negative
-  //   to           - scratch
-  //   L_copy_32_bytes - entry label
-  //   L_copy_8_bytes  - exit  label
-  //
-  void copy_32_bytes_forward(Register end_from, Register end_to,
-                             Register qword_count, Register to,
-                             Label& L_copy_32_bytes, Label& L_copy_8_bytes) {
-    DEBUG_ONLY(__ stop("enter at entry label, not here"));
-    Label L_loop;
-    __ align(16);
-  __ BIND(L_loop);
-    if(UseUnalignedLoadStores) {
-      __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
-      __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
-      __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8));
-      __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1);
-
-    } else {
-      __ movq(to, Address(end_from, qword_count, Address::times_8, -24));
-      __ movq(Address(end_to, qword_count, Address::times_8, -24), to);
-      __ movq(to, Address(end_from, qword_count, Address::times_8, -16));
-      __ movq(Address(end_to, qword_count, Address::times_8, -16), to);
-      __ movq(to, Address(end_from, qword_count, Address::times_8, - 8));
-      __ movq(Address(end_to, qword_count, Address::times_8, - 8), to);
-      __ movq(to, Address(end_from, qword_count, Address::times_8, - 0));
-      __ movq(Address(end_to, qword_count, Address::times_8, - 0), to);
-    }
-  __ BIND(L_copy_32_bytes);
-    __ addptr(qword_count, 4);
-    __ jcc(Assembler::lessEqual, L_loop);
-    __ subptr(qword_count, 4);
-    __ jcc(Assembler::less, L_copy_8_bytes); // Copy trailing qwords
-  }
-
-
-  // Copy big chunks backward
-  //
-  // Inputs:
-  //   from         - source arrays address
-  //   dest         - destination array address
-  //   qword_count  - 64-bits element count
-  //   to           - scratch
-  //   L_copy_32_bytes - entry label
-  //   L_copy_8_bytes  - exit  label
-  //
-  void copy_32_bytes_backward(Register from, Register dest,
-                              Register qword_count, Register to,
-                              Label& L_copy_32_bytes, Label& L_copy_8_bytes) {
-    DEBUG_ONLY(__ stop("enter at entry label, not here"));
-    Label L_loop;
-    __ align(16);
-  __ BIND(L_loop);
-    if(UseUnalignedLoadStores) {
-      __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16));
-      __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0);
-      __ movdqu(xmm1, Address(from, qword_count, Address::times_8,  0));
-      __ movdqu(Address(dest, qword_count, Address::times_8,  0), xmm1);
-
-    } else {
-      __ movq(to, Address(from, qword_count, Address::times_8, 24));
-      __ movq(Address(dest, qword_count, Address::times_8, 24), to);
-      __ movq(to, Address(from, qword_count, Address::times_8, 16));
-      __ movq(Address(dest, qword_count, Address::times_8, 16), to);
-      __ movq(to, Address(from, qword_count, Address::times_8,  8));
-      __ movq(Address(dest, qword_count, Address::times_8,  8), to);
-      __ movq(to, Address(from, qword_count, Address::times_8,  0));
-      __ movq(Address(dest, qword_count, Address::times_8,  0), to);
-    }
-  __ BIND(L_copy_32_bytes);
-    __ subptr(qword_count, 4);
-    __ jcc(Assembler::greaterEqual, L_loop);
-    __ addptr(qword_count, 4);
-    __ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords
-  }
-*/
 
   //
   //  Generate store check for array
@@ -1341,7 +485,7 @@
   //  The 2 input registers are overwritten
   //
  
-   //
+  //
   //  Generate store check for array
   //
   //  Input:
@@ -1359,176 +503,113 @@
 		assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
 		Label l_0;
 
-		// __ leal(ecx, Address(edi, ecx, Address::times_4, -4));
 		__ sll(AT, T5, Address::times_4); 
 		__ add(AT, T4, AT); 
 		__ lea(T5, Address(AT, -4)); 
 
-		// __ shrl(edi, CardTableModRefBS::card_shift);
 		__ shr(T4, CardTableModRefBS::card_shift); 
-		//__ shrl(ecx, CardTableModRefBS::card_shift);
 		__ shr(T5, CardTableModRefBS::card_shift);
 
-		// __ subl(ecx, edi);
 		__ sub(T5, T5, T4);
 		__ bind(l_0);
-		//    __ movb(Address(edi, ecx, Address::times_1, (int)ct->byte_map_base), 0);
 		__ add(AT, T4, T5); 
 		__ sw(ZERO, AT, (int)ct->byte_map_base); 
-		//__ decl(ecx);
 		__ addi(T5, T5, -4);  
-		// __ jcc(Assembler::greaterEqual, l_0);
 		__ bgez(T5, l_0);
 		__ delayed()->nop(); 
 	}
 
-// Arguments:
-//   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
-//             ignored
-//   name    - stub name string
-//
-// Inputs:
-//   c_rarg0   - source array address
-//   c_rarg1   - destination array address
-//   c_rarg2   - element count, treated as ssize_t, can be zero
-//
-// If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
-// we let the hardware handle it.  The one to eight bytes within words,
-// dwords or qwords that span cache line boundaries will still be loaded
-// and stored atomically.
-//
-// Side Effects:
-//   disjoint_byte_copy_entry is set to the no-overlap entry point
-//   used by generate_conjoint_byte_copy().
-//
-address generate_disjoint_byte_copy(bool aligned, const char *name) {
-				StubCodeMark mark(this, "StubRoutines", name);
-				__ align(CodeEntryAlignment);
-				address start = __ pc();
-				Label l_0, l_1, l_2, l_3, l_4, l_5, l_6;
-
-				//    __ pushl(esi);
-				__ push(T3);
-				__ push(T4);
-				__ push(T5);
-				__ push(T8);
-				//   __ movl(ecx, Address(esp, 4+12));      // count
-				__ move(T5, A2);  
-				//  __ pushl(edi);
-				// __ movl(esi, Address(esp, 8+ 4));      // from
-				__ move(T3, A0); 
-				//  __ movl(edi, Address(esp, 8+ 8));      // to
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
+  // we let the hardware handle it.  The one to eight bytes within words,
+  // dwords or qwords that span cache line boundaries will still be loaded
+  // and stored atomically.
+  //
+  // Side Effects:
+  //   disjoint_byte_copy_entry is set to the no-overlap entry point
+  //   used by generate_conjoint_byte_copy().
+  //
+  address generate_disjoint_byte_copy(bool aligned, const char *name) {
+	  StubCodeMark mark(this, "StubRoutines", name);
+		__ align(CodeEntryAlignment);
+		address start = __ pc();
+		Label l_0, l_1, l_2, l_3, l_4, l_5, l_6;
 
-				__ move(T4, A1);
-				// copy from low to high
-				//__ movl(eax, ecx);            // original count in eax
-				__ move(T8, T5);             // original count in T5
-				//__ cmpl(ecx, 3);
-				__ addi(AT, T5, -3 ); 
-				//__ jcc(Assembler::belowEqual, l_4);                   // <= 3 bytes
-				__ blez(AT, l_4);  
-				__ delayed()->nop();	
-				if (!aligned) {
-								// align source address at dword address boundary
-								// __ movl(ecx, 4);
-								__ move(T5, 4); 
-								//	__ subl(ecx, esi);
-								__ sub(T5, T5, T3); 
-		  //__ andl(ecx, 3);              // prefix byte count
-		  __ andi(T5, T5, 3); 
-		  //	__ jcc(Assembler::equal, l_1);                   // no prefix
-		  __ beq(T5, ZERO, l_1); 
-		  __ delayed()->nop();	
-		  //	__ subl(eax, ecx);            // byte count less prefix
-		  __ sub(T8,T8,T5); 
-		  // copy prefix
-		  __ bind(l_0);
-		  //     __ movb(edx, Address(esi));
-		  __ lb(AT, T3, 0); 
-		  //  __ movb(Address(edi), edx);
-		  __ sb(AT, T4, 0); 
-		  // __ incl(esi);
-		  __ addi(T3, T3, 1); 
-		  // __ incl(edi);
-		  __ addi(T4, T4, 1); 
-		  //__ decl(ecx);
-		  __ addi(T5 ,T5, 1);  
-		  // __ jcc(Assembler::notEqual,l_0);
-		  __ bne(T5, ZERO, l_0); 
-		  __ delayed()->nop(); 
-		  __ bind(l_1);
-		  //   __ movl(ecx, eax);            // byte count less prefix
-		  __ move(T5, T8); 
-	  }
-	  //__ shrl(ecx, 2);              // dword count
-	  __ shr(T5, 2); 
-	  // __ jcc(Assembler::equal, l_4);                   // no dwords to move
-	  __ beq(T5, ZERO, l_4); 
-	  __ delayed()->nop(); 
-	  /*  // __ cmpl(ecx, 32);
-	      __ addi(AT, T5, -32); 
-	  // __ jcc(Assembler::belowEqual, l_2);                   // <= 32 dwords
-	  __ blez(ZERO, l_2);  
-	  __ delayed()->nop(); 
-	  // copy aligned dwords
-	  __ rep_movl();
-
-	  //__ jmp(l_4);
-	  __ b(l_4);  
-	  */ 
-	  // copy aligned dwords
-	  __ bind(l_2);
-	  //__ subl(edi, esi);            // edi := to - from
-	  // __ sub(T4, T4, T3);  
-	  __ align(16);
-	  __ bind(l_3);
-	  //__ movl(edx, Address(esi));
-	  __ lw(AT, T3, 0);   
-	  // __ movl(Address(edi, esi, Address::times_1), edx);
-	  __ sw(AT, T4, 0 ); 
-	  //__ addl(esi, 4);
-	  __ addi(T3, T3, 4); 
-	  __ addi(T4, T4, 4); 
-	  // __ decl(ecx);
-	  __ addi(T5, T5, -1); 
-	  //__ jcc(Assembler::notEqual, l_3);
-	  __ bne(T5, ZERO, l_3); 
-	  __ delayed()->nop(); 
-	  // __ addl(edi, esi);            // restore edi to "to" pointer
-	  __ bind(l_4);
-	  // __ movl(ecx, eax);
-	  __ move(T5, T8); 
-	  //__ andl(ecx, 3);              // suffix byte count
-	  __ andi(T5, T5, 3); 
-	  // __ jcc(Assembler::equal, l_6);                   // no suffix
-	  __ beq(T5, ZERO, l_6);  
-	  __ delayed()->nop(); 
-	  // copy suffix
-	  __ bind(l_5);
-	  //    __ movb(edx, Address(esi));
-	  __ lb(AT, T3, 0); 
-	  //  __ movb(Address(edi),edx);
-	  __ sb(AT, T4, 0); 
-	  //__ incl(esi);
-	  __ addi(T3, T3, 1);  
-	  //__ incl(edi);
-	  __ addi(T4, T4, 1);  
-	  //   __ decl(ecx);
-	  __ addi(T5, T5, -1); 
-	  //__ jcc(Assembler::notEqual, l_5);
-	  __ bne(T5, ZERO, l_5 ); 
-	  __ delayed()->nop(); 
-	  __ bind(l_6);
-	  //  __ popl(edi);
-	  // __ popl(esi);
-	  __ pop(T8); 
-	  __ pop(T5); 
-	  __ pop(T4); 
-	  __ pop(T3); 
-	  //__ ret(0);
-	  __ jr(RA); 
-	  __ delayed()->nop(); 
-	  return start;
+		__ push(T3);
+		__ push(T4);
+		__ push(T5);
+		__ push(T8);
+		__ move(T5, A2);  
+		__ move(T3, A0); 
+		__ move(T4, A1);
+		__ move(T8, T5);             // original count in T5
+		__ addi(AT, T5, -3 ); 
+		__ blez(AT, l_4);  
+		__ delayed()->nop();	
+		if (!aligned) {
+						// align source address at dword address boundary
+		  __ move(T5, 4); 
+			__ sub(T5, T5, T3); 
+      __ andi(T5, T5, 3); 
+      __ beq(T5, ZERO, l_1); 
+      __ delayed()->nop();	
+      __ sub(T8,T8,T5); 
+      __ bind(l_0);
+      __ lb(AT, T3, 0); 
+      __ sb(AT, T4, 0); 
+      __ addi(T3, T3, 1); 
+      __ addi(T4, T4, 1); 
+      __ addi(T5 ,T5, 1);  
+      __ bne(T5, ZERO, l_0); 
+      __ delayed()->nop(); 
+      __ bind(l_1);
+      __ move(T5, T8); 
+    }
+    __ shr(T5, 2); 
+    __ beq(T5, ZERO, l_4);     // no dwords to move
+    __ delayed()->nop(); 
+    // copy aligned dwords
+    __ bind(l_2);
+    __ align(16);
+    __ bind(l_3);
+    __ lw(AT, T3, 0);   
+    __ sw(AT, T4, 0 ); 
+    __ addi(T3, T3, 4); 
+    __ addi(T4, T4, 4); 
+    __ addi(T5, T5, -1); 
+    __ bne(T5, ZERO, l_3); 
+    __ delayed()->nop(); 
+    __ bind(l_4);
+    __ move(T5, T8); 
+    __ andi(T5, T5, 3); 
+    __ beq(T5, ZERO, l_6);  
+    __ delayed()->nop(); 
+    // copy suffix
+    __ bind(l_5);
+    __ lb(AT, T3, 0); 
+    __ sb(AT, T4, 0); 
+    __ addi(T3, T3, 1);  
+    __ addi(T4, T4, 1);  
+    __ addi(T5, T5, -1); 
+    __ bne(T5, ZERO, l_5 ); 
+    __ delayed()->nop(); 
+    __ bind(l_6);
+    __ pop(T8); 
+    __ pop(T5); 
+    __ pop(T4); 
+    __ pop(T3); 
+    __ jr(RA); 
+    __ delayed()->nop(); 
+    return start;
   }
 
   // Arguments:
@@ -1563,84 +644,46 @@
 		array_overlap_test(nooverlap_target, 0);
 
 		// copy from high to low
-		//   __ pushl(esi);
-		//   __ movl(ecx, Address(esp, 4+12));      // count
-		//  __ pushl(edi);
-		// __ movl(esi, Address(esp, 8+ 4));      // from
-		// __ movl(edi, Address(esp, 8+ 8));      // to
 		__ move(T5, A2);  
 		__ move(T3, A0); 
 		__ move(T4, A1);
-		// __ leal(esi, Address(esi, ecx, Address::times_1, -4));   // from + count - 4
 		__ add(AT, T3, T5);  
 		__ lea(T3, Address(AT, -4));
-		//	__ std();
-		//   __ leal(edi, Address(edi, ecx, Address::times_1, -4));   // to + count - 4
 		__ add(AT, T4, T5);  
 		__ lea(T4, Address(AT, -4));
-		//	__ movl(eax, ecx);
 		__ move(T8, T5); 
-		//	__ cmpl(ecx, 3);
 		__ addi(AT, T5, -3); 
-		//	__ jcc(Assembler::belowEqual, l_3);                   // <= 3 bytes
 		__ blez(AT, l_3); 
 		__ delayed()->nop();	
-		//	__ shrl(ecx, 2);              // dword count
 		__ shr(T5, 2); 
-		/*	__ cmpl(ecx, 32);
-				__ jcc(Assembler::above, l_2);                   // > 32 dwords
-		 */     // copy dwords aligned or not in a loop
-		// __ subl(edi, esi);
 		__ align(16);
 		__ bind(l_1);
-		// __ movl(edx, Address(esi));
 		__ lw(AT, T3, 0);   
-		//__ movl(Address(edi, esi, Address::times_1), edx);
 		__ sw(AT, T4, 0); 
-		//__ subl(esi, 4);
 		__ addi(T3, T3, -4);    
 		__ addi(T4, T4, -4);    
-		//__ decl(ecx);
 		__ addi(T5, T5, -1);  
-		//__ jcc(Assembler::notEqual, l_1);
 		__ bne(T5, ZERO, l_1); 
 		__ delayed()->nop(); 
-		//__ addl(edi, esi);
-		// __ jmp(l_3);
 		__ b(l_3);  
 		__ delayed()->nop(); 
 		// copy dwords aligned or not with repeat move
 		__ bind(l_2);
-		//    __ rep_movl();
 		__ bind(l_3);
 		// copy suffix (0-3 bytes)
-		//   __ andl(eax, 3);              // suffix byte count
 		__ andi(T8, T8, 3); 
-		//__ jcc(Assembler::equal, l_5);                   // no suffix
 		__ beq(T8, ZERO, l_5); 
 		__ delayed()->nop(); 
-		//   __ subl(edi, esi);
-		//   __ sub(T4, T4, T3);  
-		//__ addl(esi, 3);
 		__ addi(T3, T3, 3); 
 		__ bind(l_4);
-		// __ movb(edx, Address(esi));
 		__ lb(AT, T3, 0);  
-		//  __ movb(Address(esi, edi, Address::times_1), edx);
 		__ sb(AT, T4, 0); 
-		//__ decl(esi);
-		//__ decl(eax);
 		__ addi(T3, T3, -1);  
 		__ addi(T4, T4, -1);  
 		__ addi(T5, T5, -1); 
-		// __ jcc(Assembler::notEqual, l_4);
 		__ bne(T5, ZERO, l_4); 
 		__ delayed()->nop(); 
 		__ bind(l_5);
-		// __ cld();
-		//__ popl(edi);
-		//__ popl(esi);
-		//__ ret(0);
 		__ pop(T8);	
 		__ pop(T5);	
 		__ pop(T4);	
@@ -1675,12 +718,6 @@
 		__ align(CodeEntryAlignment);
 		address start = __ pc();
 
-	/*     __ pushl(esi);
-	       __ movl(ecx, Address(esp, 4+12));      // count
-	       __ pushl(edi);
-	       __ movl(esi, Address(esp, 8+ 4));      // from
-	       __ movl(edi, Address(esp, 8+ 8));      // to
-	       */
 		__ push(T3);	
 		__ push(T4);	
 		__ push(T5);	
@@ -1690,76 +727,44 @@
 		__ move(T4, A1);
 
 		if (!aligned) {
-		//  __ testl(ecx, ecx);
-		// __ jcc(Assembler::equal, l_5); // nothing to do
 			__ beq(T5, ZERO, l_5);
 			__ delayed()->nop(); 
 			// align source address at dword address boundary
-			//__ movl(eax, esi);            // original from
-			__ move(T8, T3); 
-			//__ andl(eax, 3);              // either 0 or 2
-			__ andi(T8, T8, 3); 
-			//	__ jcc(Assembler::equal, l_1);                   // no prefix
-			__ beq(T8, ZERO, l_1); 
+			__ move(T8, T3); // original from
+			__ andi(T8, T8, 3); // either 0 or 2
+			__ beq(T8, ZERO, l_1); // no prefix
 			__ delayed()->nop();
 			// copy prefix
-			// __ movw(edx, Address(esi));
 			__ lh(AT, T3, 0);
-			//	__ movw(Address(edi), edx);
 			__ sh(AT, T4, 0); 
-			//	__ addl(esi, eax);            // eax == 2
 			__ add(T3, T3, T8); 
-			//	__ addl(edi, eax);
 			__ add(T4, T4, T8);
-			//  __ decl(ecx);
 			__ addi(T5, T5, -1); 
 			__ bind(l_1);
 		}
-		//  __ movl(eax, ecx);            // word count less prefix
 		__ move(T8, T5);            // word count less prefix
-		//    __ sarl(ecx, 1);              // dword count
 		__ sra(T5, T5, 1); 
-		//__ jcc(Assembler::equal, l_4);                   // no dwords to move
 		__ beq(T5, ZERO, l_4); 
 		__ delayed()->nop(); 
-		/*   __ cmpl(ecx, 32);
-				 __ jcc(Assembler::belowEqual, l_2);                   // <= 32 dwords
-		// copy aligned dwords
-		__ rep_movl();
-		__ jmp(l_4 );
-		 */      // copy aligned dwords
+    // copy aligned dwords
 		__ bind(l_2);
-		// __ subl(edi, esi);
 		__ align(16);
 		__ bind(l_3);
-		//__ movl(edx, Address(esi));
 		__ lw(AT, T3, 0);   
-		//__ movl(Address(edi, esi, Address::times_1), edx);
 		__ sw(AT, T4, 0 ); 
-		//  __ addl(esi, 4);
 		__ addi(T3, T3, 4); 
 		__ addi(T4, T4, 4); 
-		// __ decl(ecx);
 		__ addi(T5, T5, -1); 
-		//    __ jcc(Assembler::notEqual, l_3);
 		__ bne(T5, ZERO, l_3); 
 		__ delayed()->nop(); 
-		//    __ addl(edi, esi);
 		__ bind(l_4);
-		//    __ andl(eax, 1);              // suffix count
 		__ andi(T8, T8, 1); 
-		//   __ jcc(Assembler::equal, l_5);                   // no suffix
 		__ beq(T8, ZERO, l_5);  
 		__ delayed()->nop(); 
 		// copy suffix
-		//     __ movw(edx, Address(esi));
 		__ lh(AT, T3, 0); 
-		//  __ movw(Address(edi), edx);
 		__ sh(AT, T4, 0); 
 		__ bind(l_5);
-		//   __ popl(edi);
-		//    __ popl(esi);
-		//   __ ret(0);
 		__ pop(T8);	
 		__ pop(T5);	
 		__ pop(T4);	
--- a/hotspot/src/cpu/mips/vm/templateTable_mips.cpp	Fri Oct 15 20:37:51 2010 +0000
+++ b/hotspot/src/cpu/mips/vm/templateTable_mips.cpp	Sat Oct 23 21:08:56 2010 +0000
@@ -3249,13 +3249,8 @@
 	__ bne(T1, ZERO, notByte);
 	__ delayed()->nop();
 
-//	__ lb(FSR, SP, 0);
-//	__ addi(SP, SP, wordSize);
 	__ pop(btos);
 	if (!is_static) {
-		//		__ lw(T3, SP, addent);
-		//		addent += 1 * wordSize;
-		//		__ verify_oop(T3);
 		pop_and_check_object(T3); 
 	}
 	__ add(AT, T3, T2);
@@ -3273,13 +3268,8 @@
 	__ bne(T1, AT, notInt);
 	__ delayed()->nop();
 
-//	__ lw(FSR, SP, 0);
-//	__ addi(SP, SP, wordSize);
 	__ pop(itos);
 	if (!is_static) {
-		//		__ lw(T3, SP, addent);
-		//		addent += 1 * wordSize;
-		//		__ verify_oop(T3);
 		pop_and_check_object(T3); 
 	}
 	__ add(AT, T3, T2);
@@ -3296,15 +3286,11 @@
 	__ bne(T1, AT, notObj);
 	__ delayed()->nop();
 
-//	__ lw(FSR, SP, 0);
-//	__ addi(SP, SP, wordSize);
 	__ pop(atos);
 	if (!is_static) {
-		//		__ lw(T3, SP, addent);
-		//		addent += 1 * wordSize;
-		//		__ verify_oop(T3);
 		pop_and_check_object(T3); 
 	}
+
 	__ add(AT, T3, T2);
 	__ sw(FSR, AT, 0);
 	__ store_check(T3);
--- a/hotspot/src/share/vm/runtime/frame.cpp	Fri Oct 15 20:37:51 2010 +0000
+++ b/hotspot/src/share/vm/runtime/frame.cpp	Sat Oct 23 21:08:56 2010 +0000
@@ -1202,7 +1202,7 @@
 
 
 void frame::oops_do_internal(OopClosure* f, RegisterMap* map, bool use_interpreter_oop_map_cache) {
-         if (is_interpreted_frame())    { oops_interpreted_do(f, map, use_interpreter_oop_map_cache);
+  if (is_interpreted_frame())    { oops_interpreted_do(f, map, use_interpreter_oop_map_cache);
   } else if (is_entry_frame())          { oops_entry_do      (f, map);
   } else if (CodeCache::contains(pc())) { oops_code_blob_do  (f, map);
   } else {
--- a/hotspot/src/share/vm/runtime/sharedRuntime.cpp	Fri Oct 15 20:37:51 2010 +0000
+++ b/hotspot/src/share/vm/runtime/sharedRuntime.cpp	Sat Oct 23 21:08:56 2010 +0000
@@ -584,11 +584,6 @@
   address target_pc = NULL;
 
   if (Interpreter::contains(pc)) {
-#if 0
-#ifdef LOONGSONDEBUG
-	printf("handle exception in interpreter\n");
-#endif 
-#endif
 #ifdef CC_INTERP
     // C++ interpreter doesn't throw implicit exceptions
     ShouldNotReachHere();
@@ -601,12 +596,6 @@
     }
 #endif // !CC_INTERP
   } else {
-#if 0
-#ifdef LOONGSONDEBUG
-	printf("handle exception in compiled\n");
-#endif 
-#endif
-
     switch (exception_kind) {
       case STACK_OVERFLOW: {
         // Stack overflow only occurs upon frame setup; the callee is
@@ -626,11 +615,6 @@
           // exception and begin dispatching it in the caller. Since
           // the caller was at a call site, it's safe to destroy all
           // caller-saved registers, as these entry points do.
-#if 0
-#ifdef LOONGSONDEBUG
-		printf("vtableStubs contains pc\n");
-#endif 
-#endif
 
          VtableStub* vt_stub = VtableStubs::stub_containing(pc);
 
@@ -644,12 +628,7 @@
             return StubRoutines::throw_NullPointerException_at_call_entry();
           }
         } else {
-#if 0
-#ifdef LOONGSONDEBUG
-		printf("vtableStubs not contains pc\n");
-#endif 
-#endif
-         CodeBlob* cb = CodeCache::find_blob(pc);
+          CodeBlob* cb = CodeCache::find_blob(pc);
 
           // If code blob is NULL, then return NULL to signal handler to report the SEGV error.
           if (cb == NULL) return NULL;